}\r
endcase;\r
\r
-/* Perform 32*32 bit multiply for 64 bit result, by factoring into 16 bit quantities. */\r
-/* Using an improved algorithm suggested by Steve Green. */\r
- case ID_D_UMTIMES: /* UM* ( a b -- pl ph ) */ \r
+/* Assume 8-bit char and calculate cell width. */\r
+#define NBITS ((sizeof(ucell_t)) * 8)\r
+/* Define half the number of bits in a cell. */\r
+#define HNBITS (NBITS / 2)\r
+/* Assume two-complement arithmetic to calculate lower half. */\r
+#define LOWER_HALF(n) ((n) & (((ucell_t)1 << HNBITS) - 1))\r
+#define HIGH_BIT ((ucell_t)1 << (NBITS - 1))\r
+\r
+/* Perform cell*cell bit multiply for a 2 cell result, by factoring into half cell quantities.\r
+ * Using an improved algorithm suggested by Steve Green.\r
+ * Converted to 64-bit by Aleksej Saushev.\r
+ */\r
+ case ID_D_UMTIMES: /* UM* ( a b -- lo hi ) */ \r
{\r
- ucell_t ahi, alo, bhi, blo, temp;\r
- ucell_t pl, ph;\r
+ ucell_t ahi, alo, bhi, blo; /* input parts */\r
+ ucell_t lo, hi, temp;\r
/* Get values from stack. */\r
ahi = M_POP;\r
bhi = TOS;\r
/* Break into hi and lo 16 bit parts. */\r
- alo = ahi & 0xFFFF;\r
- ahi = ahi>>16;\r
- blo = bhi & 0xFFFF;\r
- bhi = bhi>>16;\r
- ph = 0;\r
-/* ahi * bhi */\r
- pl = ahi * bhi;\r
- ph = pl >> 16; /* shift 64 bit value by 16 */\r
- pl = pl << 16;\r
-/* ahi * blo */\r
+ alo = LOWER_HALF(ahi);\r
+ ahi = ahi >> HNBITS;\r
+ blo = LOWER_HALF(bhi);\r
+ bhi = bhi >> HNBITS;\r
+\r
+ lo = 0;\r
+ hi = 0;\r
+/* higher part: ahi * bhi */\r
+ hi += ahi * bhi;\r
+/* middle (overlapping) part: ahi * blo */\r
temp = ahi * blo;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
-/* alo * bhi */\r
+ lo += LOWER_HALF(temp);\r
+ hi += temp >> HNBITS;\r
+/* middle (overlapping) part: alo * bhi */\r
temp = alo * bhi;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
- ph = (ph << 16) | (pl >> 16); /* shift 64 bit value by 16 */\r
- pl = pl << 16;\r
-/* alo * blo */\r
+ lo += LOWER_HALF(temp);\r
+ hi += temp >> HNBITS;\r
+/* lower part: alo * blo */\r
temp = alo * blo;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
-\r
- M_PUSH( pl );\r
- TOS = ph;\r
+/* its higher half overlaps with middle's lower half: */\r
+ lo += temp >> HNBITS;\r
+/* process carry: */\r
+ hi += lo >> HNBITS;\r
+ lo = LOWER_HALF(lo);\r
+/* combine lower part of result: */\r
+ lo = (lo << HNBITS) + LOWER_HALF(temp);\r
+\r
+ M_PUSH( lo );\r
+ TOS = hi;\r
}\r
endcase;\r
\r
-/* Perform 32*32 bit multiply for 64 bit result, using shift and add. */\r
+/* Perform cell*cell bit multiply for 2 cell result, using shift and add. */\r
case ID_D_MTIMES: /* M* ( a b -- pl ph ) */ \r
{\r
- cell_t a,b;\r
- ucell_t ap,bp, ahi, alo, bhi, blo, temp;\r
- ucell_t pl, ph;\r
+ ucell_t ahi, alo, bhi, blo; /* input parts */\r
+ ucell_t lo, hi, temp;\r
+ int sg;\r
/* Get values from stack. */\r
- a = M_POP;\r
- b = TOS;\r
- ap = (a < 0) ? -a : a ; /* Positive A */\r
- bp = (b < 0) ? -b : b ; /* Positive B */\r
+ ahi = M_POP;\r
+ bhi = TOS;\r
+\r
+/* Calculate product sign: */\r
+ sg = ((cell_t)(ahi ^ bhi) < 0);\r
+/* Take absolute values and reduce to um* */\r
+ if ((cell_t)ahi < 0) ahi = (ucell_t)(-ahi);\r
+ if ((cell_t)bhi < 0) bhi = (ucell_t)(-bhi);\r
+\r
/* Break into hi and lo 16 bit parts. */\r
- alo = ap & 0xFFFF;\r
- ahi = ap>>16;\r
- blo = bp & 0xFFFF;\r
- bhi = bp>>16;\r
- ph = 0;\r
-/* ahi * bhi */\r
- pl = ahi * bhi;\r
- ph = pl >> 16; /* shift 64 bit value by 16 */\r
- pl = pl << 16;\r
-/* ahi * blo */\r
+ alo = LOWER_HALF(ahi);\r
+ ahi = ahi >> HNBITS;\r
+ blo = LOWER_HALF(bhi);\r
+ bhi = bhi >> HNBITS;\r
+\r
+ lo = 0;\r
+ hi = 0;\r
+/* higher part: ahi * bhi */\r
+ hi += ahi * bhi;\r
+/* middle (overlapping) part: ahi * blo */\r
temp = ahi * blo;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
-/* alo * bhi */\r
+ lo += LOWER_HALF(temp);\r
+ hi += temp >> HNBITS;\r
+/* middle (overlapping) part: alo * bhi */\r
temp = alo * bhi;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
- ph = (ph << 16) | (pl >> 16); /* shift 64 bit value by 16 */\r
- pl = pl << 16;\r
-/* alo * blo */\r
+ lo += LOWER_HALF(temp);\r
+ hi += temp >> HNBITS;\r
+/* lower part: alo * blo */\r
temp = alo * blo;\r
- pl += temp;\r
- if( pl < temp ) ph += 1; /* Carry */\r
+/* its higher half overlaps with middle's lower half: */\r
+ lo += temp >> HNBITS;\r
+/* process carry: */\r
+ hi += lo >> HNBITS;\r
+ lo = LOWER_HALF(lo);\r
+/* combine lower part of result: */\r
+ lo = (lo << HNBITS) + LOWER_HALF(temp);\r
\r
/* Negate product if one operand negative. */\r
- if( ((a ^ b) & 0x80000000) )\r
+ if(sg)\r
{\r
- pl = 0-pl;\r
- if( pl & 0x80000000 )\r
- {\r
- ph = -1 - ph; /* Borrow */\r
- }\r
- else\r
- {\r
- ph = 0 - ph;\r
- }\r
+ /* lo = (ucell_t)(- lo); */\r
+ lo = ~lo + 1;\r
+ hi = ~hi + ((lo == 0) ? 1 : 0);\r
}\r
\r
- M_PUSH( pl );\r
- TOS = ph;\r
+ M_PUSH( lo );\r
+ TOS = hi;\r
}\r
endcase;\r
\r
#define DULT(du1l,du1h,du2l,du2h) ( (du2h<du1h) ? FALSE : ( (du2h==du1h) ? (du1l<du2l) : TRUE) )\r
-/* Perform 64/32 bit divide for 32 bit result, using shift and subtract. */\r
+/* Perform 2 cell by 1 cell divide for 1 cell result and remainder, using shift and subtract. */\r
case ID_D_UMSMOD: /* UM/MOD ( al ah bdiv -- rem q ) */ \r
{\r
ucell_t ah,al, q,di, bl,bh, sl,sh;\r
bh = TOS;\r
bl = 0;\r
q = 0;\r
- for( di=0; di<32; di++ )\r
+ for( di=0; di<NBITS; di++ )\r
{\r
if( !DULT(al,ah,bl,bh) )\r
{\r
q |= 1;\r
}\r
q = q << 1;\r
- bl = (bl >> 1) | (bh << 31);\r
+ bl = (bl >> 1) | (bh << (NBITS-1));\r
bh = bh >> 1;\r
}\r
if( !DULT(al,ah,bl,bh) )\r
}\r
endcase;\r
\r
-/* Perform 64/32 bit divide for 64 bit result, using shift and subtract. */\r
+/* Perform 2 cell by 1 cell divide for 2 cell result and remainder, using shift and subtract. */\r
case ID_D_MUSMOD: /* MU/MOD ( al am bdiv -- rem ql qh ) */ \r
{\r
register ucell_t ah,am,al,ql,qh,di;\r
am = M_POP;\r
al = M_POP;\r
qh = ql = 0;\r
-#define NBITS (sizeof(cell_t)*8)\r
for( di=0; di<2*NBITS; di++ )\r
{\r
if( bdiv <= ah )\r
#if (defined(PF_BIG_ENDIAN_DIC) || defined(PF_LITTLE_ENDIAN_DIC))\r
if( IN_DICS( TOS ) )\r
{\r
- WRITE_CELL_DIC(TOS,M_POP);\r
+ WRITE_CELL_DIC((cell_t *)TOS,M_POP);\r
}\r
else\r
{\r
case ID_VAR_ECHO: DO_VAR(gVarEcho); endcase;\r
case ID_VAR_HEADERS_BASE: DO_VAR(gCurrentDictionary->dic_HeaderBase); endcase;\r
case ID_VAR_HEADERS_LIMIT: DO_VAR(gCurrentDictionary->dic_HeaderLimit); endcase;\r
- case ID_VAR_HEADERS_PTR: DO_VAR(gCurrentDictionary->dic_HeaderPtr.Cell); endcase;\r
+ case ID_VAR_HEADERS_PTR: DO_VAR(gCurrentDictionary->dic_HeaderPtr); endcase;\r
case ID_VAR_NUM_TIB: DO_VAR(gCurrentTask->td_SourceNum); endcase;\r
case ID_VAR_OUT: DO_VAR(gCurrentTask->td_OUT); endcase;\r
case ID_VAR_STATE: DO_VAR(gVarState); endcase;\r
#if (defined(PF_BIG_ENDIAN_DIC) || defined(PF_LITTLE_ENDIAN_DIC))\r
if( IN_DICS( TOS ) )\r
{\r
- TOS = (uint16_t) READ_SHORT_DIC((uint8_t *)TOS);\r
+ TOS = (uint16_t) READ_SHORT_DIC((uint16_t *)TOS);\r
}\r
else\r
{\r
#if (defined(PF_BIG_ENDIAN_DIC) || defined(PF_LITTLE_ENDIAN_DIC))\r
if( IN_DICS( TOS ) )\r
{\r
- WRITE_SHORT_DIC(TOS,M_POP);\r
+ WRITE_SHORT_DIC((uint16_t *)TOS,(uint16_t)M_POP);\r
}\r
else\r
{\r