X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=device%2Flib%2F_mullong.c;h=92ec722b3ea1d4b5d564fa574e1852c5bcde27f7;hb=1bb6a9b476754a7dd750c60972bbefe75c218f68;hp=1a37ce255c5e68d83237cb5185c9b09f59299714;hpb=8d326ee2d53edf7e720d694f73d29b8b1a27d87d;p=fw%2Fsdcc diff --git a/device/lib/_mullong.c b/device/lib/_mullong.c index 1a37ce25..92ec722b 100644 --- a/device/lib/_mullong.c +++ b/device/lib/_mullong.c @@ -50,24 +50,24 @@ #if defined(_MULLONG_ASM_SMALL) || defined(_MULLONG_ASM_SMALL_AUTO) void -_mullong_dummy (void) _naked +_mullong_dummy (void) __naked { - _asm + __asm - __mullong: +__mullong: - .globl __mullong + .globl __mullong - ; the result c will be stored in r4...r7 - #define c0 r4 - #define c1 r5 - #define c2 r6 - #define c3 r7 + ; the result c will be stored in r4...r7 + #define c0 r4 + #define c1 r5 + #define c2 r6 + #define c3 r7 - #define a0 dpl - #define a1 dph - #define a2 r2 - #define a3 r3 + #define a0 dpl + #define a1 dph + #define a2 r2 + #define a3 r3 ; c0 a0 * b0 ; c1 a1 * b0 + a0 * b1 @@ -76,255 +76,252 @@ _mullong_dummy (void) _naked #if !defined(SDCC_STACK_AUTO) || defined(SDCC_PARMS_IN_BANK1) #if defined(SDCC_PARMS_IN_BANK1) - #define b0 (b1_0) - #define b1 (b1_1) - #define b2 (b1_2) - #define b3 (b1_3) + #define b0 (b1_0) + #define b1 (b1_1) + #define b2 (b1_2) + #define b3 (b1_3) #else #if defined(SDCC_NOOVERLAY) - .area DSEG (DATA) + .area DSEG (DATA) #else - .area OSEG (OVR,DATA) + .area OSEG (OVR,DATA) #endif - __mullong_PARM_2: +__mullong_PARM_2: - .globl __mullong_PARM_2 + .globl __mullong_PARM_2 - .ds 4 + .ds 4 - b0 = __mullong_PARM_2 - b1 = (__mullong_PARM_2+1) - b2 = (__mullong_PARM_2+2) - b3 = (__mullong_PARM_2+3) + b0 = __mullong_PARM_2 + b1 = (__mullong_PARM_2+1) + b2 = (__mullong_PARM_2+2) + b3 = (__mullong_PARM_2+3) #endif - .area CSEG (CODE) - - ; parameter a comes in a, b, dph, dpl - mov r2,b ; save parameter a - mov r3,a - - ; Byte 0 - mov a,a0 - mov b,b0 - mul ab ; a0 * b0 - mov c0,a - mov c1,b - - ; Byte 1 - mov a,a1 - mov b,b0 - mul ab ; a1 * b0 - add a,c1 - mov c1,a - clr a - addc a,b - mov c2,a - - - mov a,a0 - mov b,b1 - mul ab ; a0 * b1 - add a,c1 - mov c1,a - mov a,b - addc a,c2 - mov c2,a - clr a - rlc a - mov c3,a - - ; Byte 2 - mov a,a2 - mov b,b0 - mul ab ; a2 * b0 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov a,a1 - mov b,b1 - mul ab ; a1 * b1 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov a,a0 - mov b,b2 - mul ab ; a0 * b2 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - ; Byte 3 - mov a,a3 - mov b,b0 - mul ab ; a3 * b0 - add a,c3 - mov c3,a - - mov a,a2 - mov b,b1 - mul ab ; a2 * b1 - add a,c3 - mov c3,a - - mov a,a1 - mov b,b2 - mul ab ; a1 * b2 - add a,c3 - mov c3,a - - mov a,a0 - mov b,b3 - mul ab ; a0 * b3 - add a,c3 - - mov b,c2 - mov dph,c1 - mov dpl,c0 - ret + .area CSEG (CODE) + + ; parameter a comes in a, b, dph, dpl + mov r2,b ; save parameter a + mov r3,a + + ; Byte 0 + mov a,a0 + mov b,b0 + mul ab ; a0 * b0 + mov c0,a + mov c1,b + + ; Byte 1 + mov a,a1 + mov b,b0 + mul ab ; a1 * b0 + add a,c1 + mov c1,a + clr a + addc a,b + mov c2,a + + mov a,a0 + mov b,b1 + mul ab ; a0 * b1 + add a,c1 + mov c1,a + mov a,b + addc a,c2 + mov c2,a + clr a + rlc a + mov c3,a + + ; Byte 2 + mov a,a2 + mov b,b0 + mul ab ; a2 * b0 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov a,a1 + mov b,b1 + mul ab ; a1 * b1 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov a,a0 + mov b,b2 + mul ab ; a0 * b2 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + ; Byte 3 + mov a,a3 + mov b,b0 + mul ab ; a3 * b0 + add a,c3 + mov c3,a + + mov a,a2 + mov b,b1 + mul ab ; a2 * b1 + add a,c3 + mov c3,a + + mov a,a1 + mov b,b2 + mul ab ; a1 * b2 + add a,c3 + mov c3,a + + mov a,a0 + mov b,b3 + mul ab ; a0 * b3 + add a,c3 + + mov b,c2 + mov dph,c1 + mov dpl,c0 + ret #else // SDCC_STACK_AUTO - ; parameter a comes in a, b, dph, dpl - mov r2,b ; save parameter a - mov r3,a - - #define a0 dpl - #define a1 dph - #define a2 r2 - #define a3 r3 - - #define b0 r1 - - mov a,#-2-3 ; 1 return address 2 bytes, b 4 bytes - add a,sp ; 1 - mov r0,a ; 1 r0 points to b0 - - ; Byte 0 - mov a,a0 - mov b,@r0 ; b0 - mov b0,b ; we need b0 several times - inc r0 ; r0 points to b1 - mul ab ; a0 * b0 - mov c0,a - mov c1,b - - ; Byte 1 - mov a,a1 - mov b,b0 - mul ab ; a1 * b0 - add a,c1 - mov c1,a - clr a - addc a,b - mov c2,a - - - mov a,a0 - mov b,@r0 ; b1 - mul ab ; a0 * b1 - add a,c1 - mov c1,a - mov a,b - addc a,c2 - mov c2,a - clr a - rlc a - mov c3,a - - ; Byte 2 - mov a,a2 - mov b,b0 - mul ab ; a2 * b0 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov a,a1 - mov b,@r0 ; b1 - mul ab ; a1 * b1 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov a,a0 - inc r0 - mov b,@r0 ; b2 - mul ab ; a0 * b2 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - ; Byte 3 - mov a,a3 - mov b,b0 - mul ab ; a3 * b0 - add a,c3 - mov c3,a - - mov a,a1 - mov b,@r0 ; b2 - mul ab ; a1 * b2 - add a,c3 - mov c3,a - - mov a,a2 - dec r0 - mov b,@r0 ; b1 - mul ab ; a2 * b1 - add a,c3 - mov c3,a - - mov a,a0 - inc r0 - inc r0 - mov b,@r0 ; b3 - mul ab ; a0 * b3 - add a,c3 - - mov b,c2 - mov dph,c1 - mov dpl,c0 - - ret + ; parameter a comes in a, b, dph, dpl + mov r2,b ; save parameter a + mov r3,a + + #define a0 dpl + #define a1 dph + #define a2 r2 + #define a3 r3 + + #define b0 r1 + + mov a,#-2-3 ; 1 return address 2 bytes, b 4 bytes + add a,sp ; 1 + mov r0,a ; 1 r0 points to b0 + + ; Byte 0 + mov a,a0 + mov b,@r0 ; b0 + mov b0,b ; we need b0 several times + inc r0 ; r0 points to b1 + mul ab ; a0 * b0 + mov c0,a + mov c1,b + + ; Byte 1 + mov a,a1 + mov b,b0 + mul ab ; a1 * b0 + add a,c1 + mov c1,a + clr a + addc a,b + mov c2,a + + mov a,a0 + mov b,@r0 ; b1 + mul ab ; a0 * b1 + add a,c1 + mov c1,a + mov a,b + addc a,c2 + mov c2,a + clr a + rlc a + mov c3,a + + ; Byte 2 + mov a,a2 + mov b,b0 + mul ab ; a2 * b0 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov a,a1 + mov b,@r0 ; b1 + mul ab ; a1 * b1 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov a,a0 + inc r0 + mov b,@r0 ; b2 + mul ab ; a0 * b2 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + ; Byte 3 + mov a,a3 + mov b,b0 + mul ab ; a3 * b0 + add a,c3 + mov c3,a + + mov a,a1 + mov b,@r0 ; b2 + mul ab ; a1 * b2 + add a,c3 + mov c3,a + + mov a,a2 + dec r0 + mov b,@r0 ; b1 + mul ab ; a2 * b1 + add a,c3 + mov c3,a + + mov a,a0 + inc r0 + inc r0 + mov b,@r0 ; b3 + mul ab ; a0 * b3 + add a,c3 + + mov b,c2 + mov dph,c1 + mov dpl,c0 + + ret #endif // SDCC_STACK_AUTO - _endasm ; + __endasm; } - #elif defined(_MULLONG_ASM_LARGE) void -_mullong_dummy (void) _naked +_mullong_dummy (void) __naked { - _asm + __asm - __mullong: +__mullong: - .globl __mullong + .globl __mullong - ; the result c will be stored in r4...r7 - #define c0 r4 - #define c1 r5 - #define c2 r6 - #define c3 r7 + ; the result c will be stored in r4...r7 + #define c0 r4 + #define c1 r5 + #define c2 r6 + #define c3 r7 ; c0 a0 * b0 ; c1 a1 * b0 + a0 * b1 @@ -332,162 +329,294 @@ _mullong_dummy (void) _naked ; c3 a3 * b0 + a2 * b1 + a1 * b2 + a0 * b3 #if !defined(SDCC_PARMS_IN_BANK1) - .area XSEG (XDATA) + .area XSEG (XDATA) - __mullong_PARM_2: +__mullong_PARM_2: - .globl __mullong_PARM_2 + .globl __mullong_PARM_2 - .ds 4 + .ds 4 #endif - .area CSEG (CODE) + .area CSEG (CODE) - ; parameter a comes in a, b, dph, dpl - mov r0,dpl ; save parameter a - mov r1,dph - mov r2,b - mov r3,a + ; parameter a comes in a, b, dph, dpl + mov r0,dpl ; save parameter a + mov r1,dph + mov r2,b + mov r3,a - #define a0 r0 - #define a1 r1 - #define a2 r2 - #define a3 r3 + #define a0 r0 + #define a1 r1 + #define a2 r2 + #define a3 r3 - ; Byte 0 - mov b,a0 + ; Byte 0 + mov b,a0 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_0 ; b0 + mov a,b1_0 ; b0 #else - mov dptr,#__mullong_PARM_2 - movx a,@dptr ; b0 + mov dptr,#__mullong_PARM_2 + movx a,@dptr ; b0 #endif - mul ab ; a0 * b0 - mov c0,a - mov c1,b + mul ab ; a0 * b0 + mov c0,a + mov c1,b - ; Byte 1 - mov b,a1 + ; Byte 1 + mov b,a1 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_0 ; b0 + mov a,b1_0 ; b0 #else - movx a,@dptr ; b0 + movx a,@dptr ; b0 #endif - mul ab ; a1 * b0 - add a,c1 - mov c1,a - clr a - addc a,b - mov c2,a - - - mov b,a0 + mul ab ; a1 * b0 + add a,c1 + mov c1,a + clr a + addc a,b + mov c2,a + + mov b,a0 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_1 ; b1 + mov a,b1_1 ; b1 #else - inc dptr ; b1 - movx a,@dptr + inc dptr ; b1 + movx a,@dptr #endif - mul ab ; a0 * b1 - add a,c1 - mov c1,a - mov a,b - addc a,c2 - mov c2,a - clr a - rlc a - mov c3,a - - ; Byte 2 - mov b,a1 + mul ab ; a0 * b1 + add a,c1 + mov c1,a + mov a,b + addc a,c2 + mov c2,a + clr a + rlc a + mov c3,a + + ; Byte 2 + mov b,a1 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_1 ; b1 + mov a,b1_1 ; b1 #else - movx a,@dptr ; b1 + movx a,@dptr ; b1 #endif - mul ab ; a1 * b1 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov b,a0 + mul ab ; a1 * b1 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov b,a0 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_2 ; b2 + mov a,b1_2 ; b2 #else - inc dptr ; b2 - movx a,@dptr + inc dptr ; b2 + movx a,@dptr #endif - mul ab ; a0 * b2 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - mov b,a2 + mul ab ; a0 * b2 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov b,a2 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_0 ; b0 + mov a,b1_0 ; b0 #else - mov dptr,#__mullong_PARM_2 - movx a,@dptr ; b0 + mov dptr,#__mullong_PARM_2 + movx a,@dptr ; b0 #endif - mul ab ; a2 * b0 - add a,c2 - mov c2,a - mov a,b - addc a,c3 - mov c3,a - - ; Byte 3 - mov b,a3 + mul ab ; a2 * b0 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + ; Byte 3 + mov b,a3 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_0 ; b0 + mov a,b1_0 ; b0 #else - movx a,@dptr ; b0 + movx a,@dptr ; b0 #endif - mul ab ; a3 * b0 - add a,c3 - mov c3,a + mul ab ; a3 * b0 + add a,c3 + mov c3,a - mov b,a2 + mov b,a2 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_1 ; b1 + mov a,b1_1 ; b1 #else - inc dptr ; b1 - movx a,@dptr + inc dptr ; b1 + movx a,@dptr #endif - mul ab ; a2 * b1 - add a,c3 - mov c3,a + mul ab ; a2 * b1 + add a,c3 + mov c3,a - mov b,a1 + mov b,a1 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_2 ; b2 + mov a,b1_2 ; b2 #else - inc dptr ; b2 - movx a,@dptr + inc dptr ; b2 + movx a,@dptr #endif - mul ab ; a1 * b2 - add a,c3 - mov c3,a + mul ab ; a1 * b2 + add a,c3 + mov c3,a - mov b,a0 + mov b,a0 #if defined(SDCC_PARMS_IN_BANK1) - mov a,b1_3 ; b3 + mov a,b1_3 ; b3 #else - inc dptr ; b3 - movx a,@dptr + inc dptr ; b3 + movx a,@dptr #endif - mul ab ; a0 * b3 - add a,c3 + mul ab ; a0 * b3 + add a,c3 + + mov b,c2 + mov dph,c1 + mov dpl,c0 + ret + + __endasm; +} + +#elif defined(SDCC_USE_XSTACK) && defined(SDCC_STACK_AUTO) + +void +_mullong_dummy (void) __naked +{ + __asm + +__mullong: + + .globl __mullong + + ; the result c will be stored in r4...r7 + #define c0 r4 + #define c1 r5 + #define c2 r6 + #define c3 r7 + + #define a0 dpl + #define a1 dph + #define a2 r2 + #define a3 r3 + + #define b0 r1 - mov b,c2 - mov dph,c1 - mov dpl,c0 - ret + ; c0 a0 * b0 + ; c1 a1 * b0 + a0 * b1 + ; c2 a2 * b0 + a1 * b1 + a0 * b2 + ; c3 a3 * b0 + a2 * b1 + a1 * b2 + a0 * b3 - _endasm ; + ; parameter a comes in a, b, dph, dpl + mov r2,b ; save parameter a + mov r3,a + + mov a,#-4 ; 1 b 4 bytes + add a,_spx ; 1 + mov r0,a ; 1 r0 points to b0 + + ; Byte 0 + movx a,@r0 ; b0 + mov b0,a ; we need b0 several times + inc r0 ; r0 points to b1 + mov b,a0 + mul ab ; a0 * b0 + mov c0,a + mov c1,b + + ; Byte 1 + mov a,a1 + mov b,b0 + mul ab ; a1 * b0 + add a,c1 + mov c1,a + clr a + addc a,b + mov c2,a + + mov b,a0 + movx a,@r0 ; b1 + mul ab ; a0 * b1 + add a,c1 + mov c1,a + mov a,b + addc a,c2 + mov c2,a + clr a + rlc a + mov c3,a + + ; Byte 2 + mov a,a2 + mov b,b0 + mul ab ; a2 * b0 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov b,a1 + movx a,@r0 ; b1 + mul ab ; a1 * b1 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + mov b,a0 + inc r0 + movx a,@r0 ; b2 + mul ab ; a0 * b2 + add a,c2 + mov c2,a + mov a,b + addc a,c3 + mov c3,a + + ; Byte 3 + mov a,a3 + mov b,b0 + mul ab ; a3 * b0 + add a,c3 + mov c3,a + + mov b,a1 + movx a,@r0 ; b2 + mul ab ; a1 * b2 + add a,c3 + mov c3,a + + mov b,a2 + dec r0 + movx a,@r0 ; b1 + mul ab ; a2 * b1 + add a,c3 + mov c3,a + + mov b,a0 + inc r0 + inc r0 + movx a,@r0 ; b3 + mul ab ; a0 * b3 + add a,c3 + + mov b,c2 + mov dph,c1 + mov dpl,c0 + + ret + + __endasm; } #else // _MULLONG_ASM @@ -514,14 +643,14 @@ union bil { } ; #endif -#if defined(SDCC_USE_XSTACK) -# define bcast(x) ((union bil pdata *)&(x)) +#if defined(SDCC_USE_XSTACK) || defined(SDCC_MODEL_MEDIUM) +# define bcast(x) ((union bil __pdata *)&(x)) #elif (defined(SDCC_MODEL_LARGE) || defined (SDCC_ds390) || defined (SDCC_ds400)) && !defined(SDCC_STACK_AUTO) -# define bcast(x) ((union bil xdata *)&(x)) +# define bcast(x) ((union bil __xdata *)&(x)) #elif defined(__z80) || defined(__gbz80) # define bcast(x) ((union bil *)&(x)) #else -# define bcast(x) ((union bil near *)&(x)) +# define bcast(x) ((union bil __near *)&(x)) #endif /* @@ -542,35 +671,59 @@ union bil { |3.0| G |-------> only this side 32 x 32 -> 32 */ +#if defined(SDCC_USE_XSTACK) +// currently the original code without u fails with --xstack +// it runs out of pointer registers +long +_mullong (long a, long b) +{ + union bil t, u; + + t.i.hi = bcast(a)->b.b0 * bcast(b)->b.b2; // A + t.i.lo = bcast(a)->b.b0 * bcast(b)->b.b0; // A + u.bi.b3 = bcast(a)->b.b0 * bcast(b)->b.b3; // B + u.bi.i12 = bcast(a)->b.b0 * bcast(b)->b.b1; // B + u.bi.b0 = 0; // B + t.l += u.l; + + t.b.b3 += bcast(a)->b.b3 * bcast(b)->b.b0; // G + t.b.b3 += bcast(a)->b.b2 * bcast(b)->b.b1; // F + t.i.hi += bcast(a)->b.b2 * bcast(b)->b.b0; // E + t.i.hi += bcast(a)->b.b1 * bcast(b)->b.b1; // D + + u.bi.b3 = bcast(a)->b.b1 * bcast(b)->b.b2; // C + u.bi.i12 = bcast(a)->b.b1 * bcast(b)->b.b0; // C + u.bi.b0 = 0; // C + t.l += u.l; + + return t.l; +} +#else long _mullong (long a, long b) { union bil t; - t.i.hi = bcast(a)->b.b0 * bcast(b)->b.b2; // A - t.i.lo = bcast(a)->b.b0 * bcast(b)->b.b0; // A - t.b.b3 += bcast(a)->b.b3 * - bcast(b)->b.b0; // G - t.b.b3 += bcast(a)->b.b2 * - bcast(b)->b.b1; // F - t.i.hi += bcast(a)->b.b2 * bcast(b)->b.b0; // E <- b lost in .lst + t.i.hi = bcast(a)->b.b0 * bcast(b)->b.b2; // A + t.i.lo = bcast(a)->b.b0 * bcast(b)->b.b0; // A + t.b.b3 += bcast(a)->b.b3 * bcast(b)->b.b0; // G + t.b.b3 += bcast(a)->b.b2 * bcast(b)->b.b1; // F + t.i.hi += bcast(a)->b.b2 * bcast(b)->b.b0; // E <- b lost in .lst // bcast(a)->i.hi is free ! - t.i.hi += bcast(a)->b.b1 * bcast(b)->b.b1; // D <- b lost in .lst - - bcast(a)->bi.b3 = bcast(a)->b.b1 * - bcast(b)->b.b2; - bcast(a)->bi.i12 = bcast(a)->b.b1 * - bcast(b)->b.b0; // C - - bcast(b)->bi.b3 = bcast(a)->b.b0 * - bcast(b)->b.b3; - bcast(b)->bi.i12 = bcast(a)->b.b0 * - bcast(b)->b.b1; // B - bcast(b)->bi.b0 = 0; // B - bcast(a)->bi.b0 = 0; // C + t.i.hi += bcast(a)->b.b1 * bcast(b)->b.b1; // D <- b lost in .lst + + bcast(a)->bi.b3 = bcast(a)->b.b1 * bcast(b)->b.b2; // C + bcast(a)->bi.i12 = bcast(a)->b.b1 * bcast(b)->b.b0; // C + + bcast(b)->bi.b3 = bcast(a)->b.b0 * bcast(b)->b.b3; // B + bcast(b)->bi.i12 = bcast(a)->b.b0 * bcast(b)->b.b1; // B + + bcast(b)->bi.b0 = 0; // B + bcast(a)->bi.b0 = 0; // C t.l += a; return t.l + b; } +#endif #endif // _MULLONG_ASM