From 081276d8f181c05fed4447d373d11d4940cbec2a Mon Sep 17 00:00:00 2001 From: pjs Date: Mon, 27 Dec 2004 00:04:03 +0000 Subject: [PATCH] Improvements to float support in printf_fast Add 6 new files to float library, shared helper functions for upcoming upcoming mcs51 float library git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@3614 4a8a32a2-be11-0410-ad9d-d568d2c75423 --- ChangeLog | 12 ++++ device/include/float.h | 19 +++++- device/lib/Makefile.in | 2 + device/lib/_fsget1arg.c | 32 ++++++++++ device/lib/_fsget2args.c | 131 ++++++++++++++++++++++++++++++++++++++ device/lib/_fsnormalize.c | 52 +++++++++++++++ device/lib/_fsreturnval.c | 81 +++++++++++++++++++++++ device/lib/_fsrshift.c | 77 ++++++++++++++++++++++ device/lib/_fsswapargs.c | 35 ++++++++++ device/lib/libfloat.lib | 6 ++ device/lib/printf_fast.c | 57 +++++++++-------- 11 files changed, 478 insertions(+), 26 deletions(-) create mode 100644 device/lib/_fsget1arg.c create mode 100644 device/lib/_fsget2args.c create mode 100644 device/lib/_fsnormalize.c create mode 100644 device/lib/_fsreturnval.c create mode 100644 device/lib/_fsrshift.c create mode 100644 device/lib/_fsswapargs.c diff --git a/ChangeLog b/ChangeLog index 534b5110..568e529f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2004-12-25 Paul Stoffregen + * device/lib/printf_fast.c: improvements to float output + * device/include/float.h: add defines for assembly float library + * device/lib/_fsget1arg.c: receive 1 float arg + * device/lib/_fsget2args.c: receive 2 float args (reentrant) + * device/lib/_fsnormalize.c: normalize a float + * device/lib/_fsreturnval.c: return float, various helper routines + * device/lib/_fsrshift.c: right shift a float's mantissa + * device/lib/_fsswapargs.c: swap 2 floats + * device/lib/Makefile.in: build these 6 new files for mcs51 + * device/lib/libfloat.lib: add these 6 files to the library + 2004-12-26 Borut Razem * sim/ucsim/avr.src/arith_inst.cc: fixed bug #1088372- savr is not diff --git a/device/include/float.h b/device/include/float.h index c1d3c26b..1c8292b4 100644 --- a/device/include/float.h +++ b/device/include/float.h @@ -70,9 +70,26 @@ char __fslt (float, float); char __fseq (float, float); char __fsqt (float, float); -#endif +#if defined(SDCC_FLOAT_LIB) && defined(SDCC_mcs51) && !defined(SDCC_USE_XSTACK) && !defined(_SDCC_NO_ASM_LIB_FUNCS) +// This adds extra code for proper round-off, in +// an attempt to match the results from gcc. +#define FLOAT_FULL_ACCURACY +// This adds about 66 bytes to the code size and +// significantly speeds up shift operations more +// than 8 bits (common when subtracting numbers +// of siginifantly different magnitude and scaling +// to fixed point) +#define FLOAT_SHIFT_SPEEDUP +#define sign_a psw.1 +#define sign_b psw.5 +#define exp_a dpl +#define exp_b dph +#endif // using mcs51 assembly + + +#endif // __SDC51_FLOAT_H diff --git a/device/lib/Makefile.in b/device/lib/Makefile.in index 7786a2ec..7a301d60 100644 --- a/device/lib/Makefile.in +++ b/device/lib/Makefile.in @@ -66,6 +66,8 @@ SOURCES = _atof.c _atoi.c _atol.c _autobaud.c _bp.c _schar2fs.c \ serial.c ser_ir.c printfl.c \ printf_large.c sprintf.c vprintf.c puts.c gets.c \ assert.c time.c printf_fast.c printf_tiny.c bpx.c \ + _fsget1arg.c _fsget2args.c _fsnormalize.c \ + _fsreturnval.c _fsrshift.c _fsswapargs.c \ fabsf.c frexpf.c ldexpf.c expf.c powf.c sincosf.c sinf.c \ cosf.c logf.c log10f.c sqrtf.c tancotf.c tanf.c cotf.c \ asincosf.c asinf.c acosf.c atanf.c atan2f.c sincoshf.c \ diff --git a/device/lib/_fsget1arg.c b/device/lib/_fsget1arg.c new file mode 100644 index 00000000..bee256b9 --- /dev/null +++ b/device/lib/_fsget1arg.c @@ -0,0 +1,32 @@ +#define SDCC_FLOAT_LIB +#include + + +static void dummy(void) _naked +{ + // input passed in a,b,dph,dpl + _asm + .globl fsgetarg +fsgetarg: + // extract the input, placing it into: + // sign exponent mantiassa + // ---- -------- --------- + // a: sign_a exp_a r4/r3/r2 + // + mov r2, dpl + mov r3, dph + mov c, b.7 + rlc a + mov sign_a, c + jz 00001$ + setb b.7 +00001$: + mov exp_a, a + mov r4, b + ret + _endasm; +} + + + + diff --git a/device/lib/_fsget2args.c b/device/lib/_fsget2args.c new file mode 100644 index 00000000..3a9e9fe4 --- /dev/null +++ b/device/lib/_fsget2args.c @@ -0,0 +1,131 @@ +#define SDCC_FLOAT_LIB +#include + + + + +static void dummy(void) _naked +{ + // arg1: passed in a,b,dph,dpl + // arg2: passed on stack + _asm + .globl fsgetargs +fsgetargs: + // extract the two inputs, placing them into: + // sign exponent mantiassa + // ---- -------- --------- + // a: sign_a exp_a r4/r3/r2 + // b: sign_b exp_b r7/r6/r5 + // + mov r2, dpl + mov r3, dph + mov c, b.7 + rlc a + mov sign_a, c + jz 00001$ + setb b.7 +00001$: + mov exp_a, a + mov r4, b + // now extract the 2nd parameter from the stack + mov a, sp + add a, #249 + mov r0, a + mov a, @r0 + mov r5, a + inc r0 + mov a, @r0 + mov r6, a + inc r0 + mov b, @r0 + inc r0 + mov a, @r0 + mov c, b.7 + rlc a + mov sign_b, c + jz 00002$ + setb b.7 +00002$: + mov exp_b, a + mov r7, b + ret + _endasm; +} + + + + + +#if 0 +// This old version was designed before the change to make all this +// code fully reentrant. What a mess the 2nd parameter turns out to +// be. + +void __fsgetargs (float a, float b) +{ + a; // passed in a,b,dph,dpl + b; // ___fsadd_PARM_2 + + _asm + // extract the two inputs, placing them into: + // sign exponent mantiassa + // ---- -------- --------- + // a: sign_a exp_a r4/r3/r2 + // b: sign_b exp_b r7/r6/r5 + // + mov r2, dpl + mov r3, dph + mov c, b.7 + rlc a + mov sign_a, c + jz 00001$ + setb b.7 +00001$: + mov exp_a, a + mov r4, b +#ifdef SDCC_MODEL_SMALL + mov r5, (___fsadd_PARM_2 + 0) + mov r6, (___fsadd_PARM_2 + 1) + mov b, (___fsadd_PARM_2 + 2) + mov a, (___fsadd_PARM_2 + 3) + mov c, b.7 + rlc a + mov sign_b, c + jz 00002$ + setb b.7 +00002$: + mov exp_b, a + mov r7, b +#endif + _endasm; +} + + +#ifdef SDCC_MODEL_LARGE +void __fsgetarglarge2 (void) +{ + _asm + movx a, @dptr + mov r5, a + inc dptr + movx a, @dptr + mov r6, a + inc dptr + movx a, @dptr + mov b, a + inc dptr + movx a, @dptr + mov c, b.7 + rlc a + mov sign_b, c + jz 00002$ + setb b.7 +00002$: + mov exp_b, a + mov r7, b + _endasm; +} +#endif + +#endif + diff --git a/device/lib/_fsnormalize.c b/device/lib/_fsnormalize.c new file mode 100644 index 00000000..f985b8bb --- /dev/null +++ b/device/lib/_fsnormalize.c @@ -0,0 +1,52 @@ +#define SDCC_FLOAT_LIB +#include + +static void dummy(void) _naked +{ + _asm + .globl fs_normalize_a +fs_normalize_a: +#ifdef FLOAT_SHIFT_SPEEDUP + mov r0, #4 +00001$: + mov a, r4 + jnz 00003$ + xch a, r1 + xch a, r2 + xch a, r3 + mov r4, a + //mov r4, ar3 + //mov r3, ar2 + //mov r2, ar1 + //mov r1, #0 + mov a, exp_a + add a, #248 + mov exp_a, a + djnz r0, 00001$ + ret +#else + mov a, r4 +#endif +00003$: + mov r0, #32 +00005$: + jb acc.7, 00006$ + dec exp_a + clr c + mov a, r1 + rlc a + mov r1, a + mov a, r2 + rlc a + mov r2, a + mov a, r3 + rlc a + mov r3, a + mov a, r4 + rlc a + mov r4, a + djnz r0, 00005$ +00006$: + ret + _endasm; +} diff --git a/device/lib/_fsreturnval.c b/device/lib/_fsreturnval.c new file mode 100644 index 00000000..95f01e11 --- /dev/null +++ b/device/lib/_fsreturnval.c @@ -0,0 +1,81 @@ +#define SDCC_FLOAT_LIB +#include + +static void dummy(void) _naked +{ + _asm + + .globl fs_round_and_return +fs_round_and_return: +#ifdef FLOAT_FULL_ACCURACY + // discard the extra 8 bits of precision we kept around in r1 + cjne r1, #128, 00001$ + mov a, r2 + rrc a + cpl c +00001$: + jc fs_zerocheck_return + mov a, r2 + add a, #1 + mov r2, a + clr a + addc a, r3 + mov r3, a + clr a + addc a, r4 + mov r4, a + jnc fs_zerocheck_return + mov r4, #0x80 + inc exp_a +#endif + + .globl fs_zerocheck_return +fs_zerocheck_return: + // zero output is a special case + cjne r4, #0, fs_direct_return + cjne r3, #0, fs_direct_return + cjne r2, #0, fs_direct_return + + .globl fs_return_zero +fs_return_zero: + clr a + mov b, a + mov dph, a + mov dpl, a + ret + + .globl fs_direct_return +fs_direct_return: + // collect all pieces and return + mov c, sign_a + mov a, exp_a + rrc a + mov b, r4 + mov b.7, c + mov dph, r3 + mov dpl, r2 + ret + + .globl fs_return_inf +fs_return_inf: + clr a + mov dph, a + mov dpl, a + mov b, #0x80 + cpl a + mov c, sign_a + rrc a + ret + + .globl fs_return_nan +fs_return_nan: + clr a + mov dph, a + mov dpl, a + mov b, #0xC0 + mov a, #0x7F + ret + + _endasm; +} + diff --git a/device/lib/_fsrshift.c b/device/lib/_fsrshift.c new file mode 100644 index 00000000..4928fa5f --- /dev/null +++ b/device/lib/_fsrshift.c @@ -0,0 +1,77 @@ +#define SDCC_FLOAT_LIB +#include + + +static void dummy(void) _naked +{ + _asm + .globl fs_rshift_a +fs_rshift_a: + jz 00020$ + mov r0, a + add a, exp_a // adjust exponent + jnc 00001$ + mov a, #255 // don't roll over +00001$: + mov exp_a, a +#ifdef FLOAT_SHIFT_SPEEDUP + mov a, r0 + add a, #248 + jnc 00003$ + xch a, r4 + xch a, r3 + xch a, r2 + mov r1, a + clr a + xch a, r4 + //mov r1, ar2 // avoid dependence on register bank + //mov r2, ar3 + //mov r3, ar4 + //mov r4, #0 + add a, #248 + jnc 00003$ + xch a, r3 + xch a, r2 + mov r1, a + clr a + xch a, r3 + //mov r1, ar2 + //mov r2, ar3 + //mov r3, #0 + add a, #248 + jnc 00003$ + xch a, r2 + mov r1, a + clr a + xch a, r2 + //mov r1, ar2 + //mov r2, #0 + add a, #248 + jnc 00003$ + mov r1, #0 + ret +00003$: + add a, #8 + jz 00020$ + mov r0, a +#endif +00005$: + clr c + mov a, r4 + rrc a + mov r4, a + mov a, r3 + rrc a + mov r3, a + mov a, r2 + rrc a + mov r2, a + mov a, r1 + rrc a + mov r1, a + djnz r0, 00005$ +00020$: + ret + _endasm; +} + diff --git a/device/lib/_fsswapargs.c b/device/lib/_fsswapargs.c new file mode 100644 index 00000000..aba533b4 --- /dev/null +++ b/device/lib/_fsswapargs.c @@ -0,0 +1,35 @@ +#define SDCC_FLOAT_LIB +#include + +static void dummy(void) _naked +{ + _asm + .globl fs_swap_a_b +fs_swap_a_b: + mov a, exp_a + xch a, exp_b + mov exp_a, a + // is there a faster way to swap these 2 bits??? + // this trick with psw can play havoc with the resiter bank setting + // mov a, psw + // swap a // depends on sign bits in psw.1 & psw.5 + // mov psw, a + mov c, sign_a + rlc a + mov c, sign_b + mov sign_a, c + rrc a + mov sign_b, c + mov a, r4 + xch a, r7 + mov r4, a + mov a, r3 + xch a, r6 + mov r3, a + mov a, r2 + xch a, r5 + mov r2, a + ret + _endasm; +} + diff --git a/device/lib/libfloat.lib b/device/lib/libfloat.lib index b4ff41f3..07854aed 100644 --- a/device/lib/libfloat.lib +++ b/device/lib/libfloat.lib @@ -46,3 +46,9 @@ floorf ceilf modff errno +_fsget1arg +_fsget2args +_fsnormalize +_fsreturnval +_fsrshift +_fsswapargs diff --git a/device/lib/printf_fast.c b/device/lib/printf_fast.c index e1f0aa03..d05457ee 100644 --- a/device/lib/printf_fast.c +++ b/device/lib/printf_fast.c @@ -97,6 +97,7 @@ static data unsigned char field_width; #endif #ifdef FLOAT +#define SDCC_FLOAT_LIB #include static bit continue_float; #ifndef FLOAT_FIXED4 @@ -612,17 +613,20 @@ print_float: mov _field_width, #0 #endif print_float_begin: - mov exp_b, r0 // keep r0 safe, will need it again + push ar0 // keep r0 safe, will need it again lcall printf_get_float clr c mov a, #158 // check for large float we can't print - subb a, exp_a + subb a, r7 jnc print_float_size_ok printf_float_too_big: // TODO: should print some sort of overflow error?? + pop ar0 ljmp printf_format_loop print_float_size_ok: - lcall __fs_rshift_a + push dpl + lcall fs_rshift_a + pop dpl setb _continue_float #ifndef LONG mov a, r3 @@ -635,10 +639,12 @@ print_float_size_ok: lcall printf_putchar // now that the integer part is printed, we need to refetch the // float from the va_args and extract the fractional part - mov r0, exp_b + pop ar0 lcall printf_get_float push ar0 - mov a, exp_a + push dpl + push dph + mov a, r7 cjne a, #126, print_float_frac_lshift sjmp print_float_frac // input between 0.5 to 0.9999 print_float_frac_lshift: @@ -663,7 +669,7 @@ print_float_frac_rshift: //Acc (exponent) is less than 126 (input < 0.5) cpl a add a, #127 - lcall __fs_rshift_a + lcall fs_rshift_a print_float_frac: // now we've got the fractional part, so now is the time to // convert to BCD... just convert each bit to BCD using a @@ -672,8 +678,6 @@ print_float_frac: clr a mov r6, a mov r5, a - push dpl - push dph mov dptr, #_frac2bcd // FLOAT_FIXED4 version (14 entries) print_float_frac_loop: mov a, r3 @@ -784,7 +788,9 @@ print_float_default_done: print_float_begin: push ar0 // keep r0 safe, will need it again lcall printf_get_float - mov a, exp_a + push dpl + push dph + mov a, r7 cjne a, #126, print_float_frac_lshift sjmp print_float_frac // input between 0.5 to 0.9999 @@ -810,17 +816,15 @@ print_float_frac_rshift: //Acc (exponent) is less than 126 (input < 0.5) cpl a add a, #127 - lcall __fs_rshift_a + lcall fs_rshift_a print_float_frac: - // Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in exb_b/r7/r6/r5 + // Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in r0/r7/r6/r5 mov b, #27 clr a - mov exp_b, a + mov r0, a mov r7, a mov r6, a mov r5, a - push dpl - push dph mov dptr, #_frac2bcd // FLOAT version (27 entries) print_float_frac_loop: mov a, r1 @@ -853,17 +857,15 @@ print_float_frac_loop: mov r7, a mov a, #3 movc a, @a+dptr - addc a, exp_b + addc a, r0 da a - mov exp_b, a + mov r0, a print_float_frac_skip: inc dptr inc dptr inc dptr inc dptr djnz b, print_float_frac_loop - pop dph - pop dpl print_float_frac_roundoff: // Now it's time to round-off the BCD digits to the desired precision. clr a @@ -875,7 +877,8 @@ print_float_frac_roundoff: rl a rl a anl a, #0xFC - lcall __fs_rshift_a // divide r4/r3/r2/r1 by 10^frac_field_width + mov dph, r0 // fs_rshift_a will overwrite r0 & dpl + lcall fs_rshift_a // divide r4/r3/r2/r1 by 10^frac_field_width mov a, r5 add a, r1 // add rounding to fractional part da a @@ -888,11 +891,13 @@ print_float_frac_roundoff: addc a, r3 da a mov _float_frac_bcd+1, a - mov a, exp_b + mov a, dph addc a, r4 da a mov _float_frac_bcd+0, a mov sign_b, c // keep fractional carry in sign_b + pop dph + pop dpl print_float_int: // Time to work on the integer portion... fetch the float again, check // size (exponent), scale to integer, add the fraction's carry, and @@ -902,13 +907,15 @@ print_float_int: push ar0 clr c mov a, #158 // check for large float we can't print - subb a, exp_a + subb a, r7 jnc print_float_size_ok printf_float_too_big: // TODO: should print some sort of overflow error?? ljmp printf_format_loop print_float_size_ok: - lcall __fs_rshift_a + push dpl + lcall fs_rshift_a + pop dpl jnb sign_b, print_float_do_int // if we get here, the fractional round off caused the // integer part to increment. Add 1 for a proper result @@ -1002,7 +1009,7 @@ get_float_frac_digit_done: #if 0 pm2_print_float: - mov a, exp_a + mov a, r7 lcall pm2_entry_phex mov a, #0x20 lcall pm2_entry_cout @@ -1013,7 +1020,7 @@ pm2_print_float: #endif // Fetch a float from the va_args and put it into - // exp_a/r4/r3/r2 and also clear r1 and preset + // r7(exp) r4/r3/r2(mant) and also clear r1 and preset // the flags printf_get_float: mov a, @r0 @@ -1026,7 +1033,7 @@ printf_get_float: mov a, r1 rlc a mov _negative_flag, c - mov exp_a, a + mov r7, a jz printf_get_float_2 orl ar4, #0x80 printf_get_float_2: -- 2.30.2