+2004-12-25 Paul Stoffregen <paul AT pjrc.com>
+ * device/lib/printf_fast.c: improvements to float output
+ * device/include/float.h: add defines for assembly float library
+ * device/lib/_fsget1arg.c: receive 1 float arg
+ * device/lib/_fsget2args.c: receive 2 float args (reentrant)
+ * device/lib/_fsnormalize.c: normalize a float
+ * device/lib/_fsreturnval.c: return float, various helper routines
+ * device/lib/_fsrshift.c: right shift a float's mantissa
+ * device/lib/_fsswapargs.c: swap 2 floats
+ * device/lib/Makefile.in: build these 6 new files for mcs51
+ * device/lib/libfloat.lib: add these 6 files to the library
+
2004-12-26 Borut Razem <borut.razem AT siol.net>
* sim/ucsim/avr.src/arith_inst.cc: fixed bug #1088372- savr is not
char __fseq (float, float);
char __fsqt (float, float);
-#endif
+#if defined(SDCC_FLOAT_LIB) && defined(SDCC_mcs51) && !defined(SDCC_USE_XSTACK) && !defined(_SDCC_NO_ASM_LIB_FUNCS)
+// This adds extra code for proper round-off, in
+// an attempt to match the results from gcc.
+#define FLOAT_FULL_ACCURACY
+// This adds about 66 bytes to the code size and
+// significantly speeds up shift operations more
+// than 8 bits (common when subtracting numbers
+// of siginifantly different magnitude and scaling
+// to fixed point)
+#define FLOAT_SHIFT_SPEEDUP
+#define sign_a psw.1
+#define sign_b psw.5
+#define exp_a dpl
+#define exp_b dph
+#endif // using mcs51 assembly
+
+
+#endif // __SDC51_FLOAT_H
serial.c ser_ir.c printfl.c \
printf_large.c sprintf.c vprintf.c puts.c gets.c \
assert.c time.c printf_fast.c printf_tiny.c bpx.c \
+ _fsget1arg.c _fsget2args.c _fsnormalize.c \
+ _fsreturnval.c _fsrshift.c _fsswapargs.c \
fabsf.c frexpf.c ldexpf.c expf.c powf.c sincosf.c sinf.c \
cosf.c logf.c log10f.c sqrtf.c tancotf.c tanf.c cotf.c \
asincosf.c asinf.c acosf.c atanf.c atan2f.c sincoshf.c \
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+static void dummy(void) _naked
+{
+ // input passed in a,b,dph,dpl
+ _asm
+ .globl fsgetarg
+fsgetarg:
+ // extract the input, placing it into:
+ // sign exponent mantiassa
+ // ---- -------- ---------
+ // a: sign_a exp_a r4/r3/r2
+ //
+ mov r2, dpl
+ mov r3, dph
+ mov c, b.7
+ rlc a
+ mov sign_a, c
+ jz 00001$
+ setb b.7
+00001$:
+ mov exp_a, a
+ mov r4, b
+ ret
+ _endasm;
+}
+
+
+
+
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+
+
+static void dummy(void) _naked
+{
+ // arg1: passed in a,b,dph,dpl
+ // arg2: passed on stack
+ _asm
+ .globl fsgetargs
+fsgetargs:
+ // extract the two inputs, placing them into:
+ // sign exponent mantiassa
+ // ---- -------- ---------
+ // a: sign_a exp_a r4/r3/r2
+ // b: sign_b exp_b r7/r6/r5
+ //
+ mov r2, dpl
+ mov r3, dph
+ mov c, b.7
+ rlc a
+ mov sign_a, c
+ jz 00001$
+ setb b.7
+00001$:
+ mov exp_a, a
+ mov r4, b
+ // now extract the 2nd parameter from the stack
+ mov a, sp
+ add a, #249
+ mov r0, a
+ mov a, @r0
+ mov r5, a
+ inc r0
+ mov a, @r0
+ mov r6, a
+ inc r0
+ mov b, @r0
+ inc r0
+ mov a, @r0
+ mov c, b.7
+ rlc a
+ mov sign_b, c
+ jz 00002$
+ setb b.7
+00002$:
+ mov exp_b, a
+ mov r7, b
+ ret
+ _endasm;
+}
+
+
+
+
+
+#if 0
+// This old version was designed before the change to make all this
+// code fully reentrant. What a mess the 2nd parameter turns out to
+// be.
+
+void __fsgetargs (float a, float b)
+{
+ a; // passed in a,b,dph,dpl
+ b; // ___fsadd_PARM_2
+
+ _asm
+ // extract the two inputs, placing them into:
+ // sign exponent mantiassa
+ // ---- -------- ---------
+ // a: sign_a exp_a r4/r3/r2
+ // b: sign_b exp_b r7/r6/r5
+ //
+ mov r2, dpl
+ mov r3, dph
+ mov c, b.7
+ rlc a
+ mov sign_a, c
+ jz 00001$
+ setb b.7
+00001$:
+ mov exp_a, a
+ mov r4, b
+#ifdef SDCC_MODEL_SMALL
+ mov r5, (___fsadd_PARM_2 + 0)
+ mov r6, (___fsadd_PARM_2 + 1)
+ mov b, (___fsadd_PARM_2 + 2)
+ mov a, (___fsadd_PARM_2 + 3)
+ mov c, b.7
+ rlc a
+ mov sign_b, c
+ jz 00002$
+ setb b.7
+00002$:
+ mov exp_b, a
+ mov r7, b
+#endif
+ _endasm;
+}
+
+
+#ifdef SDCC_MODEL_LARGE
+void __fsgetarglarge2 (void)
+{
+ _asm
+ movx a, @dptr
+ mov r5, a
+ inc dptr
+ movx a, @dptr
+ mov r6, a
+ inc dptr
+ movx a, @dptr
+ mov b, a
+ inc dptr
+ movx a, @dptr
+ mov c, b.7
+ rlc a
+ mov sign_b, c
+ jz 00002$
+ setb b.7
+00002$:
+ mov exp_b, a
+ mov r7, b
+ _endasm;
+}
+#endif
+
+#endif
+
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+ _asm
+ .globl fs_normalize_a
+fs_normalize_a:
+#ifdef FLOAT_SHIFT_SPEEDUP
+ mov r0, #4
+00001$:
+ mov a, r4
+ jnz 00003$
+ xch a, r1
+ xch a, r2
+ xch a, r3
+ mov r4, a
+ //mov r4, ar3
+ //mov r3, ar2
+ //mov r2, ar1
+ //mov r1, #0
+ mov a, exp_a
+ add a, #248
+ mov exp_a, a
+ djnz r0, 00001$
+ ret
+#else
+ mov a, r4
+#endif
+00003$:
+ mov r0, #32
+00005$:
+ jb acc.7, 00006$
+ dec exp_a
+ clr c
+ mov a, r1
+ rlc a
+ mov r1, a
+ mov a, r2
+ rlc a
+ mov r2, a
+ mov a, r3
+ rlc a
+ mov r3, a
+ mov a, r4
+ rlc a
+ mov r4, a
+ djnz r0, 00005$
+00006$:
+ ret
+ _endasm;
+}
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+ _asm
+
+ .globl fs_round_and_return
+fs_round_and_return:
+#ifdef FLOAT_FULL_ACCURACY
+ // discard the extra 8 bits of precision we kept around in r1
+ cjne r1, #128, 00001$
+ mov a, r2
+ rrc a
+ cpl c
+00001$:
+ jc fs_zerocheck_return
+ mov a, r2
+ add a, #1
+ mov r2, a
+ clr a
+ addc a, r3
+ mov r3, a
+ clr a
+ addc a, r4
+ mov r4, a
+ jnc fs_zerocheck_return
+ mov r4, #0x80
+ inc exp_a
+#endif
+
+ .globl fs_zerocheck_return
+fs_zerocheck_return:
+ // zero output is a special case
+ cjne r4, #0, fs_direct_return
+ cjne r3, #0, fs_direct_return
+ cjne r2, #0, fs_direct_return
+
+ .globl fs_return_zero
+fs_return_zero:
+ clr a
+ mov b, a
+ mov dph, a
+ mov dpl, a
+ ret
+
+ .globl fs_direct_return
+fs_direct_return:
+ // collect all pieces and return
+ mov c, sign_a
+ mov a, exp_a
+ rrc a
+ mov b, r4
+ mov b.7, c
+ mov dph, r3
+ mov dpl, r2
+ ret
+
+ .globl fs_return_inf
+fs_return_inf:
+ clr a
+ mov dph, a
+ mov dpl, a
+ mov b, #0x80
+ cpl a
+ mov c, sign_a
+ rrc a
+ ret
+
+ .globl fs_return_nan
+fs_return_nan:
+ clr a
+ mov dph, a
+ mov dpl, a
+ mov b, #0xC0
+ mov a, #0x7F
+ ret
+
+ _endasm;
+}
+
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+static void dummy(void) _naked
+{
+ _asm
+ .globl fs_rshift_a
+fs_rshift_a:
+ jz 00020$
+ mov r0, a
+ add a, exp_a // adjust exponent
+ jnc 00001$
+ mov a, #255 // don't roll over
+00001$:
+ mov exp_a, a
+#ifdef FLOAT_SHIFT_SPEEDUP
+ mov a, r0
+ add a, #248
+ jnc 00003$
+ xch a, r4
+ xch a, r3
+ xch a, r2
+ mov r1, a
+ clr a
+ xch a, r4
+ //mov r1, ar2 // avoid dependence on register bank
+ //mov r2, ar3
+ //mov r3, ar4
+ //mov r4, #0
+ add a, #248
+ jnc 00003$
+ xch a, r3
+ xch a, r2
+ mov r1, a
+ clr a
+ xch a, r3
+ //mov r1, ar2
+ //mov r2, ar3
+ //mov r3, #0
+ add a, #248
+ jnc 00003$
+ xch a, r2
+ mov r1, a
+ clr a
+ xch a, r2
+ //mov r1, ar2
+ //mov r2, #0
+ add a, #248
+ jnc 00003$
+ mov r1, #0
+ ret
+00003$:
+ add a, #8
+ jz 00020$
+ mov r0, a
+#endif
+00005$:
+ clr c
+ mov a, r4
+ rrc a
+ mov r4, a
+ mov a, r3
+ rrc a
+ mov r3, a
+ mov a, r2
+ rrc a
+ mov r2, a
+ mov a, r1
+ rrc a
+ mov r1, a
+ djnz r0, 00005$
+00020$:
+ ret
+ _endasm;
+}
+
--- /dev/null
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+ _asm
+ .globl fs_swap_a_b
+fs_swap_a_b:
+ mov a, exp_a
+ xch a, exp_b
+ mov exp_a, a
+ // is there a faster way to swap these 2 bits???
+ // this trick with psw can play havoc with the resiter bank setting
+ // mov a, psw
+ // swap a // depends on sign bits in psw.1 & psw.5
+ // mov psw, a
+ mov c, sign_a
+ rlc a
+ mov c, sign_b
+ mov sign_a, c
+ rrc a
+ mov sign_b, c
+ mov a, r4
+ xch a, r7
+ mov r4, a
+ mov a, r3
+ xch a, r6
+ mov r3, a
+ mov a, r2
+ xch a, r5
+ mov r2, a
+ ret
+ _endasm;
+}
+
ceilf
modff
errno
+_fsget1arg
+_fsget2args
+_fsnormalize
+_fsreturnval
+_fsrshift
+_fsswapargs
#endif
#ifdef FLOAT
+#define SDCC_FLOAT_LIB
#include <float.h>
static bit continue_float;
#ifndef FLOAT_FIXED4
mov _field_width, #0
#endif
print_float_begin:
- mov exp_b, r0 // keep r0 safe, will need it again
+ push ar0 // keep r0 safe, will need it again
lcall printf_get_float
clr c
mov a, #158 // check for large float we can't print
- subb a, exp_a
+ subb a, r7
jnc print_float_size_ok
printf_float_too_big:
// TODO: should print some sort of overflow error??
+ pop ar0
ljmp printf_format_loop
print_float_size_ok:
- lcall __fs_rshift_a
+ push dpl
+ lcall fs_rshift_a
+ pop dpl
setb _continue_float
#ifndef LONG
mov a, r3
lcall printf_putchar
// now that the integer part is printed, we need to refetch the
// float from the va_args and extract the fractional part
- mov r0, exp_b
+ pop ar0
lcall printf_get_float
push ar0
- mov a, exp_a
+ push dpl
+ push dph
+ mov a, r7
cjne a, #126, print_float_frac_lshift
sjmp print_float_frac // input between 0.5 to 0.9999
print_float_frac_lshift:
//Acc (exponent) is less than 126 (input < 0.5)
cpl a
add a, #127
- lcall __fs_rshift_a
+ lcall fs_rshift_a
print_float_frac:
// now we've got the fractional part, so now is the time to
// convert to BCD... just convert each bit to BCD using a
clr a
mov r6, a
mov r5, a
- push dpl
- push dph
mov dptr, #_frac2bcd // FLOAT_FIXED4 version (14 entries)
print_float_frac_loop:
mov a, r3
print_float_begin:
push ar0 // keep r0 safe, will need it again
lcall printf_get_float
- mov a, exp_a
+ push dpl
+ push dph
+ mov a, r7
cjne a, #126, print_float_frac_lshift
sjmp print_float_frac // input between 0.5 to 0.9999
//Acc (exponent) is less than 126 (input < 0.5)
cpl a
add a, #127
- lcall __fs_rshift_a
+ lcall fs_rshift_a
print_float_frac:
- // Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in exb_b/r7/r6/r5
+ // Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in r0/r7/r6/r5
mov b, #27
clr a
- mov exp_b, a
+ mov r0, a
mov r7, a
mov r6, a
mov r5, a
- push dpl
- push dph
mov dptr, #_frac2bcd // FLOAT version (27 entries)
print_float_frac_loop:
mov a, r1
mov r7, a
mov a, #3
movc a, @a+dptr
- addc a, exp_b
+ addc a, r0
da a
- mov exp_b, a
+ mov r0, a
print_float_frac_skip:
inc dptr
inc dptr
inc dptr
inc dptr
djnz b, print_float_frac_loop
- pop dph
- pop dpl
print_float_frac_roundoff:
// Now it's time to round-off the BCD digits to the desired precision.
clr a
rl a
rl a
anl a, #0xFC
- lcall __fs_rshift_a // divide r4/r3/r2/r1 by 10^frac_field_width
+ mov dph, r0 // fs_rshift_a will overwrite r0 & dpl
+ lcall fs_rshift_a // divide r4/r3/r2/r1 by 10^frac_field_width
mov a, r5
add a, r1 // add rounding to fractional part
da a
addc a, r3
da a
mov _float_frac_bcd+1, a
- mov a, exp_b
+ mov a, dph
addc a, r4
da a
mov _float_frac_bcd+0, a
mov sign_b, c // keep fractional carry in sign_b
+ pop dph
+ pop dpl
print_float_int:
// Time to work on the integer portion... fetch the float again, check
// size (exponent), scale to integer, add the fraction's carry, and
push ar0
clr c
mov a, #158 // check for large float we can't print
- subb a, exp_a
+ subb a, r7
jnc print_float_size_ok
printf_float_too_big:
// TODO: should print some sort of overflow error??
ljmp printf_format_loop
print_float_size_ok:
- lcall __fs_rshift_a
+ push dpl
+ lcall fs_rshift_a
+ pop dpl
jnb sign_b, print_float_do_int
// if we get here, the fractional round off caused the
// integer part to increment. Add 1 for a proper result
#if 0
pm2_print_float:
- mov a, exp_a
+ mov a, r7
lcall pm2_entry_phex
mov a, #0x20
lcall pm2_entry_cout
#endif
// Fetch a float from the va_args and put it into
- // exp_a/r4/r3/r2 and also clear r1 and preset
+ // r7(exp) r4/r3/r2(mant) and also clear r1 and preset
// the flags
printf_get_float:
mov a, @r0
mov a, r1
rlc a
mov _negative_flag, c
- mov exp_a, a
+ mov r7, a
jz printf_get_float_2
orl ar4, #0x80
printf_get_float_2: