From 081276d8f181c05fed4447d373d11d4940cbec2a Mon Sep 17 00:00:00 2001
From: pjs <pjs@4a8a32a2-be11-0410-ad9d-d568d2c75423>
Date: Mon, 27 Dec 2004 00:04:03 +0000
Subject: [PATCH] Improvements to float support in printf_fast Add 6 new files
 to float library, shared helper functions for upcoming upcoming mcs51 float
 library

git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@3614 4a8a32a2-be11-0410-ad9d-d568d2c75423
---
 ChangeLog                 |  12 ++++
 device/include/float.h    |  19 +++++-
 device/lib/Makefile.in    |   2 +
 device/lib/_fsget1arg.c   |  32 ++++++++++
 device/lib/_fsget2args.c  | 131 ++++++++++++++++++++++++++++++++++++++
 device/lib/_fsnormalize.c |  52 +++++++++++++++
 device/lib/_fsreturnval.c |  81 +++++++++++++++++++++++
 device/lib/_fsrshift.c    |  77 ++++++++++++++++++++++
 device/lib/_fsswapargs.c  |  35 ++++++++++
 device/lib/libfloat.lib   |   6 ++
 device/lib/printf_fast.c  |  57 +++++++++--------
 11 files changed, 478 insertions(+), 26 deletions(-)
 create mode 100644 device/lib/_fsget1arg.c
 create mode 100644 device/lib/_fsget2args.c
 create mode 100644 device/lib/_fsnormalize.c
 create mode 100644 device/lib/_fsreturnval.c
 create mode 100644 device/lib/_fsrshift.c
 create mode 100644 device/lib/_fsswapargs.c
diff --git a/ChangeLog b/ChangeLog
index 534b5110..568e529f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2004-12-25 Paul Stoffregen <paul AT pjrc.com>
+	* device/lib/printf_fast.c: improvements to float output
+	* device/include/float.h: add defines for assembly float library
+	* device/lib/_fsget1arg.c: receive 1 float arg
+	* device/lib/_fsget2args.c: receive 2 float args (reentrant)
+	* device/lib/_fsnormalize.c: normalize a float
+	* device/lib/_fsreturnval.c: return float, various helper routines
+	* device/lib/_fsrshift.c: right shift a float's mantissa
+	* device/lib/_fsswapargs.c: swap 2 floats
+	* device/lib/Makefile.in: build these 6 new files for mcs51
+	* device/lib/libfloat.lib: add these 6 files to the library
+
 2004-12-26 Borut Razem <borut.razem AT siol.net>
 
 	* sim/ucsim/avr.src/arith_inst.cc: fixed bug #1088372- savr is not
diff --git a/device/include/float.h b/device/include/float.h
index c1d3c26b..1c8292b4 100644
--- a/device/include/float.h
+++ b/device/include/float.h
@@ -70,9 +70,26 @@ char __fslt (float, float);
 char __fseq (float, float);
 char __fsqt (float, float);
 
-#endif
 
+#if defined(SDCC_FLOAT_LIB) && defined(SDCC_mcs51) && !defined(SDCC_USE_XSTACK) && !defined(_SDCC_NO_ASM_LIB_FUNCS)
 
+// This adds extra code for proper round-off, in
+// an attempt to match the results from gcc.
+#define FLOAT_FULL_ACCURACY
 
+// This adds about 66 bytes to the code size and
+// significantly speeds up shift operations more
+// than 8 bits (common when subtracting numbers
+// of siginifantly different magnitude and scaling
+// to fixed point)
+#define FLOAT_SHIFT_SPEEDUP
 
+#define sign_a  psw.1
+#define sign_b  psw.5
+#define exp_a dpl
+#define exp_b dph
+#endif	// using mcs51 assembly
+
+
+#endif	// __SDC51_FLOAT_H
 
diff --git a/device/lib/Makefile.in b/device/lib/Makefile.in
index 7786a2ec..7a301d60 100644
--- a/device/lib/Makefile.in
+++ b/device/lib/Makefile.in
@@ -66,6 +66,8 @@ SOURCES		= _atof.c _atoi.c _atol.c _autobaud.c _bp.c _schar2fs.c \
 		  serial.c ser_ir.c printfl.c \
 		  printf_large.c sprintf.c vprintf.c puts.c gets.c \
 		  assert.c time.c printf_fast.c printf_tiny.c bpx.c \
+		  _fsget1arg.c _fsget2args.c _fsnormalize.c \
+		  _fsreturnval.c _fsrshift.c _fsswapargs.c \
 		  fabsf.c frexpf.c ldexpf.c expf.c powf.c sincosf.c sinf.c \
 		  cosf.c logf.c log10f.c sqrtf.c tancotf.c tanf.c cotf.c \
 		  asincosf.c asinf.c acosf.c atanf.c atan2f.c sincoshf.c \
diff --git a/device/lib/_fsget1arg.c b/device/lib/_fsget1arg.c
new file mode 100644
index 00000000..bee256b9
--- /dev/null
+++ b/device/lib/_fsget1arg.c
@@ -0,0 +1,32 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+static void dummy(void) _naked
+{
+	// input passed in a,b,dph,dpl
+	_asm
+	.globl	fsgetarg
+fsgetarg:
+	// extract the input, placing it into:
+	//      sign     exponent   mantiassa
+	//      ----     --------   ---------
+	//  a:  sign_a   exp_a     r4/r3/r2
+	//
+	mov	r2, dpl
+	mov	r3, dph
+	mov	c, b.7
+	rlc	a
+	mov	sign_a, c
+	jz	00001$
+	setb	b.7
+00001$:
+	mov	exp_a, a
+	mov	r4, b
+	ret
+	_endasm;
+}
+
+
+
+
diff --git a/device/lib/_fsget2args.c b/device/lib/_fsget2args.c
new file mode 100644
index 00000000..3a9e9fe4
--- /dev/null
+++ b/device/lib/_fsget2args.c
@@ -0,0 +1,131 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+
+
+static void dummy(void) _naked
+{
+	// arg1: passed in a,b,dph,dpl
+	// arg2: passed on stack
+	_asm
+	.globl	fsgetargs
+fsgetargs:
+	// extract the two inputs, placing them into:
+	//      sign     exponent   mantiassa
+	//      ----     --------   ---------
+	//  a:  sign_a   exp_a     r4/r3/r2
+	//  b:  sign_b   exp_b     r7/r6/r5
+	//
+	mov	r2, dpl
+	mov	r3, dph
+	mov	c, b.7
+	rlc	a
+	mov	sign_a, c
+	jz	00001$
+	setb	b.7
+00001$:
+	mov	exp_a, a
+	mov	r4, b
+	// now extract the 2nd parameter from the stack
+	mov	a, sp
+	add	a, #249
+	mov	r0, a
+	mov	a, @r0
+	mov	r5, a
+	inc	r0
+	mov	a, @r0
+	mov	r6, a
+	inc	r0
+	mov	b, @r0
+	inc	r0
+	mov	a, @r0
+	mov	c, b.7
+	rlc	a
+	mov	sign_b, c
+	jz	00002$
+	setb	b.7
+00002$:
+	mov	exp_b, a
+	mov	r7, b
+	ret
+	_endasm;
+}
+
+
+
+
+
+#if 0
+// This old version was designed before the change to make all this
+// code fully reentrant.  What a mess the 2nd parameter turns out to
+// be.
+
+void __fsgetargs (float a, float b)
+{
+	a;	// passed in a,b,dph,dpl
+	b;	// ___fsadd_PARM_2
+
+	_asm
+	// extract the two inputs, placing them into:
+	//      sign     exponent   mantiassa
+	//      ----     --------   ---------
+	//  a:  sign_a   exp_a     r4/r3/r2
+	//  b:  sign_b   exp_b     r7/r6/r5
+	//
+	mov	r2, dpl
+	mov	r3, dph
+	mov	c, b.7
+	rlc	a
+	mov	sign_a, c
+	jz	00001$
+	setb	b.7
+00001$:
+	mov	exp_a, a
+	mov	r4, b
+#ifdef SDCC_MODEL_SMALL
+	mov	r5, (___fsadd_PARM_2 + 0)
+	mov	r6, (___fsadd_PARM_2 + 1)
+	mov	b, (___fsadd_PARM_2 + 2)
+	mov	a, (___fsadd_PARM_2 + 3)
+	mov	c, b.7
+	rlc	a
+	mov	sign_b, c
+	jz	00002$
+	setb	b.7
+00002$:
+	mov	exp_b, a
+	mov	r7, b
+#endif
+	_endasm;
+}
+
+
+#ifdef SDCC_MODEL_LARGE
+void __fsgetarglarge2 (void)
+{
+	_asm
+	movx	a, @dptr
+	mov	r5, a
+	inc	dptr
+	movx	a, @dptr
+	mov	r6, a
+	inc	dptr
+	movx	a, @dptr
+	mov	b, a
+	inc	dptr
+	movx	a, @dptr
+	mov	c, b.7
+	rlc	a
+	mov	sign_b, c
+	jz	00002$
+	setb	b.7
+00002$:
+	mov	exp_b, a
+	mov	r7, b
+	_endasm;
+}
+#endif
+
+#endif
+
diff --git a/device/lib/_fsnormalize.c b/device/lib/_fsnormalize.c
new file mode 100644
index 00000000..f985b8bb
--- /dev/null
+++ b/device/lib/_fsnormalize.c
@@ -0,0 +1,52 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+	_asm
+	.globl	fs_normalize_a
+fs_normalize_a:
+#ifdef FLOAT_SHIFT_SPEEDUP
+	mov	r0, #4
+00001$:
+	mov	a, r4
+	jnz	00003$
+	xch	a, r1
+	xch	a, r2
+	xch	a, r3
+	mov	r4, a
+	//mov	r4, ar3
+	//mov	r3, ar2
+	//mov	r2, ar1
+	//mov	r1, #0
+	mov	a, exp_a
+	add	a, #248
+	mov	exp_a, a
+	djnz	r0, 00001$
+	ret
+#else
+	mov	a, r4
+#endif
+00003$:
+	mov	r0, #32
+00005$:
+	jb	acc.7, 00006$
+	dec	exp_a
+	clr	c
+	mov	a, r1
+	rlc	a
+	mov	r1, a
+	mov	a, r2
+	rlc	a
+	mov	r2, a
+	mov	a, r3
+	rlc	a
+	mov	r3, a
+	mov	a, r4
+	rlc	a
+	mov	r4, a
+	djnz	r0, 00005$
+00006$:
+	ret
+	_endasm;
+}
diff --git a/device/lib/_fsreturnval.c b/device/lib/_fsreturnval.c
new file mode 100644
index 00000000..95f01e11
--- /dev/null
+++ b/device/lib/_fsreturnval.c
@@ -0,0 +1,81 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+	_asm
+
+	.globl	fs_round_and_return
+fs_round_and_return:
+#ifdef FLOAT_FULL_ACCURACY
+	// discard the extra 8 bits of precision we kept around in r1
+	cjne	r1, #128, 00001$
+	mov	a, r2
+	rrc	a
+	cpl	c
+00001$:
+	jc	fs_zerocheck_return
+	mov	a, r2
+	add	a, #1
+	mov	r2, a
+	clr	a
+	addc	a, r3
+	mov	r3, a
+	clr	a
+	addc	a, r4
+	mov	r4, a
+	jnc	fs_zerocheck_return
+	mov	r4, #0x80
+	inc	exp_a
+#endif
+
+	.globl	fs_zerocheck_return
+fs_zerocheck_return:
+	// zero output is a special case
+	cjne	r4, #0, fs_direct_return
+	cjne	r3, #0, fs_direct_return
+	cjne	r2, #0, fs_direct_return
+
+	.globl	fs_return_zero
+fs_return_zero:
+	clr	a
+	mov	b, a
+	mov	dph, a
+	mov	dpl, a
+	ret
+
+	.globl	fs_direct_return
+fs_direct_return:
+	// collect all pieces and return
+	mov	c, sign_a
+	mov	a, exp_a
+	rrc	a
+	mov	b, r4
+	mov	b.7, c
+	mov	dph, r3
+	mov	dpl, r2
+	ret
+
+	.globl	fs_return_inf
+fs_return_inf:
+	clr	a
+	mov	dph, a
+	mov	dpl, a
+	mov	b, #0x80
+	cpl	a
+	mov	c, sign_a
+	rrc	a
+	ret
+
+	.globl	fs_return_nan
+fs_return_nan:
+	clr	a
+	mov	dph, a
+	mov	dpl, a
+	mov	b, #0xC0
+	mov	a, #0x7F
+	ret
+
+	_endasm;
+}
+
diff --git a/device/lib/_fsrshift.c b/device/lib/_fsrshift.c
new file mode 100644
index 00000000..4928fa5f
--- /dev/null
+++ b/device/lib/_fsrshift.c
@@ -0,0 +1,77 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+
+static void dummy(void) _naked
+{
+	_asm
+	.globl	fs_rshift_a
+fs_rshift_a:
+	jz	00020$
+	mov	r0, a
+	add	a, exp_a	// adjust exponent
+	jnc	00001$
+	mov	a, #255		// don't roll over
+00001$:
+	mov	exp_a, a
+#ifdef FLOAT_SHIFT_SPEEDUP
+	mov	a, r0
+	add	a, #248
+	jnc	00003$
+	xch	a, r4
+	xch	a, r3
+	xch	a, r2
+	mov	r1, a
+	clr	a
+	xch	a, r4
+	//mov	r1, ar2		// avoid dependence on register bank
+	//mov	r2, ar3
+	//mov	r3, ar4
+	//mov	r4, #0
+	add	a, #248
+	jnc	00003$
+	xch	a, r3
+	xch	a, r2
+	mov	r1, a
+	clr	a
+	xch	a, r3
+	//mov	r1, ar2
+	//mov	r2, ar3
+	//mov	r3, #0
+	add	a, #248
+	jnc	00003$
+	xch	a, r2
+	mov	r1, a
+	clr	a
+	xch	a, r2
+	//mov	r1, ar2
+	//mov	r2, #0
+	add	a, #248
+	jnc	00003$
+	mov	r1, #0
+	ret
+00003$:
+	add	a, #8
+	jz	00020$
+	mov	r0, a
+#endif
+00005$:
+	clr	c
+	mov	a, r4
+	rrc	a
+	mov	r4, a
+	mov	a, r3
+	rrc	a
+	mov	r3, a
+	mov	a, r2
+	rrc	a
+	mov	r2, a
+	mov	a, r1
+	rrc	a
+	mov	r1, a
+	djnz	r0, 00005$
+00020$:
+	ret
+	_endasm;
+}
+
diff --git a/device/lib/_fsswapargs.c b/device/lib/_fsswapargs.c
new file mode 100644
index 00000000..aba533b4
--- /dev/null
+++ b/device/lib/_fsswapargs.c
@@ -0,0 +1,35 @@
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+static void dummy(void) _naked
+{
+	_asm
+	.globl	fs_swap_a_b
+fs_swap_a_b:
+	mov	a, exp_a
+	xch	a, exp_b
+	mov	exp_a, a
+	// is there a faster way to swap these 2 bits???
+	// this trick with psw can play havoc with the resiter bank setting
+	// mov	a, psw
+	// swap	a		// depends on sign bits in psw.1 & psw.5
+	// mov	psw, a
+	 mov	c, sign_a
+	 rlc	a
+	 mov	c, sign_b
+	 mov	sign_a, c
+	 rrc	a
+	 mov	sign_b, c
+	mov	a, r4
+	xch	a, r7
+	mov	r4, a
+	mov	a, r3
+	xch	a, r6
+	mov	r3, a
+	mov	a, r2
+	xch	a, r5
+	mov	r2, a
+	ret
+	_endasm;
+}
+
diff --git a/device/lib/libfloat.lib b/device/lib/libfloat.lib
index b4ff41f3..07854aed 100644
--- a/device/lib/libfloat.lib
+++ b/device/lib/libfloat.lib
@@ -46,3 +46,9 @@ floorf
 ceilf
 modff
 errno
+_fsget1arg
+_fsget2args
+_fsnormalize
+_fsreturnval
+_fsrshift
+_fsswapargs
diff --git a/device/lib/printf_fast.c b/device/lib/printf_fast.c
index e1f0aa03..d05457ee 100644
--- a/device/lib/printf_fast.c
+++ b/device/lib/printf_fast.c
@@ -97,6 +97,7 @@ static data unsigned char field_width;
 #endif
 
 #ifdef FLOAT
+#define SDCC_FLOAT_LIB
 #include <float.h>
 static bit continue_float;
 #ifndef FLOAT_FIXED4
@@ -612,17 +613,20 @@ print_float:
 	mov	_field_width, #0
 #endif
 print_float_begin:
-	mov	exp_b, r0		// keep r0 safe, will need it again
+	push	ar0		// keep r0 safe, will need it again
 	lcall	printf_get_float
 	clr	c
 	mov	a, #158			// check for large float we can't print
-	subb	a, exp_a
+	subb	a, r7
 	jnc	print_float_size_ok
 printf_float_too_big:
 	// TODO: should print some sort of overflow error??
+	pop	ar0
 	ljmp	printf_format_loop
 print_float_size_ok:
-	lcall	__fs_rshift_a
+	push	dpl
+	lcall	fs_rshift_a
+	pop	dpl
 	setb	_continue_float
 #ifndef LONG
 	mov	a, r3
@@ -635,10 +639,12 @@ print_float_size_ok:
 	lcall	printf_putchar
 	// now that the integer part is printed, we need to refetch the
 	// float from the va_args and extract the fractional part
-	mov	r0, exp_b
+	pop	ar0
 	lcall	printf_get_float
 	push	ar0
-	mov	a, exp_a
+	push	dpl
+	push	dph
+	mov	a, r7
 	cjne	a, #126, print_float_frac_lshift
 	sjmp	print_float_frac // input between 0.5 to 0.9999
 print_float_frac_lshift:
@@ -663,7 +669,7 @@ print_float_frac_rshift:
 	//Acc (exponent) is less than 126 (input < 0.5)
 	cpl	a
 	add	a, #127
-	lcall	__fs_rshift_a
+	lcall	fs_rshift_a
 print_float_frac:
 	// now we've got the fractional part, so now is the time to
 	// convert to BCD... just convert each bit to BCD using a
@@ -672,8 +678,6 @@ print_float_frac:
 	clr	a
 	mov	r6, a
 	mov	r5, a
-	push	dpl
-	push	dph
 	mov	dptr, #_frac2bcd	// FLOAT_FIXED4 version (14 entries)
 print_float_frac_loop:
 	mov	a, r3
@@ -784,7 +788,9 @@ print_float_default_done:
 print_float_begin:
 	push	ar0			// keep r0 safe, will need it again
 	lcall	printf_get_float
-	mov	a, exp_a
+	push	dpl
+	push	dph
+	mov	a, r7
 	cjne	a, #126, print_float_frac_lshift
 	sjmp	print_float_frac	// input between 0.5 to 0.9999
 
@@ -810,17 +816,15 @@ print_float_frac_rshift:
 	//Acc (exponent) is less than 126 (input < 0.5)
 	cpl	a
 	add	a, #127
-	lcall	__fs_rshift_a
+	lcall	fs_rshift_a
 print_float_frac:
-	// Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in exb_b/r7/r6/r5
+	// Convert the fraction in r4/r3/r2/r1 into 8 BCD digits in r0/r7/r6/r5
 	mov	b, #27
 	clr	a
-	mov	exp_b, a
+	mov	r0, a
 	mov	r7, a
 	mov	r6, a
 	mov	r5, a
-	push	dpl
-	push	dph
 	mov	dptr, #_frac2bcd	// FLOAT version (27 entries)
 print_float_frac_loop:
 	mov	a, r1
@@ -853,17 +857,15 @@ print_float_frac_loop:
 	mov	r7, a
 	mov	a, #3
 	movc	a, @a+dptr
-	addc	a, exp_b
+	addc	a, r0
 	da	a
-	mov	exp_b, a
+	mov	r0, a
 print_float_frac_skip:
 	inc	dptr
 	inc	dptr
 	inc	dptr
 	inc	dptr
 	djnz	b, print_float_frac_loop
-	pop	dph
-	pop	dpl
 print_float_frac_roundoff:
 	// Now it's time to round-off the BCD digits to the desired precision.
 	clr	a
@@ -875,7 +877,8 @@ print_float_frac_roundoff:
 	rl	a
 	rl	a
 	anl	a, #0xFC
-	lcall	__fs_rshift_a		// divide r4/r3/r2/r1 by 10^frac_field_width
+	mov	dph, r0			// fs_rshift_a will overwrite r0 & dpl
+	lcall	fs_rshift_a		// divide r4/r3/r2/r1 by 10^frac_field_width
 	mov	a, r5
 	add	a, r1			// add rounding to fractional part
 	da	a
@@ -888,11 +891,13 @@ print_float_frac_roundoff:
 	addc	a, r3
 	da	a
 	mov	_float_frac_bcd+1, a
-	mov	a, exp_b
+	mov	a, dph
 	addc	a, r4
 	da	a
 	mov	_float_frac_bcd+0, a
 	mov	sign_b, c		// keep fractional carry in sign_b
+	pop	dph
+	pop	dpl
 print_float_int:
 	// Time to work on the integer portion... fetch the float again, check
 	// size (exponent), scale to integer, add the fraction's carry, and
@@ -902,13 +907,15 @@ print_float_int:
 	push	ar0
 	clr	c
 	mov	a, #158			// check for large float we can't print
-	subb	a, exp_a
+	subb	a, r7
 	jnc	print_float_size_ok
 printf_float_too_big:
 	// TODO: should print some sort of overflow error??
 	ljmp	printf_format_loop
 print_float_size_ok:
-	lcall	__fs_rshift_a
+	push	dpl
+	lcall	fs_rshift_a
+	pop	dpl
 	jnb	sign_b, print_float_do_int
 	// if we get here, the fractional round off caused the
 	// integer part to increment.  Add 1 for a proper result
@@ -1002,7 +1009,7 @@ get_float_frac_digit_done:
 
 #if 0
 pm2_print_float:
-	 mov	a, exp_a
+	 mov	a, r7
 	 lcall	pm2_entry_phex
 	 mov	a, #0x20
 	 lcall	pm2_entry_cout
@@ -1013,7 +1020,7 @@ pm2_print_float:
 #endif
 
 	// Fetch a float from the va_args and put it into
-	// exp_a/r4/r3/r2 and also clear r1 and preset
+	// r7(exp) r4/r3/r2(mant) and also clear r1 and preset
 	// the flags
 printf_get_float:
 	mov	a, @r0
@@ -1026,7 +1033,7 @@ printf_get_float:
 	mov	a, r1
 	rlc	a
 	mov	_negative_flag, c
-	mov	exp_a, a
+	mov	r7, a
 	jz	printf_get_float_2
 	orl	ar4, #0x80
 printf_get_float_2:
-- 
2.47.2