+2004-12-31 Paul Stoffregen <paul AT pjrc.com>
+
+ * device/lib/logf.c: added mcs51 assembly version
+ * device/lib/expf.c: added mcs51 assembly version
+ * device/lib/_logexpf.c: new shared asm code for expf and logf
+ * device/include/math.h: add defines for assembly math library
+ * device/lib/Makefile.in: build new _logexpf.c
+ * device/lib/libfloat.lib: use new _logexpf.c
+
2004-12-29 Slade Rich <slade_rich AT users.sourceforge.net>
* src/pic/device.c
long l;
};
+#if defined(SDCC_MATH_LIB) && defined(SDCC_mcs51) && !defined(SDCC_USE_XSTACK) && !defined(SDCC_STACK_AUTO) && !defined(_SDCC_NO_ASM_LIB_FUNCS)
+// Compile the mcs51 assembly version only when all these
+// conditions are met. Since not all the functions are
+// reentrant, do not compile with --stack-auto is used.
+#define MATH_ASM_MCS51
+#endif
+
+
/* Functions on the z80 & gbz80 are always reentrant and so the "reentrant" */
/* keyword is not defined. */
#if defined(SDCC_z80) || defined(SDCC_gbz80)
printf_fast.c printf_fast_f.c printf_tiny.c \
assert.c time.c bpx.c \
_fsget1arg.c _fsget2args.c _fsnormalize.c \
- _fsreturnval.c _fsrshift.c _fsswapargs.c \
+ _fsreturnval.c _fsrshift.c _fsswapargs.c _logexpf.c \
fabsf.c frexpf.c ldexpf.c expf.c powf.c sincosf.c sinf.c \
cosf.c logf.c log10f.c sqrtf.c tancotf.c tanf.c cotf.c \
asincosf.c asinf.c acosf.c atanf.c atan2f.c sincoshf.c \
--- /dev/null
+#define SDCC_MATH_LIB
+#include <math.h>
+
+
+#ifdef MATH_ASM_MCS51
+
+// This code is shared by both logf() and expf(), so it goes in this
+// separate file to allow the linker to include it when either
+// function is needed, but only 1 copy when both are used.
+
+void _fs_cordic_rshift_r765_unsigned(void)
+{
+ _asm
+ add a, #248
+ jnc 00003$
+ mov b, r5
+ mov r5, ar6
+ mov r6, ar7
+ mov r7, #0
+ add a, #248
+ jnc 00003$
+ mov b, r5
+ mov r5, ar6
+ mov r6, #0
+ add a, #248
+ jnc 00003$
+ mov b, r5
+ mov r5, #0
+ add a, #248
+ jnc 00003$
+ mov b, #0
+ ret
+00003$:
+ add a, #8
+ jz 00030$
+ push ar0
+ mov r0, a
+00010$:
+ clr c
+ mov a, r7
+ rrc a
+ mov r7, a
+ mov a, r6
+ rrc a
+ mov r6, a
+ mov a, r5
+ rrc a
+ mov r5, a
+ mov a, b
+ rrc a
+ mov b, a
+ djnz r0, 00010$
+ pop ar0
+00030$:
+ _endasm;
+}
+
+code unsigned char _fs_natural_log_table[] = {
+0xFF, 0x42, 0x2E, 0x16, // 0.693147180560
+0xF6, 0x91, 0xF9, 0x0C, // 0.405465108108
+0xF2, 0xFD, 0x23, 0x07, // 0.223143551314
+0xEE, 0xE0, 0xC4, 0x03, // 0.117783035656
+0x0C, 0xA3, 0xF0, 0x01, // 0.060624621816
+0xD8, 0x14, 0xFC, 0x00, // 0.030771658667
+0xA3, 0x02, 0x7F, 0x00, // 0.015504186536
+0x55, 0xC0, 0x3F, 0x00, // 0.007782140442
+0x0B, 0xF0, 0x1F, 0x00, // 0.003898640416
+0x01, 0xFC, 0x0F, 0x00, // 0.001951220131
+0x00, 0xFF, 0x07, 0x00, // 0.000976085973
+0xC0, 0xFF, 0x03, 0x00, // 0.000488162080
+0xF0, 0xFF, 0x01, 0x00, // 0.000244110828
+0xFC, 0xFF, 0x00, 0x00, // 0.000122062863
+0xFF, 0x7F, 0x00, 0x00, // 0.000061033294
+0x00, 0x40, 0x00, 0x00, // 0.000030517112
+0x00, 0x20, 0x00, 0x00, // 0.000015258673
+0x00, 0x10, 0x00, 0x00, // 0.000007629365
+0x00, 0x08, 0x00, 0x00, // 0.000003814690
+0x00, 0x04, 0x00, 0x00, // 0.000001907347
+0x00, 0x02, 0x00, 0x00, // 0.000000953674
+0x00, 0x01, 0x00, 0x00, // 0.000000476837
+0x80, 0x00, 0x00, 0x00, // 0.000000238419
+0x40, 0x00, 0x00, 0x00, // 0.000000119209
+0x20, 0x00, 0x00, 0x00, // 0.000000059605
+0x10, 0x00, 0x00, 0x00, // 0.000000029802
+0x08, 0x00, 0x00, 0x00, // 0.000000014901
+0x04, 0x00, 0x00, 0x00, // 0.000000007451
+0x02, 0x00, 0x00, 0x00, // 0.000000003725
+0x01, 0x00, 0x00, 0x00 // 0.000000001863
+};
+
+#endif
+
/* Version 1.0 - Initial release */
+#define SDCC_MATH_LIB
#include <math.h>
#include <errno.h>
#include <stdbool.h>
+
+#ifdef MATH_ASM_MCS51
+
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+// TODO: share with other temps
+static bit sign_bit;
+static data unsigned char expf_y[4];
+static unsigned char n;
+
+
+float expf(float x)
+{
+ x;
+ _asm
+ mov c, acc.7
+ mov _sign_bit, c // remember sign
+ clr acc.7 // and make input positive
+ mov r0, a
+ mov c, b.7
+ rlc a // extract exponent
+ add a, #153
+ jc expf_not_zero
+ // input is a very small number, so e^x is 1.000000
+ mov dptr, #0
+ mov b, #0x80
+ mov a, #0x3F
+ ret
+expf_not_zero:
+ // TODO: check exponent for very small values, and return zero
+ mov _n, #0
+ mov a, dpl
+ add a, #0xE8 // is x >= 0.69314718
+ mov a, dph
+ addc a, #0x8D
+ mov a, b
+ addc a, #0xCE
+ mov a, r0
+ addc a, #0xC0
+ mov a, r0
+ jnc expf_no_range_reduction
+expf_range_reduction:
+ mov (_expf_y + 0), dpl // keep copy of x in "exp_y"
+ mov (_expf_y + 1), dph
+ mov (_expf_y + 2), b
+ mov (_expf_y + 3), a
+ mov r0, #0x3B
+ push ar0
+ mov r0, #0xAA
+ push ar0
+ mov r0, #0xB8
+ push ar0
+ mov r0, #0x3F
+ push ar0
+ lcall ___fsmul // x * 1.442695041 = x / ln(2)
+ dec sp
+ dec sp
+ dec sp
+ dec sp
+ lcall ___fs2uchar // n = int(x * 1.442695041)
+ mov a, dpl
+ mov _n, a
+ add a, #128
+ jnc expf_range_ok
+ ljmp fs_return_inf // exponent overflow
+expf_range_ok:
+ mov r0,#0x00
+ mov r1,#0x80
+ mov r2,#0x31
+ mov r3,#0xBF
+ lcall expf_scale_and_add
+ mov (_expf_y + 0), dpl
+ mov (_expf_y + 1), dph
+ mov (_expf_y + 2), b
+ mov (_expf_y + 3), a
+ mov r0,#0x83
+ mov r1,#0x80
+ mov r2,#0x5E
+ mov r3,#0x39
+ lcall expf_scale_and_add
+expf_no_range_reduction:
+
+
+// Compute e^x using the cordic algorithm. This works over an
+// input range of 0 to 0.69314712. Can be extended to work from
+// 0 to 1.0 if the results are normalized, but over the range
+// we use, the result is always from 1.0 to 1.99999988 (fixed
+// exponent of 127)
+
+expf_cordic_begin:
+ mov c, b.7
+ rlc a // extract exponent to acc
+ setb b.7
+ mov r1, dpl // mantissa to r4/r3/r2/r1
+ mov r2, dph
+ mov r3, b
+ mov r4, #0
+
+ // first, align the input into a 32 bit long
+ cjne a, #121, exp_lshift
+ sjmp exp_noshift
+exp_lshift:
+ jc exp_rshift
+ // exp_a is greater than 121, so left shift
+ add a, #135
+ lcall fs_lshift_a
+ sjmp exp_noshift
+exp_rshift:
+ // exp_a is less than 121, so right shift
+ cpl a
+ add a, #122
+ lcall fs_rshift_a
+exp_noshift: // r4/r3/r2/r1 = x
+ clr a
+ mov (_expf_y + 0), a // y = 1.0;
+ mov (_expf_y + 1), a
+ mov (_expf_y + 2), a
+ mov (_expf_y + 3), #0x20
+ mov dptr, #__fs_natural_log_table
+ mov r0, a // r0 = i
+exp_cordic_loop:
+ clr a
+ movc a, @a+dptr
+ mov b, a
+ inc dptr
+ clr a
+ movc a, @a+dptr // r7/r6/r5/b = table[i]
+ mov r5, a
+ inc dptr
+ clr a
+ movc a, @a+dptr
+ mov r6, a
+ inc dptr
+ clr a
+ movc a, @a+dptr
+ mov r7, a
+ inc dptr
+ clr c
+ mov a, r1
+ subb a, b // compare x to table[i]
+ mov a, r2
+ subb a, r5
+ mov a, r3
+ subb a, r6
+ mov a, r4
+ subb a, r7
+ jc exp_cordic_skip // if table[i] < x
+ clr c
+ mov a, r1
+ subb a, b
+ mov r1, a // x -= table[i]
+ mov a, r2
+ subb a, r5
+ mov r2, a
+ mov a, r3
+ subb a, r6
+ mov r3, a
+ mov a, r4
+ subb a, r7
+ mov r4, a
+ mov b, (_expf_y + 0)
+ mov r5, (_expf_y + 1) // r7/r6/r5/b = y >> i
+ mov r6, (_expf_y + 2)
+ mov r7, (_expf_y + 3)
+ mov a, r0
+ lcall __fs_cordic_rshift_r765_unsigned
+ mov a, (_expf_y + 0)
+ add a, b
+ mov (_expf_y + 0), a
+ mov a, (_expf_y + 1)
+ addc a, r5
+ mov (_expf_y + 1), a // y += (y >> i)
+ mov a, (_expf_y + 2)
+ addc a, r6
+ mov (_expf_y + 2), a
+ mov a, (_expf_y + 3)
+ addc a, r7
+ mov (_expf_y + 3), a
+exp_cordic_skip:
+ inc r0
+ cjne r0, #27, exp_cordic_loop
+ mov r4, (_expf_y + 3)
+ mov r3, (_expf_y + 2)
+ mov r2, (_expf_y + 1)
+ mov r1, (_expf_y + 0)
+ lcall fs_normalize_a // end of cordic
+
+ mov a, #127
+ add a, _n // ldexpf(x, n)
+ mov exp_a, a
+ lcall fs_round_and_return
+
+ jnb _sign_bit, expf_done
+ push dpl
+ push dph
+ push b
+ push acc
+ mov dptr, #0
+ mov b, #0x80
+ mov a, #0x3F
+ lcall ___fsdiv // 1.0 / x
+ dec sp
+ dec sp
+ dec sp
+ dec sp
+expf_done:
+ _endasm;
+#pragma less_pedantic
+}
+
+
+
+static void dummy1(void) _naked
+{
+ _asm
+ .globl fs_lshift_a
+expf_scale_and_add:
+ push ar0
+ push ar1
+ push ar2
+ push ar3
+ mov dpl, _n
+ lcall ___uchar2fs // turn n into float
+ lcall ___fsmul // n * scale_factor
+ dec sp
+ dec sp
+ dec sp
+ dec sp
+ push dpl
+ push dph
+ push b
+ push acc
+ mov dpl, (_expf_y + 0)
+ mov dph, (_expf_y + 1)
+ mov b, (_expf_y + 2)
+ mov a, (_expf_y + 3)
+ lcall ___fsadd // x += (n * scale_factor)
+ dec sp
+ dec sp
+ dec sp
+ dec sp
+ ret
+ _endasm;
+}
+
+static void dummy(void) _naked
+{
+ _asm
+ .globl fs_lshift_a
+fs_lshift_a:
+ jz fs_lshift_done
+ push ar0
+ mov r0, a
+fs_lshift_loop:
+ clr c
+ mov a, r1
+ rlc a
+ mov r1, a
+ mov a, r2
+ rlc a
+ mov r2, a
+ mov a, r3
+ rlc a
+ mov r3, a
+ mov a, r4
+ rlc a
+ mov r4, a
+ djnz r0, fs_lshift_loop
+ pop ar0
+fs_lshift_done:
+ ret
+ _endasm;
+}
+
+
+
+#else // not MATH_ASM_MCS51
+
+
#define P0 0.2499999995E+0
#define P1 0.4160288626E-2
#define Q0 0.5000000000E+0
return z;
}
+#endif
+
_fsreturnval
_fsrshift
_fsswapargs
+_logexpf
/* Version 1.0 - Initial release */
+#define SDCC_MATH_LIB
#include <math.h>
#include <errno.h>
-/*Constans for 24 bits or less (8 decimal digits)*/
+
+#ifdef MATH_ASM_MCS51
+
+#define SDCC_FLOAT_LIB
+#include <float.h>
+
+// TODO: share with other temps
+static data unsigned char logf_tmp[4];
+
+float logf(float x)
+{
+ x;
+ _asm
+
+ // extract the two input, placing it into:
+ // sign exponent mantiassa
+ // ---- -------- ---------
+ // x: sign_a exp_a r4/r3/r2
+
+ lcall fsgetarg
+logf_neg_check:
+ jnb sign_a, logf_zero_check
+ // TODO: set errno to EDOM (negative numbers not allowed)
+ ljmp fs_return_nan
+
+logf_zero_check:
+ cjne r4, #0, logf_ok
+ // TODO: set errno to ERANGE (zero not allowed)
+ setb sign_a
+ ljmp fs_return_inf
+
+logf_ok:
+ push exp_a
+ mov a, #3
+ mov r1, #0
+ lcall fs_rshift_a
+
+ clr a
+ mov (_logf_tmp + 0), a // y = 0
+ mov (_logf_tmp + 1), a
+ mov (_logf_tmp + 2), a
+ mov (_logf_tmp + 3), a
+ mov dptr, #__fs_natural_log_table
+ mov r0, a
+logf_cordic_loop:
+ mov ar7, r4 // r7/r6/r5 = x >> i
+ mov ar6, r3
+ mov ar5, r2
+ mov b, r1
+ mov a, r0
+ lcall __fs_cordic_rshift_r765_unsigned
+ mov a, r1 // check if x + (x >> i) > 1.0
+ add a, b
+ mov a, r2
+ addc a, r5
+ mov a, r3
+ addc a, r6
+ mov a, r4
+ addc a, r7
+ anl a, #0xE0
+ jnz logf_cordic_skip
+ mov a, r1 // x = x + (x >> i)
+ add a, b
+ mov r1, a
+ mov a, r2
+ addc a, r5
+ mov r2, a
+ mov a, r3
+ addc a, r6
+ mov r3, a
+ mov a, r4
+ addc a, r7
+ mov r4, a
+ clr a // y = y + log_table[i]
+ movc a, @a+dptr
+ add a, (_logf_tmp + 0)
+ mov (_logf_tmp + 0), a
+ mov a, #1
+ movc a, @a+dptr
+ addc a, (_logf_tmp + 1)
+ mov (_logf_tmp + 1), a
+ mov a, #2
+ movc a, @a+dptr
+ addc a, (_logf_tmp + 2)
+ mov (_logf_tmp + 2), a
+ mov a, #3
+ movc a, @a+dptr
+ addc a, (_logf_tmp + 3)
+ mov (_logf_tmp + 3), a
+logf_cordic_skip:
+ inc dptr
+ inc dptr
+ inc dptr
+ inc dptr
+ inc r0
+ cjne r0, #30, logf_cordic_loop
+ // at this point, _logf_tmp has the natural log of the positive
+ // normalized fractional part (0.5 to 1.0 -> 0.693 to 0.0)
+ mov r4, (_logf_tmp + 3)
+ mov r3, (_logf_tmp + 2)
+ mov r2, (_logf_tmp + 1)
+ mov r1, (_logf_tmp + 0)
+ mov exp_a, #129
+ setb sign_a
+ lcall fs_normalize_a
+ pop acc
+ cjne a, #126, logf_exponent
+ // if the input exponent was 126, then we don't need to add
+ // anything and we can just return the with we have already
+
+ // TODO: which of these gives best accuracy???
+ ljmp fs_zerocheck_return
+ //ljmp fs_round_and_return
+logf_exponent:
+ jc logf_exp_neg
+ // the input exponent was greater than 126
+logf_exp_pos:
+ add a, #130
+ clr sign_b
+ sjmp logf_exp_scale
+logf_exp_neg:
+ // the input exponent was less than 126
+ cpl a
+ add a, #127
+ setb sign_b
+logf_exp_scale:
+ // r0 has abs(exp - 126)
+ mov r0, a
+ // put the log of faction into b, so we can use a to compute
+ // the offset to be added to it or subtracted from it
+ lcall fs_swap_a_b
+ // multiply r0 by log(2), or r0 * 0xB17218
+ mov a, #0x18
+ mov b, r0
+ mul ab
+ mov r1, a
+ mov r2, b
+ mov a, #0xB1
+ mov b, r0
+ mul ab
+ mov r3, a
+ mov r4, b
+ mov a, #0x72
+ mov b, r0
+ mul ab
+ add a, r2
+ mov r2, a
+ mov a, b
+ addc a, r3
+ mov r3, a
+ clr a
+ addc a, r4
+ mov r4, a
+ // turn r0 * log(2) into a proper float
+ mov exp_a, #134
+ lcall fs_normalize_a
+ // now just add log(fractional) +/- log(2) * abs(exp - 126)
+ ljmp fsadd_direct_entry
+ _endasm;
+#pragma less_pedantic
+}
+
+
+#else // not MATH_ASM_MCS51
+
+
+
+
+/*Constants for 24 bits or less (8 decimal digits)*/
#define A0 -0.5527074855E+0
#define B0 -0.6632718214E+1
#define A(w) (A0)
xn=n;
return ((xn*C2+Rz)+xn*C1);
}
+
+#endif
+