From b701130edff0ea3adb8ecbdac46c2f8bb1b95af3 Mon Sep 17 00:00:00 2001 From: pjs Date: Sat, 1 Jan 2005 04:08:04 +0000 Subject: [PATCH] Added mcs51 asm versions of logf and expf git-svn-id: https://sdcc.svn.sourceforge.net/svnroot/sdcc/trunk/sdcc@3624 4a8a32a2-be11-0410-ad9d-d568d2c75423 --- ChangeLog | 9 ++ device/include/math.h | 8 ++ device/lib/Makefile.in | 2 +- device/lib/_logexpf.c | 92 +++++++++++++ device/lib/expf.c | 283 ++++++++++++++++++++++++++++++++++++++++ device/lib/libfloat.lib | 1 + device/lib/logf.c | 174 +++++++++++++++++++++++- 7 files changed, 567 insertions(+), 2 deletions(-) create mode 100644 device/lib/_logexpf.c diff --git a/ChangeLog b/ChangeLog index 4e21c03f..b1bfac37 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2004-12-31 Paul Stoffregen + + * device/lib/logf.c: added mcs51 assembly version + * device/lib/expf.c: added mcs51 assembly version + * device/lib/_logexpf.c: new shared asm code for expf and logf + * device/include/math.h: add defines for assembly math library + * device/lib/Makefile.in: build new _logexpf.c + * device/lib/libfloat.lib: use new _logexpf.c + 2004-12-29 Slade Rich * src/pic/device.c diff --git a/device/include/math.h b/device/include/math.h index ce084d03..f5f0b709 100644 --- a/device/include/math.h +++ b/device/include/math.h @@ -42,6 +42,14 @@ union float_long long l; }; +#if defined(SDCC_MATH_LIB) && defined(SDCC_mcs51) && !defined(SDCC_USE_XSTACK) && !defined(SDCC_STACK_AUTO) && !defined(_SDCC_NO_ASM_LIB_FUNCS) +// Compile the mcs51 assembly version only when all these +// conditions are met. Since not all the functions are +// reentrant, do not compile with --stack-auto is used. +#define MATH_ASM_MCS51 +#endif + + /* Functions on the z80 & gbz80 are always reentrant and so the "reentrant" */ /* keyword is not defined. */ #if defined(SDCC_z80) || defined(SDCC_gbz80) diff --git a/device/lib/Makefile.in b/device/lib/Makefile.in index 97c226b4..9a662a9f 100644 --- a/device/lib/Makefile.in +++ b/device/lib/Makefile.in @@ -68,7 +68,7 @@ SOURCES = _atof.c _atoi.c _atol.c _autobaud.c _bp.c _schar2fs.c \ printf_fast.c printf_fast_f.c printf_tiny.c \ assert.c time.c bpx.c \ _fsget1arg.c _fsget2args.c _fsnormalize.c \ - _fsreturnval.c _fsrshift.c _fsswapargs.c \ + _fsreturnval.c _fsrshift.c _fsswapargs.c _logexpf.c \ fabsf.c frexpf.c ldexpf.c expf.c powf.c sincosf.c sinf.c \ cosf.c logf.c log10f.c sqrtf.c tancotf.c tanf.c cotf.c \ asincosf.c asinf.c acosf.c atanf.c atan2f.c sincoshf.c \ diff --git a/device/lib/_logexpf.c b/device/lib/_logexpf.c new file mode 100644 index 00000000..bd9b1003 --- /dev/null +++ b/device/lib/_logexpf.c @@ -0,0 +1,92 @@ +#define SDCC_MATH_LIB +#include + + +#ifdef MATH_ASM_MCS51 + +// This code is shared by both logf() and expf(), so it goes in this +// separate file to allow the linker to include it when either +// function is needed, but only 1 copy when both are used. + +void _fs_cordic_rshift_r765_unsigned(void) +{ + _asm + add a, #248 + jnc 00003$ + mov b, r5 + mov r5, ar6 + mov r6, ar7 + mov r7, #0 + add a, #248 + jnc 00003$ + mov b, r5 + mov r5, ar6 + mov r6, #0 + add a, #248 + jnc 00003$ + mov b, r5 + mov r5, #0 + add a, #248 + jnc 00003$ + mov b, #0 + ret +00003$: + add a, #8 + jz 00030$ + push ar0 + mov r0, a +00010$: + clr c + mov a, r7 + rrc a + mov r7, a + mov a, r6 + rrc a + mov r6, a + mov a, r5 + rrc a + mov r5, a + mov a, b + rrc a + mov b, a + djnz r0, 00010$ + pop ar0 +00030$: + _endasm; +} + +code unsigned char _fs_natural_log_table[] = { +0xFF, 0x42, 0x2E, 0x16, // 0.693147180560 +0xF6, 0x91, 0xF9, 0x0C, // 0.405465108108 +0xF2, 0xFD, 0x23, 0x07, // 0.223143551314 +0xEE, 0xE0, 0xC4, 0x03, // 0.117783035656 +0x0C, 0xA3, 0xF0, 0x01, // 0.060624621816 +0xD8, 0x14, 0xFC, 0x00, // 0.030771658667 +0xA3, 0x02, 0x7F, 0x00, // 0.015504186536 +0x55, 0xC0, 0x3F, 0x00, // 0.007782140442 +0x0B, 0xF0, 0x1F, 0x00, // 0.003898640416 +0x01, 0xFC, 0x0F, 0x00, // 0.001951220131 +0x00, 0xFF, 0x07, 0x00, // 0.000976085973 +0xC0, 0xFF, 0x03, 0x00, // 0.000488162080 +0xF0, 0xFF, 0x01, 0x00, // 0.000244110828 +0xFC, 0xFF, 0x00, 0x00, // 0.000122062863 +0xFF, 0x7F, 0x00, 0x00, // 0.000061033294 +0x00, 0x40, 0x00, 0x00, // 0.000030517112 +0x00, 0x20, 0x00, 0x00, // 0.000015258673 +0x00, 0x10, 0x00, 0x00, // 0.000007629365 +0x00, 0x08, 0x00, 0x00, // 0.000003814690 +0x00, 0x04, 0x00, 0x00, // 0.000001907347 +0x00, 0x02, 0x00, 0x00, // 0.000000953674 +0x00, 0x01, 0x00, 0x00, // 0.000000476837 +0x80, 0x00, 0x00, 0x00, // 0.000000238419 +0x40, 0x00, 0x00, 0x00, // 0.000000119209 +0x20, 0x00, 0x00, 0x00, // 0.000000059605 +0x10, 0x00, 0x00, 0x00, // 0.000000029802 +0x08, 0x00, 0x00, 0x00, // 0.000000014901 +0x04, 0x00, 0x00, 0x00, // 0.000000007451 +0x02, 0x00, 0x00, 0x00, // 0.000000003725 +0x01, 0x00, 0x00, 0x00 // 0.000000001863 +}; + +#endif + diff --git a/device/lib/expf.c b/device/lib/expf.c index 1bcd8145..84b73c60 100644 --- a/device/lib/expf.c +++ b/device/lib/expf.c @@ -21,10 +21,291 @@ /* Version 1.0 - Initial release */ +#define SDCC_MATH_LIB #include #include #include + +#ifdef MATH_ASM_MCS51 + +#define SDCC_FLOAT_LIB +#include + +// TODO: share with other temps +static bit sign_bit; +static data unsigned char expf_y[4]; +static unsigned char n; + + +float expf(float x) +{ + x; + _asm + mov c, acc.7 + mov _sign_bit, c // remember sign + clr acc.7 // and make input positive + mov r0, a + mov c, b.7 + rlc a // extract exponent + add a, #153 + jc expf_not_zero + // input is a very small number, so e^x is 1.000000 + mov dptr, #0 + mov b, #0x80 + mov a, #0x3F + ret +expf_not_zero: + // TODO: check exponent for very small values, and return zero + mov _n, #0 + mov a, dpl + add a, #0xE8 // is x >= 0.69314718 + mov a, dph + addc a, #0x8D + mov a, b + addc a, #0xCE + mov a, r0 + addc a, #0xC0 + mov a, r0 + jnc expf_no_range_reduction +expf_range_reduction: + mov (_expf_y + 0), dpl // keep copy of x in "exp_y" + mov (_expf_y + 1), dph + mov (_expf_y + 2), b + mov (_expf_y + 3), a + mov r0, #0x3B + push ar0 + mov r0, #0xAA + push ar0 + mov r0, #0xB8 + push ar0 + mov r0, #0x3F + push ar0 + lcall ___fsmul // x * 1.442695041 = x / ln(2) + dec sp + dec sp + dec sp + dec sp + lcall ___fs2uchar // n = int(x * 1.442695041) + mov a, dpl + mov _n, a + add a, #128 + jnc expf_range_ok + ljmp fs_return_inf // exponent overflow +expf_range_ok: + mov r0,#0x00 + mov r1,#0x80 + mov r2,#0x31 + mov r3,#0xBF + lcall expf_scale_and_add + mov (_expf_y + 0), dpl + mov (_expf_y + 1), dph + mov (_expf_y + 2), b + mov (_expf_y + 3), a + mov r0,#0x83 + mov r1,#0x80 + mov r2,#0x5E + mov r3,#0x39 + lcall expf_scale_and_add +expf_no_range_reduction: + + +// Compute e^x using the cordic algorithm. This works over an +// input range of 0 to 0.69314712. Can be extended to work from +// 0 to 1.0 if the results are normalized, but over the range +// we use, the result is always from 1.0 to 1.99999988 (fixed +// exponent of 127) + +expf_cordic_begin: + mov c, b.7 + rlc a // extract exponent to acc + setb b.7 + mov r1, dpl // mantissa to r4/r3/r2/r1 + mov r2, dph + mov r3, b + mov r4, #0 + + // first, align the input into a 32 bit long + cjne a, #121, exp_lshift + sjmp exp_noshift +exp_lshift: + jc exp_rshift + // exp_a is greater than 121, so left shift + add a, #135 + lcall fs_lshift_a + sjmp exp_noshift +exp_rshift: + // exp_a is less than 121, so right shift + cpl a + add a, #122 + lcall fs_rshift_a +exp_noshift: // r4/r3/r2/r1 = x + clr a + mov (_expf_y + 0), a // y = 1.0; + mov (_expf_y + 1), a + mov (_expf_y + 2), a + mov (_expf_y + 3), #0x20 + mov dptr, #__fs_natural_log_table + mov r0, a // r0 = i +exp_cordic_loop: + clr a + movc a, @a+dptr + mov b, a + inc dptr + clr a + movc a, @a+dptr // r7/r6/r5/b = table[i] + mov r5, a + inc dptr + clr a + movc a, @a+dptr + mov r6, a + inc dptr + clr a + movc a, @a+dptr + mov r7, a + inc dptr + clr c + mov a, r1 + subb a, b // compare x to table[i] + mov a, r2 + subb a, r5 + mov a, r3 + subb a, r6 + mov a, r4 + subb a, r7 + jc exp_cordic_skip // if table[i] < x + clr c + mov a, r1 + subb a, b + mov r1, a // x -= table[i] + mov a, r2 + subb a, r5 + mov r2, a + mov a, r3 + subb a, r6 + mov r3, a + mov a, r4 + subb a, r7 + mov r4, a + mov b, (_expf_y + 0) + mov r5, (_expf_y + 1) // r7/r6/r5/b = y >> i + mov r6, (_expf_y + 2) + mov r7, (_expf_y + 3) + mov a, r0 + lcall __fs_cordic_rshift_r765_unsigned + mov a, (_expf_y + 0) + add a, b + mov (_expf_y + 0), a + mov a, (_expf_y + 1) + addc a, r5 + mov (_expf_y + 1), a // y += (y >> i) + mov a, (_expf_y + 2) + addc a, r6 + mov (_expf_y + 2), a + mov a, (_expf_y + 3) + addc a, r7 + mov (_expf_y + 3), a +exp_cordic_skip: + inc r0 + cjne r0, #27, exp_cordic_loop + mov r4, (_expf_y + 3) + mov r3, (_expf_y + 2) + mov r2, (_expf_y + 1) + mov r1, (_expf_y + 0) + lcall fs_normalize_a // end of cordic + + mov a, #127 + add a, _n // ldexpf(x, n) + mov exp_a, a + lcall fs_round_and_return + + jnb _sign_bit, expf_done + push dpl + push dph + push b + push acc + mov dptr, #0 + mov b, #0x80 + mov a, #0x3F + lcall ___fsdiv // 1.0 / x + dec sp + dec sp + dec sp + dec sp +expf_done: + _endasm; +#pragma less_pedantic +} + + + +static void dummy1(void) _naked +{ + _asm + .globl fs_lshift_a +expf_scale_and_add: + push ar0 + push ar1 + push ar2 + push ar3 + mov dpl, _n + lcall ___uchar2fs // turn n into float + lcall ___fsmul // n * scale_factor + dec sp + dec sp + dec sp + dec sp + push dpl + push dph + push b + push acc + mov dpl, (_expf_y + 0) + mov dph, (_expf_y + 1) + mov b, (_expf_y + 2) + mov a, (_expf_y + 3) + lcall ___fsadd // x += (n * scale_factor) + dec sp + dec sp + dec sp + dec sp + ret + _endasm; +} + +static void dummy(void) _naked +{ + _asm + .globl fs_lshift_a +fs_lshift_a: + jz fs_lshift_done + push ar0 + mov r0, a +fs_lshift_loop: + clr c + mov a, r1 + rlc a + mov r1, a + mov a, r2 + rlc a + mov r2, a + mov a, r3 + rlc a + mov r3, a + mov a, r4 + rlc a + mov r4, a + djnz r0, fs_lshift_loop + pop ar0 +fs_lshift_done: + ret + _endasm; +} + + + +#else // not MATH_ASM_MCS51 + + #define P0 0.2499999995E+0 #define P1 0.4160288626E-2 #define Q0 0.5000000000E+0 @@ -85,3 +366,5 @@ float expf(const float x) return z; } +#endif + diff --git a/device/lib/libfloat.lib b/device/lib/libfloat.lib index 59abc3c5..ee97a369 100644 --- a/device/lib/libfloat.lib +++ b/device/lib/libfloat.lib @@ -53,3 +53,4 @@ _fsnormalize _fsreturnval _fsrshift _fsswapargs +_logexpf diff --git a/device/lib/logf.c b/device/lib/logf.c index 75c3f71b..31907a07 100644 --- a/device/lib/logf.c +++ b/device/lib/logf.c @@ -21,10 +21,179 @@ /* Version 1.0 - Initial release */ +#define SDCC_MATH_LIB #include #include -/*Constans for 24 bits or less (8 decimal digits)*/ + +#ifdef MATH_ASM_MCS51 + +#define SDCC_FLOAT_LIB +#include + +// TODO: share with other temps +static data unsigned char logf_tmp[4]; + +float logf(float x) +{ + x; + _asm + + // extract the two input, placing it into: + // sign exponent mantiassa + // ---- -------- --------- + // x: sign_a exp_a r4/r3/r2 + + lcall fsgetarg +logf_neg_check: + jnb sign_a, logf_zero_check + // TODO: set errno to EDOM (negative numbers not allowed) + ljmp fs_return_nan + +logf_zero_check: + cjne r4, #0, logf_ok + // TODO: set errno to ERANGE (zero not allowed) + setb sign_a + ljmp fs_return_inf + +logf_ok: + push exp_a + mov a, #3 + mov r1, #0 + lcall fs_rshift_a + + clr a + mov (_logf_tmp + 0), a // y = 0 + mov (_logf_tmp + 1), a + mov (_logf_tmp + 2), a + mov (_logf_tmp + 3), a + mov dptr, #__fs_natural_log_table + mov r0, a +logf_cordic_loop: + mov ar7, r4 // r7/r6/r5 = x >> i + mov ar6, r3 + mov ar5, r2 + mov b, r1 + mov a, r0 + lcall __fs_cordic_rshift_r765_unsigned + mov a, r1 // check if x + (x >> i) > 1.0 + add a, b + mov a, r2 + addc a, r5 + mov a, r3 + addc a, r6 + mov a, r4 + addc a, r7 + anl a, #0xE0 + jnz logf_cordic_skip + mov a, r1 // x = x + (x >> i) + add a, b + mov r1, a + mov a, r2 + addc a, r5 + mov r2, a + mov a, r3 + addc a, r6 + mov r3, a + mov a, r4 + addc a, r7 + mov r4, a + clr a // y = y + log_table[i] + movc a, @a+dptr + add a, (_logf_tmp + 0) + mov (_logf_tmp + 0), a + mov a, #1 + movc a, @a+dptr + addc a, (_logf_tmp + 1) + mov (_logf_tmp + 1), a + mov a, #2 + movc a, @a+dptr + addc a, (_logf_tmp + 2) + mov (_logf_tmp + 2), a + mov a, #3 + movc a, @a+dptr + addc a, (_logf_tmp + 3) + mov (_logf_tmp + 3), a +logf_cordic_skip: + inc dptr + inc dptr + inc dptr + inc dptr + inc r0 + cjne r0, #30, logf_cordic_loop + // at this point, _logf_tmp has the natural log of the positive + // normalized fractional part (0.5 to 1.0 -> 0.693 to 0.0) + mov r4, (_logf_tmp + 3) + mov r3, (_logf_tmp + 2) + mov r2, (_logf_tmp + 1) + mov r1, (_logf_tmp + 0) + mov exp_a, #129 + setb sign_a + lcall fs_normalize_a + pop acc + cjne a, #126, logf_exponent + // if the input exponent was 126, then we don't need to add + // anything and we can just return the with we have already + + // TODO: which of these gives best accuracy??? + ljmp fs_zerocheck_return + //ljmp fs_round_and_return +logf_exponent: + jc logf_exp_neg + // the input exponent was greater than 126 +logf_exp_pos: + add a, #130 + clr sign_b + sjmp logf_exp_scale +logf_exp_neg: + // the input exponent was less than 126 + cpl a + add a, #127 + setb sign_b +logf_exp_scale: + // r0 has abs(exp - 126) + mov r0, a + // put the log of faction into b, so we can use a to compute + // the offset to be added to it or subtracted from it + lcall fs_swap_a_b + // multiply r0 by log(2), or r0 * 0xB17218 + mov a, #0x18 + mov b, r0 + mul ab + mov r1, a + mov r2, b + mov a, #0xB1 + mov b, r0 + mul ab + mov r3, a + mov r4, b + mov a, #0x72 + mov b, r0 + mul ab + add a, r2 + mov r2, a + mov a, b + addc a, r3 + mov r3, a + clr a + addc a, r4 + mov r4, a + // turn r0 * log(2) into a proper float + mov exp_a, #134 + lcall fs_normalize_a + // now just add log(fractional) +/- log(2) * abs(exp - 126) + ljmp fsadd_direct_entry + _endasm; +#pragma less_pedantic +} + + +#else // not MATH_ASM_MCS51 + + + + +/*Constants for 24 bits or less (8 decimal digits)*/ #define A0 -0.5527074855E+0 #define B0 -0.6632718214E+1 #define A(w) (A0) @@ -68,3 +237,6 @@ float logf(const float x) _FLOAT_FUNC_REENTRANT xn=n; return ((xn*C2+Rz)+xn*C1); } + +#endif + -- 2.30.2