git.gag.com Git - fw/sdcc/blob - device/lib/expf.c

   1 /*  expf.c: Computes e**x of a 32-bit float as outlined in [1]
   2
   3     Copyright (C) 2001, 2002  Jesus Calvino-Fraga, jesusc@ieee.org
   4
   5     This library is free software; you can redistribute it and/or
   6     modify it under the terms of the GNU Lesser General Public
   7     License as published by the Free Software Foundation; either
   8     version 2.1 of the License, or (at your option) any later version.
   9
  10     This library is distributed in the hope that it will be useful,
  11     but WITHOUT ANY WARRANTY; without even the implied warranty of
  12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13     Lesser General Public License for more details.
  14
  15     You should have received a copy of the GNU Lesser General Public
  16     License along with this library; if not, write to the Free Software
  17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA */
  18
  19 /* [1] William James Cody and W.  M.  Waite.  _Software manual for the
  20    elementary functions_, Englewood Cliffs, N.J.:Prentice-Hall, 1980. */
  21
  22 /* Version 1.0 - Initial release */
  23
  24 #define SDCC_MATH_LIB
  25 #include <math.h>
  26 #include <errno.h>
  27 #include <stdbool.h>
  28
  29
  30 #ifdef MATH_ASM_MCS51
  31
  32 #define SDCC_FLOAT_LIB
  33 #include <float.h>
  34
  35 // TODO: share with other temps
  36 static bit sign_bit;
  37 static data unsigned char expf_y[4];
  38 static data unsigned char n;
  39
  40
  41 float expf(float x)
  42 {
  43         x;
  44         _asm
  45         mov     c, acc.7
  46         mov     _sign_bit, c    // remember sign
  47         clr     acc.7           // and make input positive
  48         mov     r0, a
  49         mov     c, b.7
  50         rlc     a               // extract exponent
  51         add     a, #153
  52         jc      expf_not_zero
  53         // input is a very small number, so e^x is 1.000000
  54         mov     dptr, #0
  55         mov     b, #0x80
  56         mov     a, #0x3F
  57         ret
  58 expf_not_zero:
  59         // TODO: check exponent for very small values, and return zero
  60         mov     _n, #0
  61         mov     a, dpl
  62         add     a, #0xE8        // is x >= 0.69314718
  63         mov     a, dph
  64         addc    a, #0x8D
  65         mov     a, b
  66         addc    a, #0xCE
  67         mov     a, r0
  68         addc    a, #0xC0
  69         mov     a, r0
  70         jnc     expf_no_range_reduction
  71 expf_range_reduction:
  72         mov     (_expf_y + 0), dpl      // keep copy of x in "exp_y"
  73         mov     (_expf_y + 1), dph
  74         mov     (_expf_y + 2), b
  75         mov     (_expf_y + 3), a
  76         mov     r0, #0x3B
  77         push    ar0
  78         mov     r0, #0xAA
  79         push    ar0
  80         mov     r0, #0xB8
  81         push    ar0
  82         mov     r0, #0x3F
  83         push    ar0
  84         lcall   ___fsmul        // x * 1.442695041 = x / ln(2)
  85         dec     sp
  86         dec     sp
  87         dec     sp
  88         dec     sp
  89         lcall   ___fs2uchar     // n = int(x * 1.442695041)
  90         mov     a, dpl
  91         mov     _n, a
  92         add     a, #128
  93         jnc     expf_range_ok
  94         ljmp    fs_return_inf   // exponent overflow
  95 expf_range_ok:
  96         mov     r0,#0x00
  97         mov     r1,#0x80
  98         mov     r2,#0x31
  99         mov     r3,#0xBF
 100         lcall   expf_scale_and_add
 101         mov     (_expf_y + 0), dpl
 102         mov     (_expf_y + 1), dph
 103         mov     (_expf_y + 2), b
 104         mov     (_expf_y + 3), a
 105         mov     r0,#0x83
 106         mov     r1,#0x80
 107         mov     r2,#0x5E
 108         mov     r3,#0x39
 109         lcall   expf_scale_and_add
 110 expf_no_range_reduction:
 111
 112
 113 // Compute e^x using the cordic algorithm.  This works over an
 114 // input range of 0 to 0.69314712.  Can be extended to work from
 115 // 0 to 1.0 if the results are normalized, but over the range
 116 // we use, the result is always from 1.0 to 1.99999988 (fixed
 117 // exponent of 127)
 118
 119 expf_cordic_begin:
 120         mov     c, b.7
 121         rlc     a               // extract exponent to acc
 122         setb    b.7
 123         mov     r1, dpl         // mantissa to r4/r3/r2/r1
 124         mov     r2, dph
 125         mov     r3, b
 126         mov     r4, #0
 127
 128         // first, align the input into a 32 bit long
 129         cjne    a, #121, exp_lshift
 130         sjmp    exp_noshift
 131 exp_lshift:
 132         jc      exp_rshift
 133         // exp_a is greater than 121, so left shift
 134         add     a, #135
 135         lcall   fs_lshift_a
 136         sjmp    exp_noshift
 137 exp_rshift:
 138         // exp_a is less than 121, so right shift
 139         cpl     a
 140         add     a, #122
 141         lcall   fs_rshift_a
 142 exp_noshift:                            // r4/r3/r2/r1 = x
 143         clr     a
 144         mov     (_expf_y + 0), a        // y = 1.0;
 145         mov     (_expf_y + 1), a
 146         mov     (_expf_y + 2), a
 147         mov     (_expf_y + 3), #0x20
 148         mov     dptr, #__fs_natural_log_table
 149         mov     r0, a                   // r0 = i
 150 exp_cordic_loop:
 151         clr     a
 152         movc    a, @a+dptr
 153         mov     b, a
 154         inc     dptr
 155         clr     a
 156         movc    a, @a+dptr              // r7/r6/r5/b = table[i]
 157         mov     r5, a
 158         inc     dptr
 159         clr     a
 160         movc    a, @a+dptr
 161         mov     r6, a
 162         inc     dptr
 163         clr     a
 164         movc    a, @a+dptr
 165         mov     r7, a
 166         inc     dptr
 167         clr     c
 168         mov     a, r1
 169         subb    a, b                    // compare x to table[i]
 170         mov     a, r2
 171         subb    a, r5
 172         mov     a, r3
 173         subb    a, r6
 174         mov     a, r4
 175         subb    a, r7
 176         jc      exp_cordic_skip         // if table[i] < x
 177         clr     c
 178         mov     a, r1
 179         subb    a, b
 180         mov     r1, a                   // x -= table[i]
 181         mov     a, r2
 182         subb    a, r5
 183         mov     r2, a
 184         mov     a, r3
 185         subb    a, r6
 186         mov     r3, a
 187         mov     a, r4
 188         subb    a, r7
 189         mov     r4, a
 190         mov     b,  (_expf_y + 0)
 191         mov     r5, (_expf_y + 1)       // r7/r6/r5/b = y >> i
 192         mov     r6, (_expf_y + 2)
 193         mov     r7, (_expf_y + 3)
 194         mov     a, r0
 195         lcall   __fs_cordic_rshift_r765_unsigned
 196         mov     a, (_expf_y + 0)
 197         add     a, b
 198         mov     (_expf_y + 0), a
 199         mov     a, (_expf_y + 1)
 200         addc    a, r5
 201         mov     (_expf_y + 1), a        // y += (y >> i)
 202         mov     a, (_expf_y + 2)
 203         addc    a, r6
 204         mov     (_expf_y + 2), a
 205         mov     a, (_expf_y + 3)
 206         addc    a, r7
 207         mov     (_expf_y + 3), a
 208 exp_cordic_skip:
 209         inc     r0
 210         cjne    r0, #27, exp_cordic_loop
 211         mov     r4, (_expf_y + 3)
 212         mov     r3, (_expf_y + 2)
 213         mov     r2, (_expf_y + 1)
 214         mov     r1, (_expf_y + 0)
 215         lcall   fs_normalize_a          // end of cordic
 216
 217         mov     a, #127
 218         add     a, _n                   // ldexpf(x, n)
 219         mov     exp_a, a
 220         lcall   fs_round_and_return
 221
 222         jnb     _sign_bit, expf_done
 223         push    dpl
 224         push    dph
 225         push    b
 226         push    acc
 227         mov     dptr, #0
 228         mov     b, #0x80
 229         mov     a, #0x3F
 230         lcall   ___fsdiv                // 1.0 / x
 231         dec     sp
 232         dec     sp
 233         dec     sp
 234         dec     sp
 235 expf_done:
 236         _endasm;
 237 #pragma less_pedantic
 238 }
 239
 240
 241
 242 static void dummy1(void) _naked
 243 {
 244         _asm
 245         .globl  fs_lshift_a
 246 expf_scale_and_add:
 247         push    ar0
 248         push    ar1
 249         push    ar2
 250         push    ar3
 251         mov     dpl, _n
 252         lcall   ___uchar2fs     // turn n into float
 253         lcall   ___fsmul        // n * scale_factor
 254         dec     sp
 255         dec     sp
 256         dec     sp
 257         dec     sp
 258         push    dpl
 259         push    dph
 260         push    b
 261         push    acc
 262         mov     dpl, (_expf_y + 0)
 263         mov     dph, (_expf_y + 1)
 264         mov     b,   (_expf_y + 2)
 265         mov     a,   (_expf_y + 3)
 266         lcall   ___fsadd        // x += (n * scale_factor)
 267         dec     sp
 268         dec     sp
 269         dec     sp
 270         dec     sp
 271         ret
 272         _endasm;
 273 }
 274
 275 static void dummy(void) _naked
 276 {
 277         _asm
 278         .globl  fs_lshift_a
 279 fs_lshift_a:
 280         jz      fs_lshift_done
 281         push    ar0
 282         mov     r0, a
 283 fs_lshift_loop:
 284         clr     c
 285         mov     a, r1
 286         rlc     a
 287         mov     r1, a
 288         mov     a, r2
 289         rlc     a
 290         mov     r2, a
 291         mov     a, r3
 292         rlc     a
 293         mov     r3, a
 294         mov     a, r4
 295         rlc     a
 296         mov     r4, a
 297         djnz    r0, fs_lshift_loop
 298         pop     ar0
 299 fs_lshift_done:
 300         ret
 301         _endasm;
 302 }
 303
 304
 305
 306 #else // not MATH_ASM_MCS51
 307
 308
 309 #define P0      0.2499999995E+0
 310 #define P1      0.4160288626E-2
 311 #define Q0      0.5000000000E+0
 312 #define Q1      0.4998717877E-1
 313
 314 #define P(z) ((P1*z)+P0)
 315 #define Q(z) ((Q1*z)+Q0)
 316
 317 #define C1       0.693359375
 318 #define C2      -2.1219444005469058277e-4
 319
 320 #define BIGX    88.72283911  /* ln(XMAX) */
 321 #define EXPEPS  1.0E-7       /* exp(1.0E-7)=0.0000001 */
 322 #define K1      1.4426950409 /* 1/ln(2) */
 323
 324 float expf(const float x)
 325 {
 326     int n;
 327     float xn, g, r, z, y;
 328         BOOL sign;
 329
 330     if(x>=0.0)
 331         { y=x;  sign=0; }
 332     else
 333         { y=-x; sign=1; }
 334
 335     if(y<EXPEPS) return 1.0;
 336
 337     if(y>BIGX)
 338     {
 339         if(sign)
 340         {
 341             errno=ERANGE;
 342             return XMAX;
 343         }
 344         else
 345         {
 346             return 0.0;
 347         }
 348     }
 349
 350     z=y*K1;
 351     n=z;
 352
 353     if(n<0) --n;
 354     if(z-n>=0.5) ++n;
 355     xn=n;
 356     g=((y-xn*C1))-xn*C2;
 357     z=g*g;
 358     r=P(z)*g;
 359     r=0.5+(r/(Q(z)-r));
 360
 361     n++;
 362     z=ldexpf(r, n);
 363     if(sign)
 364         return 1.0/z;
 365     else
 366         return z;
 367 }
 368
 369 #endif
 370