git.gag.com Git - debian/cc1111/blob - device/lib/expf.c

   1 /*  expf.c: Computes e**x of a 32-bit float as outlined in [1]
   2
   3     Copyright (C) 2001, 2002  Jesus Calvino-Fraga, jesusc@ieee.org
   4
   5     This library is free software; you can redistribute it and/or
   6     modify it under the terms of the GNU Lesser General Public
   7     License as published by the Free Software Foundation; either
   8     version 2.1 of the License, or (at your option) any later version.
   9
  10     This library is distributed in the hope that it will be useful,
  11     but WITHOUT ANY WARRANTY; without even the implied warranty of
  12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13     Lesser General Public License for more details.
  14
  15     You should have received a copy of the GNU Lesser General Public
  16     License along with this library; if not, write to the Free Software
  17     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA */
  18
  19 /* [1] William James Cody and W.  M.  Waite.  _Software manual for the
  20    elementary functions_, Englewood Cliffs, N.J.:Prentice-Hall, 1980. */
  21
  22 /* Version 1.0 - Initial release */
  23
  24 #define SDCC_MATH_LIB
  25 #include <math.h>
  26 #include <errno.h>
  27 #include <stdbool.h>
  28
  29
  30 #ifdef MATH_ASM_MCS51
  31
  32 #define SDCC_FLOAT_LIB
  33 #include <float.h>
  34
  35 // TODO: share with other temps
  36 static __bit sign_bit;
  37 static __data unsigned char expf_y[4];
  38 static __data unsigned char n;
  39
  40
  41 float expf(float x)
  42 {
  43         x;
  44         __asm
  45         mov     c, acc.7
  46         mov     _sign_bit, c    // remember sign
  47         clr     acc.7           // and make input positive
  48         mov     r0, a
  49         mov     c, b.7
  50         rlc     a               // extract exponent
  51         add     a, #153
  52         jc      expf_not_zero
  53         // input is a very small number, so e^x is 1.000000
  54         mov     dptr, #0
  55         mov     b, #0x80
  56         mov     a, #0x3F
  57         ret
  58 expf_not_zero:
  59         // TODO: check exponent for very small values, and return zero
  60         mov     _n, #0
  61         mov     a, dpl
  62         add     a, #0xE8        // is x >= 0.69314718
  63         mov     a, dph
  64         addc    a, #0x8D
  65         mov     a, b
  66         addc    a, #0xCE
  67         mov     a, r0
  68         addc    a, #0xC0
  69         mov     a, r0
  70         jnc     expf_no_range_reduction
  71 expf_range_reduction:
  72         mov     (_expf_y + 0), dpl      // keep copy of x in "exp_y"
  73         mov     (_expf_y + 1), dph
  74         mov     (_expf_y + 2), b
  75         mov     (_expf_y + 3), a
  76         mov     r0, #0x3B
  77         push    ar0
  78         mov     r0, #0xAA
  79         push    ar0
  80         mov     r0, #0xB8
  81         push    ar0
  82         mov     r0, #0x3F
  83         push    ar0
  84         lcall   ___fsmul        // x * 1.442695041 = x / ln(2)
  85         dec     sp
  86         dec     sp
  87         dec     sp
  88         dec     sp
  89         lcall   ___fs2uchar     // n = int(x * 1.442695041)
  90         mov     a, dpl
  91         mov     _n, a
  92         add     a, #128
  93         jnc     expf_range_ok
  94         ljmp    fs_return_inf   // exponent overflow
  95 expf_range_ok:
  96         mov     r0,#0x00
  97         mov     r1,#0x80
  98         mov     r2,#0x31
  99         mov     r3,#0xBF
 100         lcall   expf_scale_and_add
 101         mov     (_expf_y + 0), dpl
 102         mov     (_expf_y + 1), dph
 103         mov     (_expf_y + 2), b
 104         mov     (_expf_y + 3), a
 105         mov     r0,#0x83
 106         mov     r1,#0x80
 107         mov     r2,#0x5E
 108         mov     r3,#0x39
 109         lcall   expf_scale_and_add
 110 expf_no_range_reduction:
 111
 112
 113 // Compute e^x using the cordic algorithm.  This works over an
 114 // input range of 0 to 0.69314712.  Can be extended to work from
 115 // 0 to 1.0 if the results are normalized, but over the range
 116 // we use, the result is always from 1.0 to 1.99999988 (fixed
 117 // exponent of 127)
 118
 119 expf_cordic_begin:
 120         mov     c, b.7
 121         rlc     a               // extract exponent to acc
 122         setb    b.7
 123         mov     r1, dpl         // mantissa to r4/r3/r2/r1
 124         mov     r2, dph
 125         mov     r3, b
 126         mov     r4, #0
 127
 128         // first, align the input into a 32 bit long
 129         cjne    a, #121, exp_lshift
 130         sjmp    exp_noshift
 131 exp_lshift:
 132         jc      exp_rshift
 133         // exp_a is greater than 121, so left shift
 134         add     a, #135
 135         lcall   fs_lshift_a
 136         sjmp    exp_noshift
 137 exp_rshift:
 138         // exp_a is less than 121, so right shift
 139         cpl     a
 140         add     a, #122
 141         lcall   fs_rshift_a
 142 exp_noshift:                            // r4/r3/r2/r1 = x
 143         clr     a
 144         mov     (_expf_y + 0), a        // y = 1.0;
 145         mov     (_expf_y + 1), a
 146         mov     (_expf_y + 2), a
 147         mov     (_expf_y + 3), #0x20
 148         mov     dptr, #__fs_natural_log_table
 149         mov     r0, a                   // r0 = i
 150 exp_cordic_loop:
 151         clr     a
 152         movc    a, @a+dptr
 153         mov     b, a
 154         inc     dptr
 155         clr     a
 156         movc    a, @a+dptr              // r7/r6/r5/b = table[i]
 157         mov     r5, a
 158         inc     dptr
 159         clr     a
 160         movc    a, @a+dptr
 161         mov     r6, a
 162         inc     dptr
 163         clr     a
 164         movc    a, @a+dptr
 165         mov     r7, a
 166         inc     dptr
 167         clr     c
 168         mov     a, r1
 169         subb    a, b                    // compare x to table[i]
 170         mov     a, r2
 171         subb    a, r5
 172         mov     a, r3
 173         subb    a, r6
 174         mov     a, r4
 175         subb    a, r7
 176         jc      exp_cordic_skip         // if table[i] < x
 177         clr     c
 178         mov     a, r1
 179         subb    a, b
 180         mov     r1, a                   // x -= table[i]
 181         mov     a, r2
 182         subb    a, r5
 183         mov     r2, a
 184         mov     a, r3
 185         subb    a, r6
 186         mov     r3, a
 187         mov     a, r4
 188         subb    a, r7
 189         mov     r4, a
 190         mov     b,  (_expf_y + 0)
 191         mov     r5, (_expf_y + 1)       // r7/r6/r5/b = y >> i
 192         mov     r6, (_expf_y + 2)
 193         mov     r7, (_expf_y + 3)
 194         mov     a, r0
 195         lcall   __fs_cordic_rshift_r765_unsigned
 196         mov     a, (_expf_y + 0)
 197         add     a, b
 198         mov     (_expf_y + 0), a
 199         mov     a, (_expf_y + 1)
 200         addc    a, r5
 201         mov     (_expf_y + 1), a        // y += (y >> i)
 202         mov     a, (_expf_y + 2)
 203         addc    a, r6
 204         mov     (_expf_y + 2), a
 205         mov     a, (_expf_y + 3)
 206         addc    a, r7
 207         mov     (_expf_y + 3), a
 208 exp_cordic_skip:
 209         inc     r0
 210         cjne    r0, #27, exp_cordic_loop
 211         mov     r4, (_expf_y + 3)
 212         mov     r3, (_expf_y + 2)
 213         mov     r2, (_expf_y + 1)
 214         mov     r1, (_expf_y + 0)
 215         lcall   fs_normalize_a          // end of cordic
 216
 217         mov     a, #127
 218         add     a, _n                   // ldexpf(x, n)
 219         mov     exp_a, a
 220         lcall   fs_round_and_return
 221
 222         jnb     _sign_bit, expf_done
 223         push    dpl
 224         push    dph
 225         push    b
 226         push    acc
 227         mov     dptr, #0
 228         mov     b, #0x80
 229         mov     a, #0x3F
 230         lcall   ___fsdiv                // 1.0 / x
 231         dec     sp
 232         dec     sp
 233         dec     sp
 234         dec     sp
 235 expf_done:
 236         clr     acc.7           // Result is always positive!
 237         __endasm;
 238 #pragma less_pedantic
 239 }
 240
 241 static void dummy1(void) __naked
 242 {
 243         __asm
 244         .globl  fs_lshift_a
 245 expf_scale_and_add:
 246         push    ar0
 247         push    ar1
 248         push    ar2
 249         push    ar3
 250         mov     dpl, _n
 251         lcall   ___uchar2fs     // turn n into float
 252         lcall   ___fsmul        // n * scale_factor
 253         dec     sp
 254         dec     sp
 255         dec     sp
 256         dec     sp
 257         push    dpl
 258         push    dph
 259         push    b
 260         push    acc
 261         mov     dpl, (_expf_y + 0)
 262         mov     dph, (_expf_y + 1)
 263         mov     b,   (_expf_y + 2)
 264         mov     a,   (_expf_y + 3)
 265         lcall   ___fsadd        // x += (n * scale_factor)
 266         dec     sp
 267         dec     sp
 268         dec     sp
 269         dec     sp
 270         ret
 271         __endasm;
 272 }
 273
 274 static void dummy(void) __naked
 275 {
 276         __asm
 277         .globl  fs_lshift_a
 278 fs_lshift_a:
 279         jz      fs_lshift_done
 280         push    ar0
 281         mov     r0, a
 282 fs_lshift_loop:
 283         clr     c
 284         mov     a, r1
 285         rlc     a
 286         mov     r1, a
 287         mov     a, r2
 288         rlc     a
 289         mov     r2, a
 290         mov     a, r3
 291         rlc     a
 292         mov     r3, a
 293         mov     a, r4
 294         rlc     a
 295         mov     r4, a
 296         djnz    r0, fs_lshift_loop
 297         pop     ar0
 298 fs_lshift_done:
 299         ret
 300         __endasm;
 301 }
 302
 303 #else // not MATH_ASM_MCS51
 304
 305 #define P0      0.2499999995E+0
 306 #define P1      0.4160288626E-2
 307 #define Q0      0.5000000000E+0
 308 #define Q1      0.4998717877E-1
 309
 310 #define P(z) ((P1*z)+P0)
 311 #define Q(z) ((Q1*z)+Q0)
 312
 313 #define C1       0.693359375
 314 #define C2      -2.1219444005469058277e-4
 315
 316 #define BIGX    88.72283911  /* ln(HUGE_VALF) */
 317 #define EXPEPS  1.0E-7       /* exp(1.0E-7)=0.0000001 */
 318 #define K1      1.4426950409 /* 1/ln(2) */
 319
 320 float expf(const float x)
 321 {
 322     int n;
 323     float xn, g, r, z, y;
 324         BOOL sign;
 325
 326     if(x>=0.0)
 327         { y=x;  sign=0; }
 328     else
 329         { y=-x; sign=1; }
 330
 331     if(y<EXPEPS) return 1.0;
 332
 333     if(y>BIGX)
 334     {
 335         if(sign)
 336         {
 337             errno=ERANGE;
 338             return HUGE_VALF
 339             ;
 340         }
 341         else
 342         {
 343             return 0.0;
 344         }
 345     }
 346
 347     z=y*K1;
 348     n=z;
 349
 350     if(n<0) --n;
 351     if(z-n>=0.5) ++n;
 352     xn=n;
 353     g=((y-xn*C1))-xn*C2;
 354     z=g*g;
 355     r=P(z)*g;
 356     r=0.5+(r/(Q(z)-r));
 357
 358     n++;
 359     z=ldexpf(r, n);
 360     if(sign)
 361         return 1.0/z;
 362     else
 363         return z;
 364 }
 365
 366 #endif