git.gag.com Git - debian/cc1111/blob - device/lib/_fsmul.c

   1 /* Floating point library in optimized assembly for 8051
   2  * Copyright (c) 2004, Paul Stoffregen, paul@pjrc.com
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Library General Public License
   6  * as published by the Free Software Foundation; either version 2
   7  * of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17  */
  18
  19
  20 #define SDCC_FLOAT_LIB
  21 #include <float.h>
  22
  23
  24 #ifdef FLOAT_ASM_MCS51
  25
  26 // float __fsmul (float a, float b) __reentrant
  27 static void dummy(void) __naked
  28 {
  29         __asm
  30         .globl  ___fsmul
  31 ___fsmul:
  32         // extract the two inputs, placing them into:
  33         //      sign     exponent   mantissa
  34         //      ----     --------   --------
  35         //  a:  sign_a   exp_a      r4/r3/r2
  36         //  b:  sign_b   exp_b      r7/r6/r5
  37
  38         lcall   fsgetargs
  39
  40         // first check if either input is zero
  41         cjne    r4, #0, 00002$
  42 00001$:
  43         ljmp    fs_return_zero
  44
  45 00002$:
  46         mov     a, r7
  47         jz      00001$
  48
  49         // compute final sign bit
  50         jnb     sign_b, 00003$
  51         cpl     sign_a
  52 00003$:
  53
  54         // check if either input is infinity
  55         mov     a, exp_b
  56         cjne    a, #0xFF, 00004$
  57         ljmp    fs_return_inf
  58 00004$:
  59         mov     a, exp_a
  60         cjne    a, #0xFF, 00005$
  61         ljmp    fs_return_inf
  62 00005$:
  63
  64         // add the exponents
  65         add     a, exp_b
  66         // if carry then no underflow
  67         jc      00006$
  68         add     a, #130
  69         jc      00007$
  70         ljmp    fs_return_zero
  71
  72 00006$:
  73         add     a, #131
  74         dec     a
  75         jnc     00007$
  76         ljmp    fs_return_inf
  77
  78 00007$:
  79         mov     exp_a, a
  80
  81         // now we need to multipy r4/r3/r2 * r7/r6/r5
  82         // ------------------------------------------
  83         //                              r2 * r5         << 0
  84         //                  r3 * r5  +  r2 * r6         << 8
  85         //      r4 * r5  +  r3 * r6  +  r2 * r7         << 16
  86         //      r4 * r6  +  r3 * r7                     << 24
  87         //      r4 * r7                                 << 32
  88         //
  89         // This adds quite a bit of code, but it is a LOT faster
  90         // than three calls to __mululong...
  91
  92         // output goes into r4/r3/r2/r1/r0/xx
  93
  94         mov     a, r2
  95         mov     b, r5
  96         mul     ab                      // r2 * r5
  97         // discard lowest 8 bits
  98         mov     r0, b
  99         // range 0-FE
 100
 101         mov     a, r2
 102         mov     b, r6
 103         mul     ab                      // r2 * r6
 104         add     a, r0
 105         mov     r0, a
 106         clr     a
 107         addc    a, b
 108         mov     r1, a
 109         // range 0-FEFF
 110
 111         mov     a, r3
 112         mov     b, r5
 113         mul     ab                      // r3 * r5
 114         add     a, r0
 115         // discard lowest 8 bits
 116         mov     a, r1
 117         addc    a, b
 118         mov     r1, a
 119         clr     a
 120         rlc     a
 121         xch     a, r2
 122         // range 0-1FD
 123
 124         mov     b, r7
 125         mul     ab                      // r2 * r7
 126         add     a, r1
 127         mov     r1, a
 128         mov     a, r2
 129         addc    a, b
 130         mov     r2, a
 131         // range 0-FFFE
 132
 133         mov     a, r3
 134         mov     r0, a
 135         mov     b, r6
 136         mul     ab                      // r3 * r6
 137         add     a, r1
 138         mov     r1, a
 139         mov     a, r2
 140         addc    a, b
 141         mov     r2, a
 142         clr     a
 143         rlc     a
 144         mov     r3, a
 145         // range 0-1FDFF
 146
 147         mov     a, r4
 148         mov     b, r5
 149         mul     ab                      // r4 * r5
 150         add     a, r1
 151         mov     r1, a
 152         mov     a, r2
 153         addc    a, b
 154         mov     r2, a
 155         clr     a
 156         addc    a, r3
 157         mov     r3, a
 158         // range 0-2FC00
 159
 160         mov     a, r0 // r3
 161         mov     b, r7
 162         mul     ab                      // r3 * r7
 163         add     a, r2
 164         mov     r2, a
 165         mov     a, r3
 166         addc    a, b
 167         mov     r3, a
 168         clr     a
 169         rlc     a
 170         xch     a, r4
 171         // range 0-100FD00
 172
 173         mov     r5, a
 174         mov     b, r6
 175         mul     ab                      // r4 * r6
 176         add     a, r2
 177         mov     r2, a
 178         mov     a, r3
 179         addc    a, b
 180         mov     r3, a
 181         clr     a
 182         addc    a, r4
 183         mov     r4, a
 184         // range 0-1FEFE00
 185
 186         mov     a, r5 // r4
 187         mov     b, r7
 188         mul     ab                      // r4 * r7
 189         add     a, r3
 190         mov     r3, a
 191         mov     a, r4
 192         addc    a, b
 193         mov     r4, a
 194         // range 40000000-FFFFFE00
 195
 196         jb      acc.7, 00010$
 197         lcall   fs_normalize_a
 198
 199 00010$:
 200         ljmp    fs_round_and_return
 201         __endasm;
 202 }
 203
 204 #else
 205
 206 /*
 207 ** libgcc support for software floating point.
 208 ** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
 209 ** Permission is granted to do *anything* you want with this file,
 210 ** commercial or otherwise, provided this message remains intact.  So there!
 211 ** I would appreciate receiving any updates/patches/changes that anyone
 212 ** makes, and am willing to be the repository for said changes (am I
 213 ** making a big mistake?).
 214 **
 215 ** Pat Wood
 216 ** Pipeline Associates, Inc.
 217 ** pipeline!phw@motown.com or
 218 ** sun!pipeline!phw or
 219 ** uunet!motown!pipeline!phw
 220 */
 221
 222 /* (c)2000/2001: hacked a little by johan.knol@iduna.nl for sdcc */
 223
 224 union float_long
 225   {
 226     float f;
 227     unsigned long l;
 228   };
 229
 230 /* multiply two floats */
 231 float __fsmul (float a1, float a2) {
 232   volatile union float_long fl1, fl2;
 233   volatile unsigned long result;
 234   volatile int exp;
 235   char sign;
 236
 237   fl1.f = a1;
 238   fl2.f = a2;
 239
 240   if (!fl1.l || !fl2.l)
 241     return (0);
 242
 243   /* compute sign and exponent */
 244   sign = SIGN (fl1.l) ^ SIGN (fl2.l);
 245   exp = EXP (fl1.l) - EXCESS;
 246   exp += EXP (fl2.l);
 247
 248   fl1.l = MANT (fl1.l);
 249   fl2.l = MANT (fl2.l);
 250
 251   /* the multiply is done as one 16x16 multiply and two 16x8 multiples */
 252   result = (fl1.l >> 8) * (fl2.l >> 8);
 253   result += ((fl1.l & (unsigned long) 0xFF) * (fl2.l >> 8)) >> 8;
 254   result += ((fl2.l & (unsigned long) 0xFF) * (fl1.l >> 8)) >> 8;
 255
 256   if (result & SIGNBIT)
 257     {
 258       /* round */
 259       result += 0x80;
 260       result >>= 8;
 261     }
 262   else
 263     {
 264       /* round */
 265       result += 0x40;
 266       result >>= 7;
 267       exp--;
 268     }
 269
 270   result &= ~HIDDEN;
 271
 272   /* pack up and go home */
 273   if (exp >= 0x100)
 274     fl1.l = (sign ? SIGNBIT : 0) | __INFINITY;
 275   else if (exp < 0)
 276     fl1.l = 0;
 277   else
 278     fl1.l = PACK (sign ? SIGNBIT : 0 , exp, result);
 279   return (fl1.f);
 280 }
 281
 282 #endif