git.gag.com Git - fw/sdcc/blob - device/lib/_fsmul.c

   1 /* Floating point library in optimized assembly for 8051
   2  * Copyright (c) 2004, Paul Stoffregen, paul@pjrc.com
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Library General Public License
   6  * as published by the Free Software Foundation; either version 2
   7  * of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12  * GNU General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  17  */
  18
  19
  20 #define SDCC_FLOAT_LIB
  21 #include <float.h>
  22
  23
  24 #ifdef FLOAT_ASM_MCS51
  25
  26 // float __fsmul (float a, float b) reentrant
  27 static void dummy(void) _naked
  28 {
  29         _asm
  30         .globl  ___fsmul
  31 ___fsmul:
  32         // extract the two inputs, placing them into:
  33         //      sign     exponent   mantissa
  34         //      ----     --------   --------
  35         //  a:  sign_a   exp_a      r4/r3/r2
  36         //  b:  sign_b   exp_b      r7/r6/r5
  37
  38         lcall   fsgetargs
  39
  40         // first check if either input is zero
  41         cjne    r4, #0, 00002$
  42 00001$:
  43         ljmp    fs_return_zero
  44
  45 00002$:
  46         mov     a, r7
  47         jz      00001$
  48
  49         // compute final sign bit
  50         jnb     sign_b, 00003$
  51         cpl     sign_a
  52 00003$:
  53
  54         // add the exponents
  55         mov     a, exp_a
  56         add     a, exp_b
  57         add     a, #130
  58         mov     exp_a, a
  59
  60
  61         // now we need to multipy r4/r3/r2 * r7/r6/r5
  62         // ------------------------------------------
  63         //                              r2 * r5         << 0
  64         //                  r3 * r5  +  r2 * r6         << 8
  65         //      r4 * r5  +  r3 * r6  +  r2 * r7         << 16
  66         //      r4 * r6  +  r3 * r7                     << 24
  67         //      r4 * r7                                 << 32
  68         //
  69         // This adds quite a bit of code, but it is a LOT faster
  70         // than three calls to __mululong...
  71
  72         // output goes into r4/r3/r2/r1/r0/xx
  73
  74         mov     a, r2
  75         mov     b, r5
  76         mul     ab                      // r2 * r5
  77         // discard lowest 8 bits
  78         mov     r0, b
  79         // range 0-FE
  80
  81         mov     a, r2
  82         mov     b, r6
  83         mul     ab                      // r2 * r6
  84         add     a, r0
  85         mov     r0, a
  86         clr     a
  87         addc    a, b
  88         mov     r1, a
  89         // range 0-FEFF
  90
  91         mov     a, r3
  92         mov     b, r5
  93         mul     ab                      // r3 * r5
  94         add     a, r0
  95         // discard lowest 8 bits
  96         mov     a, r1
  97         addc    a, b
  98         mov     r1, a
  99         clr     a
 100         rlc     a
 101         xch     a, r2
 102         // range 0-1FD
 103
 104         mov     b, r7
 105         mul     ab                      // r2 * r7
 106         add     a, r1
 107         mov     r1, a
 108         mov     a, r2
 109         addc    a, b
 110         mov     r2, a
 111         // range 0-FFFE
 112
 113         mov     a, r3
 114         mov     r0, a
 115         mov     b, r6
 116         mul     ab                      // r3 * r6
 117         add     a, r1
 118         mov     r1, a
 119         mov     a, r2
 120         addc    a, b
 121         mov     r2, a
 122         clr     a
 123         rlc     a
 124         mov     r3, a
 125         // range 0-1FDFF
 126
 127         mov     a, r4
 128         mov     b, r5
 129         mul     ab                      // r4 * r5
 130         add     a, r1
 131         mov     r1, a
 132         mov     a, r2
 133         addc    a, b
 134         mov     r2, a
 135         clr     a
 136         addc    a, r3
 137         mov     r3, a
 138         // range 0-2FC00
 139
 140         mov     a, r0 // r3
 141         mov     b, r7
 142         mul     ab                      // r3 * r7
 143         add     a, r2
 144         mov     r2, a
 145         mov     a, r3
 146         addc    a, b
 147         mov     r3, a
 148         clr     a
 149         rlc     a
 150         xch     a, r4
 151         // range 0-100FD00
 152
 153         mov     r5, a
 154         mov     b, r6
 155         mul     ab                      // r4 * r6
 156         add     a, r2
 157         mov     r2, a
 158         mov     a, r3
 159         addc    a, b
 160         mov     r3, a
 161         clr     a
 162         addc    a, r4
 163         mov     r4, a
 164         // range 0-1FEFE00
 165
 166         mov     a, r5 // r4
 167         mov     b, r7
 168         mul     ab                      // r4 * r7
 169         add     a, r3
 170         mov     r3, a
 171         mov     a, r4
 172         addc    a, b
 173         mov     r4, a
 174         // range 40000000-FFFFFE00
 175
 176         jb      acc.7, 00010$
 177         lcall   fs_normalize_a
 178
 179 00010$:
 180         ljmp    fs_round_and_return
 181         _endasm;
 182 }
 183
 184 #else
 185
 186
 187 /*
 188 ** libgcc support for software floating point.
 189 ** Copyright (C) 1991 by Pipeline Associates, Inc.  All rights reserved.
 190 ** Permission is granted to do *anything* you want with this file,
 191 ** commercial or otherwise, provided this message remains intact.  So there!
 192 ** I would appreciate receiving any updates/patches/changes that anyone
 193 ** makes, and am willing to be the repository for said changes (am I
 194 ** making a big mistake?).
 195 **
 196 ** Pat Wood
 197 ** Pipeline Associates, Inc.
 198 ** pipeline!phw@motown.com or
 199 ** sun!pipeline!phw or
 200 ** uunet!motown!pipeline!phw
 201 */
 202
 203 /* (c)2000/2001: hacked a little by johan.knol@iduna.nl for sdcc */
 204
 205
 206 union float_long
 207   {
 208     float f;
 209     unsigned long l;
 210   };
 211
 212 /* multiply two floats */
 213 float __fsmul (float a1, float a2) {
 214   volatile union float_long fl1, fl2;
 215   volatile unsigned long result;
 216   volatile int exp;
 217   char sign;
 218
 219   fl1.f = a1;
 220   fl2.f = a2;
 221
 222   if (!fl1.l || !fl2.l)
 223     return (0);
 224
 225   /* compute sign and exponent */
 226   sign = SIGN (fl1.l) ^ SIGN (fl2.l);
 227   exp = EXP (fl1.l) - EXCESS;
 228   exp += EXP (fl2.l);
 229
 230   fl1.l = MANT (fl1.l);
 231   fl2.l = MANT (fl2.l);
 232
 233   /* the multiply is done as one 16x16 multiply and two 16x8 multiples */
 234   result = (fl1.l >> 8) * (fl2.l >> 8);
 235   result += ((fl1.l & (unsigned long) 0xFF) * (fl2.l >> 8)) >> 8;
 236   result += ((fl2.l & (unsigned long) 0xFF) * (fl1.l >> 8)) >> 8;
 237
 238   if (result & SIGNBIT)
 239     {
 240       /* round */
 241       result += 0x80;
 242       result >>= 8;
 243     }
 244   else
 245     {
 246       /* round */
 247       result += 0x40;
 248       result >>= 7;
 249       exp--;
 250     }
 251
 252   result &= ~HIDDEN;
 253
 254   /* pack up and go home */
 255   fl1.l = PACK (sign ? SIGNBIT : 0 , (unsigned long)exp, result);
 256   return (fl1.f);
 257 }
 258
 259 #endif
 260