X-Git-Url: https://git.gag.com/?a=blobdiff_plain;f=device%2Flib%2F_fsmul.c;h=95c12f110b3f8a20c4b1abd51fec47435e5ff443;hb=bf1925f901a9f45ad2529874f41e1e2b01f5bc9f;hp=09fd00a4fdba9092114e4d52538f139a366be3da;hpb=4e85b590d59de032a494ad439773f94d845db0f4;p=fw%2Fsdcc diff --git a/device/lib/_fsmul.c b/device/lib/_fsmul.c index 09fd00a4..95c12f11 100644 --- a/device/lib/_fsmul.c +++ b/device/lib/_fsmul.c @@ -1,3 +1,189 @@ +/* Floating point library in optimized assembly for 8051 + * Copyright (c) 2004, Paul Stoffregen, paul@pjrc.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + */ + + +#define SDCC_FLOAT_LIB +#include + + +#ifdef FLOAT_ASM_MCS51 + +// float __fsmul (float a, float b) reentrant +static void dummy(void) _naked +{ + _asm + .globl ___fsmul +___fsmul: + // extract the two inputs, placing them into: + // sign exponent mantiassa + // ---- -------- --------- + // a: sign_a exp_a r4/r3/r2 + // b: sign_b exp_b r7/r6/r5 + + lcall fsgetargs + + // first check if either input is zero + cjne r4, #0, 00002$ +00001$: + ljmp fs_return_zero + +00002$: + mov a, r7 + jz 00001$ + + // compute final sign bit + jnb sign_b, 00003$ + cpl sign_a +00003$: + + // add the exponents + mov a, exp_a + add a, exp_b + add a, #130 + mov exp_a, a + + + // now we need to multipy r4/r3/r2 * r7/r6/r5 + // ------------------------------------------ + // r2 * r5 << 0 + // r3 * r5 + r2 * r6 << 8 + // r4 * r5 + r3 * r6 + r2 * r7 << 16 + // r4 * r6 + r3 * r7 << 24 + // r4 * r7 << 32 + // + // This adds quite a bit of code, but it is a LOT faster + // that three calls to __mululong... + + // output goes into r4/r3/r2/r1/r0/xx + + mov a, r2 + mov b, r5 + mul ab // r2 * r5 + // discard lowest 8 bits + mov r0, b + // range 0-FE + + mov a, r2 + mov b, r6 + mul ab // r2 * r6 + add a, r0 + mov r0, a + clr a + addc a, b + mov r1, a + // range 0-FEFF + + mov a, r3 + mov b, r5 + mul ab // r3 * r5 + add a, r0 + // discard lowest 8 bits + mov a, r1 + addc a, b + mov r1, a + clr a + rlc a + xch a, r2 + // range 0-1FD + + mov b, r7 + mul ab // r2 * r7 + add a, r1 + mov r1, a + mov a, r2 + addc a, b + mov r2, a + // range 0-FFFE + + mov a, r3 + mov r0, a + mov b, r6 + mul ab // r3 * r6 + add a, r1 + mov r1, a + mov a, r2 + addc a, b + mov r2, a + clr a + rlc a + mov r3, a + // range 0-1FDFF + + mov a, r4 + mov b, r5 + mul ab // r4 * r5 + add a, r1 + mov r1, a + mov a, r2 + addc a, b + mov r2, a + clr a + addc a, r3 + mov r3, a + // range 0-2FC00 + + mov a, r0 // r3 + mov b, r7 + mul ab // r3 * r7 + add a, r2 + mov r2, a + mov a, r3 + addc a, b + mov r3, a + clr a + rlc a + xch a, r4 + // range 0-100FD00 + + mov r5, a + mov b, r6 + mul ab // r4 * r6 + add a, r2 + mov r2, a + mov a, r3 + addc a, b + mov r3, a + clr a + addc a, r4 + mov r4, a + // range 0-1FEFE00 + + mov a, r5 // r4 + mov b, r7 + mul ab // r4 * r7 + add a, r3 + mov r3, a + mov a, r4 + addc a, b + mov r4, a + // range 40000000-FFFFFE00 + + jb acc.7, 00010$ + lcall fs_normalize_a + +00010$: + ljmp fs_round_and_return + _endasm; +} + +#else + + /* ** libgcc support for software floating point. ** Copyright (C) 1991 by Pipeline Associates, Inc. All rights reserved. @@ -16,7 +202,6 @@ /* (c)2000/2001: hacked a little by johan.knol@iduna.nl for sdcc */ -#include union float_long { @@ -71,6 +256,5 @@ float __fsmul (float a1, float a2) { return (fl1.f); } - - +#endif