From: Keith Packard Date: Thu, 23 May 2013 00:08:55 +0000 (-0700) Subject: altos: Add 64x64 multiply. Test 64 ops for dest same as either source X-Git-Tag: 1.2.9.4~118^2~18 X-Git-Url: https://git.gag.com/?p=fw%2Faltos;a=commitdiff_plain;h=5ccd902d0fd2adc40c72982babb60fac4da6a087 altos: Add 64x64 multiply. Test 64 ops for dest same as either source The test change is to ensure that the destination may be one of the 64 bit sources. Signed-off-by: Keith Packard --- diff --git a/src/core/ao_int64.c b/src/core/ao_int64.c index 8e3caa24..5307342d 100644 --- a/src/core/ao_int64.c +++ b/src/core/ao_int64.c @@ -20,39 +20,40 @@ void ao_plus64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) { uint32_t t; - r->low = t = a->low + b->low; r->high = a->high + b->high; + t = a->low + b->low; if (t < a->low) r->high++; + r->low = t; } void ao_rshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d) { if (d < 32) { - r->high = (int32_t) a->high >> d; r->low = a->low >> d; if (d) r->low |= a->high << (32 - d); + r->high = (int32_t) a->high >> d; } else { d &= 0x1f; - r->high = 0; r->low = (int32_t) a->high >> d; + r->high = 0; } } void ao_lshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d) { if (d < 32) { r->high = a->high << d; - r->low = a->low << d; if (d) r->high |= a->low >> (32 - d); + r->low = a->low << d; } else { d &= 0x1f; - r->low = 0; r->high = a->low << d; + r->low = 0; } } -void ao_umul64(ao_int64_t *r, uint32_t a, uint32_t b) +static void ao_umul64_32_32(ao_int64_t *r, uint32_t a, uint32_t b) { uint32_t r1; uint32_t r2, r3, r4; @@ -65,11 +66,11 @@ void ao_umul64(ao_int64_t *r, uint32_t a, uint32_t b) s.low = r1; s.high = r4; - t.high = (uint32_t) r2 >> 16; + t.high = r2 >> 16; t.low = r2 << 16; ao_plus64(&u, &s, &t); - v.high = (int32_t) r3 >> 16; + v.high = r3 >> 16; v.low = r3 << 16; ao_plus64(r, &u, &v); } @@ -81,7 +82,7 @@ void ao_neg64(ao_int64_t *r, ao_int64_t *a) { r->high++; } -void ao_mul64(ao_int64_t *r, int32_t a, int32_t b) { +void ao_mul64_32_32(ao_int64_t *r, int32_t a, int32_t b) { uint8_t negative = 0; if (a < 0) { @@ -92,23 +93,55 @@ void ao_mul64(ao_int64_t *r, int32_t a, int32_t b) { b = -b; negative = ~negative; } - ao_umul64(r, a, b); + ao_umul64_32_32(r, a, b); if (negative) ao_neg64(r, r); } -void ao_umul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) { - ao_umul64(r, a->low, b); - r->high += a->high * b; +static void ao_umul64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) { + ao_int64_t r2, r3; + + ao_umul64_32_32(&r2, a->high, b->low); + ao_umul64_32_32(&r3, a->low, b->high); + ao_umul64_32_32(r, a->low, b->low); + + r->high += r2.low + r3.low; } -void ao_mul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) { - if ((int32_t) a->high < 0) { - ao_int64_t t; +void ao_mul64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) { + uint8_t negative = 0; + ao_int64_t ap, bp; - ao_neg64(&t, a); - ao_umul64_16(r, &t, b); + if (ao_int64_negativep(a)) { + ao_neg64(&ap, a); + a = ≈ + negative = ~0; + } + if (ao_int64_negativep(b)) { + ao_neg64(&bp, b); + b = &bp; + negative = ~negative; + } + ao_umul64(r, a, b); + if (negative) ao_neg64(r, r); +} + +void ao_umul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) { + uint32_t h = a->high * b; + ao_umul64_32_32(r, a->low, b); + r->high += h; +} + +void ao_mul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) { + ao_int64_t ap; + uint8_t negative = 0; + if ((int32_t) a->high < 0) { + ao_neg64(&ap, a); + a = ≈ + negative = ~0; } else - ao_umul64_16(r, a, b); + ao_umul64_64_16(r, a, b); + if (negative) + ao_neg64(r, r); } diff --git a/src/core/ao_int64.h b/src/core/ao_int64.h index 93aa87e4..e5eee823 100644 --- a/src/core/ao_int64.h +++ b/src/core/ao_int64.h @@ -26,14 +26,19 @@ typedef struct { } ao_int64_t; void ao_plus64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b); +void ao_neg64(ao_int64_t *r, ao_int64_t *a); +void ao_lshift64_16(ao_int64_t *r, uint16_t a, uint8_t d); void ao_rshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d); void ao_lshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d); -void ao_mul64(ao_int64_t *r, int32_t a, int32_t b); -void ao_mul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b); +void ao_mul64_64_64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b); +void ao_mul64_32_32(ao_int64_t *r, int32_t a, int32_t b); +void ao_mul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b); #define ao_int64_init32(r, a) (((r)->high = 0), (r)->low = (a)) #define ao_int64_init64(r, a, b) (((r)->high = (a)), (r)->low = (b)) #define ao_cast64(a) (((int64_t) (a)->high << 32) | (a)->low) +#define ao_int64_negativep(a) (((int32_t) (a)->high) < 0) + #endif /* _AO_INT64_H_ */ diff --git a/src/test/ao_int64_test.c b/src/test/ao_int64_test.c index 67ba6ec5..4c88b1a1 100644 --- a/src/test/ao_int64_test.c +++ b/src/test/ao_int64_test.c @@ -22,17 +22,35 @@ int errors; -#define test(op,func,a,b,ao_a,ao_b) do { \ +#define test_o(op,func,mod,a,b,ao_a,ao_b) do { \ r = (a) op (b); \ func(&ao_r, ao_a, ao_b); \ c = ao_cast64(&ao_r); \ if (c != r) { \ - printf ("trial %4d: %lld " #func " %lld = %lld (should be %lld)\n", \ + printf ("trial %4d: %lld " #func mod " %lld = %lld (should be %lld)\n", \ trial, (int64_t) (a), (int64_t) b, c, r); \ ++errors; \ } \ } while (0) +#define test(op,func,a,b,ao_a,ao_b) test_o(op,func,"",a,b,ao_a,ao_b) + +#define test_a(op,func,a,b,ao_a,ao_b) do { \ + ao_r = *ao_a; \ + test_o(op,func,"_a",a,b,&ao_r,ao_b); \ + } while (0) + +#define test_b(op,func,a,b,ao_a,ao_b) do { \ + ao_r = *ao_b; \ + test_o(op,func,"_b",a,b,ao_a,&ao_r); \ + } while (0) + +#define test_x(op,func,a,b,ao_a,ao_b) do { \ + ao_r = *ao_a; \ + test_o(op,func,"_xa",a,a,&ao_r,&ao_r); \ + ao_r = *ao_b; \ + test_o(op,func,"_xb",b,b,&ao_r,&ao_r); \ + } while (0) void do_test(int trial, int64_t a, int64_t b) @@ -44,10 +62,20 @@ do_test(int trial, int64_t a, int64_t b) ao_int64_init64(&ao_b, b >> 32, b); test(+, ao_plus64, a, b, &ao_a, &ao_b); - test(*, ao_mul64,(int64_t) (int32_t) a, (int32_t) b, (int32_t) a, (int32_t) b); + test_a(+, ao_plus64, a, b, &ao_a, &ao_b); + test_b(+, ao_plus64, a, b, &ao_a, &ao_b); + test_x(+, ao_plus64, a, b, &ao_a, &ao_b); + test(*, ao_mul64_32_32,(int64_t) (int32_t) a, (int32_t) b, (int32_t) a, (int32_t) b); + test(*, ao_mul64, a, b, &ao_a, &ao_b); + test_a(*, ao_mul64, a, b, &ao_a, &ao_b); + test_b(*, ao_mul64, a, b, &ao_a, &ao_b); + test_x(*, ao_mul64, a, b, &ao_a, &ao_b); + test(*, ao_mul64_64_16, a, (uint16_t) b, &ao_a, (uint16_t) b); + test_a(*, ao_mul64_64_16, a, (uint16_t) b, &ao_a, (uint16_t) b); test(>>, ao_rshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f); + test_a(>>, ao_rshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f); test(<<, ao_lshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f); - test(*, ao_mul64_16, a, (uint16_t) b, &ao_a, (uint16_t) b); + test_a(<<, ao_lshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f); } #define TESTS 10000000