void ao_plus64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) {
uint32_t t;
- r->low = t = a->low + b->low;
r->high = a->high + b->high;
+ t = a->low + b->low;
if (t < a->low)
r->high++;
+ r->low = t;
}
void ao_rshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d) {
if (d < 32) {
- r->high = (int32_t) a->high >> d;
r->low = a->low >> d;
if (d)
r->low |= a->high << (32 - d);
+ r->high = (int32_t) a->high >> d;
} else {
d &= 0x1f;
- r->high = 0;
r->low = (int32_t) a->high >> d;
+ r->high = 0;
}
}
void ao_lshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d) {
if (d < 32) {
r->high = a->high << d;
- r->low = a->low << d;
if (d)
r->high |= a->low >> (32 - d);
+ r->low = a->low << d;
} else {
d &= 0x1f;
- r->low = 0;
r->high = a->low << d;
+ r->low = 0;
}
}
-void ao_umul64(ao_int64_t *r, uint32_t a, uint32_t b)
+static void ao_umul64_32_32(ao_int64_t *r, uint32_t a, uint32_t b)
{
uint32_t r1;
uint32_t r2, r3, r4;
s.low = r1;
s.high = r4;
- t.high = (uint32_t) r2 >> 16;
+ t.high = r2 >> 16;
t.low = r2 << 16;
ao_plus64(&u, &s, &t);
- v.high = (int32_t) r3 >> 16;
+ v.high = r3 >> 16;
v.low = r3 << 16;
ao_plus64(r, &u, &v);
}
r->high++;
}
-void ao_mul64(ao_int64_t *r, int32_t a, int32_t b) {
+void ao_mul64_32_32(ao_int64_t *r, int32_t a, int32_t b) {
uint8_t negative = 0;
if (a < 0) {
b = -b;
negative = ~negative;
}
- ao_umul64(r, a, b);
+ ao_umul64_32_32(r, a, b);
if (negative)
ao_neg64(r, r);
}
-void ao_umul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) {
- ao_umul64(r, a->low, b);
- r->high += a->high * b;
+static void ao_umul64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) {
+ ao_int64_t r2, r3;
+
+ ao_umul64_32_32(&r2, a->high, b->low);
+ ao_umul64_32_32(&r3, a->low, b->high);
+ ao_umul64_32_32(r, a->low, b->low);
+
+ r->high += r2.low + r3.low;
}
-void ao_mul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) {
- if ((int32_t) a->high < 0) {
- ao_int64_t t;
+void ao_mul64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b) {
+ uint8_t negative = 0;
+ ao_int64_t ap, bp;
- ao_neg64(&t, a);
- ao_umul64_16(r, &t, b);
+ if (ao_int64_negativep(a)) {
+ ao_neg64(&ap, a);
+ a = ≈
+ negative = ~0;
+ }
+ if (ao_int64_negativep(b)) {
+ ao_neg64(&bp, b);
+ b = &bp;
+ negative = ~negative;
+ }
+ ao_umul64(r, a, b);
+ if (negative)
ao_neg64(r, r);
+}
+
+void ao_umul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) {
+ uint32_t h = a->high * b;
+ ao_umul64_32_32(r, a->low, b);
+ r->high += h;
+}
+
+void ao_mul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b) {
+ ao_int64_t ap;
+ uint8_t negative = 0;
+ if ((int32_t) a->high < 0) {
+ ao_neg64(&ap, a);
+ a = ≈
+ negative = ~0;
} else
- ao_umul64_16(r, a, b);
+ ao_umul64_64_16(r, a, b);
+ if (negative)
+ ao_neg64(r, r);
}
} ao_int64_t;
void ao_plus64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b);
+void ao_neg64(ao_int64_t *r, ao_int64_t *a);
+void ao_lshift64_16(ao_int64_t *r, uint16_t a, uint8_t d);
void ao_rshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d);
void ao_lshift64(ao_int64_t *r, ao_int64_t *a, uint8_t d);
-void ao_mul64(ao_int64_t *r, int32_t a, int32_t b);
-void ao_mul64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b);
+void ao_mul64_64_64(ao_int64_t *r, ao_int64_t *a, ao_int64_t *b);
+void ao_mul64_32_32(ao_int64_t *r, int32_t a, int32_t b);
+void ao_mul64_64_16(ao_int64_t *r, ao_int64_t *a, uint16_t b);
#define ao_int64_init32(r, a) (((r)->high = 0), (r)->low = (a))
#define ao_int64_init64(r, a, b) (((r)->high = (a)), (r)->low = (b))
#define ao_cast64(a) (((int64_t) (a)->high << 32) | (a)->low)
+#define ao_int64_negativep(a) (((int32_t) (a)->high) < 0)
+
#endif /* _AO_INT64_H_ */
int errors;
-#define test(op,func,a,b,ao_a,ao_b) do { \
+#define test_o(op,func,mod,a,b,ao_a,ao_b) do { \
r = (a) op (b); \
func(&ao_r, ao_a, ao_b); \
c = ao_cast64(&ao_r); \
if (c != r) { \
- printf ("trial %4d: %lld " #func " %lld = %lld (should be %lld)\n", \
+ printf ("trial %4d: %lld " #func mod " %lld = %lld (should be %lld)\n", \
trial, (int64_t) (a), (int64_t) b, c, r); \
++errors; \
} \
} while (0)
+#define test(op,func,a,b,ao_a,ao_b) test_o(op,func,"",a,b,ao_a,ao_b)
+
+#define test_a(op,func,a,b,ao_a,ao_b) do { \
+ ao_r = *ao_a; \
+ test_o(op,func,"_a",a,b,&ao_r,ao_b); \
+ } while (0)
+
+#define test_b(op,func,a,b,ao_a,ao_b) do { \
+ ao_r = *ao_b; \
+ test_o(op,func,"_b",a,b,ao_a,&ao_r); \
+ } while (0)
+
+#define test_x(op,func,a,b,ao_a,ao_b) do { \
+ ao_r = *ao_a; \
+ test_o(op,func,"_xa",a,a,&ao_r,&ao_r); \
+ ao_r = *ao_b; \
+ test_o(op,func,"_xb",b,b,&ao_r,&ao_r); \
+ } while (0)
void
do_test(int trial, int64_t a, int64_t b)
ao_int64_init64(&ao_b, b >> 32, b);
test(+, ao_plus64, a, b, &ao_a, &ao_b);
- test(*, ao_mul64,(int64_t) (int32_t) a, (int32_t) b, (int32_t) a, (int32_t) b);
+ test_a(+, ao_plus64, a, b, &ao_a, &ao_b);
+ test_b(+, ao_plus64, a, b, &ao_a, &ao_b);
+ test_x(+, ao_plus64, a, b, &ao_a, &ao_b);
+ test(*, ao_mul64_32_32,(int64_t) (int32_t) a, (int32_t) b, (int32_t) a, (int32_t) b);
+ test(*, ao_mul64, a, b, &ao_a, &ao_b);
+ test_a(*, ao_mul64, a, b, &ao_a, &ao_b);
+ test_b(*, ao_mul64, a, b, &ao_a, &ao_b);
+ test_x(*, ao_mul64, a, b, &ao_a, &ao_b);
+ test(*, ao_mul64_64_16, a, (uint16_t) b, &ao_a, (uint16_t) b);
+ test_a(*, ao_mul64_64_16, a, (uint16_t) b, &ao_a, (uint16_t) b);
test(>>, ao_rshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f);
+ test_a(>>, ao_rshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f);
test(<<, ao_lshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f);
- test(*, ao_mul64_16, a, (uint16_t) b, &ao_a, (uint16_t) b);
+ test_a(<<, ao_lshift64, a, (uint8_t) b & 0x3f, &ao_a, (uint8_t) b & 0x3f);
}
#define TESTS 10000000