int ar_cfadd64 (AR_CRAY_64 *x, const AR_CRAY_64 *a, const AR_CRAY_64 *b) { int res = AR_STAT_OK, a_expo = a->expo, b_expo = b->expo; unsigned int carry, shift, inexact; AR_CRAY_64 y; /* Ensure that the first argument has the largest exponent */ if (b_expo > a_expo) y = *a, *x = *b; else y = *b, *x = *a; /* Test for underflow */ if (x->expo < AR_CRAY_MIN_EXPO) { ZEROCRAY64 (*x); return AR_STAT_UNDERFLOW | AR_STAT_ZERO; } /* Shift the coefficient of the second argument down (right). We * do this in parts so as not to overflow registers. */ inexact = 0; shift = x->expo - y.expo; if (shift > AR_CRAY64_COEFF_BITS) ZEROCRAY64 (y); else { for (; shift; shift--) { inexact |= y.coeff2 & 1; SHRIGHTCRAY64 (y); y.expo++; } } /* If signs differ, complement the first argument; this is equivalent * to negating and then subtracting one, in a two's complement sense. */ if (x->sign != y.sign) NOTCRAY64 (*x); /* Compute sum of coefficients */ carry = 0; ADDCRAY64 (*x, carry, *x, y); /* Check if the sign changed, and add 1 if so; otherwise, undo * the complement. */ if (x->sign != y.sign) if (carry) { x->sign ^= 1; carry = 1; INCCRAY64 (*x, carry); carry = 0; } else NOTCRAY64 (*x); if (carry) { SHRIGHTCRAY64 (*x); x->coeff0 |= 1 << (AR_CRAY_C0_BITS - 1); x->expo++; } /* Check for a zero result, as a special case */ if (!(x->coeff0 | x->coeff1 | x->coeff2)) { x->sign = x->expo = 0; return AR_STAT_ZERO; } /* Shift the result coefficient left until normalized */ while (!(x->coeff0 >> (AR_CRAY_C0_BITS - 1))) { x->expo--; SHLEFTCRAY64 (*x); } /* Test for out-of-range result or operand */ if (x->expo > AR_CRAY_MAX_EXPO || a_expo > AR_CRAY_MAX_EXPO || b_expo > AR_CRAY_MAX_EXPO) { x->sign = 0; x->expo = AR_CRAY_MAX_EXPO + 1; res |= AR_STAT_OVERFLOW; } if (inexact) res |= AR_STAT_INEXACT; if (x->sign) res |= AR_STAT_NEGATIVE; return res; }
int ar_cfmul64 (AR_CRAY_64 *x, const AR_CRAY_64 *a, const AR_CRAY_64 *b, int roundmode) { int i, res = AR_STAT_OK; long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo; unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0; AR_CRAY_64 y, z; x->sign = a->sign ^ b->sign; y = *a; z = *b; if (!a_expo && !b_expo) x->sign = x_expo = 0; else { x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS; if (a_expo < AR_CRAY_MIN_EXPO - 1 || b_expo < AR_CRAY_MIN_EXPO - 1 || x_expo < AR_CRAY_MIN_EXPO - 1) { ZEROCRAY64 (*x); return AR_STAT_UNDERFLOW | AR_STAT_ZERO; } } switch (roundmode) { case AR_ROUNDED: /* CRAY-1 rounded multiply */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0151; break; case AR_UNROUNDED: /* CRAY-1 truncation compensation */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0011; break; case AR_RECIPROCAL_ITERATION: /* CRAY-1 recip iter */ x_lbits = 1; x->coeff0 = ~0; x->coeff1 = ~0; x->coeff2 = (0011 - 0320) >> 7; x_rbits = (0011 - 0320) & MASKR (7); break; } /* Compute and sum the pyramid */ #if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS y_rbits = y.coeff2<<7; i = AR_CRAY_C0_BITS; zcoeff = z.coeff0; while(zcoeff) { if(zcoeff & 0x8000) { x_rbits += (y_rbits & 0177); carry = x_rbits >> 7; x_rbits &= 0177; ADDCRAY64 (*x, carry, *x, y); x_lbits += carry; } SHRIGHTCRAY64 (y); y_rbits >>= 1; zcoeff = (zcoeff & 0x7fff) << 1; i--; }