int ar_ifmul32 (AR_IEEE_32 *x, const AR_IEEE_32 *a, const AR_IEEE_32 *b, int roundmode) { int i, res = AR_STAT_OK; unsigned long x_lbits, z_lbits, rbits, carry; signed int x_expo; AR_IEEE_32 x2, y, y2, z; /* If either a or b is a NaN, it's the result. */ if (IS_IEEE32_NaN(a)) { *x = *a; return res | AR_STAT_UNDEFINED; } if (IS_IEEE32_NaN(b)) { *x = *b; return res | AR_STAT_UNDEFINED; } /* Test for infinities */ if (b->expo > AR_IEEE32_MAX_EXPO) if (a->expo == 0 && !IS_IEEE32_NZ_COEFF(a)){ /* zero * inf = quiet NaN */ QNaNIEEE32 (x); return res | AR_STAT_UNDEFINED; } else { /* anything * infinity = infinity */ if (i = a->sign ^ b->sign) res |= AR_STAT_NEGATIVE; *x = *b; x->sign = i; return res | AR_STAT_OVERFLOW; } if (a->expo > AR_IEEE32_MAX_EXPO) if (b->expo == 0 && !IS_IEEE32_NZ_COEFF(b)){ /* infinity * zero = quiet NaN */ QNaNIEEE32 (x); return res | AR_STAT_UNDEFINED; } else { /* infinity * anything = infinity */ if (i = a->sign ^ b->sign) res |= AR_STAT_NEGATIVE; *x = *a; x->sign = i; return res | AR_STAT_OVERFLOW; } /* Test for denorms (they have zero exponents) to determine the * values of the implicit normalization bits; make them explicit. */ if (ar_state_register.ar_denorms_trap && ((!a->expo && IS_IEEE32_NZ_COEFF(a)) || (!b->expo && IS_IEEE32_NZ_COEFF(b)))) { /* operand is a denorm and denorms cause a trap */ x->expo = AR_IEEE32_MAX_EXPO + 1; return res | AR_STAT_UNDEFINED; } ZEROIEEE32 (y); y.coeff1 = !!a->expo; y2 = *a; z = *b; z_lbits = !!b->expo; x_expo = a->expo + b->expo + !a->expo + !b->expo - AR_IEEE32_EXPO_BIAS; if (x_expo <= 0) x_expo--; i = a->sign ^ b->sign; /* Sum the pyramid */ if (z.coeff1 & 1) { x2 = *a; *x = y; } else { ZEROIEEE32 (*x); ZEROIEEE32 (x2); } x->sign = i; x_lbits = z_lbits & y.coeff1; SHLEFTIEEE32_2 (y, y2); SHRIGHTIEEE32 (z); z.coeff0 |= z_lbits << (AR_IEEE32_C0_BITS - 1); for (i = 1; i < AR_IEEE32_COEFF_BITS + 1; i++) { if (z.coeff1 & 1) { carry = 0; ADDIEEE32 (x2, carry, x2, y2); ADDIEEE32 (*x, carry, *x, y); x_lbits += carry; } SHLEFTIEEE32_2 (y, y2); SHRIGHTIEEE32 (z); } /* Extract rounding bits */ rbits = x2.coeff0 >> (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS); if (x2.coeff0 & MASKR (AR_IEEE32_C0_BITS - AR_IEEE32_ROUND_BITS) | x2.coeff1) rbits |= 1; /* sticky bit */ /* Normalize and round */ return ar_i32norm (x_expo, x_lbits, rbits, x, roundmode); }
int ar_cfmul64 (AR_CRAY_64 *x, const AR_CRAY_64 *a, const AR_CRAY_64 *b, int roundmode) { int i, res = AR_STAT_OK; long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo; unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0; AR_CRAY_64 y, z; x->sign = a->sign ^ b->sign; y = *a; z = *b; if (!a_expo && !b_expo) x->sign = x_expo = 0; else { x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS; if (a_expo < AR_CRAY_MIN_EXPO - 1 || b_expo < AR_CRAY_MIN_EXPO - 1 || x_expo < AR_CRAY_MIN_EXPO - 1) { ZEROCRAY64 (*x); return AR_STAT_UNDERFLOW | AR_STAT_ZERO; } } switch (roundmode) { case AR_ROUNDED: /* CRAY-1 rounded multiply */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0151; break; case AR_UNROUNDED: /* CRAY-1 truncation compensation */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0011; break; case AR_RECIPROCAL_ITERATION: /* CRAY-1 recip iter */ x_lbits = 1; x->coeff0 = ~0; x->coeff1 = ~0; x->coeff2 = (0011 - 0320) >> 7; x_rbits = (0011 - 0320) & MASKR (7); break; } /* Compute and sum the pyramid */ #if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS y_rbits = y.coeff2<<7; i = AR_CRAY_C0_BITS; zcoeff = z.coeff0; while(zcoeff) { if(zcoeff & 0x8000) { x_rbits += (y_rbits & 0177); carry = x_rbits >> 7; x_rbits &= 0177; ADDCRAY64 (*x, carry, *x, y); x_lbits += carry; } SHRIGHTCRAY64 (y); y_rbits >>= 1; zcoeff = (zcoeff & 0x7fff) << 1; i--; }