int ar_cfadd64 (AR_CRAY_64 *x, const AR_CRAY_64 *a, const AR_CRAY_64 *b) { int res = AR_STAT_OK, a_expo = a->expo, b_expo = b->expo; unsigned int carry, shift, inexact; AR_CRAY_64 y; /* Ensure that the first argument has the largest exponent */ if (b_expo > a_expo) y = *a, *x = *b; else y = *b, *x = *a; /* Test for underflow */ if (x->expo < AR_CRAY_MIN_EXPO) { ZEROCRAY64 (*x); return AR_STAT_UNDERFLOW | AR_STAT_ZERO; } /* Shift the coefficient of the second argument down (right). We * do this in parts so as not to overflow registers. */ inexact = 0; shift = x->expo - y.expo; if (shift > AR_CRAY64_COEFF_BITS) ZEROCRAY64 (y); else { for (; shift; shift--) { inexact |= y.coeff2 & 1; SHRIGHTCRAY64 (y); y.expo++; } } /* If signs differ, complement the first argument; this is equivalent * to negating and then subtracting one, in a two's complement sense. */ if (x->sign != y.sign) NOTCRAY64 (*x); /* Compute sum of coefficients */ carry = 0; ADDCRAY64 (*x, carry, *x, y); /* Check if the sign changed, and add 1 if so; otherwise, undo * the complement. */ if (x->sign != y.sign) if (carry) { x->sign ^= 1; carry = 1; INCCRAY64 (*x, carry); carry = 0; } else NOTCRAY64 (*x); if (carry) { SHRIGHTCRAY64 (*x); x->coeff0 |= 1 << (AR_CRAY_C0_BITS - 1); x->expo++; } /* Check for a zero result, as a special case */ if (!(x->coeff0 | x->coeff1 | x->coeff2)) { x->sign = x->expo = 0; return AR_STAT_ZERO; } /* Shift the result coefficient left until normalized */ while (!(x->coeff0 >> (AR_CRAY_C0_BITS - 1))) { x->expo--; SHLEFTCRAY64 (*x); } /* Test for out-of-range result or operand */ if (x->expo > AR_CRAY_MAX_EXPO || a_expo > AR_CRAY_MAX_EXPO || b_expo > AR_CRAY_MAX_EXPO) { x->sign = 0; x->expo = AR_CRAY_MAX_EXPO + 1; res |= AR_STAT_OVERFLOW; } if (inexact) res |= AR_STAT_INEXACT; if (x->sign) res |= AR_STAT_NEGATIVE; return res; }
int ar_convert_to_complex (ar_data *result, const AR_TYPE *resulttype, const ar_data *opnd, const AR_TYPE *opndtype) { ar_data from, re, im, cre, cim; AR_TYPE reimtype, parttype, temptype; int status = AR_STAT_OK, restat, imstat; parttype = (AR_TYPE) (*resulttype ^ AR_FLOAT_COMPLEX); if (AR_CLASS (*opndtype) == AR_CLASS_FLOAT && AR_FLOAT_IS_COMPLEX (*opndtype) == AR_FLOAT_COMPLEX) { status |= ar_decompose_complex (&re, &im, &reimtype, opnd, opndtype); restat = ar_convert_to_float (&cre, &parttype, &re, &reimtype); imstat = ar_convert_to_float (&cim, &parttype, &im, &reimtype); status |= ar_compose_complex (result, &temptype, &cre, &cim, &parttype); status &= ~(AR_STAT_ZERO | AR_STAT_NEGATIVE); status |= restat & imstat & AR_STAT_ZERO; return status; } status |= ar_convert_to_float (&cre, &parttype, opnd, opndtype); switch (*resulttype) { case AR_Complex_Cray1_64: case AR_Complex_Cray1_64_F: result->ar_cplx_f64.real = cre.ar_f64; ZEROCRAY64 (result->ar_cplx_f64.imag); break; case AR_Complex_Cray1_128: result->ar_cplx_f128.real = cre.ar_f128; ZEROCRAY128 (result->ar_cplx_f128.imag); break; case AR_Complex_IEEE_NR_32: case AR_Complex_IEEE_ZE_32: case AR_Complex_IEEE_UP_32: case AR_Complex_IEEE_DN_32: IEEE32_TO_CPLX32_REAL(result->ar_cplx_ieee32, cre.ar_ieee32); result->ar_cplx_ieee32.isign = 0; result->ar_cplx_ieee32.iexpo = 0; result->ar_cplx_ieee32.icoeff0 = 0; result->ar_cplx_ieee32.icoeff1 = 0; break; case AR_Complex_IEEE_NR_64: case AR_Complex_IEEE_ZE_64: case AR_Complex_IEEE_UP_64: case AR_Complex_IEEE_DN_64: result->ar_cplx_ieee64.real = cre.ar_ieee64; ZEROIEEE64 (result->ar_cplx_ieee64.imag); break; case AR_Complex_IEEE_NR_128: case AR_Complex_IEEE_ZE_128: case AR_Complex_IEEE_UP_128: case AR_Complex_IEEE_DN_128: result->ar_cplx_ieee128.real = cre.ar_ieee128; ZEROIEEE128 (result->ar_cplx_ieee128.imag); break; default: return AR_STAT_INVALID_TYPE; } return status; }
int ar_cfmul64 (AR_CRAY_64 *x, const AR_CRAY_64 *a, const AR_CRAY_64 *b, int roundmode) { int i, res = AR_STAT_OK; long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo; unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0; AR_CRAY_64 y, z; x->sign = a->sign ^ b->sign; y = *a; z = *b; if (!a_expo && !b_expo) x->sign = x_expo = 0; else { x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS; if (a_expo < AR_CRAY_MIN_EXPO - 1 || b_expo < AR_CRAY_MIN_EXPO - 1 || x_expo < AR_CRAY_MIN_EXPO - 1) { ZEROCRAY64 (*x); return AR_STAT_UNDERFLOW | AR_STAT_ZERO; } } switch (roundmode) { case AR_ROUNDED: /* CRAY-1 rounded multiply */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0151; break; case AR_UNROUNDED: /* CRAY-1 truncation compensation */ x_lbits = 0; x->coeff0 = x->coeff1 = x->coeff2 = 0; x_rbits = 0011; break; case AR_RECIPROCAL_ITERATION: /* CRAY-1 recip iter */ x_lbits = 1; x->coeff0 = ~0; x->coeff1 = ~0; x->coeff2 = (0011 - 0320) >> 7; x_rbits = (0011 - 0320) & MASKR (7); break; } /* Compute and sum the pyramid */ #if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS y_rbits = y.coeff2<<7; i = AR_CRAY_C0_BITS; zcoeff = z.coeff0; while(zcoeff) { if(zcoeff & 0x8000) { x_rbits += (y_rbits & 0177); carry = x_rbits >> 7; x_rbits &= 0177; ADDCRAY64 (*x, carry, *x, y); x_lbits += carry; } SHRIGHTCRAY64 (y); y_rbits >>= 1; zcoeff = (zcoeff & 0x7fff) << 1; i--; }