예제 #1
0
파일: cray_fadd.c 프로젝트: xyuan/absoft
int
ar_cfadd64 (AR_CRAY_64 *x,
	    const AR_CRAY_64 *a,
	    const AR_CRAY_64 *b) {

	int res = AR_STAT_OK, a_expo = a->expo, b_expo = b->expo;
	unsigned int carry, shift, inexact;
	AR_CRAY_64 y;

	/* Ensure that the first argument has the largest exponent */
	if (b_expo > a_expo)
		y = *a, *x = *b;
	else
		y = *b, *x = *a;

	/* Test for underflow */
	if (x->expo < AR_CRAY_MIN_EXPO) {
		ZEROCRAY64 (*x);
		return AR_STAT_UNDERFLOW | AR_STAT_ZERO;
	}

	/* Shift the coefficient of the second argument down (right). We
	 * do this in parts so as not to overflow registers.
	 */
	inexact = 0;
	shift = x->expo - y.expo;
	if (shift > AR_CRAY64_COEFF_BITS)
		ZEROCRAY64 (y);
	else {
		for (; shift; shift--) {
			inexact |= y.coeff2 & 1;
			SHRIGHTCRAY64 (y);
			y.expo++;
		}
	}

	/* If signs differ, complement the first argument; this is equivalent
	 * to negating and then subtracting one, in a two's complement sense.
	 */
	if (x->sign != y.sign)
		NOTCRAY64 (*x);

	/* Compute sum of coefficients */
	carry = 0;
	ADDCRAY64 (*x, carry, *x, y);

	/* Check if the sign changed, and add 1 if so; otherwise, undo
	 * the complement.
	 */
	if (x->sign != y.sign)
		if (carry) {
			x->sign ^= 1;
			carry = 1;
			INCCRAY64 (*x, carry);
			carry = 0;
		} else
			NOTCRAY64 (*x);

	if (carry) {
		SHRIGHTCRAY64 (*x);
		x->coeff0 |= 1 << (AR_CRAY_C0_BITS - 1);
		x->expo++;
	}

	/* Check for a zero result, as a special case */
	if (!(x->coeff0 | x->coeff1 | x->coeff2)) {
		x->sign = x->expo = 0;
		return AR_STAT_ZERO;
	}

	/* Shift the result coefficient left until normalized */
	while (!(x->coeff0 >> (AR_CRAY_C0_BITS - 1))) {
		x->expo--;
		SHLEFTCRAY64 (*x);
	}

	/* Test for out-of-range result or operand */
	if (x->expo > AR_CRAY_MAX_EXPO ||
	    a_expo > AR_CRAY_MAX_EXPO ||
	    b_expo > AR_CRAY_MAX_EXPO) {
		x->sign = 0;
		x->expo = AR_CRAY_MAX_EXPO + 1;
		res |= AR_STAT_OVERFLOW;
	}

	if (inexact)
		res |= AR_STAT_INEXACT;
	if (x->sign)
		res |= AR_STAT_NEGATIVE;

	return res;
}
예제 #2
0
파일: cvt.c 프로젝트: keylentona/Path64
int
ar_convert_to_complex
		(ar_data *result, const AR_TYPE *resulttype,
   const ar_data *opnd,   const AR_TYPE *opndtype) {
 
	ar_data from, re, im, cre, cim;
	AR_TYPE reimtype, parttype, temptype;
	int status = AR_STAT_OK, restat, imstat;

	parttype = (AR_TYPE) (*resulttype ^ AR_FLOAT_COMPLEX);

	if (AR_CLASS (*opndtype) == AR_CLASS_FLOAT &&
	    AR_FLOAT_IS_COMPLEX (*opndtype) == AR_FLOAT_COMPLEX) {
		status |= ar_decompose_complex (&re, &im, &reimtype,
						opnd, opndtype);
		restat = ar_convert_to_float (&cre, &parttype, &re, &reimtype);
		imstat = ar_convert_to_float (&cim, &parttype, &im, &reimtype);
		status |= ar_compose_complex (result, &temptype,
					      &cre, &cim, &parttype);
		status &= ~(AR_STAT_ZERO | AR_STAT_NEGATIVE);
		status |= restat & imstat & AR_STAT_ZERO;
		return status;
	}

	status |= ar_convert_to_float (&cre, &parttype, opnd, opndtype);

	switch (*resulttype) {
	case AR_Complex_Cray1_64:
	case AR_Complex_Cray1_64_F:
		result->ar_cplx_f64.real = cre.ar_f64;
		ZEROCRAY64 (result->ar_cplx_f64.imag);
		break;
	case AR_Complex_Cray1_128:
		result->ar_cplx_f128.real = cre.ar_f128;
		ZEROCRAY128 (result->ar_cplx_f128.imag);
		break;
	case AR_Complex_IEEE_NR_32:
	case AR_Complex_IEEE_ZE_32:
	case AR_Complex_IEEE_UP_32:
	case AR_Complex_IEEE_DN_32:
		IEEE32_TO_CPLX32_REAL(result->ar_cplx_ieee32, cre.ar_ieee32);
		result->ar_cplx_ieee32.isign = 0;
		result->ar_cplx_ieee32.iexpo = 0;
		result->ar_cplx_ieee32.icoeff0 = 0;
		result->ar_cplx_ieee32.icoeff1 = 0;
		break;
	case AR_Complex_IEEE_NR_64:
	case AR_Complex_IEEE_ZE_64:
	case AR_Complex_IEEE_UP_64:
	case AR_Complex_IEEE_DN_64:
		result->ar_cplx_ieee64.real = cre.ar_ieee64;
		ZEROIEEE64 (result->ar_cplx_ieee64.imag);
		break;
	case AR_Complex_IEEE_NR_128:
	case AR_Complex_IEEE_ZE_128:
	case AR_Complex_IEEE_UP_128:
	case AR_Complex_IEEE_DN_128:
		result->ar_cplx_ieee128.real = cre.ar_ieee128;
		ZEROIEEE128 (result->ar_cplx_ieee128.imag);
		break;
	default:
		return AR_STAT_INVALID_TYPE;
	}

	return status;
}
예제 #3
0
int
ar_cfmul64 (AR_CRAY_64 *x,
	    const AR_CRAY_64 *a,
	    const AR_CRAY_64 *b,
            int roundmode) {

	int i, res = AR_STAT_OK;
	long x_expo, a_expo = a->expo, b_expo = b->expo, test_expo;
	unsigned int x_lbits, y_rbits, zcoeff, carry, x_rbits = 0;
	AR_CRAY_64 y, z;

	x->sign = a->sign ^ b->sign;
	y = *a;
	z = *b;

	if (!a_expo && !b_expo)
		x->sign = x_expo = 0;
	else {
		x_expo = a_expo + b_expo - AR_CRAY_EXPO_BIAS;
		if (a_expo < AR_CRAY_MIN_EXPO - 1 ||
		    b_expo < AR_CRAY_MIN_EXPO - 1 ||
		    x_expo < AR_CRAY_MIN_EXPO - 1) {
			ZEROCRAY64 (*x);
			return AR_STAT_UNDERFLOW | AR_STAT_ZERO;
		}
	}

	switch (roundmode) {
	case AR_ROUNDED:		/* CRAY-1 rounded multiply */
		x_lbits = 0;
		x->coeff0 = x->coeff1 = x->coeff2 = 0;
		x_rbits = 0151;
		break;
	case AR_UNROUNDED:		/* CRAY-1 truncation compensation */
		x_lbits = 0;
		x->coeff0 = x->coeff1 = x->coeff2 = 0;
		x_rbits = 0011;
		break;
	case AR_RECIPROCAL_ITERATION:		/* CRAY-1 recip iter */
		x_lbits = 1;
		x->coeff0 = ~0;
		x->coeff1 = ~0;
		x->coeff2 = (0011 - 0320) >> 7;
		x_rbits = (0011 - 0320) & MASKR (7);
		break;
	}

	/* Compute and sum the pyramid */
#if AR_CRAY_C0_BITS*3 == AR_CRAY64_COEFF_BITS
	y_rbits = y.coeff2<<7;
	i = AR_CRAY_C0_BITS;
	zcoeff = z.coeff0;
	while(zcoeff) {
		if(zcoeff & 0x8000) {
			x_rbits += (y_rbits & 0177);
			carry = x_rbits >> 7;
			x_rbits &= 0177;
			ADDCRAY64 (*x, carry, *x, y);
			x_lbits += carry;
		}
		SHRIGHTCRAY64 (y);
		y_rbits >>= 1;
		zcoeff = (zcoeff & 0x7fff) << 1;
		i--;
	}