short DSP_maxval ( const short *x, /* x[nx] = input vector */ int nx /* nx = number of elements */ ) { int i; const long long *xll; double x0123, x4567; int max01, max23, max45, max67; /* Set all 8 intermediate max values to most negative */ /* Each 32bit var contains two shorts */ max01 = 0x80008000; max23 = 0x80008000; max45 = 0x80008000; max67 = 0x80008000; /* Convert the short pointer to a 64bit long long pointer */ xll = (const long long *)x; /* In each loop iteration we will load 8 short values from the array. */ /* On the C64x+ we can do 4 max2 operations in one cycle. This will */ /* give us 8 results, that we keep seperated. Outside the loop we'll */ /* find the max out of these 8 intermediate values. */ _nassert((int)(xll) % 8 == 0); #pragma MUST_ITERATE(1,,1); for (i = 0; i < nx; i += 8) { x0123 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ x4567 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ max01 = _max2(max01, _lo(x0123)); max23 = _max2(max23, _hi(x0123)); max45 = _max2(max45, _lo(x4567)); max67 = _max2(max67, _hi(x4567)); } max01 = _max2(max01, max23); /* Calculate 2 maximums of max01 and max23 */ max45 = _max2(max45, max67); /* Calculate 2 maximums of max45 and max67 */ max01 = _max2(max01, max45); /* Get the 2 max values of the remaining 4 */ max45 = _rotl(max01, 16); /* Swap lower and higher 16 bit */ /* Find the final maximum value (will be in higher and lower part) */ max01 = _max2(max01, max45); /* max01 is a 32-bit value with the result in the upper and lower 16 bit */ /* Use an AND operation to only return the lower 16 bit to the caller. */ return (max01 & 0xFFFF); }
/* Inform the compiler by _nasserts the following: */ /* a) The output array is word aligned */ /* b) The width is greater than or equal to 2 */ /* c) The width is a multiple of 2. */ /* -------------------------------------------------------------------- */ _nassert((int) imgout_ptr % 4 == 0); _nassert((int) width % 2 == 0); _nassert((int) width >= 2); /* -------------------------------------------------------------------- */ /* The 1st two mask values of each row are loaded into one int */ /* Reverse order of mask coefficients for rotation. */ /* -------------------------------------------------------------------- */ mask_tmp = _memd8_const((void *) &mask_ptr[5]); mask3_0 = _itod(_packlh2(_lo(mask_tmp),_lo(mask_tmp)), _packlh2(_hi(mask_tmp),_hi(mask_tmp))); mask_tmp = _memd8_const((void *) &mask_ptr[1]); mask7_4 = _itod(_packlh2(_lo(mask_tmp),_lo(mask_tmp)), _packlh2(_hi(mask_tmp),_hi(mask_tmp))); /* -------------------------------------------------------------------- */ /* The last mask values of each row are loaded into an int */ /* -------------------------------------------------------------------- */ mask8 = mask_ptr[0]; mask43 = _packlh2(_lo(mask7_4),_hi(mask3_0)); /* -------------------------------------------------------------------- */ /* mask2_88 contains the last mask of row 3 in its half words */ /* -------------------------------------------------------------------- */ mask88 = _pack2(mask8,mask8);
double logdp_c(double a) { // double One=1.0 ; double Zero = 0.0, Half = 0.5 ; double srHalf= 0.70710678118654752440 ; /* sqrt(0.5) */ double MIN = 2.2250738585072014e-308 ; double LnMin = -708.3964185322641 ; double MAX = 1.7976931348623157e+308 ; double LnMax = 709.7827138470578 ; double a0 =-0.64124943423745581147e+2 ; double a1 = 0.16383943563021534222e+2 ; double a2 =-0.78956112887491257267e+0 ; double b0 =-0.76949932108494879777e+3 ; double b1 = 0.31203222091924532844e+3 ; double b2 =-0.35667977739034646171e+2 ; /* Note b3 = 1.0 */ double c1 = 0.693359375 ; /* 355/512 */ double c2 =-2.121944400546905827679e-4 ; double Y,Z,zn,zd,X,W,Rz,Sa,Bd,Cn,Da ; int N, exp, upper; Y = a ; exp = _extu(_hi(Y),1,21); N = exp - 1022; upper = _clr(_hi(Y),20,31); upper = 0x3fe00000 | upper; Z = _itod(upper, _lo(Y)); if (exp == 0) Z = 0; if (Z > srHalf) { zn=(Z-Half)-Half ; zd=Z*Half+Half ; } else { zn=Z-Half; zd=zn*Half+Half ; N=N-1 ; } //X = zn/zd; X = divdpMod_clog(zn,zd); W = X*X ; Bd= ( (W+b2) * W + b1) * W + b0 ; Cn= (W*a2 + a1) * W + a0 ; Rz= W * divdpMod_clog(Cn,Bd);//Cn / Bd ; Sa = X + X*Rz ; Cn= N ; Da = (Cn*c2 + Sa) + Cn*c1 ; if (Y <= Zero) Da = 0;//{Z = setnmx(Y) ; Da = (Z) ; } if (Y < MIN) Da = (LnMin) ; if (Y > MAX) Da = (LnMax) ; return (Da) ; }