short DSP_maxval ( const short *x, /* x[nx] = input vector */ int nx /* nx = number of elements */ ) { int i; const long long *xll; double x0123, x4567; int max01, max23, max45, max67; /* Set all 8 intermediate max values to most negative */ /* Each 32bit var contains two shorts */ max01 = 0x80008000; max23 = 0x80008000; max45 = 0x80008000; max67 = 0x80008000; /* Convert the short pointer to a 64bit long long pointer */ xll = (const long long *)x; /* In each loop iteration we will load 8 short values from the array. */ /* On the C64x+ we can do 4 max2 operations in one cycle. This will */ /* give us 8 results, that we keep seperated. Outside the loop we'll */ /* find the max out of these 8 intermediate values. */ _nassert((int)(xll) % 8 == 0); #pragma MUST_ITERATE(1,,1); for (i = 0; i < nx; i += 8) { x0123 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ x4567 = _amemd8((void *)xll++); /* Use LDDW to load 4 shorts */ max01 = _max2(max01, _lo(x0123)); max23 = _max2(max23, _hi(x0123)); max45 = _max2(max45, _lo(x4567)); max67 = _max2(max67, _hi(x4567)); } max01 = _max2(max01, max23); /* Calculate 2 maximums of max01 and max23 */ max45 = _max2(max45, max67); /* Calculate 2 maximums of max45 and max67 */ max01 = _max2(max01, max45); /* Get the 2 max values of the remaining 4 */ max45 = _rotl(max01, 16); /* Swap lower and higher 16 bit */ /* Find the final maximum value (will be in higher and lower part) */ max01 = _max2(max01, max45); /* max01 is a 32-bit value with the result in the upper and lower 16 bit */ /* Use an AND operation to only return the lower 16 bit to the caller. */ return (max01 & 0xFFFF); }
_CODE_ACCESS void *memset(void *dst, int fill, size_t len) { char *restrict dst1, *restrict dst2; int pre_bytes, post_bytes, wfill, i; double dfill1, dfill2; dst1 = (char *)dst; /*--------------------------------------------------------------------*/ /* Replicate the 8-bit value in fill into all 4 bytes of wfill */ /*--------------------------------------------------------------------*/ wfill = _pack2 (fill, fill); wfill = _packl4(wfill, wfill); dfill1 = _itod (wfill, wfill); dfill2 = _itod (wfill, wfill); /*--------------------------------------------------------------------*/ /* Calculate number of bytes to pre-copy to get to an alignment of 8 */ /*--------------------------------------------------------------------*/ pre_bytes = (8 - (int) dst) & 7; if (len > pre_bytes) { len -= pre_bytes; if (pre_bytes & 1) { *dst1 = fill; dst1 += 1; } if (pre_bytes & 2) { _amem2(dst1) = wfill; dst1 += 2; } if (pre_bytes & 4) { _amem4(dst1) = wfill; dst1 += 4; } } /*--------------------------------------------------------------------*/ /* Double word fills */ /*--------------------------------------------------------------------*/ post_bytes = len > 0 ? len : 0; dst2 = dst1 + 8; if (len > 15) for (i = 0; i < len >> 4; i++) { _amemd8(dst1) = dfill1; dst1 += 16; _amemd8(dst2) = dfill2; dst2 += 16; post_bytes -= 16; } /*--------------------------------------------------------------------*/ /* Finish transfer with 8, 4, 2 and/or 1-byte writes */ /*--------------------------------------------------------------------*/ if (post_bytes & 8) { _memd8(dst1) = dfill1; dst1 += 8; } if (post_bytes & 4) { _mem4 (dst1) = wfill; dst1 += 4; } if (post_bytes & 2) { dst1[0] = wfill; dst1[1] = wfill; dst1 += 2; } if (post_bytes & 1) { *dst1 = fill; dst1 += 1; } return dst; }