static int pix_norm1_altivec(uint8_t *pix, int line_size) { int i; int s; __vector zero = __vzero(); /* vector unsigned char *tv; vector unsigned char pixv; vector unsigned int sv; vector signed int sum; */ __vector *tv; __vector pixv; __vector sv; __vector sum; sv = __vzero(); s = 0; for (i = 0; i < 16; i++) { /* Read in the potentially unaligned pixels */ //tv = (vector unsigned char *) pix; tv = (__vector*) pix; //pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); pixv = __vperm(tv[0], tv[1], __lvsl(pix,0)); /* Square the values, and add them to our sum */ sv = vec_msum(pixv, pixv, sv); pix += line_size; } /* Sum up the four partial sums, and put the result into s */ sum = vec_sums((vector signed int) sv, (vector signed int) zero); sum = vec_splat(sum, 3); vec_ste(sum, 0, &s); return s; }
static force_inline __vector4 pix_multiply (__vector4 p, __vector4 a) { __vector4 hi, lo, mod; __vector4 hiLow, hiHigh, modLow, modHigh, loLow, loHigh; __vector4 hiLowFP, hiHighFP, modLowFP, modHighFP, loLowFP, loHighFP; __vector4 himodLow, himodHigh, lomodLow, lomodHigh; __vector4 zeroVector = *(__vector4*)(&zeroVectori); __vector4 zeroEightVector = *(__vector4*)(&zeroEightVectori); __vector4 permVecLo = *(__vector4*)(&permVecLoi); __vector4 permVecHi = *(__vector4*)(&permVecHii); __vector4 permVec1Vec2 = *(__vector4*)(&permVec1Vec2i); /* unpack to short */ hi = __vmrghb(zeroVector,p); mod = __vmrghb(zeroVector,a); //+ What we want to do here is to multiply 8 unsigned shorts of the hi with 8 unsigned shorts of mod. /* Extract the hi vector into 4 Unsigned int by using 4 Lower unsigned shorts*/ hiLow = __vperm(hi,zeroVector, permVecLo); /* Extract the hi vector into 4 Unsigned int by using 4 Upper unsigned shorts*/ hiHigh = __vperm(hi,zeroVector, permVecHi); /* Extract the mod vector into 4 Unsigned int by using 4 Lower unsigned shorts*/ modLow = __vperm(mod,zeroVector, permVecLo); /* Extract the mod vector into 4 Unsigned int by using 4 Upper unsigned shorts*/ modHigh = __vperm(mod,zeroVector, permVecHi); /* Convert the 4 unsigned ints to floating point by treating them as Fixed point*/ hiLowFP = __vcfux(hiLow,0); hiHighFP = __vcfux(hiHigh,0); modLowFP = __vcfux(modLow,0); modHighFP = __vcfux(modHigh,0); /* Multiply the floating points */ himodLow = __vmaddfp(hiLowFP, modLowFP,zeroVector); himodHigh = __vmaddfp(hiHighFP, modHighFP,zeroVector); /* Convert the floating points to Fixed Point with zero digits after radix point - Effectively an unsigned int*/ himodLow = __vctuxs(himodLow,0); himodHigh = __vctuxs(himodHigh,0); /* Fuse the multiplication together to get the final product*/ hi = __vperm(himodLow,himodHigh,permVec1Vec2); //- hi = __vadduhm(hi,zeroEightVector); hi = __vadduhs(hi, __vsrh(hi, __vspltish (8))); hi = __vsrh (hi, __vspltish (8)); /* unpack to short */ lo = __vmrglb(zeroVector,p); mod = __vmrglb(zeroVector,a); //+ Comments from few lines above applicable here. loLow = __vperm(lo,zeroVector,permVecLo); loHigh = __vperm(lo,zeroVector, permVecHi); modLow = __vperm(mod,zeroVector, permVecLo); modHigh = __vperm(mod,zeroVector, permVecHi); loLowFP = __vcfux(loLow,0); loHighFP = __vcfux(loHigh,0); modLowFP = __vcfux(modLow,0); modHighFP = __vcfux(modHigh,0); lomodLow = __vmaddfp(loLowFP, modLowFP,zeroVector); lomodHigh = __vmaddfp(loHighFP, modHighFP,zeroVector); lomodLow = __vctuxs(lomodLow,0); lomodHigh = __vctuxs(lomodHigh,0); lo = __vperm(lomodLow,lomodHigh,permVec1Vec2); //- lo = __vadduhm(lo,zeroEightVector); lo = __vadduhs (lo, __vsrh (lo, __vspltish (8))); lo = __vsrh (lo, __vspltish (8)); return __vpkuhus (hi, lo); }
static force_inline __vector4 splat_alpha (__vector4 pix) { return __vperm (pix, pix, *(__vector4*)(&vmx128i_splat_alpha_vector)); }