Esempio n. 1
0
static int pix_norm1_altivec(uint8_t *pix, int line_size)
{
    int i;
    int s;
    __vector zero = __vzero();
/*
    vector unsigned char *tv;
    vector unsigned char pixv;
    vector unsigned int sv;
    vector signed int sum;	
*/

	__vector *tv;
    __vector pixv;
    __vector sv;
    __vector sum;

    sv = __vzero();

    s = 0;
    for (i = 0; i < 16; i++) {
        /* Read in the potentially unaligned pixels */
        //tv = (vector unsigned char *) pix;
		tv = (__vector*) pix;
        //pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
		pixv = __vperm(tv[0], tv[1], __lvsl(pix,0));

        /* Square the values, and add them to our sum */
        sv = vec_msum(pixv, pixv, sv);

        pix += line_size;
    }
    /* Sum up the four partial sums, and put the result into s */
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
    sum = vec_splat(sum, 3);
    
	vec_ste(sum, 0, &s);

    return s;
}
Esempio n. 2
0
static force_inline __vector4
pix_multiply (__vector4 p, __vector4 a)
{
	__vector4 hi, lo, mod;
	
	__vector4 hiLow, hiHigh, modLow, modHigh, loLow, loHigh;
	__vector4 hiLowFP, hiHighFP, modLowFP, modHighFP, loLowFP, loHighFP;
	__vector4 himodLow, himodHigh, lomodLow, lomodHigh;
	
 	__vector4 zeroVector = *(__vector4*)(&zeroVectori);
 	__vector4 zeroEightVector = *(__vector4*)(&zeroEightVectori);
	__vector4 permVecLo = *(__vector4*)(&permVecLoi);
	__vector4 permVecHi = *(__vector4*)(&permVecHii);
	__vector4 permVec1Vec2 = *(__vector4*)(&permVec1Vec2i);

	/* unpack to short */
	hi  = __vmrghb(zeroVector,p);
	mod = __vmrghb(zeroVector,a);
	
	//+ What we want to do here is to multiply 8 unsigned shorts of the hi with 8 unsigned shorts of mod.
	/* Extract the hi vector into 4 Unsigned int by using 4 Lower unsigned shorts*/
	hiLow	= __vperm(hi,zeroVector, permVecLo);
	/* Extract the hi vector into 4 Unsigned int by using 4 Upper unsigned shorts*/
	hiHigh	= __vperm(hi,zeroVector, permVecHi);

	/* Extract the mod vector into 4 Unsigned int by using 4 Lower unsigned shorts*/
	modLow	= __vperm(mod,zeroVector, permVecLo);
	/* Extract the mod vector into 4 Unsigned int by using 4 Upper unsigned shorts*/
	modHigh = __vperm(mod,zeroVector, permVecHi);

	/* Convert the 4 unsigned ints to floating point by treating them as Fixed point*/ 
	hiLowFP		= __vcfux(hiLow,0);
	hiHighFP	= __vcfux(hiHigh,0);
	modLowFP	= __vcfux(modLow,0);
	modHighFP	= __vcfux(modHigh,0);

	/* Multiply the floating points */
	himodLow	= __vmaddfp(hiLowFP, modLowFP,zeroVector);
	himodHigh	= __vmaddfp(hiHighFP, modHighFP,zeroVector);
	
	/* Convert the floating points to Fixed Point with zero digits after radix point - Effectively an unsigned int*/
	himodLow	= __vctuxs(himodLow,0);
	himodHigh	= __vctuxs(himodHigh,0);

	/* Fuse the multiplication together to get the final product*/
	hi = __vperm(himodLow,himodHigh,permVec1Vec2);

	//-



	hi = __vadduhm(hi,zeroEightVector);
	
	hi = __vadduhs(hi, __vsrh(hi, __vspltish (8)));
	hi = __vsrh (hi, __vspltish (8));


	/* unpack to short */
	lo  = __vmrglb(zeroVector,p);
	mod = __vmrglb(zeroVector,a);
	
	//+ Comments from few lines above applicable here.
	loLow	= __vperm(lo,zeroVector,permVecLo);
	loHigh	= __vperm(lo,zeroVector, permVecHi); 

	modLow	= __vperm(mod,zeroVector, permVecLo);
	modHigh = __vperm(mod,zeroVector, permVecHi);

	loLowFP		= __vcfux(loLow,0);
	loHighFP	= __vcfux(loHigh,0);
	modLowFP	= __vcfux(modLow,0);
	modHighFP	= __vcfux(modHigh,0);

	lomodLow	= __vmaddfp(loLowFP, modLowFP,zeroVector);
	lomodHigh	= __vmaddfp(loHighFP, modHighFP,zeroVector);
	
	lomodLow	= __vctuxs(lomodLow,0);
	lomodHigh	= __vctuxs(lomodHigh,0);

	lo = __vperm(lomodLow,lomodHigh,permVec1Vec2);
	//-

	lo = __vadduhm(lo,zeroEightVector);
	
	lo = __vadduhs (lo, __vsrh (lo, __vspltish (8)));
	lo = __vsrh (lo, __vspltish (8));

	return __vpkuhus (hi, lo);
}
Esempio n. 3
0
static  force_inline __vector4
splat_alpha (__vector4 pix)
{
	return __vperm (pix, pix, *(__vector4*)(&vmx128i_splat_alpha_vector));
}