Exemple #1
0
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
{
    LOAD_ZERO;
    vec_s16 *pv1 = (vec_s16*)v1;
    register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};
    register vec_s16 t0, t1, i0, i1, i4;
    register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3);
    register vec_s32 res = zero_s32v;
    register vec_u8 align = vec_lvsl(0, v2);
    int32_t ires;
    order >>= 4;
    do {
        i1 = vec_ld(16, v2);
        t0 = vec_perm(i2, i1, align);
        i2 = vec_ld(32, v2);
        t1 = vec_perm(i1, i2, align);
        i0 = pv1[0];
        i1 = pv1[1];
        res = vec_msum(t0, i0, res);
        res = vec_msum(t1, i1, res);
        i4 = vec_ld(16, v3);
        t0 = vec_perm(i3, i4, align);
        i3 = vec_ld(32, v3);
        t1 = vec_perm(i4, i3, align);
        pv1[0] = vec_mladd(t0, muls, i0);
        pv1[1] = vec_mladd(t1, muls, i1);
        pv1 += 2;
        v2  += 16;
        v3  += 16;
    } while(--order);
    res = vec_splat(vec_sums(res, zero_s32v), 3);
    vec_ste(res, 0, &ires);
    return ires;
}
Exemple #2
0
uint32_t
sse8_16bit_altivec_c(const int16_t * b1,
			 const int16_t * b2,
			 const uint32_t stride)
{
    register vector signed short b1_vec;
    register vector signed short b2_vec;
    register vector signed short diff;
    register vector signed int sum;
    uint32_t result;
    
    /* initialize */
    sum = vec_splat_s32(0);
    
    SSE8_16BIT();
    SSE8_16BIT();
    SSE8_16BIT();
    SSE8_16BIT();
    
    SSE8_16BIT();
    SSE8_16BIT();
    SSE8_16BIT();
    SSE8_16BIT();
        
    /* sum the vector */
    sum = vec_sums(sum, vec_splat_s32(0));
    sum = vec_splat(sum,3);
    
    vec_ste(sum,0,(int*)&result);
    
    /* and return */
    return result;
}
static int pix_sum_altivec(uint8_t * pix, int line_size)
{
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char perm = vec_lvsl(0, pix);
    vector unsigned char t1;
    vector unsigned int sad;
    vector signed int sumdiffs;

    int i;
    int s;

    sad = (vector unsigned int)vec_splat_u32(0);

    for (i = 0; i < 16; i++) {
        /* Read the potentially unaligned 16 pixels into t1 */
        vector unsigned char pixl = vec_ld( 0, pix);
        vector unsigned char pixr = vec_ld(15, pix);
        t1 = vec_perm(pixl, pixr, perm);

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t1, sad);

        pix += line_size;
    }

    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
Exemple #4
0
static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order, const int shift)
{
    int i;
    LOAD_ZERO;
    register vec_s16 vec1, *pv;
    register vec_s32 res = vec_splat_s32(0), t;
    register vec_u32 shifts;
    DECLARE_ALIGNED_16(int32_t, ires);

    shifts = zero_u32v;
    if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));
    if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));
    if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));
    if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));
    if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));

    for(i = 0; i < order; i += 8){
        pv = (vec_s16*)v1;
        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
        t = vec_sr(t, shifts);
        res = vec_sums(t, res);
        v1 += 8;
        v2 += 8;
    }
    res = vec_splat(res, 3);
    vec_ste(res, 0, &ires);
    return ires;
}
Exemple #5
0
int pix_norm1_altivec(uint8_t *pix, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char *tv;
    vector unsigned char pixv;
    vector unsigned int sv;
    vector signed int sum;
    
    sv = (vector unsigned int)vec_splat_u32(0);
    
    s = 0;
    for (i = 0; i < 16; i++) {
        /* Read in the potentially unaligned pixels */
        tv = (vector unsigned char *) pix;
        pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));

        /* Square the values, and add them to our sum */
        sv = vec_msum(pixv, pixv, sv);

        pix += line_size;
    }
    /* Sum up the four partial sums, and put the result into s */
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
    sum = vec_splat(sum, 3);
    vec_ste(sum, 0, &s);

    return s;
}
Exemple #6
0
uint32_t
sad8_altivec_c(const uint8_t * cur,
	   const uint8_t *ref,
	   const uint32_t stride)
{
	uint32_t result = 0;
	
	register vector unsigned int sad;
	register vector unsigned char c;
	register vector unsigned char r;
	
	/* initialize */
	sad = vec_splat_u32(0);
	
	/* Perform sad operations */
	SAD8();
	SAD8();
	SAD8();
	SAD8();
	
	SAD8();
	SAD8();
	SAD8();
	SAD8();
	
	/* finish addition, add the first 2 together */
	sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0)));
	sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0));
	sad = vec_splat(sad,3);
	vec_ste(sad, 0, &result);
		
	return result;
}
Exemple #7
0
int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
    vector unsigned char *tv;
    vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;
    uint8_t *pix3 = pix2 + line_size;

    s = 0;
    sad = (vector unsigned int)vec_splat_u32(0);

    /*
       Due to the fact that pix3 = pix2 + line_size, the pix3 of one
       iteration becomes pix2 in the next iteration. We can use this
       fact to avoid a potentially expensive unaligned read, each
       time around the loop.
       Read unaligned pixels into our vectors. The vectors are as follows:
       pix2v: pix2[0]-pix2[15]
       Split the pixel vectors into shorts
    */
    tv = (vector unsigned char *) &pix2[0];
    pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
    
    for(i=0;i<16;i++) {
        /*
           Read unaligned pixels into our vectors. The vectors are as follows:
           pix1v: pix1[0]-pix1[15]
           pix3v: pix3[0]-pix3[15]
        */
        tv = (vector unsigned char *) pix1;
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));

        tv = (vector unsigned char *) &pix3[0];
        pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0]));

        /* Calculate the average vector */
        avgv = vec_avg(pix2v, pix3v);

        /* Calculate a sum of abs differences vector */
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);
        
        pix1 += line_size;
        pix2v = pix3v;
        pix3 += line_size;
        
    }
    
    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);
    return s;    
}
Exemple #8
0
inline int v_signmask(const v_int32x4& a)
{
    static const vec_uint4 slm = {0, 1, 2, 3};
    vec_int4 sv = vec_sr(a.val, vec_uint4_sp(31));
    sv = vec_sl(sv, slm);
    sv = vec_sums(sv, vec_int4_z);
    return vec_extract(sv, 3);
}
Exemple #9
0
inline int v_signmask(const v_int16x8& a)
{
    static const vec_ushort8 slm = {0, 1, 2, 3, 4, 5, 6, 7};
    vec_short8 sv = vec_sr(a.val, vec_ushort8_sp(15));
    sv = vec_sl(sv, slm);
    vec_int4 svi = vec_int4_z;
    svi = vec_sums(vec_sum4s(sv, svi), svi);
    return vec_extract(svi, 3);
}
Exemple #10
0
uint32_t
quant_h263_inter_altivec_c(int16_t *coeff,
                            int16_t *data,
                            const uint32_t quant,
                            const uint16_t *mpeg_quant_matrices)
{
    vector unsigned char zerovec;
    vector unsigned short mult;
    vector unsigned short quant_m_2;
    vector unsigned short quant_d_2;
    vector unsigned short sum_short;
    vector signed short acLevel;
    
    vector unsigned int even;
    vector unsigned int odd;
    
    vector bool short m2_mask;
    vector bool short zero_mask;
    
    uint32_t result;

#ifdef DEBUG
    if(((unsigned)coeff) & 0x15)
        fprintf(stderr, "quant_h263_inter_altivec_c:incorrect align, coeff: %lx\n", (long)coeff);
#endif
    
    /* initialisation stuff */
    zerovec = vec_splat_u8(0);
    *((unsigned short*)&mult) = (unsigned short)multipliers[quant];
    mult = vec_splat(mult, 0);
    *((unsigned short*)&quant_m_2) = (unsigned short)quant;
    quant_m_2 = vec_splat(quant_m_2, 0);
    quant_m_2 = vec_sl(quant_m_2, vec_splat_u16(1));
    *((unsigned short*)&quant_d_2) = (unsigned short)quant;
    quant_d_2 = vec_splat(quant_d_2, 0);
    quant_d_2 = vec_sr(quant_d_2, vec_splat_u16(1));
    sum_short = (vector unsigned short)zerovec;
    
    /* Quantize */
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
    QUANT_H263_INTER_ALTIVEC();
        
    /* Calculate the return value */
    even = (vector unsigned int)vec_sum4s((vector signed short)sum_short, (vector signed int)zerovec);
    even = (vector unsigned int)vec_sums((vector signed int)even, (vector signed int)zerovec);
    even = vec_splat(even, 3);
    vec_ste(even, 0, &result);
    return result;
}
Exemple #11
0
/** Mask **/
inline int v_signmask(const v_uint8x16& a)
{
    vec_uchar16 sv  = vec_sr(a.val, vec_uchar16_sp(7));
    static const vec_uchar16 slm = {0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7};
    sv = vec_sl(sv, slm);
    vec_uint4 sv4 = vec_sum4s(sv, vec_uint4_z);
    static const vec_uint4 slm4 = {0, 0, 8, 8};
    sv4 = vec_sl(sv4, slm4);
    return vec_extract(vec_sums((vec_int4) sv4, vec_int4_z), 3);
}
Exemple #12
0
uint32_t
sad16bi_altivec_c(vector unsigned char *cur,
                        vector unsigned char *ref1,
                        vector unsigned char *ref2,
                        uint32_t stride)
{
    vector unsigned char t1, t2;
    vector unsigned char mask1, mask2;
    vector unsigned char sad;
    vector unsigned int sum;
    uint32_t result;
    
#ifdef DEBUG
    /* print alignment errors if this is on */
    if((long)cur & 0xf)
        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur);
    if(stride & 0xf)
        fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride);
#endif
    
    /* Initialisation stuff */
    stride >>= 4;
    mask1 = vec_lvsl(0, (unsigned char*)ref1);
    mask2 = vec_lvsl(0, (unsigned char*)ref2);
    sad = vec_splat_u8(0);
    sum = (vector unsigned int)sad;
    
    SAD16BI();
    SAD16BI();
    SAD16BI();
    SAD16BI();
    
    SAD16BI();
    SAD16BI();
    SAD16BI();
    SAD16BI();
    
    SAD16BI();
    SAD16BI();
    SAD16BI();
    SAD16BI();
    
    SAD16BI();
    SAD16BI();
    SAD16BI();
    SAD16BI();
    
    sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0));
    sum = vec_splat(sum, 3);
    vec_ste(sum, 0, (uint32_t*)&result);
    
    return result;
}
Exemple #13
0
/**
 * Sum of Squared Errors for a 8x8 block.
 * AltiVec-enhanced.
 * It's the pix_abs8x8_altivec code above w/ squaring added.
 */
int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
    vector unsigned char t1, t2, t3,t4, t5;
    vector unsigned int sum;
    vector signed int sumsqr;
    
    sum = (vector unsigned int)vec_splat_u32(0);

    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);

    
    for(i=0;i<8;i++) {
	/* Read potentially unaligned pixels into t1 and t2
	   Since we're reading 16 pixels, and actually only want 8,
	   mask out the last 8 pixels. The 0s don't change the sum. */
        perm1 = vec_lvsl(0, pix1);
        pix1v = (vector unsigned char *) pix1;
        perm2 = vec_lvsl(0, pix2);
        pix2v = (vector unsigned char *) pix2;
        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);

        /*
          Since we want to use unsigned chars, we can take advantage
          of the fact that abs(a-b)^2 = (a-b)^2.
        */
        
	/* Calculate abs differences vector */ 
        t3 = vec_max(t1, t2);
        t4 = vec_min(t1, t2);
        t5 = vec_sub(t3, t4);
        
        /* Square the values and add them to our sum */
        sum = vec_msum(t5, t5, sum);
        
        pix1 += line_size;
        pix2 += line_size;
    }
    
    /* Sum up the four partial sums, and put the result into s */
    sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
    sumsqr = vec_splat(sumsqr, 3);
    vec_ste(sumsqr, 0, &s);
    
    return s;
}
Exemple #14
0
static void test()
{
  vector signed int va = {-7,11,-13,17};

#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  vector signed int vb = {128,0,0,0};
#else
  vector signed int vb = {0,0,0,128};
#endif

  vector signed int vd = vec_sums (va, vb);
  signed int r = vec_extract (vd, 3);

  check (r == 136, "sums");
}
Exemple #15
0
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
                                     int size)
{
    int i, size16 = size >> 4;
    vector signed char vpix1;
    vector signed short vpix2, vdiff, vpix1l, vpix1h;
    union {
        vector signed int vscore;
        int32_t score[4];
    } u = { .vscore = vec_splat_s32(0) };

    while (size16) {
        // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
        // load pix1 and the first batch of pix2

        vpix1 = vec_unaligned_load(pix1);
        vpix2 = vec_unaligned_load(pix2);
        pix2 += 8;
        // unpack
        vpix1h = vec_unpackh(vpix1);
        vdiff  = vec_sub(vpix1h, vpix2);
        vpix1l = vec_unpackl(vpix1);
        // load another batch from pix2
        vpix2    = vec_unaligned_load(pix2);
        u.vscore = vec_msum(vdiff, vdiff, u.vscore);
        vdiff    = vec_sub(vpix1l, vpix2);
        u.vscore = vec_msum(vdiff, vdiff, u.vscore);
        pix1    += 16;
        pix2    += 8;
        size16--;
    }
    u.vscore = vec_sums(u.vscore, vec_splat_s32(0));

    size %= 16;
    for (i = 0; i < size; i++)
        u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);

    return u.score[3];
}
#endif /* HAVE_ALTIVEC */

av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
{
#if HAVE_ALTIVEC
    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
#endif /* HAVE_ALTIVEC */
}
Exemple #16
0
int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
    vector unsigned char *tv;
    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;

    s = 0;
    sad = (vector unsigned int)vec_splat_u32(0);
    for(i=0;i<16;i++) {
        /*
           Read unaligned pixels into our vectors. The vectors are as follows:
           pix1v: pix1[0]-pix1[15]
           pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]
        */
        tv = (vector unsigned char *) pix1;
        pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
        
        tv = (vector unsigned char *) &pix2[0];
        pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));

        tv = (vector unsigned char *) &pix2[1];
        pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));

        /* Calculate the average vector */
        avgv = vec_avg(pix2v, pix2iv);

        /* Calculate a sum of abs differences vector */
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);
        
        pix1 += line_size;
        pix2 += line_size;
    }
    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
Exemple #17
0
/*
 * This function assumes cur is 8 bytes aligned, stride is 16 bytes
 * aligned and ref is unaligned
 */
unsigned long
sad8_altivec(const vector unsigned char *cur,
			 const vector unsigned char *ref,
			 unsigned long stride)
{
	vector unsigned char t1, t2, t3, t4, t5, tp;
	vector unsigned int sad;
	vector signed int sumdiffs;
	vector unsigned char perm_cur;
	vector unsigned char perm_ref1, perm_ref2;
	unsigned long result;

	ZERODEF;

#ifdef DEBUG
	if (((unsigned long) cur) & 0x7)
		fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
//  if (((unsigned long)ref) & 0x7)
//      fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
	if (stride & 0xf)
		fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
#endif

	perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
	perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
	perm_ref2 = get_perm(0);

	/* initialization */
	sad = (vector unsigned int) (ZEROVEC);
	stride >>= 4;

	/* perform sum of differences between current and previous */
	SAD8();
	SAD8();
	SAD8();
	SAD8();

	/* sum all parts of difference into one 32 bit quantity */
	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);

	/* copy vector sum into unaligned result */
	sumdiffs = vec_splat(sumdiffs, 3);
	vec_ste(sumdiffs, 0, (int *) &result);
	return (result);
}
Exemple #18
0
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
                                     int size) {
    int i, size16;
    vector signed char vpix1;
    vector signed short vpix2, vdiff, vpix1l,vpix1h;
    union { vector signed int vscore;
            int32_t score[4];
          } u;
    u.vscore = vec_splat_s32(0);
//
//XXX lazy way, fix it later

#define vec_unaligned_load(b) \
    vec_perm(vec_ld(0,b),vec_ld(15,b),vec_lvsl(0, b));

    size16 = size >> 4;
    while(size16) {
//        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
        //load pix1 and the first batch of pix2

        vpix1 = vec_unaligned_load(pix1);
        vpix2 = vec_unaligned_load(pix2);
        pix2 += 8;
        //unpack
        vpix1h = vec_unpackh(vpix1);
        vdiff  = vec_sub(vpix1h, vpix2);
        vpix1l = vec_unpackl(vpix1);
        // load another batch from pix2
        vpix2 = vec_unaligned_load(pix2);
        u.vscore = vec_msum(vdiff, vdiff, u.vscore);
        vdiff  = vec_sub(vpix1l, vpix2);
        u.vscore = vec_msum(vdiff, vdiff, u.vscore);
        pix1 += 16;
        pix2 += 8;
        size16--;
    }
    u.vscore = vec_sums(u.vscore, vec_splat_s32(0));

    size %= 16;
    for (i = 0; i < size; i++) {
        u.score[3] += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
    }
    return u.score[3];
}
Exemple #19
0
int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v;
    vector unsigned char t1, t2, t3,t4, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;

    sad = (vector unsigned int)vec_splat_u32(0);

    permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);

    for(i=0;i<8;i++) {
	/* Read potentially unaligned pixels into t1 and t2
	   Since we're reading 16 pixels, and actually only want 8,
	   mask out the last 8 pixels. The 0s don't change the sum. */
        perm1 = vec_lvsl(0, pix1);
        pix1v = (vector unsigned char *) pix1;
        perm2 = vec_lvsl(0, pix2);
        pix2v = (vector unsigned char *) pix2;
        t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
        t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);

	/* Calculate a sum of abs differences vector */ 
        t3 = vec_max(t1, t2);
        t4 = vec_min(t1, t2);
        t5 = vec_sub(t3, t4);

	/* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);

        pix1 += line_size;
        pix2 += line_size;
    }

    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
Exemple #20
0
static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
    int i;
    int s;
    const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
    vector unsigned char perm1 = vec_lvsl(0, pix2);
    vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
    vector unsigned char pix2l, pix2r;
    vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;

    s = 0;
    sad = (vector unsigned int)vec_splat_u32(0);
    for (i = 0; i < h; i++) {
        /* Read unaligned pixels into our vectors. The vectors are as follows:
           pix1v: pix1[0]-pix1[15]
           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16] */
        pix1v  = vec_ld( 0, pix1);
        pix2l  = vec_ld( 0, pix2);
        pix2r  = vec_ld(16, pix2);
        pix2v  = vec_perm(pix2l, pix2r, perm1);
        pix2iv = vec_perm(pix2l, pix2r, perm2);

        /* Calculate the average vector */
        avgv = vec_avg(pix2v, pix2iv);

        /* Calculate a sum of abs differences vector */
        t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv));

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);

        pix1 += line_size;
        pix2 += line_size;
    }
    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
Exemple #21
0
static unsigned reg_sad_altivec(const kvz_pixel * const data1, const kvz_pixel * const data2,
                        const int width, const int height, const unsigned stride1, const unsigned stride2)
{
  vector unsigned int vsad = {0,0,0,0}, vzero = {0,0,0,0}; 
  vector signed int sumdiffs;
  int tmpsad, sad = 0;
  
  int y, x;
  
  for (y = 0; y < height; ++y) {
    vector unsigned char perm1, perm2;
    
    perm1 = vec_lvsl(0, &data1[y * stride1]);
    perm2 = vec_lvsl(0, &data2[y * stride2]);
    
    for (x = 0; x <= width-16; x+=16) {
      vector unsigned char t1, t2, t3, t4, t5;
      vector unsigned char *current, *previous;
      
      current = (vector unsigned char *) &data1[y * stride1 + x];
      previous = (vector unsigned char *) &data2[y * stride2 + x];
      
      t1  = vec_perm(current[0], current[1], perm1 );  /* align current vector  */ 
      t2  = vec_perm(previous[0], previous[1], perm2 );/* align previous vector */ 
      t3  = vec_max(t1, t2 );      /* find largest of two           */ 
      t4  = vec_min(t1, t2 );      /* find smaller of two           */ 
      t5  = vec_sub(t3, t4);       /* find absolute difference      */ 
      vsad = vec_sum4s(t5, vsad);    /* accumulate sum of differences */
    }

    for (; x < width; ++x) {
      sad += abs(data1[y * stride1 + x] - data2[y * stride2 + x]);
    }
  }
  
  sumdiffs = vec_sums((vector signed int) vsad, (vector signed int) vzero);
  /* copy vector sum into unaligned result */
  sumdiffs = vec_splat( sumdiffs, 3);
  vec_ste( sumdiffs, 0, &tmpsad );
  sad += tmpsad;
  
  return sad;
}
Exemple #22
0
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
                                           int order)
{
    int i;
    LOAD_ZERO;
    register vec_s16 vec1;
    register vec_s32 res = vec_splat_s32(0), t;
    int32_t ires;

    for(i = 0; i < order; i += 8){
        vec1 = vec_unaligned_load(v1);
        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
        res = vec_sums(t, res);
        v1 += 8;
        v2 += 8;
    }
    res = vec_splat(res, 3);
    vec_ste(res, 0, &ires);
    return ires;
}
Exemple #23
0
int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size)
{
    int i;
    int s __attribute__((aligned(16)));
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char perm1, perm2, *pix1v, *pix2v;
    vector unsigned char t1, t2, t3,t4, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;
    
    sad = (vector unsigned int)vec_splat_u32(0);


    for(i=0;i<16;i++) {
	/* Read potentially unaligned pixels into t1 and t2 */
        perm1 = vec_lvsl(0, pix1);
        pix1v = (vector unsigned char *) pix1;
        perm2 = vec_lvsl(0, pix2);
        pix2v = (vector unsigned char *) pix2;
        t1 = vec_perm(pix1v[0], pix1v[1], perm1);
        t2 = vec_perm(pix2v[0], pix2v[1], perm2);
       
	/* Calculate a sum of abs differences vector */ 
        t3 = vec_max(t1, t2);
        t4 = vec_min(t1, t2);
        t5 = vec_sub(t3, t4);
	
	/* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);

        pix1 += line_size;
        pix2 += line_size;
    }

    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);
    
    return s;
}
Exemple #24
0
scalarproduct_int16_vsx (const signed short *v1, const signed short *v2,
			 int order)
{
  int i;
  LOAD_ZERO;
  register vec_s16 vec1;
  register vec_s32 res = vec_splat_s32 (0), t;
  signed int ires;

  for (i = 0; i < order; i += 8) {
    vec1 = vec_vsx_ld (0, v1);
    t    = vec_msum (vec1, vec_vsx_ld (0, v2), zero_s32v);
    res  = vec_sums (t, res);
    v1  += 8;
    v2  += 8;
  }
  res = vec_splat (res, 3);
  vec_ste (res, 0, &ires);

  return ires;
}
Exemple #25
0
static int pix_norm1_altivec(uint8_t *pix, int line_size)
{
    int i;
    int s;
    __vector zero = __vzero();
/*
    vector unsigned char *tv;
    vector unsigned char pixv;
    vector unsigned int sv;
    vector signed int sum;	
*/

	__vector *tv;
    __vector pixv;
    __vector sv;
    __vector sum;

    sv = __vzero();

    s = 0;
    for (i = 0; i < 16; i++) {
        /* Read in the potentially unaligned pixels */
        //tv = (vector unsigned char *) pix;
		tv = (__vector*) pix;
        //pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix));
		pixv = __vperm(tv[0], tv[1], __lvsl(pix,0));

        /* Square the values, and add them to our sum */
        sv = vec_msum(pixv, pixv, sv);

        pix += line_size;
    }
    /* Sum up the four partial sums, and put the result into s */
    sum = vec_sums((vector signed int) sv, (vector signed int) zero);
    sum = vec_splat(sum, 3);
    
	vec_ste(sum, 0, &s);

    return s;
}
Exemple #26
0
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
                                           int order)
{
    int i;
    LOAD_ZERO;
    const vec_s16 *pv;
    register vec_s16 vec1;
    register vec_s32 res = vec_splat_s32(0), t;
    int32_t ires;

    for(i = 0; i < order; i += 8){
        pv = (const vec_s16*)v1;
        vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
        t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
        res = vec_sums(t, res);
        v1 += 8;
        v2 += 8;
    }
    res = vec_splat(res, 3);
    vec_ste(res, 0, &ires);
    return ires;
}
Exemple #27
0
static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
    int i;
    int s;
    const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
    vector unsigned char perm = vec_lvsl(0, pix2);
    vector unsigned char t1, t2, t3,t4, t5;
    vector unsigned int sad;
    vector signed int sumdiffs;

    sad = (vector unsigned int)vec_splat_u32(0);


    for (i = 0; i < h; i++) {
        /* Read potentially unaligned pixels into t1 and t2 */
        vector unsigned char pix2l = vec_ld( 0, pix2);
        vector unsigned char pix2r = vec_ld(15, pix2);
        t1 = vec_ld(0, pix1);
        t2 = vec_perm(pix2l, pix2r, perm);

        /* Calculate a sum of abs differences vector */
        t3 = vec_max(t1, t2);
        t4 = vec_min(t1, t2);
        t5 = vec_sub(t3, t4);

        /* Add each 4 pixel group together and put 4 results into sad */
        sad = vec_sum4s(t5, sad);

        pix1 += line_size;
        pix2 += line_size;
    }

    /* Sum up the four partial sums, and put the result into s */
    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
    sumdiffs = vec_splat(sumdiffs, 3);
    vec_ste(sumdiffs, 0, &s);

    return s;
}
static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
                                                    const int16_t *v2,
                                                    const int16_t *v3,
                                                    int order, int mul)
{
    LOAD_ZERO;
    vec_s16 *pv1 = (vec_s16 *) v1;
    register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };
    register vec_s16 t0, t1, i0, i1, i4, i2, i3;
    register vec_s32 res = zero_s32v;
#if HAVE_BIGENDIAN
    register vec_u8 align = vec_lvsl(0, v2);
    i2 = vec_ld(0, v2);
    i3 = vec_ld(0, v3);
#endif
    int32_t ires;

    order >>= 4;
    do {
        GET_T(t0,t1,v2,i1,i2);
        i0     = pv1[0];
        i1     = pv1[1];
        res    = vec_msum(t0, i0, res);
        res    = vec_msum(t1, i1, res);
        GET_T(t0,t1,v3,i4,i3);
        pv1[0] = vec_mladd(t0, muls, i0);
        pv1[1] = vec_mladd(t1, muls, i1);
        pv1   += 2;
        v2    += 16;
        v3    += 16;
    } while (--order);
    res = vec_splat(vec_sums(res, zero_s32v), 3);
    vec_ste(res, 0, &ires);

    return ires;
}
Exemple #29
0
/*
 * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
 */
unsigned long
sad16_altivec(const vector unsigned char *cur,
			  const vector unsigned char *ref,
			  unsigned long stride,
			  const unsigned long best_sad)
{
	vector unsigned char perm;
	vector unsigned char t1, t2, t3, t4;
	vector unsigned int sad;
	vector signed int sumdiffs, best_vec;
	unsigned long result;

	ZERODEF;

#ifdef DEBUG
	if (((unsigned long) cur) & 0xf)
		fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
//  if (((unsigned long)ref) & 0xf)
//      fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
	if (stride & 0xf)
		fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
#endif
	/* initialization */
	sad = (vector unsigned int) (ZEROVEC);
	stride >>= 4;
	perm = vec_lvsl(0, (unsigned char *) ref);
	*((unsigned long *) &best_vec) = best_sad;
	best_vec = vec_splat(best_vec, 0);

	/* perform sum of differences between current and previous */
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	/* Temp sum for exit */
	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
	if (vec_all_ge(sumdiffs, best_vec))
		goto bail;
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
	if (vec_all_ge(sumdiffs, best_vec))
		goto bail;
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	SAD16();
	SAD16();

	/* sum all parts of difference into one 32 bit quantity */
	sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
  bail:
	/* copy vector sum into unaligned result */
	sumdiffs = vec_splat(sumdiffs, 3);
	vec_ste(sumdiffs, 0, (int *) &result);
	return (result);
}
Exemple #30
0
unsigned long
dev16_altivec(const vector unsigned char *cur,
			  unsigned long stride)
{
	vector unsigned char t2, t3, t4, mn;
	vector unsigned int mean, dev;
	vector signed int sumdiffs;
	vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
		c13, c14, c15;
	unsigned long result;

	ZERODEF;

	mean = (vector unsigned int) (ZEROVEC);
	dev = (vector unsigned int) (ZEROVEC);
	stride >>= 4;

	MEAN16(0);
	MEAN16(1);
	MEAN16(2);
	MEAN16(3);
	MEAN16(4);
	MEAN16(5);
	MEAN16(6);
	MEAN16(7);
	MEAN16(8);
	MEAN16(9);
	MEAN16(10);
	MEAN16(11);
	MEAN16(12);
	MEAN16(13);
	MEAN16(14);
	MEAN16(15);

	sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
	mn = vec_perm((vector unsigned char) sumdiffs,
				  (vector unsigned char) sumdiffs, (vector unsigned char) (14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14,
																		   14));
	DEV16(0);
	DEV16(1);
	DEV16(2);
	DEV16(3);
	DEV16(4);
	DEV16(5);
	DEV16(6);
	DEV16(7);
	DEV16(8);
	DEV16(9);
	DEV16(10);
	DEV16(11);
	DEV16(12);
	DEV16(13);
	DEV16(14);
	DEV16(15);

	/* sum all parts of difference into one 32 bit quantity */
	sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);

	/* copy vector sum into unaligned result */
	sumdiffs = vec_splat(sumdiffs, 3);
	vec_ste(sumdiffs, 0, (int *) &result);
	return (result);
}