Exemplo n.º 1
0
void f34() {
  vec_dstst(var_long_ptr[0], var_int[1], 0);
  vec_dstst(var_long_ptr[0], var_int[1], 1);
  vec_dstst(var_long_ptr[0], var_int[1], 2);
  vec_dstst(var_long_ptr[0], var_int[1], 3);
  vec_dstst(var_unsigned_long_ptr[0], var_int[1], 0);
  vec_dstst(var_unsigned_long_ptr[0], var_int[1], 1);
  vec_dstst(var_unsigned_long_ptr[0], var_int[1], 2);
  vec_dstst(var_unsigned_long_ptr[0], var_int[1], 3);
}
void
b()
{
  z = vec_add (x, y);

  /* Make sure the predicates accept correct argument types.  */

  int1 = vec_all_in (f, g);
  int1 = vec_all_ge (f, g);
  int1 = vec_all_eq (c, d);
  int1 = vec_all_ne (s, t);
  int1 = vec_any_eq (i, j);
  int1 = vec_any_ge (f, g);
  int1 = vec_all_ngt (f, g);
  int1 = vec_any_ge (c, d);
  int1 = vec_any_ge (s, t);
  int1 = vec_any_ge (i, j);
  int1 = vec_any_ge (c, d);
  int1 = vec_any_ge (s, t);
  int1 = vec_any_ge (i, j);

  vec_mtvscr (i);
  vec_dssall ();
  s = (vector signed short) vec_mfvscr ();
  vec_dss (3);

  vec_dst (pi, int1 + int2, 3);
  vec_dstst (pi, int1 + int2, 3);
  vec_dststt (pi, int1 + int2, 3);
  vec_dstt (pi, int1 + int2, 3);

  uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi);
  uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi);

  c = vec_lde (int1, (signed char *) pi);
  s = vec_lde (int1, (signed short *) pi);
  i = vec_lde (int1, (signed int *) pi);
  i = vec_ldl (int1, pi);
  i = vec_ld (int1, pi);

  vec_st (i, int2, pi);
  vec_ste (c, int2, (signed char *) pi);
  vec_ste (s, int2, (signed short *) pi);
  vec_ste (i, int2, (signed int *) pi);
  vec_stl (i, int2, pi);
}
Exemplo n.º 3
0
int
main (void)
{
    unsigned long ul = 2;
    signed long sl = 2;
    unsigned int ui = 2;
    signed int si = 2;
    float fl = 2.0;

    vec_dst (&vi, ul, '\0');
    vec_dst (&vi, sl, 0);
    vec_dst (&vi, ui, '\0');
    vec_dst (&vi, si, 0);
    vec_dstst (&vi, (short)fl, '\0');

    return 0;
}
Exemplo n.º 4
0
void foo(void) {
  const unsigned char *buf;
  vector pixel vp = { 3, 4, 5, 6 };
  vector bool int vbi = { 1, 0, 1, 0 };
  vector bool short vbs = { 1, 0, 1, 0, 1, 0, 1, 0 };
  vector bool char vbc = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 };
  vector signed char vsc;
  int a = 3;
  
  vec_dst(buf, a, 1);
  vec_dstst(buf, a, 2);
  vec_dststt(buf, a, 3);
  vec_dststt(buf, a, 2);

  vp = vec_sld(vp, vp, 5);
  vbc = vec_splat(vbc, 7);
  vbs = vec_splat(vbs, 12);
  vp = vec_splat(vp, 17);
  vbi = vec_splat(vbi, 31);  
}
Exemplo n.º 5
0
void fluid_genPressure_black(fluid *in_f, int y, pvt_fluidMode *mode)
{
	struct pressure *p = &mode->pressure;
	
	int w = fieldWidth(p->velX);
	int h = fieldHeight(p->velX);

#ifdef __APPLE_ALTIVEC__
#elif defined __SSE3__
#else
	int sx = fieldStrideX(p->velX);
#endif
	int sy = fieldStrideY(p->velY);
	
	float *velX = fieldData(p->velX);
	float *velY = fieldData(p->velY);
	
	float *pressure = fieldData(p->pressure);
	
	if (y == 0)
	{
#ifdef X_SIMD
		x128f *vPressure = (x128f*)fluidFloatPointer(pressure, 0*sy);
		x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, 1*sy);
		
		int x;
		w/=4;
		for (x=0; x<w; x++)
		{
			vPressure[x] = vPressureP[x];
		}
#else
		int x;
		for (x=0; x<w; x++)
		{
			fluidFloatPointer(pressure,x*sx)[0] = fluidFloatPointer(pressure,x*sx + sy)[0];
		}
#endif
	}
	else if (y == h-1)
	{
#ifdef X_SIMD
		x128f *vPressure = (x128f*)fluidFloatPointer(pressure, y*sy);
		x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, (y-1)*sy);
		
		int x;
		w/=4;
		for (x=0; x<w; x++)
		{
			vPressure[x] = vPressureP[x];
		}
#else
		int x;
		for (x=0; x<w; x++)
		{
			fluidFloatPointer(pressure,x*sx + y*sy)[0] =
					fluidFloatPointer(pressure,x*sx + (y-1)*sy)[0];
		}
#endif
	}
	else
	{
#ifdef X_SIMD
		float *vPressureRow = fluidFloatPointer(pressure, y*sy);
		
		x128f *vPressure = (x128f*)vPressureRow;
		x128f *vVelX = (x128f*)fluidFloatPointer(velX, y*sy);
		
		x128f *vPressureN = (x128f*)fluidFloatPointer(pressure, (y+1)*sy);
		x128f *vVelYN = (x128f*)fluidFloatPointer(velY, (y+1)*sy);
		
		x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, (y-1)*sy);
		x128f *vVelYP = (x128f*)fluidFloatPointer(velY, (y-1)*sy);
		
		x128f div4 = {0.0f, 1.0f/4.0f, 0.0f, 1.0f/4.0f};
		x128f mask = {1.0f, 0.0f, 1.0f, 0.0f};
#endif
	
#ifdef __APPLE_ALTIVEC__
		//int myTempVariable = __mfspr( 1023 );
		
		vector float vZero = {0,0,0,0};
		
		vec_dstst(vPressure, 0x01000001, 0);
		vec_dst(vVelX, 0x01000001, 1);
		vec_dst(vVelYN, 0x01000001, 2);
		vec_dst(vVelYP, 0x01000001, 3);
		
		int x;
		{
			vector float tmp;
			
			//Compute shifts
			vector float sl_p = vec_sld(vPressure[0], vPressure[1],4);
			vector float sr_p = vec_sld(vZero, vPressure[0], 12);
			
			vector float sl_vx = vec_sld(vVelX[0], vVelX[1],4);
			vector float sr_vx = vec_sld(vZero, vVelX[0], 12);
			
			//Sum everything!!!
			tmp = vec_add(sl_p, sr_p);
			tmp = vec_add(tmp, vPressureN[0]);
			tmp = vec_add(tmp, vPressureP[0]);
			tmp = vec_sub(tmp, sl_vx);
			tmp = vec_add(tmp, sr_vx);
			tmp = vec_sub(tmp, vVelYN[0]);
			tmp = vec_add(tmp, vVelYP[0]);
			
			vPressure[0] = vec_madd(tmp, div4, vZero);
			vPressureRow[0] = vPressureRow[1];
		}
		x=1;
		
		while (x<w/4-5)
		{
			PRESSURE_VEC_PRE(0)
			PRESSURE_VEC_PRE(1)
			PRESSURE_VEC_PRE(2)
			PRESSURE_VEC_PRE(3)
			
			PRESSURE_VEC_SHIFT(0)
			PRESSURE_VEC_SHIFT(1)
			PRESSURE_VEC_SHIFT(2)
			PRESSURE_VEC_SHIFT(3)
			
			PRESSURE_VEC_END(0)
			PRESSURE_VEC_END(1)
			PRESSURE_VEC_END(2)
			PRESSURE_VEC_END(3)
			
			x+=4;
		}

		while (x<w/4-1)
		{			
			PRESSURE_VEC_PRE(0)
			PRESSURE_VEC_SHIFT(0)
			PRESSURE_VEC_END(0)
			x++;
		}
		{
			vector float tmp;
			
			//Compute shifts
			vector float sl_p = vec_sld(vPressure[x], vZero,4);
			vector float sr_p = vec_sld(vPressure[x-1], vPressure[x], 12);
			
			vector float sl_vx = vec_sld(vVelX[x], vZero,4);
			vector float sr_vx = vec_sld(vVelX[x-1], vVelX[x], 12);
			
			//Sum everything!!!
			tmp = vec_add(sl_p, sr_p);
			tmp = vec_add(tmp, vPressureN[x]);
			tmp = vec_add(tmp, vPressureP[x]);
			tmp = vec_sub(tmp, sl_vx);
			tmp = vec_add(tmp, sr_vx);
			tmp = vec_sub(tmp, vVelYN[x]);
			tmp = vec_add(tmp, vVelYP[x]);
			
			vPressure[x] = vec_madd(tmp, div4, vZero);
			
			vPressureRow[w-1] = vPressureRow[w-2];
		}
		
#elif defined __SSE3__
		
		int x;
		{
			__m128 tmp;
			
			//Compute shifts
			__m128 sl_p = _mm_srli_sf128(vPressure[0],4);
			sl_p = _mm_add_ps(sl_p,_mm_slli_sf128(vPressure[1],12));
			
			__m128 sr_p = _mm_slli_sf128(vPressure[0],4);
			
			__m128 sl_vx = _mm_srli_sf128(vVelX[0],4);
			sl_vx = _mm_add_ps(sl_vx,_mm_slli_sf128(vVelX[1],12));
			
			__m128 sr_vx = _mm_slli_sf128(vVelX[0],4);
			
			//Sum everything!!!
			tmp = _mm_add_ps(sl_p, sr_p);
			tmp = _mm_add_ps(tmp, vPressureN[0]);
			tmp = _mm_add_ps(tmp, vPressureP[0]);
			tmp = _mm_sub_ps(tmp, sl_vx);
			tmp = _mm_add_ps(tmp, sr_vx);
			tmp = _mm_sub_ps(tmp, vVelYN[0]);
			tmp = _mm_add_ps(tmp, vVelYP[0]);
			
			vPressure[0] = _mm_mul_ps(tmp, div4);
			vPressureRow[0] = vPressureRow[1];
		}
		x=1;
		while (x<w/4-9)
		{
			//Compute shifts (1)
			PRESSURE_SSE_PRE(0);
			PRESSURE_SSE_PRE(1);
			PRESSURE_SSE_PRE(2);
			
			//Sum everything!!! (1)
			PRESSURE_SSE_POST(0);
			PRESSURE_SSE_POST(1);
			PRESSURE_SSE_POST(2);
			
			x+=3;
		}
		while (x<w/4-1)
		{
			//Compute shifts
			PRESSURE_SSE_PRE(0);
			
			//Sum everything!!!
			PRESSURE_SSE_POST(0);
			
			x++;
		}
		{
			__m128 tmp;
			
			//Compute shifts
			__m128 sl_p = _mm_srli_sf128(vPressure[x],4);
			
			__m128 sr_p = _mm_slli_sf128(vPressure[x],4);
			sr_p = _mm_add_ps(sr_p,_mm_srli_sf128(vPressure[x-1],12));
			
			__m128 sl_vx = _mm_srli_sf128(vVelX[x],4);
			
			__m128 sr_vx = _mm_slli_sf128(vVelX[x],4);
			sr_vx = _mm_add_ps(sr_vx,_mm_srli_sf128(vVelX[x-1],12));
			
			//Sum everything!!!
			tmp = _mm_add_ps(sl_p, sr_p);
			tmp = _mm_add_ps(tmp, vPressureN[x]);
			tmp = _mm_add_ps(tmp, vPressureP[x]);
			tmp = _mm_sub_ps(tmp, sl_vx);
			tmp = _mm_add_ps(tmp, sr_vx);
			tmp = _mm_sub_ps(tmp, vVelYN[x]);
			tmp = _mm_add_ps(tmp, vVelYP[x]);
			
			vPressure[x] = _mm_mul_ps(tmp, div4);
			
			vPressureRow[w-1] = vPressureRow[w-2];
		}
		
#else
		float lastPressureX = fluidFloatPointer(pressure,sx + y*sy)[0];
		float lastVelX = fluidFloatPointer(velX, y*sy)[0];
		
		float curPressureX = lastPressureX;
		float curVelX = fluidFloatPointer(velX, sx + y*sy)[0];
		
		fluidFloatPointer(pressure,y*sy)[0] = lastPressureX;
		
		int x;
		int curxy = sx + y*sy;
		for (x=1; x<w-1; x++)
		{
			float nextPressureX = fluidFloatPointer(pressure,curxy + sx)[0];
			float nextVelX = fluidFloatPointer(velX,curxy + sx)[0];
			
			fluidFloatPointer(pressure,curxy)[0] =
				(	  lastPressureX
				 	+ nextPressureX
				 	+ fluidFloatPointer(pressure,curxy - sy)[0]
					+ fluidFloatPointer(pressure,curxy + sy)[0]
				 - 		(  nextVelX
						 - lastVelX
						 + fluidFloatPointer(velY,curxy + sy)[0]
						 - fluidFloatPointer(velY,curxy - sy)[0])) / 4.0f;
			
			lastPressureX = curPressureX;
			curPressureX = nextPressureX;
			
			lastVelX = curVelX;
			curVelX = nextVelX;
			
			curxy += sx;
		}
		
		fluidFloatPointer(pressure,(w-1)*sx + y*sy)[0]
			= fluidFloatPointer(pressure,(w-2)*sx + y*sy)[0];
#endif
	}
}
Exemplo n.º 6
0
/*
 * subtract prediction from block data
 * pred % 8 == 0
 * cur % 8 == 0
 * lx % 16 == 0
 * blk % 16 == 0
 */
void sub_pred_altivec(SUB_PRED_PDECL)
{
    unsigned int dst;
    uint8_t *pCA, *pCB, *pPA, *pPB;
    int16_t *pBA, *pBB;
    vector unsigned char zero;
    vector unsigned char predA, predB, curA, curB;
    vector signed short blkA, blkB;


#ifdef ALTIVEC_VERIFY
#ifdef ALTIVEC_DST
    if (lx & (~0xffff) != 0)
	mjpeg_error_exit1("sub_pred: lx > vec_dst range", lx);
#endif

    if (NOT_VECTOR_ALIGNED(lx))
	mjpeg_error_exit1("sub_pred: lx %% 16 != 0, (%d)", lx);
    if (NOT_VECTOR_ALIGNED(blk))
	mjpeg_error_exit1("sub_pred: blk %% 16 != 0, (%d)", blk);

    if (((unsigned long)pred & 0xf) != ((unsigned long)cur & 0xf))
	mjpeg_error_exit1("sub_pred: (pred(0x%X) %% 16) != (cur(0x%X) %% 16)",
	    pred, cur);
    if ((((unsigned long)pred) & 0x7) != 0)
	mjpeg_error_exit1("sub_pred: pred %% 8 != 0, (0x%X)", pred);
    if ((((unsigned long)cur) & 0x7) != 0)
	mjpeg_error_exit1("sub_pred: cur %% 8 != 0, (0x%X)", cur);
#endif

/* A->B, B->A expand differently depending on input */
#define ABBA(symbol,ab)		_ABBA(ABBA_##ab,symbol) /* {{{ */
#define _ABBA(abba_ab,symbol)	abba_ab(symbol)
#define ABBA_A(symbol)		symbol##B
#define ABBA_B(symbol)		symbol##A
/* }}} */


#define	PERFORM_ITERATION(hl,ab,iter) /* iter {{{ */                         \
	pred##ab = vec_merge##hl(zero, pred##ab);                            \
	cur##ab = vec_merge##hl(zero, cur##ab);                              \
	blk##ab = vec_sub(vs16(cur##ab), vs16(pred##ab));                    \
	vec_st(blk##ab, 0, (signed short*)pB##ab);                           \
	/* }}} */

#define PREPARE_ITERATION(hl,ab,iter) /* iter {{{ */                         \
	pP##ab = ABBA(pP,ab) + lx;                                           \
	pC##ab = ABBA(pC,ab) + lx;                                           \
	pB##ab = ABBA(pB,ab) + 8;                                            \
	pred##ab = vec_ld(0, pP##ab);                                        \
	cur##ab = vec_ld(0, pC##ab);                                         \
	/* }}} */

#define NO_RESCHEDULE	asm volatile ("")

    AMBER_START;

    pPA = pred;
    pCA = cur;
    pBA = blk;

#ifdef ALTIVEC_DST
    dst = 0x01080000 | lx;
    vec_dst(pPA, dst, 0);
    vec_dst(pCA, dst, 1);
    dst = 0x01080010;
    vec_dstst(pBA, dst, 2);
#endif

    pPB = pPA + lx;         NO_RESCHEDULE;
    predA = vec_ld(0, pPA); NO_RESCHEDULE;
    pCB = pCA + lx;         NO_RESCHEDULE;
    curA = vec_ld(0, pCA);  NO_RESCHEDULE;
    pBB = pBA + 8;          NO_RESCHEDULE;
    predB = vec_ld(0, pPB); NO_RESCHEDULE;
    zero = vec_splat_u8(0); NO_RESCHEDULE;
    curB = vec_ld(0, pCB);

    if (VECTOR_ALIGNED(pPA)) {
	PERFORM_ITERATION(h,A,0);
	PREPARE_ITERATION(h,A,2);   /* prepare next A iteration */
	PERFORM_ITERATION(h,B,1);
	PREPARE_ITERATION(h,B,3);   /* prepare next B iteration */
	PERFORM_ITERATION(h,A,2);
	PREPARE_ITERATION(h,A,4);
	PERFORM_ITERATION(h,B,3);
	PREPARE_ITERATION(h,B,5);
	PERFORM_ITERATION(h,A,4);
	PREPARE_ITERATION(h,A,6);
	PERFORM_ITERATION(h,B,5);
	PREPARE_ITERATION(h,B,7);
	PERFORM_ITERATION(h,A,6);
	PERFORM_ITERATION(h,B,7);
    } else {
	PERFORM_ITERATION(l,A,0);
	PREPARE_ITERATION(l,A,2);   /* prepare next A iteration */
	PERFORM_ITERATION(l,B,1);
	PREPARE_ITERATION(l,B,3);   /* prepare next B iteration */
	PERFORM_ITERATION(l,A,2);
	PREPARE_ITERATION(l,A,4);
	PERFORM_ITERATION(l,B,3);
	PREPARE_ITERATION(l,B,5);
	PERFORM_ITERATION(l,A,4);
	PREPARE_ITERATION(l,A,6);
	PERFORM_ITERATION(l,B,5);
	PREPARE_ITERATION(l,B,7);
	PERFORM_ITERATION(l,A,6);
	PERFORM_ITERATION(l,B,7);
    }

#ifdef ALTIVEC_DST
    vec_dssall();
#endif

    AMBER_STOP;
}