void f34() { vec_dstst(var_long_ptr[0], var_int[1], 0); vec_dstst(var_long_ptr[0], var_int[1], 1); vec_dstst(var_long_ptr[0], var_int[1], 2); vec_dstst(var_long_ptr[0], var_int[1], 3); vec_dstst(var_unsigned_long_ptr[0], var_int[1], 0); vec_dstst(var_unsigned_long_ptr[0], var_int[1], 1); vec_dstst(var_unsigned_long_ptr[0], var_int[1], 2); vec_dstst(var_unsigned_long_ptr[0], var_int[1], 3); }
void b() { z = vec_add (x, y); /* Make sure the predicates accept correct argument types. */ int1 = vec_all_in (f, g); int1 = vec_all_ge (f, g); int1 = vec_all_eq (c, d); int1 = vec_all_ne (s, t); int1 = vec_any_eq (i, j); int1 = vec_any_ge (f, g); int1 = vec_all_ngt (f, g); int1 = vec_any_ge (c, d); int1 = vec_any_ge (s, t); int1 = vec_any_ge (i, j); int1 = vec_any_ge (c, d); int1 = vec_any_ge (s, t); int1 = vec_any_ge (i, j); vec_mtvscr (i); vec_dssall (); s = (vector signed short) vec_mfvscr (); vec_dss (3); vec_dst (pi, int1 + int2, 3); vec_dstst (pi, int1 + int2, 3); vec_dststt (pi, int1 + int2, 3); vec_dstt (pi, int1 + int2, 3); uc = (vector unsigned char) vec_lvsl (int1 + 69, (signed int *) pi); uc = (vector unsigned char) vec_lvsr (int1 + 69, (signed int *) pi); c = vec_lde (int1, (signed char *) pi); s = vec_lde (int1, (signed short *) pi); i = vec_lde (int1, (signed int *) pi); i = vec_ldl (int1, pi); i = vec_ld (int1, pi); vec_st (i, int2, pi); vec_ste (c, int2, (signed char *) pi); vec_ste (s, int2, (signed short *) pi); vec_ste (i, int2, (signed int *) pi); vec_stl (i, int2, pi); }
int main (void) { unsigned long ul = 2; signed long sl = 2; unsigned int ui = 2; signed int si = 2; float fl = 2.0; vec_dst (&vi, ul, '\0'); vec_dst (&vi, sl, 0); vec_dst (&vi, ui, '\0'); vec_dst (&vi, si, 0); vec_dstst (&vi, (short)fl, '\0'); return 0; }
void foo(void) { const unsigned char *buf; vector pixel vp = { 3, 4, 5, 6 }; vector bool int vbi = { 1, 0, 1, 0 }; vector bool short vbs = { 1, 0, 1, 0, 1, 0, 1, 0 }; vector bool char vbc = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 }; vector signed char vsc; int a = 3; vec_dst(buf, a, 1); vec_dstst(buf, a, 2); vec_dststt(buf, a, 3); vec_dststt(buf, a, 2); vp = vec_sld(vp, vp, 5); vbc = vec_splat(vbc, 7); vbs = vec_splat(vbs, 12); vp = vec_splat(vp, 17); vbi = vec_splat(vbi, 31); }
void fluid_genPressure_black(fluid *in_f, int y, pvt_fluidMode *mode) { struct pressure *p = &mode->pressure; int w = fieldWidth(p->velX); int h = fieldHeight(p->velX); #ifdef __APPLE_ALTIVEC__ #elif defined __SSE3__ #else int sx = fieldStrideX(p->velX); #endif int sy = fieldStrideY(p->velY); float *velX = fieldData(p->velX); float *velY = fieldData(p->velY); float *pressure = fieldData(p->pressure); if (y == 0) { #ifdef X_SIMD x128f *vPressure = (x128f*)fluidFloatPointer(pressure, 0*sy); x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, 1*sy); int x; w/=4; for (x=0; x<w; x++) { vPressure[x] = vPressureP[x]; } #else int x; for (x=0; x<w; x++) { fluidFloatPointer(pressure,x*sx)[0] = fluidFloatPointer(pressure,x*sx + sy)[0]; } #endif } else if (y == h-1) { #ifdef X_SIMD x128f *vPressure = (x128f*)fluidFloatPointer(pressure, y*sy); x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, (y-1)*sy); int x; w/=4; for (x=0; x<w; x++) { vPressure[x] = vPressureP[x]; } #else int x; for (x=0; x<w; x++) { fluidFloatPointer(pressure,x*sx + y*sy)[0] = fluidFloatPointer(pressure,x*sx + (y-1)*sy)[0]; } #endif } else { #ifdef X_SIMD float *vPressureRow = fluidFloatPointer(pressure, y*sy); x128f *vPressure = (x128f*)vPressureRow; x128f *vVelX = (x128f*)fluidFloatPointer(velX, y*sy); x128f *vPressureN = (x128f*)fluidFloatPointer(pressure, (y+1)*sy); x128f *vVelYN = (x128f*)fluidFloatPointer(velY, (y+1)*sy); x128f *vPressureP = (x128f*)fluidFloatPointer(pressure, (y-1)*sy); x128f *vVelYP = (x128f*)fluidFloatPointer(velY, (y-1)*sy); x128f div4 = {0.0f, 1.0f/4.0f, 0.0f, 1.0f/4.0f}; x128f mask = {1.0f, 0.0f, 1.0f, 0.0f}; #endif #ifdef __APPLE_ALTIVEC__ //int myTempVariable = __mfspr( 1023 ); vector float vZero = {0,0,0,0}; vec_dstst(vPressure, 0x01000001, 0); vec_dst(vVelX, 0x01000001, 1); vec_dst(vVelYN, 0x01000001, 2); vec_dst(vVelYP, 0x01000001, 3); int x; { vector float tmp; //Compute shifts vector float sl_p = vec_sld(vPressure[0], vPressure[1],4); vector float sr_p = vec_sld(vZero, vPressure[0], 12); vector float sl_vx = vec_sld(vVelX[0], vVelX[1],4); vector float sr_vx = vec_sld(vZero, vVelX[0], 12); //Sum everything!!! tmp = vec_add(sl_p, sr_p); tmp = vec_add(tmp, vPressureN[0]); tmp = vec_add(tmp, vPressureP[0]); tmp = vec_sub(tmp, sl_vx); tmp = vec_add(tmp, sr_vx); tmp = vec_sub(tmp, vVelYN[0]); tmp = vec_add(tmp, vVelYP[0]); vPressure[0] = vec_madd(tmp, div4, vZero); vPressureRow[0] = vPressureRow[1]; } x=1; while (x<w/4-5) { PRESSURE_VEC_PRE(0) PRESSURE_VEC_PRE(1) PRESSURE_VEC_PRE(2) PRESSURE_VEC_PRE(3) PRESSURE_VEC_SHIFT(0) PRESSURE_VEC_SHIFT(1) PRESSURE_VEC_SHIFT(2) PRESSURE_VEC_SHIFT(3) PRESSURE_VEC_END(0) PRESSURE_VEC_END(1) PRESSURE_VEC_END(2) PRESSURE_VEC_END(3) x+=4; } while (x<w/4-1) { PRESSURE_VEC_PRE(0) PRESSURE_VEC_SHIFT(0) PRESSURE_VEC_END(0) x++; } { vector float tmp; //Compute shifts vector float sl_p = vec_sld(vPressure[x], vZero,4); vector float sr_p = vec_sld(vPressure[x-1], vPressure[x], 12); vector float sl_vx = vec_sld(vVelX[x], vZero,4); vector float sr_vx = vec_sld(vVelX[x-1], vVelX[x], 12); //Sum everything!!! tmp = vec_add(sl_p, sr_p); tmp = vec_add(tmp, vPressureN[x]); tmp = vec_add(tmp, vPressureP[x]); tmp = vec_sub(tmp, sl_vx); tmp = vec_add(tmp, sr_vx); tmp = vec_sub(tmp, vVelYN[x]); tmp = vec_add(tmp, vVelYP[x]); vPressure[x] = vec_madd(tmp, div4, vZero); vPressureRow[w-1] = vPressureRow[w-2]; } #elif defined __SSE3__ int x; { __m128 tmp; //Compute shifts __m128 sl_p = _mm_srli_sf128(vPressure[0],4); sl_p = _mm_add_ps(sl_p,_mm_slli_sf128(vPressure[1],12)); __m128 sr_p = _mm_slli_sf128(vPressure[0],4); __m128 sl_vx = _mm_srli_sf128(vVelX[0],4); sl_vx = _mm_add_ps(sl_vx,_mm_slli_sf128(vVelX[1],12)); __m128 sr_vx = _mm_slli_sf128(vVelX[0],4); //Sum everything!!! tmp = _mm_add_ps(sl_p, sr_p); tmp = _mm_add_ps(tmp, vPressureN[0]); tmp = _mm_add_ps(tmp, vPressureP[0]); tmp = _mm_sub_ps(tmp, sl_vx); tmp = _mm_add_ps(tmp, sr_vx); tmp = _mm_sub_ps(tmp, vVelYN[0]); tmp = _mm_add_ps(tmp, vVelYP[0]); vPressure[0] = _mm_mul_ps(tmp, div4); vPressureRow[0] = vPressureRow[1]; } x=1; while (x<w/4-9) { //Compute shifts (1) PRESSURE_SSE_PRE(0); PRESSURE_SSE_PRE(1); PRESSURE_SSE_PRE(2); //Sum everything!!! (1) PRESSURE_SSE_POST(0); PRESSURE_SSE_POST(1); PRESSURE_SSE_POST(2); x+=3; } while (x<w/4-1) { //Compute shifts PRESSURE_SSE_PRE(0); //Sum everything!!! PRESSURE_SSE_POST(0); x++; } { __m128 tmp; //Compute shifts __m128 sl_p = _mm_srli_sf128(vPressure[x],4); __m128 sr_p = _mm_slli_sf128(vPressure[x],4); sr_p = _mm_add_ps(sr_p,_mm_srli_sf128(vPressure[x-1],12)); __m128 sl_vx = _mm_srli_sf128(vVelX[x],4); __m128 sr_vx = _mm_slli_sf128(vVelX[x],4); sr_vx = _mm_add_ps(sr_vx,_mm_srli_sf128(vVelX[x-1],12)); //Sum everything!!! tmp = _mm_add_ps(sl_p, sr_p); tmp = _mm_add_ps(tmp, vPressureN[x]); tmp = _mm_add_ps(tmp, vPressureP[x]); tmp = _mm_sub_ps(tmp, sl_vx); tmp = _mm_add_ps(tmp, sr_vx); tmp = _mm_sub_ps(tmp, vVelYN[x]); tmp = _mm_add_ps(tmp, vVelYP[x]); vPressure[x] = _mm_mul_ps(tmp, div4); vPressureRow[w-1] = vPressureRow[w-2]; } #else float lastPressureX = fluidFloatPointer(pressure,sx + y*sy)[0]; float lastVelX = fluidFloatPointer(velX, y*sy)[0]; float curPressureX = lastPressureX; float curVelX = fluidFloatPointer(velX, sx + y*sy)[0]; fluidFloatPointer(pressure,y*sy)[0] = lastPressureX; int x; int curxy = sx + y*sy; for (x=1; x<w-1; x++) { float nextPressureX = fluidFloatPointer(pressure,curxy + sx)[0]; float nextVelX = fluidFloatPointer(velX,curxy + sx)[0]; fluidFloatPointer(pressure,curxy)[0] = ( lastPressureX + nextPressureX + fluidFloatPointer(pressure,curxy - sy)[0] + fluidFloatPointer(pressure,curxy + sy)[0] - ( nextVelX - lastVelX + fluidFloatPointer(velY,curxy + sy)[0] - fluidFloatPointer(velY,curxy - sy)[0])) / 4.0f; lastPressureX = curPressureX; curPressureX = nextPressureX; lastVelX = curVelX; curVelX = nextVelX; curxy += sx; } fluidFloatPointer(pressure,(w-1)*sx + y*sy)[0] = fluidFloatPointer(pressure,(w-2)*sx + y*sy)[0]; #endif } }
/* * subtract prediction from block data * pred % 8 == 0 * cur % 8 == 0 * lx % 16 == 0 * blk % 16 == 0 */ void sub_pred_altivec(SUB_PRED_PDECL) { unsigned int dst; uint8_t *pCA, *pCB, *pPA, *pPB; int16_t *pBA, *pBB; vector unsigned char zero; vector unsigned char predA, predB, curA, curB; vector signed short blkA, blkB; #ifdef ALTIVEC_VERIFY #ifdef ALTIVEC_DST if (lx & (~0xffff) != 0) mjpeg_error_exit1("sub_pred: lx > vec_dst range", lx); #endif if (NOT_VECTOR_ALIGNED(lx)) mjpeg_error_exit1("sub_pred: lx %% 16 != 0, (%d)", lx); if (NOT_VECTOR_ALIGNED(blk)) mjpeg_error_exit1("sub_pred: blk %% 16 != 0, (%d)", blk); if (((unsigned long)pred & 0xf) != ((unsigned long)cur & 0xf)) mjpeg_error_exit1("sub_pred: (pred(0x%X) %% 16) != (cur(0x%X) %% 16)", pred, cur); if ((((unsigned long)pred) & 0x7) != 0) mjpeg_error_exit1("sub_pred: pred %% 8 != 0, (0x%X)", pred); if ((((unsigned long)cur) & 0x7) != 0) mjpeg_error_exit1("sub_pred: cur %% 8 != 0, (0x%X)", cur); #endif /* A->B, B->A expand differently depending on input */ #define ABBA(symbol,ab) _ABBA(ABBA_##ab,symbol) /* {{{ */ #define _ABBA(abba_ab,symbol) abba_ab(symbol) #define ABBA_A(symbol) symbol##B #define ABBA_B(symbol) symbol##A /* }}} */ #define PERFORM_ITERATION(hl,ab,iter) /* iter {{{ */ \ pred##ab = vec_merge##hl(zero, pred##ab); \ cur##ab = vec_merge##hl(zero, cur##ab); \ blk##ab = vec_sub(vs16(cur##ab), vs16(pred##ab)); \ vec_st(blk##ab, 0, (signed short*)pB##ab); \ /* }}} */ #define PREPARE_ITERATION(hl,ab,iter) /* iter {{{ */ \ pP##ab = ABBA(pP,ab) + lx; \ pC##ab = ABBA(pC,ab) + lx; \ pB##ab = ABBA(pB,ab) + 8; \ pred##ab = vec_ld(0, pP##ab); \ cur##ab = vec_ld(0, pC##ab); \ /* }}} */ #define NO_RESCHEDULE asm volatile ("") AMBER_START; pPA = pred; pCA = cur; pBA = blk; #ifdef ALTIVEC_DST dst = 0x01080000 | lx; vec_dst(pPA, dst, 0); vec_dst(pCA, dst, 1); dst = 0x01080010; vec_dstst(pBA, dst, 2); #endif pPB = pPA + lx; NO_RESCHEDULE; predA = vec_ld(0, pPA); NO_RESCHEDULE; pCB = pCA + lx; NO_RESCHEDULE; curA = vec_ld(0, pCA); NO_RESCHEDULE; pBB = pBA + 8; NO_RESCHEDULE; predB = vec_ld(0, pPB); NO_RESCHEDULE; zero = vec_splat_u8(0); NO_RESCHEDULE; curB = vec_ld(0, pCB); if (VECTOR_ALIGNED(pPA)) { PERFORM_ITERATION(h,A,0); PREPARE_ITERATION(h,A,2); /* prepare next A iteration */ PERFORM_ITERATION(h,B,1); PREPARE_ITERATION(h,B,3); /* prepare next B iteration */ PERFORM_ITERATION(h,A,2); PREPARE_ITERATION(h,A,4); PERFORM_ITERATION(h,B,3); PREPARE_ITERATION(h,B,5); PERFORM_ITERATION(h,A,4); PREPARE_ITERATION(h,A,6); PERFORM_ITERATION(h,B,5); PREPARE_ITERATION(h,B,7); PERFORM_ITERATION(h,A,6); PERFORM_ITERATION(h,B,7); } else { PERFORM_ITERATION(l,A,0); PREPARE_ITERATION(l,A,2); /* prepare next A iteration */ PERFORM_ITERATION(l,B,1); PREPARE_ITERATION(l,B,3); /* prepare next B iteration */ PERFORM_ITERATION(l,A,2); PREPARE_ITERATION(l,A,4); PERFORM_ITERATION(l,B,3); PREPARE_ITERATION(l,B,5); PERFORM_ITERATION(l,A,4); PREPARE_ITERATION(l,A,6); PERFORM_ITERATION(l,B,5); PREPARE_ITERATION(l,B,7); PERFORM_ITERATION(l,A,6); PERFORM_ITERATION(l,B,7); } #ifdef ALTIVEC_DST vec_dssall(); #endif AMBER_STOP; }