void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { int i; if (count >= 8) { /* SkFixed is 16.16 fixed point */ SkFixed dx2 = dx+dx; SkFixed dx4 = dx2+dx2; SkFixed dx8 = dx4+dx4; /* now build fx/fx+dx/fx+2dx/fx+3dx */ SkFixed fx1, fx2, fx3; int32x4_t lbase, hbase; uint16_t *dst16 = (uint16_t *)dst; fx1 = fx+dx; fx2 = fx1+dx; fx3 = fx2+dx; /* avoid an 'lbase unitialized' warning */ lbase = vdupq_n_s32(fx); lbase = vsetq_lane_s32(fx1, lbase, 1); lbase = vsetq_lane_s32(fx2, lbase, 2); lbase = vsetq_lane_s32(fx3, lbase, 3); hbase = vaddq_s32(lbase, vdupq_n_s32(dx4)); /* take upper 16 of each, store, and bump everything */ do { int32x4_t lout, hout; uint16x8_t hi16; lout = lbase; hout = hbase; /* gets hi's of all louts then hi's of all houts */ asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout)); hi16 = vreinterpretq_u16_s32(hout); vst1q_u16(dst16, hi16); /* on to the next */ lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8)); hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8)); dst16 += 8; count -= 8; fx += dx8; } while (count >= 8); dst = (uint32_t *) dst16; }
const int32 *rightInput, int inputSampleCount) { int32 outputSampleCount = (inputSampleCount + 1 - instance->m_analysisOdd) >> 1; #ifdef __ARM_NEON__ int32 Ktmp; int32x4_t K; int32x4_t MEM1; int32x4_t MEM2; int32x4_t OUT_HIGH, OUT_LOW, V; int32x4_t K1, K2; int32 i = 0; Ktmp = instance->m_coeff01; K = vsetq_lane_s32(Ktmp, K, 0); K = vsetq_lane_s32(Ktmp, K, 1); Ktmp = instance->m_coeff11; K = vsetq_lane_s32(Ktmp, K, 2); K = vsetq_lane_s32(Ktmp, K, 3); K1 = vshlq_n_s32(K, 16); Ktmp = instance->m_coeff02; K = vsetq_lane_s32(Ktmp, K, 0); K = vsetq_lane_s32(Ktmp, K, 1); Ktmp = instance->m_coeff12; K = vsetq_lane_s32(Ktmp, K, 2); K = vsetq_lane_s32(Ktmp, K, 3);
int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) { // CHECK: test_vsetq_lane_s32 return vsetq_lane_s32(v1, v2, 1); // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}} }
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { // CHECK-LABEL: test_vsetq_lane_s32: // CHECK-NEXT: ins.s v0[3], w0 // CHECK-NEXT: ret return vsetq_lane_s32(a, b, 3); }
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 { // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> // CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { return vsetq_lane_s32(a, b, 3); }