void decal_nofilter_scale_neon(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
{
    int i;

    if (count >= 8) {
        /* SkFixed is 16.16 fixed point */
        SkFixed dx2 = dx+dx;
        SkFixed dx4 = dx2+dx2;
        SkFixed dx8 = dx4+dx4;

        /* now build fx/fx+dx/fx+2dx/fx+3dx */
        SkFixed fx1, fx2, fx3;
        int32x4_t lbase, hbase;
        uint16_t *dst16 = (uint16_t *)dst;

        fx1 = fx+dx;
        fx2 = fx1+dx;
        fx3 = fx2+dx;

        /* avoid an 'lbase unitialized' warning */
        lbase = vdupq_n_s32(fx);
        lbase = vsetq_lane_s32(fx1, lbase, 1);
        lbase = vsetq_lane_s32(fx2, lbase, 2);
        lbase = vsetq_lane_s32(fx3, lbase, 3);
        hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));

        /* take upper 16 of each, store, and bump everything */
        do {
            int32x4_t lout, hout;
            uint16x8_t hi16;

            lout = lbase;
            hout = hbase;
            /* gets hi's of all louts then hi's of all houts */
            asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
            hi16 = vreinterpretq_u16_s32(hout);
            vst1q_u16(dst16, hi16);

            /* on to the next */
            lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
            hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
            dst16 += 8;
            count -= 8;
            fx += dx8;
        } while (count >= 8);
        dst = (uint32_t *) dst16;
    }
                           const int32 *rightInput,
                           int inputSampleCount)
{
  int32 outputSampleCount =
      (inputSampleCount + 1 - instance->m_analysisOdd) >> 1;
#ifdef __ARM_NEON__
  int32 Ktmp;
  int32x4_t K;
  int32x4_t MEM1;
  int32x4_t MEM2;
  int32x4_t OUT_HIGH, OUT_LOW, V;
  int32x4_t K1, K2;
  int32 i = 0;

  Ktmp = instance->m_coeff01;
  K = vsetq_lane_s32(Ktmp, K, 0);
  K = vsetq_lane_s32(Ktmp, K, 1);

  Ktmp = instance->m_coeff11;
  K = vsetq_lane_s32(Ktmp, K, 2);
  K = vsetq_lane_s32(Ktmp, K, 3);

  K1 = vshlq_n_s32(K, 16);

  Ktmp = instance->m_coeff02;
  K = vsetq_lane_s32(Ktmp, K, 0);
  K = vsetq_lane_s32(Ktmp, K, 1);

  Ktmp = instance->m_coeff12;
  K = vsetq_lane_s32(Ktmp, K, 2);
  K = vsetq_lane_s32(Ktmp, K, 3);
Exemplo n.º 3
0
int32x4_t test_vsetq_lane_s32(int32_t v1, int32x4_t v2) {
   // CHECK: test_vsetq_lane_s32
  return vsetq_lane_s32(v1, v2, 1);
  // CHECK: ins {{v[0-9]+}}.s[1], {{w[0-9]+}}
}
Exemplo n.º 4
0
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
  // CHECK-LABEL: test_vsetq_lane_s32:
  // CHECK-NEXT:  ins.s v0[3], w0
  // CHECK-NEXT:  ret
  return vsetq_lane_s32(a, b, 3);
}
Exemplo n.º 5
0
// CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #0 {
// CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
// CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
// CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3
// CHECK:   ret <4 x i32> [[VSET_LANE]]
int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
  return vsetq_lane_s32(a, b, 3);
}