Beispiel #1
0
void test_vrecpeQf32 (void)
{
  float32x4_t out_float32x4_t;
  float32x4_t arg0_float32x4_t;

  out_float32x4_t = vrecpeq_f32 (arg0_float32x4_t);
}
inline float32x4_t cv_vrecpq_f32(float32x4_t val)
{
    float32x4_t reciprocal = vrecpeq_f32(val);
    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
    return reciprocal;
}
Beispiel #3
0
inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
{
    float32x4_t reciprocal = vrecpeq_f32(b.val);
    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
    return v_float32x4(vmulq_f32(a.val, reciprocal));
}
Beispiel #4
0
inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
{
    float32x4_t reciprocal = vrecpeq_f32(b.val);
    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
    reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
    a.val = vmulq_f32(a.val, reciprocal);
    return a;
}
static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
  int i;
  float32x4_t x = vrecpeq_f32(b);
  // from arm documentation
  // The Newton-Raphson iteration:
  //     x[n+1] = x[n] * (2 - d * x[n])
  // converges to (1/d) if x0 is the result of VRECPE applied to d.
  //
  // Note: The precision did not improve after 2 iterations.
  for (i = 0; i < 2; i++) {
    x = vmulq_f32(vrecpsq_f32(b, x), x);
  }
  // a/b = a*(1/b)
  return vmulq_f32(a, x);
}
// http://stackoverflow.com/questions/6759897/
static inline __attribute__((always_inline)) float32x4_t reciprocal(float32x4_t x) {
    float32x4_t recip = vrecpeq_f32(x);
    recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
    recip = vmulq_f32(vrecpsq_f32(x, recip), recip);
    return recip;
}