Exemple #1
0
static inline void check_wrap(const cmuk::KConstants& kc, vec3f& q, cmuk::LegIndex leg) {

  float min[2], max[2];

  min[0] = jl(kc, leg, cmuk::HIP_RY, 0);
  min[1] = jl(kc, leg, cmuk::KNEE_RY, 0);

  max[0] = jl(kc, leg, cmuk::HIP_RY, 1);
  max[1] = jl(kc, leg, cmuk::KNEE_RY, 1);

  for (int i=0; i<2; ++i) {

    float& angle = q[i+1];
    float b_old = compute_badness(angle, min[i], max[i]);      
    if (!b_old) { continue; }

    float a_new = angle;

    if (b_old > 0) {
      a_new = angle - 2*M_PI;
    } else {
      a_new = angle + 2*M_PI;
    }

    float b_new = compute_badness(a_new, min[i], max[i]);

    if (fabs(b_new) < fabs(b_old)) {
      angle = a_new;
    }

  }

  
}
Exemple #2
0
static bool check_limits(const cmuk::KConstants& kc, vec3f& angles, int leg) {
  bool rval = true;
  for (int i=0; i<3; ++i) {
    const float& min = jl(kc, leg, i, 0);
    const float& max = jl(kc, leg, i, 1);
    if (angles[i] < min) { angles[i] = min; rval = false; }
    if (angles[i] > max) { angles[i] = max; rval = false; }
  }
  return rval;
}
Exemple #3
0
Real MR::Omega(size_t i, size_t _j, size_t _l) {
    sub_nb jl(G.nb(i).size());
    jl.set(_j);
    jl.set(_l);
    Real Tijl = T(i,jl);
    return Tijl / (1.0 + tJ[i][_l] * M[i][_l] * Tijl);
}
/// Calculate the (2l+1)*A_{n,l} coefficients for each Lorentzian
std::vector<double>
InelasticDiffSphere::LorentzianCoefficients(double a) const {
  // precompute the 2+m_lmax spherical bessel functions (26 in total)
  std::vector<double> jl(2 + m_lmax);
  for (size_t l = 0; l < 2 + m_lmax; l++) {
    jl[l] = boost::math::sph_bessel(static_cast<unsigned int>(l), a);
  }

  // store the coefficient of each Lorentzian in vector YJ(a,w)
  size_t ncoeff = m_xnl.size();
  std::vector<double> YJ(ncoeff);

  for (size_t i = 0; i < ncoeff; i++) {
    double x = m_xnl[i].x;
    unsigned int l = static_cast<unsigned int>(m_xnl[i].l);

    double J;
    if (fabs(a - x) > m_divZone) {
      J = (a * jl[l + 1] - l * jl[l]) / (a * a - x * x);
    } else {
      J = m_linearJlist[i].slope * a +
          m_linearJlist[i].intercept; // linear interpolation instead
    }

    YJ[i] = m_alpha[i] * (J * J);
  }

  return YJ;
} // end of LorentzianCoefficients
Exemple #5
0
CMUK_ERROR_CODE cmuk::getJointLimits( LegIndex leg,
                                      vec3f* minrot,
                                      vec3f* maxrot ) const {
  
  if ((int)leg < 0  || (int) leg >= NUM_LEGS) {
    return CMUK_BAD_LEG_INDEX;
  }
  if (!minrot || !maxrot) { return CMUK_INSUFFICIENT_ARGUMENTS; }
  
  for (int i=0; i<3; ++i) {
    (*minrot)[i] = jl(_kc, leg, i, 0);
    (*maxrot)[i] = jl(_kc, leg, i, 1);
  }
  
  return CMUK_OKAY;

}
Exemple #6
0
void SimplexParameters::update(double y, const MnAlgebraicVector& p) {

  theSimplexParameters[jh()] = std::pair<double, MnAlgebraicVector>(y, p);
  if(y < theSimplexParameters[jl()].first) theJLow = jh();

  unsigned int jh = 0;
  for(unsigned int i = 1; i < theSimplexParameters.size(); i++) {
    if(theSimplexParameters[i].first > theSimplexParameters[jh].first) jh = i;
  }
  theJHigh = jh;

  return;
} 
Exemple #7
0
CMUK_ERROR_CODE cmuk::computeFootIK( LegIndex leg,
                                     const vec3f& pos,
                                     vec3f* q_bent_forward,
                                     vec3f* q_bent_rearward ) const {

  if ((int)leg < 0 || (int)leg >= NUM_LEGS) {
    return CMUK_BAD_LEG_INDEX;
  } else if (!q_bent_forward || !q_bent_rearward) {
    return CMUK_INSUFFICIENT_ARGUMENTS;
  }

  debug << "*** computing IK...\n";

  int hipflags = 0;

  // subtract off hip position
  vec3f p = pos - jo(_kc, leg, HIP_RX_OFFSET, _centeredFootIK); 
  vec3f orig = pos;

  // get dist from hip rx joint to y rotation plane
  const float& d = jo(_kc, leg, HIP_RY_OFFSET, _centeredFootIK)[1]; 

  // get the squared length of the distance on the plane
  float yz = p[1]*p[1] + p[2]*p[2];

  // alpha is the angle of the foot in the YZ plane with respect to the Y axis
  float alpha = atan2(p[2], p[1]);

  // h is the distance of foot from hip in YZ plane
  float h = sqrt(yz);

  // beta is the angle between the foot-hip vector (projected in YZ
  // plane) and the top hip link.
  float cosbeta = d / h;

  debug << "p = " << p << ", d = " << d << ", yz = " << yz << "\nalpha = " << alpha << ", h = " << h << ", cosbeta=" << cosbeta << "\n";

  if (fabs(cosbeta) > 1) {
    debug << "violated triangle inequality when calculating hip_rx_angle!\n" ;
    if (fabs(cosbeta) - 1 > 1e-4) {
      hipflags = hipflags | IK_UPPER_DISTANCE;
    }
    cosbeta = (cosbeta < 0) ? -1 : 1;
    if (yz < 1e-4) {
      p[1] = d;
      p[2] = 0;
    } else {
      float scl = fabs(d) / h;
      p[1] *= scl;
      p[2] *= scl;
      orig = p + jo(_kc, leg, HIP_RX_OFFSET, _centeredFootIK);
    }
  }

  float beta = acos(cosbeta);

  // Now compute the two possible hip angles
  float hip_rx_angles[2], badness[2];
  int flags[2];

  flags[0] = hipflags;
  flags[1] = hipflags;

  hip_rx_angles[0] = fix_angle(alpha - beta, -M_PI, M_PI);
  hip_rx_angles[1] = fix_angle(alpha + beta, -M_PI, M_PI);

  const float& min = jl(_kc, leg, HIP_RX, 0);
  const float& max = jl(_kc, leg, HIP_RX, 1);

  // See how badly we violate the joint limits for this hip angles
  for (int i=0; i<2; ++i) {
    float& angle = hip_rx_angles[i];
    badness[i] = fabs(compute_badness(angle, min, max));
    if (badness[i]) { flags[i] = flags[i] | IK_UPPER_ANGLE_RANGE; }
  }
  
  // Put the least bad (and smallest) hip angle first
  bool swap = false;

  if ( badness[1] <= badness[0] ) {
    // We want the less bad solution for hip angle
    swap = true;
  } else if (badness[0] == 0 && badness[1] == 0) {
    // We want the solution for hip angle that leaves the hip up.
    if ((leg == FL || leg == HL) && hip_rx_angles[0] > hip_rx_angles[1]) {
      swap = true;
    } else if ((leg == FR || leg == HR) && hip_rx_angles[0] < hip_rx_angles[1]) {
      swap = true;
    }
  } 

  if (swap) {
    std::swap(hip_rx_angles[0], hip_rx_angles[1]);
    std::swap(badness[0], badness[1]);  
    std::swap(flags[0], flags[1]);
  }
  
  int hip_solution_cnt = 2;

  if (badness[0] == 0 && badness[1] != 0) {
    hip_solution_cnt = 1;
  } 

  debug << "hip_rx_angles[0]=" << hip_rx_angles[0] 
        << ", badness=" << badness[0]
        << ", flags=" << flags[0] << "\n";

  debug << "hip_rx_angles[1]=" << hip_rx_angles[1] 
        << ", badness=" << badness[1]
        << ", flags=" << flags[1] << "\n";
  
  debug << "hip_solution_cnt = " << hip_solution_cnt << "\n";

  vec3f qfwd[2], qrear[2];
  
  for (int i=0; i<hip_solution_cnt; ++i) {

    debug << "** computing ll solution " << (i+1) << " of " << (hip_solution_cnt) << "\n";

    float hip_rx = hip_rx_angles[i];
    
    // now make inv. transform to get rid of hip rotation
    Transform3f tx = Transform3f::rx(hip_rx, jo(_kc, leg, HIP_RX_OFFSET, _centeredFootIK));
    vec3f ptx = tx.transformInv(orig);

    debug << "tx=[" << tx.translation() << ", " << tx.rotation() << "], ptx = " << ptx << "\n";
    
    // calculate lengths for cosine law
    float l1sqr = ol2(_kc, leg, KNEE_RY_OFFSET, _centeredFootIK);
    float l2sqr = ol2(_kc, leg, FOOT_OFFSET, _centeredFootIK);
    float l1 = ol(_kc, leg, KNEE_RY_OFFSET, _centeredFootIK);
    float l2 = ol(_kc, leg, FOOT_OFFSET, _centeredFootIK);
    
    float ksqr = ptx[0]*ptx[0] + ptx[2]*ptx[2];
    float k = sqrt(ksqr);

    debug << "l1=" << l1 << ", l2=" << l2 << ", k=" << k << "\n";
    
    // check triangle inequality
    if (k > l1 + l2) { 
      debug << "oops, violated the triangle inequality for lower segments: "
            << "k = " << k << ", "
            << "l1 + l2 = " << l1 + l2 << "\n";
      if (k - (l1 + l2) > 1e-4) {
        flags[i] = flags[i] | IK_LOWER_DISTANCE;
      }
      k = l1 + l2;
      ksqr = k * k;
    }
    
    // 2*theta is the acute angle formed by the spread
    // of the two hip rotations... 
    float costheta = (l1sqr + ksqr - l2sqr) / (2 * l1 * k);
    if (fabs(costheta) > 1) {
      debug << "costheta = " << costheta << " > 1\n";
      if (fabs(costheta) - 1 > 1e-4) {
        flags[i] = flags[i] | IK_LOWER_DISTANCE;
      }
      costheta = (costheta < 0) ? -1 : 1;
    }
    float theta = acos(costheta);
    
    // gamma is the angle of the foot with respect to the z axis
    float gamma = atan2(-ptx[0], -ptx[2]);
    
    // hip angles are just offsets off of gamma now
    float hip_ry_1 = gamma - theta;
    float hip_ry_2 = gamma + theta;
    
    // phi is the obtuse angle of the parallelogram
    float cosphi = (l1sqr + l2sqr - ksqr) / (2 * l1 * l2);
    if (fabs(cosphi) > 1) {
      debug << "cosphi = " << cosphi << " > 1\n";
      if (fabs(cosphi) - 1 > 1e-4) {
        flags[i] = flags[i] | IK_LOWER_DISTANCE;
      }
      cosphi = (cosphi < 0) ? -1 : 1;
    }
    float phi = acos(cosphi);
    
    // epsilon is the "error" caused by not having feet offset directly
    // along the z-axis (if they were, epsilon would equal zero)
    float epsilon = le(_kc, leg, _centeredFootIK);
    
    // now we can directly solve for knee angles
    float knee_ry_1 =  M_PI - phi - epsilon;
    float knee_ry_2 =  -M_PI + phi - epsilon;

    // now fill out angle structs and check limits
    qfwd[i] = vec3f(hip_rx, hip_ry_1, knee_ry_1);
    qrear[i] = vec3f(hip_rx, hip_ry_2, knee_ry_2);
    
    debug << "before wrap, qfwd =  " << qfwd[i] << "\n";
    debug << "before wrap, qrear = " << qrear[i] << "\n";

    check_wrap(_kc, qfwd[i], leg);
    check_wrap(_kc, qrear[i], leg);

    debug << "after wrap, qfwd =  " << qfwd[i] << "\n";
    debug << "after wrap, qrear = " << qrear[i] << "\n";
    
    if (!check_limits(_kc, qfwd[i], leg)) {
      debug << "violated limits forward!\n";
      flags[i] = flags[i] | IK_LOWER_ANGLE_RANGE_FWD;
    }
    if (!check_limits(_kc, qrear[i], leg)) {
      debug << "violated limits rearward!\n";
      flags[i] = flags[i] | IK_LOWER_ANGLE_RANGE_REAR;
    }
    
  } // for each viable hip solution

  int best = 0;

  if (hip_solution_cnt == 2) {
    if (howbad(flags[0]) > howbad(flags[1]))  {
      best = 1;
    }
    debug << "best overall solution is " << (best+1) << "\n";
  }


  *q_bent_forward = qfwd[best];
  *q_bent_rearward = qrear[best];
  return flags_to_errcode(flags[best]);

}
    void forward_avx2() {
        xor_(reg_soff, reg_soff);
        Label mb_sp_loop;
        L(mb_sp_loop); {

            channel_loop([=](size_t unroll) {
                        // Load 32 channels (two C16_blocks) in ymm, then
                        // split the work in half, each half splits in two
                        // regs with 8 channels per. When down converting,
                        // put the result in a temp register for the 1st
                        // iteration, combine the result at 2nd iteration
                        // and store ymm with 32 channels.
                        // If 16 channels, do just one half and store the
                        // result with mask.
                        Vmm v0 = Vmm(0);
                        Vmm v1 = Vmm(1);
                        Vmm vscale0 = Vmm(2);
                        Vmm vshift0 = Vmm(3);
                        Vmm vmean0 = Vmm(4);
                        Vmm vsqrtvar0 = Vmm(5);
                        Vmm vscale1 = Vmm(6);
                        Vmm vshift1 = Vmm(7);
                        Vmm vmean1 = Vmm(8);
                        Vmm vsqrtvar1 = Vmm(9);
                        Vmm tmp = Vmm(10);

                        for (size_t i = 0; i < unroll; i++) {
                            compute_vscaleshift(vscale0, vshift0, vmean0,
                                    vsqrtvar0, i * c_in_xmm_ * sizeof(float));
                            compute_vscaleshift(vscale1, vshift1, vmean1,
                                    vsqrtvar1, i * c_in_xmm_ * sizeof(float)
                                    + simd_w_ * sizeof(float));

                            vpmovsxbd(v0, src_ptr(i*c_in_xmm_));
                            vpmovsxbd(v1, src_ptr(i*c_in_xmm_ + simd_w_));
                            vcvtdq2ps(v0, v0);
                            vcvtdq2ps(v1, v1);

                            uni_vfmadd213ps(v0, vscale0, vshift0);
                            uni_vfmadd213ps(v1, vscale1, vshift1);
                            if (with_relu_) {
                                uni_vmaxps(v0, v0, vzero);
                                uni_vmaxps(v1, v1, vzero);
                            }

                            vcvtps2dq(v0, v0); // BA
                            vcvtps2dq(v1, v1); // DC
                            vpackssdw(v0, v0, v1); // BA + DC -> DBCA
                            vpermq(v0, v0, 0xD8); // DBCA -> DCBA
                            vperm2i128(v1, v0, v0, 0x1); // DCBA -> BADC
                            vpacksswb(v0, v0, v1); // DCBA + BADC -> badcDCBA
                            if (i == 0 && unroll != 1)
                                uni_vmovups(tmp, v0);
                            else if (i == 1) {
                                // badcDCBA + fehgHGFE -> HGFEDCBA
                                vperm2i128(v0, v0, tmp, 0x2);
                            }
                        }

                        if (unroll == 1)
                            vmaskmovps(dst_ptr(), vbody_mask, v0);
                        else
                            uni_vmovups(dst_ptr(), v0);
                    },
                    [=]() {
                        // handle first 8 channels. If tail is bigger,
                        // handle second part separately. There is no way
                        // to get performance as one has to work with bytes
                        // via xmm. vzeroupper kills all the perf.
                        Xmm x0 = Xmm(0);
                        Vmm v0 = Vmm(0);
                        Vmm vscale0 = Vmm(1);
                        Vmm vshift0 = Vmm(2);
                        Vmm vmean0 = Vmm(3);
                        Vmm vsqrtvar0 = Vmm(4);

                        size_t tail = nstl::min(c_tail_, simd_w_);
                        size_t num_iters = c_tail_ > simd_w_ ? 2 : 1;

                        for (size_t i = 0; i < num_iters; i++) {
                            if (i > 0)
                                tail = c_tail_ - simd_w_;

                            for (size_t tl = 0; tl < tail; tl++)
                                vpinsrb(x0, x0, src_ptr(8*i + tl), tl);

                            if (tail == simd_w_)
                                compute_vscaleshift(vscale0, vshift0, vmean0,
                                        vsqrtvar0, 32*i);
                            else
                                compute_vscaleshift(vscale0, vshift0, vmean0,
                                        vsqrtvar0, 32*i, true);

                            vpmovsxbd(v0, x0);
                            vcvtdq2ps(v0, v0);
                            uni_vfmadd213ps(v0, vscale0, vshift0);
                            if (with_relu_)
                                uni_vmaxps(v0, v0, vzero);
                            vcvtps2dq(v0, v0);
                            vpackssdw(v0, v0, vzero);
                            vpermq(v0, v0, 0xD8);
                            vpacksswb(v0, v0, vzero);

                            for (size_t tl = 0; tl < tail; tl++)
                                vpextrb(dst_ptr(8*i + tl), x0, tl);
                        }
                    });

            add(reg_soff, reg_coff_max);
            cmp(reg_soff, reg_soff_max);
            jl(mb_sp_loop);
        }
    }
    void forward_avx512() {
        xor_(reg_soff, reg_soff);
        Label mb_sp_loop;
        L(mb_sp_loop); {

            channel_loop([=](size_t unroll) {
                        // Works with 16c times @unroll blocks simultaneously.
                        // Each block up converts 16c, performs math and down
                        // converts.
                        for (size_t i = 0; i < unroll; i++) {
                            Vmm v = Vmm(i + 0*unroll);
                            Vmm vscale = Vmm(i + 1*unroll);
                            Vmm vshift = Vmm(i + 2*unroll);
                            Vmm vmean = Vmm(i + 3*unroll);
                            Vmm vsqrtvar = Vmm(i + 4*unroll);

                            compute_vscaleshift(vscale, vshift, vmean, vsqrtvar,
                                i * c_in_xmm_ * sizeof(float));

                            vpmovsxbd(v, src_ptr(i * c_in_xmm_));
                            vcvtdq2ps(v, v);

                            uni_vfmadd213ps(v, vscale, vshift);
                            if (with_relu_)
                                uni_vmaxps(v, v, vzero);

                            vcvtps2dq(v, v);
                            vpmovsdb(dst_ptr(i * c_in_xmm_), v);
                        }
                    },
                    [=]() {
                        // There is no way to get performance as one has to
                        // work with bytes via xmm. vzeroupper kills the perf.
                        Xmm x = Xmm(0);
                        Vmm v = Vmm(0);
                        Vmm vscale = Vmm(1);
                        Vmm vshift = Vmm(2);
                        Vmm vmean = Vmm(3);
                        Vmm vsqrtvar = Vmm(4);

                        for (size_t tl = 0; tl < c_tail_; tl++)
                            vpinsrb(x, x, src_ptr(tl), tl);

                        compute_vscaleshift(vscale, vshift, vmean, vsqrtvar, 0,
                                true);

                        vpmovsxbd(v, x);
                        vcvtdq2ps(v, v);

                        uni_vfmadd213ps(v, vscale, vshift);
                        if (with_relu_)
                            uni_vmaxps(v, v, vzero);

                        vcvtps2dq(v, v);
                        vpmovsdb(x, v);

                        for (size_t tl = 0; tl < c_tail_; tl++)
                            vpextrb(dst_ptr(tl), x, tl);
                    });

            add(reg_soff, reg_coff_max);
            cmp(reg_soff, reg_soff_max);
            jl(mb_sp_loop);
        }
    }