Пример #1
0
GLfloat* rotate(GLfloat *vecDest, GLfloat *vecSource, GLfloat *rot, GLfloat phi)
{
    GLfloat result1[3], result2[3], result3[3], result4[3], result5[3], result6[3], result7[3];
    add(add(scalMult(scalProd(rot, vecSource), rot, result1),
        scalMult(cos(phi*M_PI/180), vecProd( vecProd(rot, vecSource, result2), rot, result3), result4), result5),
        scalMult(sin(phi*M_PI/180), vecProd(rot, vecSource, result6), result7), vecDest);
    return vecDest;
}
Пример #2
0
  template <typename F> F GpuBicgStab<F>::iteration (csize_t nr, UNUSED std::ostream& log, bool profilingRun, Core::ProfilingDataPtr prof) {
    const std::vector<cl::CommandQueue>& queues = this->queues ();
    const cl::CommandQueue& queue = queues[0];

    DipVector<ftype>& rvec = this->rvec ();
    DipVector<ftype>& Avecbuffer = this->Avecbuffer ();
    DipVector<ftype>& xvec = this->xvec ();

    DipVector<ftype>& pvec = this->tmpVec1 ();
    DipVector<ftype>& v = this->tmpVec2 ();
    DipVector<ftype>& s = this->tmpVec3 ();
    DipVector<ftype>& rtilda = this->tmpVec4 ();

    (vars + &Vars::ro_new).write (queue, vecProd (queues, rvec, rtilda));
    // Use higher precision to avoid underflow / overflow
    ftype dtmp = static_cast<ftype> (std::abs<ldouble> ((vars + &Vars::ro_new).read (queue)) / this->inprodR ());
    ASSERT (dtmp >= 1e-16 || profilingRun);
    if (nr == 0) {
      this->linComb.linComb (queues, rvec, pvec);
    } else {
      // Use higher precision to avoid underflow / overflow
      cldouble ro_new_alpha = (vars + &Vars::ro_new).read (queue) * (vars + &Vars::alpha).read (queue);
      cldouble ro_old_omega = (vars + &Vars::ro_old).read (queue) * (vars + &Vars::omega).read (queue);
      ASSERT (std::abs (ro_old_omega) / std::abs (ro_new_alpha) >= 10e-10 || profilingRun);
      (vars + &Vars::beta).write (queue, static_cast<ctype> (ro_new_alpha / ro_old_omega));
      (vars + &Vars::mBetaOmega).write (queue, -(vars + &Vars::beta).read (queue) * (vars + &Vars::omega).read (queue));
      this->linComb.linComb (queues, pvec, vars + &Vars::beta, v, vars + &Vars::mBetaOmega, rvec, pvec);
    }
    this->matVec ().apply (queues, pvec, v, false, prof);
    // Use higher precision to avoid underflow / overflow
    cldouble ro_new = (vars + &Vars::ro_new).read (queue);
    cldouble vRtilda = vecProd (queues, v, rtilda);
    (vars + &Vars::alpha).write (queue, static_cast<ctype> (ro_new / vRtilda));
    (vars + &Vars::mAlpha).write (queue, -(vars + &Vars::alpha).read (queue));
    this->linComb.linComb (queues, v, vars + &Vars::mAlpha, rvec, s);
    this->linComb.reduce (queues, s, vars + &Vars::norm);
    ftype inprodRplus1 = (vars + &Vars::norm).read (queue);
    if (inprodRplus1 < this->epsB && !profilingRun) {
      this->linComb.linComb (queues, pvec, vars + &Vars::alpha, xvec, xvec);
    } else {
      this->matVec ().apply (queues, s, Avecbuffer, false, prof);
      this->linComb.reduce (queues, Avecbuffer, vars + &Vars::denumOmega);
      // Use higher precision to avoid underflow / overflow
      //(vars + &Vars::omega).write (queue, vecProd (queues, s, Avecbuffer) / (vars + &Vars::denumOmega).read (queue));
      (vars + &Vars::omega).write (queue, static_cast<ctype> (static_cast<cldouble> (vecProd (queues, s, Avecbuffer)) / static_cast<ldouble> ((vars + &Vars::denumOmega).read (queue))));
      this->linComb.linComb (queues, pvec, vars + &Vars::alpha, s, vars + &Vars::omega, xvec, xvec);
      (vars + &Vars::mOmega).write (queue, -(vars + &Vars::omega).read (queue));
      this->linComb.linComb (queues, Avecbuffer, vars + &Vars::mOmega, s, rvec);
      this->linComb.reduce (queues, rvec, vars + &Vars::norm);
      inprodRplus1 = (vars + &Vars::norm).read (queue);
      (vars + &Vars::ro_old).write (queue, (vars + &Vars::ro_new).read (queue));
    }
    return inprodRplus1;
  }