/**
 * Solve the following KKT system (2.10) of [AHO98]:
 *
 *     [ 0  A^T  I ] [ dsx ] = [ rd ]
 *     [ A   0   0 ] [  dy ] = [ rp ]
 *     [ E   0   F ] [ dsz ] = [ rc ]
 *     \---- M ----/
 *
 * where
 *
 *     A  = [ Asparse ]
 *          [ Adense  ]
 *     dy = [ dysparse  dydense ]
 *     E  = Z sym I
 *     F  = X sym I
 *
 */
static inline void
SolveKKTSystem(const arma::sp_mat& Asparse,
               const arma::mat& Adense,
               const arma::mat& Z,
               const arma::mat& M,
               const arma::mat& F,
               const arma::vec& rp,
               const arma::vec& rd,
               const arma::vec& rc,
               arma::vec& dsx,
               arma::vec& dysparse,
               arma::vec& dydense,
               arma::vec& dsz)
{
  arma::mat Frd_rc_Mat, Einv_Frd_rc_Mat,
            Einv_Frd_ATdy_rc_Mat, Frd_ATdy_rc_Mat;
  arma::vec Einv_Frd_rc, Einv_Frd_ATdy_rc, dy;

  // Note: Whenever a formula calls for E^(-1) v for some v, we solve Lyapunov
  // equations instead of forming an explicit inverse.

  // Compute the RHS of (2.12)
  math::Smat(F * rd - rc, Frd_rc_Mat);
  SolveLyapunov(Einv_Frd_rc_Mat, Z, 2. * Frd_rc_Mat);
  math::Svec(Einv_Frd_rc_Mat, Einv_Frd_rc);

  arma::vec rhs = rp;
  const size_t numConstraints = Asparse.n_rows + Adense.n_rows;
  if (Asparse.n_rows)
    rhs(arma::span(0, Asparse.n_rows - 1)) += Asparse * Einv_Frd_rc;
  if (Adense.n_rows)
    rhs(arma::span(Asparse.n_rows, numConstraints - 1)) += Adense * Einv_Frd_rc;

  // TODO(stephentu): use a more efficient method (e.g. LU decomposition)
  if (!arma::solve(dy, M, rhs))
    Log::Fatal << "PrimalDualSolver::SolveKKTSystem(): Could not solve KKT "
        << "system." << std::endl;

  if (Asparse.n_rows)
    dysparse = dy(arma::span(0, Asparse.n_rows - 1));
  if (Adense.n_rows)
    dydense = dy(arma::span(Asparse.n_rows, numConstraints - 1));

  // Compute dx from (2.13)
  math::Smat(F * (rd - Asparse.t() * dysparse - Adense.t() * dydense) - rc,
      Frd_ATdy_rc_Mat);
  SolveLyapunov(Einv_Frd_ATdy_rc_Mat, Z, 2. * Frd_ATdy_rc_Mat);
  math::Svec(Einv_Frd_ATdy_rc_Mat, Einv_Frd_ATdy_rc);
  dsx = -Einv_Frd_ATdy_rc;

  // Compute dz from (2.14)
  dsz = rd - Asparse.t() * dysparse - Adense.t() * dydense;
}
Exemple #2
0
// [[Rcpp::export]]
arma::sp_mat sparseTranspose(arma::sp_mat SM) {
    return SM.t();
}
// compute the log likelihood and its gradient w.r.t. theta
int dtq::compGrad(void)
{
  // remember, everything here is for equispaced data
  // we'll save the non-equispaced case for our scala + spark code :)
  if ((! haveData) || (! haveMyh)) return 1;
  if (spi<1) return 1;

  loglikmat = arma::zeros(ltvec-1,numts);

  if (spi==1) // special case
  {
  } 
  else
  {
    // strategy: precompute and store common elements in Mats and Cubs

    // compute gradf and gradg at all spatial grid points
    arma::mat gradfy = arma::zeros(ylen,curtheta.n_elem);
    arma::mat gradgy = arma::zeros(ylen,curtheta.n_elem);
    this->gradFGyvec(gradfy, gradgy);

    // ompute gradf and gradg at all the data points
    arma::cube gradfdata = arma::zeros(curtheta.n_elem, (ltvec-1), numts);
    arma::cube gradgdata = arma::zeros(curtheta.n_elem, (ltvec-1), numts);
    this->gradFGdata(gradfdata, gradgdata);
    
    // initialize cubes to store all states and adjoints,
    // at all internal time points (spi-1),
    // for each pair of time series points (ltvec-1),
    // and at all spatial grid points (ylen)
    arma::cube dtqcube = arma::zeros(ylen,(ltvec-1),(spi-1));
    arma::cube adjcube = arma::zeros(ylen,(ltvec-1),(spi-1));

    // temporary matrix to store the initial state, phatinit
    arma::mat phatinit = arma::zeros(ylen,(ltvec-1));
    
    // cube to store the gradient of the initial state w.r.t. theta
    arma::cube phatgrad = arma::zeros(ylen,(ltvec-1),curtheta.n_elem);

    // build the big matrix of initial conditions
    // and the gradients of those initial conditions!
    this->phatinitgrad(phatinit, phatgrad, gradfdata, gradgdata);
    dtqcube.slice(0) = phatinit;

    // propagate states forward in time by (spi-2) steps
    if (spi >= 3)
      for (int i=1; i<=(spi-2); i++)
        dtqcube.slice(i) = myk * prop * dtqcube.slice(i-1);

    // now multiply on the left by the Gamma vectors
    const arma::vec muvec = yvec + fy*myh;
    const arma::vec sigvec = gy*sqrt(myh);
    arma::cube allgamma = arma::zeros(ylen,numts,(ltvec-1));
    for (int j=0; j<(ltvec-1); j++)
    {
      for (int l=0; l<numts; l++)
      {
        allgamma.slice(j).col(l) = myk*gausspdf((*odata)(j+1,l),muvec,sigvec);
        loglikmat(j,l) = log(arma::dot(allgamma.slice(j).col(l),dtqcube.slice(spi-2).col(j)));
      }
    }

    // std::cout << loglikmat << '\n';

    // initialize the adjoint calculation
    for (int j=0; j<(ltvec-1); j++)
      for (int l=0; l<numts; l++)
        adjcube.slice(spi-2).col(j) += allgamma.slice(j).col(l) / exp(loglikmat(j,l));

    // propagate adjoints backward in time by (spi-2) steps
    arma::sp_mat transprop = prop.t();
    if (spi >= 3)
      for (int i=(spi-2); i>=1; i--)
        adjcube.slice(i-1) = myk * transprop * adjcube.slice(i);

    // stuff that we need for a bunch of gradients
    gradloglik = arma::zeros(curtheta.n_elem);
    arma::vec gvecm1 = arma::pow(gy,-1);
    arma::vec gvecm2 = arma::pow(gy,-2);
    arma::vec gvecm3 = arma::pow(gy,-3);

    // actual gradient calculation
    // proceed element-wise through theta_i
    for (int i=0; i<curtheta.n_elem; i++)
    {
      arma::vec temp1 = gvecm2 % gradfy.col(i);
      arma::vec temp2 = gvecm1 % gradgy.col(i);
      arma::vec temp3 = (1.0/myh)*gvecm3 % gradgy.col(i);
      arma::sp_mat::const_iterator start = prop.begin();
      arma::sp_mat::const_iterator end = prop.end();
      arma::umat dkdtloc(2, prop.n_nonzero);
      arma::vec dkdtval(prop.n_nonzero);
      unsigned int dkdtc = 0;
      for (arma::sp_mat::const_iterator it = start; it != end; ++it)
      {
        dkdtloc(0,dkdtc) = it.row();
        dkdtloc(1,dkdtc) = it.col();
        dkdtc++;
      }
#pragma omp parallel for
      for (unsigned int dkdtcount=0; dkdtcount < prop.n_nonzero; dkdtcount++)
      {
        unsigned int orow = dkdtloc(0,dkdtcount);
        unsigned int ocol = dkdtloc(1,dkdtcount);
        double comval = yvec(orow) - muvec(ocol);
        dkdtval(dkdtcount) = myk*(prop.values[dkdtcount])*( comval*temp1(ocol) - temp2(ocol) + temp3(ocol)*comval*comval );
      }
      arma::sp_mat dkdtheta(dkdtloc, dkdtval, ylen, ylen, false, true);

      // implement formula (22) from the DSAA paper
      // need gradient of Gamma{F-1}
      double tally = 0.0;
#pragma omp parallel for reduction(+:tally)
      for (int j=0; j<(ltvec-1); j++)
      {
        tally += arma::dot(phatgrad.slice(i).col(j),adjcube.slice(0).col(j));
      }

#pragma omp parallel for collapse(2) reduction(+:tally)
      for (int j=0; j<(ltvec-1); j++)
        for (int l=0; l<numts; l++)
        {
          double xi = (*odata)((j+1),l);
          arma::vec gammagrad = (xi-muvec) % temp1;
          gammagrad += arma::pow(xi-muvec,2) % temp3;
          gammagrad -= temp2;
          gammagrad = gammagrad % allgamma.slice(j).col(l);
          tally += arma::dot(gammagrad,dtqcube.slice(spi-2).col(j)) / exp(loglikmat(j,l));
        }

      // we have tested and found that the dot product is better than the
      // triple matrix product here, i.e., it is worth taking the transpose
      // arma::mat dkdtheta = dkdthetatrans.t();
#pragma omp parallel for collapse(2) reduction(+:tally)
      for (int j=0; j<(ltvec-1); j++)
        for (int l=0; l<(spi-2); l++)
        {
          tally += arma::dot((dkdtheta*dtqcube.slice(l).col(j)),adjcube.slice(l+1).col(j));
        }
      gradloglik(i) = tally;
    }
  }
  haveLoglik = true;
  haveGradloglik = true;
  return 0;
}