/** * Solve the following KKT system (2.10) of [AHO98]: * * [ 0 A^T I ] [ dsx ] = [ rd ] * [ A 0 0 ] [ dy ] = [ rp ] * [ E 0 F ] [ dsz ] = [ rc ] * \---- M ----/ * * where * * A = [ Asparse ] * [ Adense ] * dy = [ dysparse dydense ] * E = Z sym I * F = X sym I * */ static inline void SolveKKTSystem(const arma::sp_mat& Asparse, const arma::mat& Adense, const arma::mat& Z, const arma::mat& M, const arma::mat& F, const arma::vec& rp, const arma::vec& rd, const arma::vec& rc, arma::vec& dsx, arma::vec& dysparse, arma::vec& dydense, arma::vec& dsz) { arma::mat Frd_rc_Mat, Einv_Frd_rc_Mat, Einv_Frd_ATdy_rc_Mat, Frd_ATdy_rc_Mat; arma::vec Einv_Frd_rc, Einv_Frd_ATdy_rc, dy; // Note: Whenever a formula calls for E^(-1) v for some v, we solve Lyapunov // equations instead of forming an explicit inverse. // Compute the RHS of (2.12) math::Smat(F * rd - rc, Frd_rc_Mat); SolveLyapunov(Einv_Frd_rc_Mat, Z, 2. * Frd_rc_Mat); math::Svec(Einv_Frd_rc_Mat, Einv_Frd_rc); arma::vec rhs = rp; const size_t numConstraints = Asparse.n_rows + Adense.n_rows; if (Asparse.n_rows) rhs(arma::span(0, Asparse.n_rows - 1)) += Asparse * Einv_Frd_rc; if (Adense.n_rows) rhs(arma::span(Asparse.n_rows, numConstraints - 1)) += Adense * Einv_Frd_rc; // TODO(stephentu): use a more efficient method (e.g. LU decomposition) if (!arma::solve(dy, M, rhs)) Log::Fatal << "PrimalDualSolver::SolveKKTSystem(): Could not solve KKT " << "system." << std::endl; if (Asparse.n_rows) dysparse = dy(arma::span(0, Asparse.n_rows - 1)); if (Adense.n_rows) dydense = dy(arma::span(Asparse.n_rows, numConstraints - 1)); // Compute dx from (2.13) math::Smat(F * (rd - Asparse.t() * dysparse - Adense.t() * dydense) - rc, Frd_ATdy_rc_Mat); SolveLyapunov(Einv_Frd_ATdy_rc_Mat, Z, 2. * Frd_ATdy_rc_Mat); math::Svec(Einv_Frd_ATdy_rc_Mat, Einv_Frd_ATdy_rc); dsx = -Einv_Frd_ATdy_rc; // Compute dz from (2.14) dsz = rd - Asparse.t() * dysparse - Adense.t() * dydense; }
// [[Rcpp::export]] arma::sp_mat sparseTranspose(arma::sp_mat SM) { return SM.t(); }
// compute the log likelihood and its gradient w.r.t. theta int dtq::compGrad(void) { // remember, everything here is for equispaced data // we'll save the non-equispaced case for our scala + spark code :) if ((! haveData) || (! haveMyh)) return 1; if (spi<1) return 1; loglikmat = arma::zeros(ltvec-1,numts); if (spi==1) // special case { } else { // strategy: precompute and store common elements in Mats and Cubs // compute gradf and gradg at all spatial grid points arma::mat gradfy = arma::zeros(ylen,curtheta.n_elem); arma::mat gradgy = arma::zeros(ylen,curtheta.n_elem); this->gradFGyvec(gradfy, gradgy); // ompute gradf and gradg at all the data points arma::cube gradfdata = arma::zeros(curtheta.n_elem, (ltvec-1), numts); arma::cube gradgdata = arma::zeros(curtheta.n_elem, (ltvec-1), numts); this->gradFGdata(gradfdata, gradgdata); // initialize cubes to store all states and adjoints, // at all internal time points (spi-1), // for each pair of time series points (ltvec-1), // and at all spatial grid points (ylen) arma::cube dtqcube = arma::zeros(ylen,(ltvec-1),(spi-1)); arma::cube adjcube = arma::zeros(ylen,(ltvec-1),(spi-1)); // temporary matrix to store the initial state, phatinit arma::mat phatinit = arma::zeros(ylen,(ltvec-1)); // cube to store the gradient of the initial state w.r.t. theta arma::cube phatgrad = arma::zeros(ylen,(ltvec-1),curtheta.n_elem); // build the big matrix of initial conditions // and the gradients of those initial conditions! this->phatinitgrad(phatinit, phatgrad, gradfdata, gradgdata); dtqcube.slice(0) = phatinit; // propagate states forward in time by (spi-2) steps if (spi >= 3) for (int i=1; i<=(spi-2); i++) dtqcube.slice(i) = myk * prop * dtqcube.slice(i-1); // now multiply on the left by the Gamma vectors const arma::vec muvec = yvec + fy*myh; const arma::vec sigvec = gy*sqrt(myh); arma::cube allgamma = arma::zeros(ylen,numts,(ltvec-1)); for (int j=0; j<(ltvec-1); j++) { for (int l=0; l<numts; l++) { allgamma.slice(j).col(l) = myk*gausspdf((*odata)(j+1,l),muvec,sigvec); loglikmat(j,l) = log(arma::dot(allgamma.slice(j).col(l),dtqcube.slice(spi-2).col(j))); } } // std::cout << loglikmat << '\n'; // initialize the adjoint calculation for (int j=0; j<(ltvec-1); j++) for (int l=0; l<numts; l++) adjcube.slice(spi-2).col(j) += allgamma.slice(j).col(l) / exp(loglikmat(j,l)); // propagate adjoints backward in time by (spi-2) steps arma::sp_mat transprop = prop.t(); if (spi >= 3) for (int i=(spi-2); i>=1; i--) adjcube.slice(i-1) = myk * transprop * adjcube.slice(i); // stuff that we need for a bunch of gradients gradloglik = arma::zeros(curtheta.n_elem); arma::vec gvecm1 = arma::pow(gy,-1); arma::vec gvecm2 = arma::pow(gy,-2); arma::vec gvecm3 = arma::pow(gy,-3); // actual gradient calculation // proceed element-wise through theta_i for (int i=0; i<curtheta.n_elem; i++) { arma::vec temp1 = gvecm2 % gradfy.col(i); arma::vec temp2 = gvecm1 % gradgy.col(i); arma::vec temp3 = (1.0/myh)*gvecm3 % gradgy.col(i); arma::sp_mat::const_iterator start = prop.begin(); arma::sp_mat::const_iterator end = prop.end(); arma::umat dkdtloc(2, prop.n_nonzero); arma::vec dkdtval(prop.n_nonzero); unsigned int dkdtc = 0; for (arma::sp_mat::const_iterator it = start; it != end; ++it) { dkdtloc(0,dkdtc) = it.row(); dkdtloc(1,dkdtc) = it.col(); dkdtc++; } #pragma omp parallel for for (unsigned int dkdtcount=0; dkdtcount < prop.n_nonzero; dkdtcount++) { unsigned int orow = dkdtloc(0,dkdtcount); unsigned int ocol = dkdtloc(1,dkdtcount); double comval = yvec(orow) - muvec(ocol); dkdtval(dkdtcount) = myk*(prop.values[dkdtcount])*( comval*temp1(ocol) - temp2(ocol) + temp3(ocol)*comval*comval ); } arma::sp_mat dkdtheta(dkdtloc, dkdtval, ylen, ylen, false, true); // implement formula (22) from the DSAA paper // need gradient of Gamma{F-1} double tally = 0.0; #pragma omp parallel for reduction(+:tally) for (int j=0; j<(ltvec-1); j++) { tally += arma::dot(phatgrad.slice(i).col(j),adjcube.slice(0).col(j)); } #pragma omp parallel for collapse(2) reduction(+:tally) for (int j=0; j<(ltvec-1); j++) for (int l=0; l<numts; l++) { double xi = (*odata)((j+1),l); arma::vec gammagrad = (xi-muvec) % temp1; gammagrad += arma::pow(xi-muvec,2) % temp3; gammagrad -= temp2; gammagrad = gammagrad % allgamma.slice(j).col(l); tally += arma::dot(gammagrad,dtqcube.slice(spi-2).col(j)) / exp(loglikmat(j,l)); } // we have tested and found that the dot product is better than the // triple matrix product here, i.e., it is worth taking the transpose // arma::mat dkdtheta = dkdthetatrans.t(); #pragma omp parallel for collapse(2) reduction(+:tally) for (int j=0; j<(ltvec-1); j++) for (int l=0; l<(spi-2); l++) { tally += arma::dot((dkdtheta*dtqcube.slice(l).col(j)),adjcube.slice(l+1).col(j)); } gradloglik(i) = tally; } } haveLoglik = true; haveGradloglik = true; return 0; }