/** * @brief Covariance matrix between the functional and derivative training data * or its partial derivative * given pair-wise squared distances and differences * @param [in] logHyp The log hyperparameters * - logHyp(0) = \f$\log(l)\f$ * - logHyp(1) = \f$\log(\sigma_f)\f$ * @param [in] pSqDist The pair-wise squared distances between the functional and derivative training data * @param [in] pDelta The pair-wise differences between the functional and derivative training data * @param [in] pdHypIndex (Optional) Hyperparameter index for partial derivatives * - pdHypIndex = -1: return \f$\frac{\partial \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \mathbf{Z}_j}\f$ (default) * - pdHypIndex = 0: return \f$\frac{\partial^2 \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \log(l) \partial \mathbf{Z}_j}\f$ * - pdHypIndex = 1: return \f$\frac{\partial^2 \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \log(\sigma_f) \partial \mathbf{Z}_j}\f$ * @return An NNxNN matrix pointer\n * NN: The number of functional and derivative training data */ static MatrixPtr K_FD(const Hyp &logHyp, const MatrixConstPtr pSqDist, const MatrixConstPtr pDelta, const int pdHypIndex = -1) { // K: same size with the squared distances MatrixPtr pK = K(logHyp, pSqDist); // constants const Scalar inv_ell2 = exp(static_cast<Scalar>(-2.f) * logHyp(0)); // (1/ell^2) // pre-calculation // k(x, z) = sigma_f^2 * exp(-r^2/(2*ell^2)), r = |x-z| // k(s) = sigma_f^2 * exp(s), s = -r^2/(2*ell^2) // // s = -r^2/(2*ell^2) = (-1/(2*ell^2)) * sum_{i=1}^d (xi - zi)^2 // ds/dzj = (xj - zj) / ell^2 // // dk/ds = sigma_f^2 * exp(s) = k // dk(s)/dzj = dk/ds * ds/dzj // = k(x, z) * (xj - zj) / ell^2 pK->noalias() = (inv_ell2 * pK->array() * pDelta->array()).matrix(); // mode switch(pdHypIndex) { // derivatives of covariance matrix w.r.t log ell case 0: { // dk/dzj = sigma_f^2 * exp(s) * (xj - zj) / ell^2 // d^2k/dzj dlog(ell) = sigma_f^2 * exp(s) * [(xj - zj) / ell^2] * (r^2/ell^2 - 2) // = dk/dzj * (r^2/ell^2 - 2) pK->noalias() = (pK->array() * (inv_ell2 * (pSqDist->array()) - static_cast<Scalar>(2.f))).matrix(); break; } // derivatives of covariance matrix w.r.t log sigma_f case 1: { // d^2k/dzj dlog(sigma_f) = 2 * dk/dzj pK->noalias() = static_cast<Scalar>(2.f) * (*pK); break; } // covariance matrix, dk/dzj default: { break; } } return pK; }
static MatrixPtr K(const Hyp &logHyp, GeneralTrainingData<Scalar> &generalTrainingData, const int pdHypIndex = -1) { // Assertions only in the begining of the public static member functions which can be accessed outside. // The hyparparameter index should be less than the number of hyperparameters assert(pdHypIndex < logHyp.size()); // copy hyperparameters Cov1::Hyp logHyp1; Cov2::Hyp logHyp2; copy(logHyp, logHyp1, logHyp2); // output MatrixPtr pK; // covariance matrix if(pdHypIndex < 0) { // Cov = Cov1 * Cov2 pK = Cov1::K(logHyp1, generalTrainingData, pdHypIndex); // Cov1 pK->noalias() = pK->cwiseProduct(*Cov2::K(logHyp2, generalTrainingData, pdHypIndex)); // Cov2 } // partial derivatives of covariance matrix else { // dCov1 if(pdHypIndex < Cov1::N - 1) { // dCov = dCov1*Cov2 pK = Cov1::K(logHyp1, generalTrainingData, pdHypIndex); // dCov1 pK->noalias() = pK->cwiseProduct(*Cov2::K(logHyp2, generalTrainingData, -1)); // Cov2 } else if(pdHypIndex == N - 1) { // dCov = dCov1*dCov2 pK = Cov1::K(logHyp1, generalTrainingData, logHyp1.size()-1); // dCov1 pK->noalias() = pK->cwiseProduct(*Cov2::K(logHyp2, generalTrainingData, logHyp2.size()-1)); // dCov2 } // dCov2 else { // dCov = Cov1*dCov2 pK = Cov1::K(logHyp1, generalTrainingData, -1); // Cov1 pK->noalias() = pK->cwiseProduct(*Cov2::K(logHyp2, generalTrainingData, pdHypIndex - Cov1::N - 1)); // dCov2 } } return pK; }
/** * @brief Self [co]variance matrix between the test data, Kss(Z, Z) * @param [in] logHyp The log hyperparameters * - logHyp(0) = \f$\log(l)\f$ * - logHyp(1) = \f$\log(\sigma_f)\f$ * @param [in] testData The test data * @param [in] fVarianceVector Flag for the return value * - fVarianceVector = true : return \f$\mathbf{k}_{**} \in \mathbb{R}^{M \times 1}, \mathbf{k}_{**}^i = k(\mathbf{Z}_i, \mathbf{Z}_i)\f$ (default) * - fVarianceVector = false: return \f$\mathbf{K}_{**} = \mathbf{K}(\mathbf{Z}, \mathbf{Z}) \in \mathbb{R}^{M \times M}\f$,\n * which can be used for Bayesian Committee Machines. * @return A matrix pointer\n * - Mx1 (fVarianceVector == true) * - MxM (fVarianceVector == false)\n * M: The number of test data */ static MatrixPtr Kss(const Hyp &logHyp, const TestData<Scalar> &testData, const bool fVarianceVector = true) { // The number of test data const int M = testData.M(); // Some constant values const Scalar sigma_f2 = exp(static_cast<Scalar>(2.0) * logHyp(1)); // sigma_f^2 // Output MatrixPtr pKss; // K: self-variance vector (Mx1) if(fVarianceVector) { // k(z, z) = sigma_f^2 pKss.reset(new Matrix(M, 1)); pKss->fill(sigma_f2); } // K: self-covariance matrix (MxM) else { // K(r) MatrixPtr pAbsDistXsXs = PairwiseOp<Scalar>::sqDist(testData.pXs()); // MxM pAbsDistXsXs->noalias() = pAbsDistXsXs->cwiseSqrt(); pKss = K(logHyp, pAbsDistXsXs); } return pKss; }
/** * @brief Covariance matrix between the derivative training data * or its partial derivative * given pair-wise squared distances and differences * @param [in] logHyp The log hyperparameters * - logHyp(0) = \f$\log(l)\f$ * - logHyp(1) = \f$\log(\sigma_f)\f$ * @param [in] pSqDist The pair-wise squared distances between the functional and derivative training data * @param [in] pDelta_i The pair-wise differences of the i-th components between the functional and derivative training data * @param [in] pDelta_j The pair-wise differences of the j-th components between the functional and derivative training data * @param [in] fSameCoord i == j * @param [in] pdHypIndex (Optional) Hyperparameter index for partial derivatives * - pdHypIndex = -1: return \f$\frac{\partial^2 \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \mathbf{X}_i \partial \mathbf{Z}_j}\f$ (default) * - pdHypIndex = 0: return \f$\frac{\partial^3 \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \log(l) \partial \mathbf{X}_i \partial \mathbf{Z}_j}\f$ * - pdHypIndex = 1: return \f$\frac{\partial^3 \mathbf{K}(\mathbf{X}, \mathbf{Z})}{\partial \log(\sigma_f) \partial \mathbf{X}_i \partial \mathbf{Z}_j}\f$ * @return An NNxNN matrix pointer\n * NN: The number of functional and derivative training data */ static MatrixPtr K_DD(const Hyp &logHyp, const MatrixConstPtr pSqDist, const MatrixConstPtr pDelta_i, const MatrixConstPtr pDelta_j, const bool fSameCoord, const int pdHypIndex = -1) { // K: same size with the squared distances MatrixPtr pK = K(logHyp, pSqDist, pdHypIndex); // hyperparameters const Scalar inv_ell2 = exp(static_cast<Scalar>(-2.f) * logHyp(0)); // 1/ell^2 const Scalar inv_ell4 = inv_ell2 * inv_ell2; // 1/ell^4 const Scalar four_inv_ell4 = static_cast<Scalar>(4.f) * inv_ell4; // 4/ell^4 // delta const Scalar delta_inv_ell2 = fSameCoord ? inv_ell2 : static_cast<Scalar>(0.f); // delta(i, j)/ell^2 const Scalar neg_double_delta_inv_ell2 = static_cast<Scalar>(-2.f) * delta_inv_ell2; // -2*delta(i, j)/ell^2 // pre-calculation // k(x, z) = sigma_f^2 * exp(-r^2/(2*ell^2)), r = |x-z| // k(s) = sigma_f^2 * exp(s), s = -r^2/(2*ell^2) // // s = -r^2/(2*ell^2) = (-1/(2*ell^2)) * sum_{i=1}^d (xi - zi)^2 // ds/dzj = (xj - zj) / ell^2 // // dk/ds = sigma_f^2 * exp(s) = k // dk(s)/dzj = dk/ds * ds/dzj // = k(x, z) * (xj - zj) / ell^2 // // d^2k/dzj dxi = dk/dxi * (xj - zj) / ell^2 + k * delta(i, j) / ell^2 // = k * [-(xi - zi)/ell^2] * (xj - zj) / ell^2 + k * delta(i, j) / ell^2 // = k * [delta(i, j)/ell^2 - (xi - zi)*(xj - zj)/ell^4] pK->noalias() = (pK->array() * (delta_inv_ell2 - inv_ell4 * pDelta_i->array() * pDelta_j->array())).matrix(); // particularly, derivatives of covariance matrix w.r.t log ell if(pdHypIndex == 0) { // d^2k/dxi dzj = k * [delta(i, j)/ell^2 - (xi - zi)*(xj - zj)/ell^4] // d^3k/dxi dzj dlog(ell) = dk/dlog(ell) * [delta(i, j)/ell^2 - (xi - zi)*(xj - zj)/ell^4] // + k * [-2*delta(i, j)/ell^2 + 4*(xi - zi)*(xj - zj)/ell^4] MatrixPtr pK0 = K(logHyp, pSqDist); pK->noalias() += (pK0->array() * (neg_double_delta_inv_ell2 + four_inv_ell4 * pDelta_i->array() * pDelta_j->array())).matrix(); } return pK; }
static MatrixPtr Ks(const Hyp &logHyp, const GeneralTrainingData<Scalar> &generalTrainingData, const TestData<Scalar> &testData) { // copy hyperparameters Cov1::Hyp logHyp1; Cov2::Hyp logHyp2; copy(logHyp, logHyp1, logHyp2); // Cov = Cov1 * Cov2 MatrixPtr pKs = Cov1::Ks(logHyp1, generalTrainingData, testData); // Cov1 pKs->noalias() = pKs->cwiseProduct(*Cov2::Ks(logHyp2, generalTrainingData, testData)); // Cov2 return pKs; }
static MatrixPtr Kss(const Hyp &logHyp, const TestData<Scalar> &testData, const bool fVarianceVector = true) { // copy hyperparameters Cov1::Hyp logHyp1; Cov2::Hyp logHyp2; copy(logHyp, logHyp1, logHyp2); // Cov = Cov1 * Cov2 MatrixPtr pKss = Cov1::Kss(logHyp1, testData, fVarianceVector); // Cov1 pKss->noalias() = pKss->cwiseProduct(*Cov2::Kss(logHyp2, testData, fVarianceVector)); // Cov2 return pKss; }
/** * @brief Gets the cross absolute distances between the training and test inputs * @param [in] pXs The M test inputs * @return An matrix pointer * \f[ * \mathbf{R} \in \mathbb{R}^{N \times M}, \quad * \mathbf{R}_{ij} = |\mathbf{x}_i - \mathbf{z}_j| * \f] * @todo Include this matrix as a member variable like m_pDistXX */ MatrixPtr pAbsDistXXs(const TestData<Scalar> &testData) const { MatrixPtr pAbsDist = pSqDistXXs(testData); // NxM pAbsDist->noalias() = pAbsDist->cwiseSqrt(); return pAbsDist; }