float64_t CKLDualInferenceMethod::get_derivative_related_cov(SGMatrix<float64_t> dK) { Map<MatrixXd> eigen_dK(dK.matrix, dK.num_rows, dK.num_cols); Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); Map<VectorXd> eigen_W(m_W.vector, m_W.vlen); Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen); Map<MatrixXd> eigen_Sigma(m_Sigma.matrix, m_Sigma.num_rows, m_Sigma.num_cols); Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); Map<VectorXd> eigen_dv(m_dv.vector, m_dv.vlen); Map<VectorXd> eigen_df(m_df.vector, m_df.vlen); index_t len=m_W.vlen; //U=inv(L')*diag(sW) MatrixXd eigen_U=eigen_L.triangularView<Upper>().adjoint().solve(MatrixXd(eigen_sW.asDiagonal())); //A=I-K*diag(sW)*inv(L)*inv(L')*diag(sW) Map<MatrixXd> eigen_V(m_V.matrix, m_V.num_rows, m_V.num_cols); MatrixXd eigen_A=MatrixXd::Identity(len, len)-eigen_V.transpose()*eigen_U; //AdK = A*dK; MatrixXd AdK=eigen_A*eigen_dK; //z = diag(AdK) + sum(A.*AdK,2) - sum(A'.*AdK,1)'; VectorXd z=AdK.diagonal()+(eigen_A.array()*AdK.array()).rowwise().sum().matrix() -(eigen_A.transpose().array()*AdK.array()).colwise().sum().transpose().matrix(); float64_t result=eigen_alpha.dot(eigen_dK*(eigen_alpha/2.0-eigen_df))-z.dot(eigen_dv); return result; }
float64_t CKLDualInferenceMethod::lbfgs_optimization() { lbfgs_parameter_t lbfgs_param; lbfgs_param.m = m_m; lbfgs_param.max_linesearch = m_max_linesearch; lbfgs_param.linesearch = m_linesearch; lbfgs_param.max_iterations = m_max_iterations; lbfgs_param.delta = m_delta; lbfgs_param.past = m_past; lbfgs_param.epsilon = m_epsilon; lbfgs_param.min_step = m_min_step; lbfgs_param.max_step = m_max_step; lbfgs_param.ftol = m_ftol; lbfgs_param.wolfe = m_wolfe; lbfgs_param.gtol = m_gtol; lbfgs_param.xtol = m_xtol; lbfgs_param.orthantwise_c = m_orthantwise_c; lbfgs_param.orthantwise_start = m_orthantwise_start; lbfgs_param.orthantwise_end = m_orthantwise_end; float64_t nlml_opt=0; void * obj_prt = static_cast<void *>(this); Map<VectorXd> eigen_W(m_W.vector, m_W.vlen); lbfgs(m_W.vlen, m_W.vector, &nlml_opt, CKLDualInferenceMethod::evaluate, NULL, obj_prt, &lbfgs_param, CKLDualInferenceMethod::adjust_step); return nlml_opt; }
bool CKLDualInferenceMethod::precompute() { Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); CDualVariationalGaussianLikelihood *lik= get_dual_variational_likelihood(); Map<VectorXd> eigen_W(m_W.vector, m_W.vlen); lik->set_dual_parameters(m_W, m_labels); m_is_dual_valid=lik->dual_parameters_valid(); if (!m_is_dual_valid) return false; //construct alpha m_alpha=lik->get_mu_dual_parameter(); Map<VectorXd> eigen_alpha(m_alpha.vector, m_alpha.vlen); eigen_alpha=-eigen_alpha; Map<VectorXd> eigen_sW(m_sW.vector, m_sW.vlen); eigen_sW=eigen_W.array().sqrt().matrix(); m_L=CMatrixOperations::get_choleksy(m_W, m_sW, m_ktrtr, CMath::exp(m_log_scale)); Map<MatrixXd> eigen_L(m_L.matrix, m_L.num_rows, m_L.num_cols); //solve L'*V=diag(sW)*K Map<MatrixXd> eigen_V(m_V.matrix, m_V.num_rows, m_V.num_cols); eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0)); Map<VectorXd> eigen_s2(m_s2.vector, m_s2.vlen); //Sigma=inv(inv(K)+diag(W))=K-K*diag(sW)*inv(L)'*inv(L)*diag(sW)*K //v=abs(diag(Sigma)) eigen_s2=(eigen_K.diagonal().array()*CMath::exp(m_log_scale*2.0)-(eigen_V.array().pow(2).colwise().sum().transpose())).abs().matrix(); //construct mu SGVector<float64_t> mean=m_mean->get_mean_vector(m_features); Map<VectorXd> eigen_mean(mean.vector, mean.vlen); Map<VectorXd> eigen_mu(m_mu.vector, m_mu.vlen); //mu=K*alpha+m eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean; return true; }
void CKLDualInferenceMethod::update_alpha() { float64_t nlml_new=0; float64_t nlml_def=0; Map<MatrixXd> eigen_K(m_ktrtr.matrix, m_ktrtr.num_rows, m_ktrtr.num_cols); CDualVariationalGaussianLikelihood *lik= get_dual_variational_likelihood(); if (m_alpha.vlen == m_labels->get_num_labels()) { nlml_new=get_negative_log_marginal_likelihood_helper(); index_t len=m_labels->get_num_labels(); SGVector<float64_t> W_tmp(len); Map<VectorXd> eigen_W(W_tmp.vector, W_tmp.vlen); eigen_W.fill(0.5); SGVector<float64_t> sW_tmp(len); Map<VectorXd> eigen_sW(sW_tmp.vector, sW_tmp.vlen); eigen_sW=eigen_W.array().sqrt().matrix(); SGMatrix<float64_t> L_tmp=CMatrixOperations::get_choleksy(W_tmp, sW_tmp, m_ktrtr, CMath::exp(m_log_scale*2.0)); Map<MatrixXd> eigen_L(L_tmp.matrix, L_tmp.num_rows, L_tmp.num_cols); lik->set_dual_parameters(W_tmp, m_labels); //construct alpha SGVector<float64_t> alpha_tmp=lik->get_mu_dual_parameter(); Map<VectorXd> eigen_alpha(alpha_tmp.vector, alpha_tmp.vlen); eigen_alpha=-eigen_alpha; //construct mu SGVector<float64_t> mean=m_mean->get_mean_vector(m_features); Map<VectorXd> eigen_mean(mean.vector, mean.vlen); SGVector<float64_t> mu_tmp(len); Map<VectorXd> eigen_mu(mu_tmp.vector, mu_tmp.vlen); //mu=K*alpha+m eigen_mu=eigen_K*CMath::exp(m_log_scale*2.0)*eigen_alpha+eigen_mean; //construct s2 MatrixXd eigen_V=eigen_L.triangularView<Upper>().adjoint().solve(eigen_sW.asDiagonal()*eigen_K*CMath::exp(m_log_scale*2.0)); SGVector<float64_t> s2_tmp(len); Map<VectorXd> eigen_s2(s2_tmp.vector, s2_tmp.vlen); eigen_s2=(eigen_K.diagonal().array()*CMath::exp(m_log_scale*2.0)-(eigen_V.array().pow(2).colwise().sum().transpose())).abs().matrix(); lik->set_variational_distribution(mu_tmp, s2_tmp, m_labels); nlml_def=get_nlml_wrapper(alpha_tmp, mu_tmp, L_tmp); if (nlml_new<=nlml_def) { lik->set_dual_parameters(m_W, m_labels); lik->set_variational_distribution(m_mu, m_s2, m_labels); } } if (m_alpha.vlen != m_labels->get_num_labels() || nlml_def<nlml_new) { if(m_alpha.vlen != m_labels->get_num_labels()) m_alpha = SGVector<float64_t>(m_labels->get_num_labels()); index_t len=m_alpha.vlen; m_W=SGVector<float64_t>(len); for (index_t i=0; i<m_W.vlen; i++) m_W[i]=0.5; lik->set_dual_parameters(m_W, m_labels); m_sW=SGVector<float64_t>(len); m_mu=SGVector<float64_t>(len); m_s2=SGVector<float64_t>(len); m_Sigma=SGMatrix<float64_t>(len, len); m_Sigma.zero(); m_V=SGMatrix<float64_t>(len, len); } nlml_new=optimization(); lik->set_variational_distribution(m_mu, m_s2, m_labels); TParameter* s2_param=lik->m_parameters->get_parameter("sigma2"); m_dv=lik->get_variational_first_derivative(s2_param); TParameter* mu_param=lik->m_parameters->get_parameter("mu"); m_df=lik->get_variational_first_derivative(mu_param); }