/* * This function calculates the Hessian and mu_ij_k and Lambda_ij_k * for all pairs (i,j) in the batch * and for all components k * */ void Likelihood_Protein::compute_negLL(int nr_threads_prot) { //initialize likelihood vector log_likelihood.zeros(this->number_of_pairs); #pragma omp parallel for num_threads(nr_threads_prot) for(int pair = 0; pair < this->number_of_pairs; pair++){ int i = this->i_indices(pair); int j = this->j_indices(pair); int lin_index = i*(L - (i+1)/2.0 - 1) + j - 1; //i*L - i*(i+1)/2 + j-(i+1); int contact = this->protein_contacts(pair); //for computation of likelihood arma::vec log_density(this->nr_components, arma::fill::zeros); arma::vec vqij = mq_ij.col(lin_index); double N_ij = mN_ij(i,j); arma::vec w_ij = w_ij3d.tube(i,j); //arma::vec vqij = q_ij3d.tube(i,j); //diagonal matrix Qij = diag(q'ji) //q'ijab = q(x_i=a, x_j=b) - (lambda_w * wijab / N_ij) --> has been precomputed arma::mat Qij = arma::diagmat(vqij); //outer product arma::mat qij_prod = vqij * vqij.t(); //determine negative Hessian = Hij arma::mat diff = Qij - qij_prod; arma::mat H_ij = N_ij * diff + regularizer_w; //eq 37 //precompute product H_ij * wij arma::vec Hij_wij_prod = H_ij * w_ij; for(int k = 0; k < this->nr_components; k++){ //gaussian parameters of coupling prior double weight_k = this->parameters.get_weight(k, contact); arma::vec mu_k = this->parameters.get_mean(k); arma::mat lambda_k = this->parameters.get_precMat(k); //---------------- simplify computation in case that lambda_k is diagonal matrix // A, A_inv, lambda_k, Qij are diagonal matrices arma::vec A = N_ij * vqij + lambda_k.diag(); //represents diagonal matrix arma::vec A_inv = 1.0 / A; //represents diagonal matrix double log_det_A = arma::sum(arma::log(A)); arma::vec Ainv_qij_product = A_inv % vqij; ////column vector double triple_product = arma::sum(vqij % Ainv_qij_product); //---------------- matrix computations in case lambda_k is NOT diagonal matrix //A is a diagonal matrix, as Qij and lambda_k are diagonal matrices //arma::mat A = N_ij * Qij + lambda_k; //diagonal //arma::mat A_inv = arma::diagmat(1.0 / A.diag()); //diagonal //double log_det_A = arma::sum(arma::log(A.diag())); //arma::vec Ainv_qij_product = arma::vec(A_inv * vqij); //400x1 dim matrix //double triple_product = arma::as_scalar(vqij.t() * Ainv_qij_product); //compute lambda_ij_k_mat arma::mat lambda_ij_k_mat = H_ij - regularizer_w + lambda_k; //debugging: we assume diagonal Hessian ================================================================ // arma::mat lambda_ij_k_mat_inv(400,400,arma::fill::zeros); // lambda_ij_k_mat_inv.diag() = 1.0 / lambda_ij_k_mat.diag(); //debugging======================================================================================= //compute inverse of lambda_ij_k_mat //---------------- simplify computation in case that lambda_k is diagonal matrix arma::mat lambda_ij_k_mat_inv = arma::diagmat(A_inv) + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product); //---------------- matrix computations in case lambda_k is NOT diagonal matrix //arma::mat lambda_ij_k_mat_inv = A_inv + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product); //compute mu_ij_k from precomputed entities arma::vec mu_ij_k_vec = lambda_ij_k_mat_inv * ( Hij_wij_prod + lambda_k * mu_k); //debugging: we assume diagonal Hessian ================================================================ // log_det_lambda_ij_k(k) = arma::sum(arma::log(lambda_ij_k_mat.diag())); //debugging======================================================================================= //save log determinant of lambda_ij_k, see page 16 Saikats theory double log_det_lambda_ij = log(1 - N_ij * triple_product) + log_det_A; //ratio of two gaussians in log space // N(0 | mu_k, lambda_k) //------------------------------ // N(0 | mu_ij_k, lambda_ij,k) double gaussian_ratio_logdensity = log_density_gaussian_ratio( mu_k, mu_ij_k_vec, lambda_k, lambda_ij_k_mat, this->parameters.get_log_det_inv_covMat(k), log_det_lambda_ij); log_density(k) = log(weight_k) + gaussian_ratio_logdensity; // if ((i == 0) && (j == 12)){ // std::cout << " " << std::endl; // std::cout << protein_id << " i: " << i << " j: " << j << " contact=" << contact << " component: " << k << std::endl; // std::cout << "triple_product : " << triple_product << std::endl; // std::cout << "log_det_A : " << log_det_A << std::endl; // std::cout << "lambda_ij_k_mat(0,1) : " << lambda_ij_k_mat(0,1) << "lambda_ij_k_mat(0,2) : " << lambda_ij_k_mat(0,2) << "lambda_ij_k_mat(2,0) : " << lambda_ij_k_mat(2,0) << std::endl; // std::cout << "mu_ij_k_vec(0) : " << mu_ij_k_vec(0) << "mu_ij_k_vec(1) : " << mu_ij_k_vec(1) << "mu_ij_k_vec(2) : " << mu_ij_k_vec(2) << std::endl; // std::cout << "Gaussian log density: " << gaussian_ratio_logdensity << std::endl; // std::cout << "log_density(k): " << log_density(k)<< std::endl; // std::cout << "log(weight_k): " << log(weight_k) << " weight_k: " << weight_k << std::endl ; // } }//end loop over components k //Johannes suggestion how to precisely compute the responsibilities double a_max = arma::max(log_density); arma::vec resps = arma::exp(log_density - a_max);//r_nk = exp( a_nk - a_max) double sum_resp = arma::sum(resps); //sum += r_nk //save neg likelihood of current pair double f = log(sum_resp) + a_max; // if ((i == 0) && (j == 12)){ // std::cout << "a_max : " << a_max << std::endl; // std::cout << "sum_resp : " << sum_resp << std::endl; // std::cout << "f : " << f << std::endl; // } if(! std::isnormal(f)) { std::cout << "ERROR: likelihood cannot be computed for protein " << protein_id << ", i " << i << ", j " << j << " ("<< contact <<"): " << f << std::endl; std::cout << "Nij " << N_ij << ", sum_resp: " << sum_resp << ", a_max: " << a_max << std::endl; for(int k = 0; k < this->nr_components; k++){ std::cout << "component: " << k << ", sum_precMat(k)diag: "<< arma::sum(this->parameters.get_precMat(k).diag()) << ", responsibilty:" << resps(k)/sum_resp << ", log_density: " << log_density(k) << std::endl; } continue; } else log_likelihood(pair) = f; }//end of parallelized for loop over ij pairs }
GaussMixture* GaussMixtureEstimator::Estimate(vector<VecD>& pts) const { assert(pts.size() >= ncomps); int npts = pts.size(); int dim = pts[0].size(); // Compute the number of free variables in the model and in the // data. If the former is less than the latter then the system is // underspecified and will lead to singularities in the likelihood // function. int model_params; if (spherical) { model_params = ncomps * (2 + dim); } else if (axis_aligned) { model_params = ncomps * (1 + 2*dim); } else { model_params = ncomps * (1 + dim + dim*(dim+1)/2); } int data_params = npts * dim; // number of free variables in the data assert(data_params >= model_params); // is the system under-specified? // Initialize the model using k-means clustering GaussMixture* model = new GaussMixture(ncomps, dim); MatD resps(npts, ncomps); vector<VecD> initmeans; KMeans::Estimate(pts, ncomps, initmeans, resps); for (int i = 0; i < ncomps; i++) { model->weights[i] = 1.0 / ncomps; model->comps[i]->mean = initmeans[i]; // set covariances to 1e-10 * Identity so that in the first // iteration each point is assigned entirely to the nearest // component model->comps[i]->cov.SetIdentity(1e-10); } // Begin iterating double loglik = 0.0, prev_loglik = 0.0; for (int i = 0; i < max_iters; i++) { GaussMixtureEvaluator eval(*model); // Check for small determinants (indicates poor support) for (int j = 0; j < model->ncomps; j++) { double logdetcov = eval.Component(j).GetLogDetCov(); if (logdetcov < -50*dim) { cerr << "Warning: log(det(covariance of component " << j << "))" << " is very small: " << logdetcov << endl; cerr << " its total support is " << resps.GetRow(j).Sum() << endl; cerr << " at iteration " << i << endl; } } // Compute responsibilities (E step) //DLOG << "E step\n"; prev_loglik = loglik; loglik = 0.0; for (int j = 0; j < npts; j++) { double logdenom = eval.EvaluateLog(pts[j]); loglik += logdenom; // Compute the responsibilities for (int k = 0; k < ncomps; k++) { const GaussianEvaluator& g = eval.Component(k); double logresp = eval.logweights[k] + g.EvaluateLog(pts[j]); resps[j][k] = exp(logresp - logdenom); } } // Test for convergence //DLOG << "After iteration " << i << " log likelihood = " << loglik << endl; double reldiff = fabs((prev_loglik - loglik) / prev_loglik); if (reldiff < exit_thresh) { cout << "EM converged after " << i << " iterations" << endl; return model; } prev_loglik = loglik; // Estimate new parameters (M step) //DLOG << "M step\n"; for (int k = 0; k < ncomps; k++) { Gaussian* comp = model->comps[k].get(); VecD col = resps.GetColumn(k); double colsum = resps.GetColumn(k).Sum(); // Estimate means comp->mean.Fill(0.0); for (int j = 0; j < npts; j++) { comp->mean += col[j] * pts[j]; } comp->mean /= colsum; // Estimate covariances // Initialize the forward diagonal to a small constant to // prevent singularities when components only have a small // number of points assigned to them. comp->cov.SetIdentity(1e-10); for (int j = 0; j < npts; j++) { if (spherical) { double d = col[j] * VectorSSD(pts[j], comp->mean) / dim; for (int k = 0; k < dim; k++) { comp->cov[k][k] += d; } } else if (axis_aligned) { VecD d = pts[j] - comp->mean; for (int k = 0; k < dim; k++) { comp->cov[k][k] += col[j] * d[k]*d[k]; } } else { VecD v = pts[j] - comp->mean; comp->cov += col[j] * OuterProduct(v, v); } } comp->cov /= colsum; if (spherical) { for (int k = 0; k < dim; k++) { comp->cov[k][k] = sqrt(comp->cov[k][k]); } } // Estimate weights model->weights[k] = colsum; } // Normalize mixing coefficients model->weights /= model->weights.Sum(); } DLOG << "EM failed to converge after " << max_iters << " iterations" << endl; return model; }