/*
 * This function calculates the Hessian and mu_ij_k and Lambda_ij_k
 * for all pairs (i,j) in the batch
 * and for all components k
 *
*/
void Likelihood_Protein::compute_negLL(int nr_threads_prot)
{
    //initialize likelihood vector
    log_likelihood.zeros(this->number_of_pairs);

	#pragma omp parallel for num_threads(nr_threads_prot)
    for(int pair = 0; pair < this->number_of_pairs; pair++){

		int i 				= this->i_indices(pair);
		int j 				= this->j_indices(pair);
		int lin_index       = i*(L - (i+1)/2.0 - 1) + j - 1; //i*L - i*(i+1)/2 + j-(i+1);
		int contact         = this->protein_contacts(pair);

        //for computation of likelihood
		arma::vec log_density(this->nr_components, arma::fill::zeros);

        arma::vec vqij = mq_ij.col(lin_index);
        double N_ij = mN_ij(i,j);
		arma::vec w_ij = w_ij3d.tube(i,j);
		//arma::vec vqij = q_ij3d.tube(i,j);

		//diagonal matrix Qij = diag(q'ji)
		//q'ijab = q(x_i=a, x_j=b) - (lambda_w * wijab / N_ij) --> has been precomputed
		arma::mat Qij 		= arma::diagmat(vqij);

		//outer product
		arma::mat qij_prod = vqij * vqij.t();

		//determine negative Hessian = Hij
		arma::mat diff = Qij - qij_prod;
		arma::mat H_ij = N_ij * diff + regularizer_w; //eq 37

		//precompute product H_ij * wij
		arma::vec Hij_wij_prod = H_ij * w_ij;


		for(int k = 0; k < this->nr_components; k++){


            //gaussian parameters of coupling prior
            double weight_k 		        = this->parameters.get_weight(k, contact);
			arma::vec mu_k 				    = this->parameters.get_mean(k);
			arma::mat lambda_k 			    = this->parameters.get_precMat(k);


            //---------------- simplify computation in case that lambda_k is diagonal matrix
            // A, A_inv, lambda_k, Qij are diagonal matrices
            arma::vec A = N_ij * vqij + lambda_k.diag();     //represents diagonal matrix
            arma::vec A_inv = 1.0 / A;                    //represents diagonal matrix
            double log_det_A	= arma::sum(arma::log(A));
            arma::vec Ainv_qij_product  = A_inv % vqij;   ////column vector
            double triple_product 	    = arma::sum(vqij % Ainv_qij_product);
            //---------------- matrix computations in case lambda_k is NOT diagonal matrix
			//A is a diagonal matrix, as Qij and lambda_k are diagonal matrices
			//arma::mat A 		= N_ij * Qij + lambda_k;                    //diagonal
			//arma::mat A_inv 	= arma::diagmat(1.0 / A.diag());            //diagonal
			//double log_det_A	= arma::sum(arma::log(A.diag()));
			//arma::vec Ainv_qij_product  = arma::vec(A_inv * vqij);          //400x1 dim matrix
			//double triple_product 	    = arma::as_scalar(vqij.t() * Ainv_qij_product);


			//compute lambda_ij_k_mat
			arma::mat lambda_ij_k_mat  	= H_ij - regularizer_w + lambda_k;


            //debugging: we assume diagonal Hessian ================================================================
//            arma::mat lambda_ij_k_mat_inv(400,400,arma::fill::zeros);
//            lambda_ij_k_mat_inv.diag() = 1.0 / lambda_ij_k_mat.diag();
            //debugging=======================================================================================
			//compute inverse of lambda_ij_k_mat
			//---------------- simplify computation in case that lambda_k is diagonal matrix
			arma::mat lambda_ij_k_mat_inv = arma::diagmat(A_inv) + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product);
			//---------------- matrix computations in case lambda_k is NOT diagonal matrix
			//arma::mat  lambda_ij_k_mat_inv  = A_inv + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product);



		    //compute mu_ij_k from precomputed entities
			arma::vec mu_ij_k_vec      = lambda_ij_k_mat_inv * ( Hij_wij_prod + lambda_k * mu_k);



            //debugging: we assume diagonal Hessian ================================================================
//            log_det_lambda_ij_k(k) = arma::sum(arma::log(lambda_ij_k_mat.diag()));
            //debugging=======================================================================================
			//save log determinant of lambda_ij_k, see page 16 Saikats theory
			double log_det_lambda_ij = log(1 - N_ij * triple_product) + log_det_A;

			//ratio of two gaussians in log space
			//     N(0 | mu_k, lambda_k)
            //------------------------------
            //  N(0 | mu_ij_k, lambda_ij,k)
			double gaussian_ratio_logdensity = log_density_gaussian_ratio(	mu_k,
																			mu_ij_k_vec,
																			lambda_k,
																			lambda_ij_k_mat,
																			this->parameters.get_log_det_inv_covMat(k),
																			log_det_lambda_ij);


            log_density(k) = log(weight_k) + gaussian_ratio_logdensity;


//            if ((i == 0) && (j == 12)){
//                std::cout  << " " << std::endl;
//                std::cout  << protein_id << " i: " << i << " j: " << j << " contact=" << contact << " component: " << k << std::endl;
//                std::cout  << "triple_product : " << triple_product  << std::endl;
//                std::cout  << "log_det_A : " << log_det_A  << std::endl;
//                std::cout  << "lambda_ij_k_mat(0,1) : " << lambda_ij_k_mat(0,1) << "lambda_ij_k_mat(0,2) : " << lambda_ij_k_mat(0,2) << "lambda_ij_k_mat(2,0) : " << lambda_ij_k_mat(2,0)  << std::endl;
//                std::cout  << "mu_ij_k_vec(0) : " << mu_ij_k_vec(0) << "mu_ij_k_vec(1) : " << mu_ij_k_vec(1) << "mu_ij_k_vec(2) : " << mu_ij_k_vec(2)  << std::endl;
//                std::cout  << "Gaussian log density: " << gaussian_ratio_logdensity << std::endl;
//                std::cout  << "log_density(k): " << log_density(k)<< std::endl;
//                std::cout  << "log(weight_k): " << log(weight_k) << " weight_k: " << weight_k << std::endl ;
//            }



		}//end loop over components k



		//Johannes suggestion how to precisely compute the responsibilities
		double a_max = arma::max(log_density);
		arma::vec resps = arma::exp(log_density - a_max);//r_nk = exp( a_nk - a_max)
		double sum_resp = arma::sum(resps);    //sum += r_nk


		//save neg likelihood of current pair
		double f = log(sum_resp) + a_max;

//        if ((i == 0) && (j == 12)){
//            std::cout  << "a_max : " << a_max  << std::endl;
//            std::cout  << "sum_resp : " << sum_resp  << std::endl;
//            std::cout  << "f : " << f  << std::endl;
//        }


		if(! std::isnormal(f)) {
				std::cout  << "ERROR: likelihood cannot be computed for protein " << protein_id << ", i " << i << ", j " << j << " ("<< contact <<"): " << f << std::endl;
                std::cout  << "Nij " << N_ij << ", sum_resp: " << sum_resp << ", a_max: " << a_max << std::endl;
                for(int k = 0; k < this->nr_components; k++){
                    std::cout  << "component: " << k << ", sum_precMat(k)diag: "<< arma::sum(this->parameters.get_precMat(k).diag()) << ", responsibilty:" << resps(k)/sum_resp << ", log_density: " << log_density(k) << std::endl;
                }

				continue;
		} else log_likelihood(pair) = f;

	}//end of parallelized for loop over ij pairs

}
Esempio n. 2
0
	GaussMixture* GaussMixtureEstimator::Estimate(vector<VecD>& pts) const {
		assert(pts.size() >= ncomps);
		int npts = pts.size();
		int dim = pts[0].size();

		// Compute the number of free variables in the model and in the
		// data. If the former is less than the latter then the system is
		// underspecified and will lead to singularities in the likelihood
		// function.
		int model_params;
		if (spherical) {
			model_params = ncomps * (2 + dim);
		} else if (axis_aligned) {
			model_params = ncomps * (1 + 2*dim);
		} else {
			model_params = ncomps * (1 + dim + dim*(dim+1)/2);
		}
		int data_params = npts * dim;  // number of free variables in the data
		assert(data_params >= model_params);  // is the system under-specified?

		// Initialize the model using k-means clustering
		GaussMixture* model = new GaussMixture(ncomps, dim);
		MatD resps(npts, ncomps);
		vector<VecD> initmeans;
		KMeans::Estimate(pts, ncomps, initmeans, resps);
		for (int i = 0; i < ncomps; i++) {
			model->weights[i] = 1.0 / ncomps;
			model->comps[i]->mean = initmeans[i];
			// set covariances to 1e-10 * Identity so that in the first
			// iteration each point is assigned entirely to the nearest
			// component
			model->comps[i]->cov.SetIdentity(1e-10);
		}

		// Begin iterating
		double loglik = 0.0, prev_loglik = 0.0;
		for (int i = 0; i < max_iters; i++) {
			GaussMixtureEvaluator eval(*model);

			// Check for small determinants (indicates poor support)
			for (int j = 0; j < model->ncomps; j++) {
				double logdetcov = eval.Component(j).GetLogDetCov();
				if (logdetcov < -50*dim) {
					cerr << "Warning: log(det(covariance of component " << j << "))"
							 << " is very small: " << logdetcov << endl;
					cerr << "  its total support is " << resps.GetRow(j).Sum() << endl;
					cerr << "  at iteration " << i << endl;
				}
			}

			// Compute responsibilities (E step)
			//DLOG << "E step\n";
			prev_loglik = loglik;
			loglik = 0.0;
			for (int j = 0; j < npts; j++) {
				double logdenom = eval.EvaluateLog(pts[j]);
				loglik += logdenom;

				// Compute the responsibilities
				for (int k = 0; k < ncomps; k++) {
					const GaussianEvaluator& g = eval.Component(k);
					double logresp = eval.logweights[k] + g.EvaluateLog(pts[j]);
					resps[j][k] = exp(logresp - logdenom);
				}
			}

			// Test for convergence
			//DLOG << "After iteration " << i << " log likelihood = " << loglik << endl;
			double reldiff = fabs((prev_loglik - loglik) / prev_loglik);
			if (reldiff < exit_thresh) {
				cout << "EM converged after " << i << " iterations" << endl;
				return model;
			}
			prev_loglik = loglik;

			// Estimate new parameters (M step)
			//DLOG << "M step\n";
			for (int k = 0; k < ncomps; k++) {
				Gaussian* comp = model->comps[k].get();
				VecD col = resps.GetColumn(k);
				double colsum = resps.GetColumn(k).Sum();

				// Estimate means
				comp->mean.Fill(0.0);
				for (int j = 0; j < npts; j++) {
					comp->mean += col[j] * pts[j];
				}
				comp->mean /= colsum;

				// Estimate covariances

				// Initialize the forward diagonal to a small constant to
				// prevent singularities when components only have a small
				// number of points assigned to them.
				comp->cov.SetIdentity(1e-10);

				for (int j = 0; j < npts; j++) {
					if (spherical) {
						double d = col[j] * VectorSSD(pts[j], comp->mean) / dim;
						for (int k = 0; k < dim; k++) {
							comp->cov[k][k] += d;
						}
					} else if (axis_aligned) {
						VecD d = pts[j] - comp->mean;
						for (int k = 0; k < dim; k++) {
							comp->cov[k][k] += col[j] * d[k]*d[k];
						}
					} else {
						VecD v = pts[j] - comp->mean;
						comp->cov += col[j] * OuterProduct(v, v);
					}
				}
				comp->cov /= colsum;
				if (spherical) {
					for (int k = 0; k < dim; k++) {
						comp->cov[k][k] = sqrt(comp->cov[k][k]);
					}
				}

				// Estimate weights
				model->weights[k] = colsum;
			}
			// Normalize mixing coefficients
			model->weights /= model->weights.Sum();
		}

		DLOG << "EM failed to converge after " << max_iters << " iterations" << endl;
		return model;
	}