/*
 * Compute the following entities:
 *      - responsibilities r(k|ij)
 *      - mu_ij_k
 *      - Lambda_ij_k
 *      - log determinante Lamda_ij_k
 *
 *  And then use these to compute likelihood and gradients
 */
void Likelihood_Protein::compute_f_df(int hessian_pseudocount)
{


	//initialize parameters for second gaussian
    mu_ij_k.zeros(400, this->nr_components);
    lambda_ij_k.zeros(400, 400, this->nr_components);
    lambda_ij_k_inv.zeros(400, 400, this->nr_components);
    log_det_lambda_ij_k.zeros(this->nr_components);
	responsibilities.zeros(this->nr_components);

    //initialize likelihood vector
    log_likelihood.zeros(this->number_of_pairs);

    //intialize gradients to zero
	grad_weight.zeros(this->nr_components, 2);
	grad_mu.zeros(400,this-> nr_components);
	grad_precMat.zeros(400, this->nr_components);

    for(int pair = 0; pair < this->number_of_pairs; pair++){

		int i 				= this->i_indices(pair);
		int j 				= this->j_indices(pair);
		int lin_index       = i*(L - (i+1)/2.0 - 1) + j - 1; //i*L - i*(i+1)/2 + j-(i+1);
		int contact         = this->protein_contacts(pair);

        //for computation of likelihood
		arma::vec log_density(this->nr_components, arma::fill::zeros);

        arma::vec vqij = mq_ij.col(lin_index);
        double N_ij = mN_ij(i,j);
		arma::vec w_ij = w_ij3d.tube(i,j);
		//diagonal matrix Qij = diag(q'ji)
		//q'ijab = q(x_i=a, x_j=b) - (lambda_w * wijab / N_ij) --> has been precomputed
		arma::mat Qij 		= arma::diagmat(vqij);

		//outer product
		arma::mat qij_prod = vqij * vqij.t();

		//determine negative Hessian = Hij
		arma::mat diff = Qij - qij_prod;
		arma::mat H_ij = N_ij * diff + regularizer_w; //eq 37

        // remove after debugging ---------------------------------------------------------------------------------
		//debugging artefacts in learning
		//add counts to Hessian to see if we have problems with non-informative data
		H_ij.diag() += hessian_pseudocount;
		//H_ij = arma::diagmat(H_ij);
		// remove after debugging ---------------------------------------------------------------------------------

		//precompute product H_ij * wij
		arma::vec Hij_wij_prod = H_ij * w_ij;

		for(int k = 0; k < this->nr_components; k++){

            //gaussian parameters of coupling prior
            double weight_k 		        = this->parameters.get_weight(k, contact);
			arma::vec mu_k 				    = this->parameters.get_mean(k);
			arma::mat lambda_k 			    = this->parameters.get_precMat(k);


            //---------------- simplify computation in case that lambda_k is diagonal matrix
            // A, A_inv, lambda_k, Qij are diagonal matrices
            arma::vec A = N_ij * vqij + lambda_k.diag();     //represents diagonal matrix
            arma::vec A_inv = 1.0 / A;                    //represents diagonal matrix
            double log_det_A	= arma::sum(arma::log(A));
            arma::vec Ainv_qij_product  = A_inv % vqij;   ////column vector
            double triple_product 	    = arma::sum(vqij % Ainv_qij_product);
            //---------------- matrix computations in case lambda_k is NOT diagonal matrix
			//A is a diagonal matrix, as Qij and lambda_k are diagonal matrices
			//arma::mat A 		= N_ij * Qij + lambda_k;                    //diagonal
			//arma::mat A_inv 	= arma::diagmat(1.0 / A.diag());            //diagonal
			//double log_det_A	= arma::sum(arma::log(A.diag()));
			//arma::vec Ainv_qij_product  = arma::vec(A_inv * vqij);          //400x1 dim matrix
			//double triple_product 	    = arma::as_scalar(vqij.t() * Ainv_qij_product);


			//compute lambda_ij_k_mat
			arma::mat lambda_ij_k_mat  	= H_ij - regularizer_w + lambda_k;
			lambda_ij_k.slice(k) 	    = lambda_ij_k_mat;






            //debugging: we assume diagonal Hessian ================================================================
//            arma::mat lambda_ij_k_mat_inv(400,400,arma::fill::zeros);
//            lambda_ij_k_mat_inv.diag() = 1.0 / lambda_ij_k_mat.diag();
            //debugging=======================================================================================
			//compute inverse of lambda_ij_k_mat
			//---------------- simplify computation in case that lambda_k is diagonal matrix
			arma::mat lambda_ij_k_mat_inv = arma::diagmat(A_inv) + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product);
			//---------------- matrix computations in case lambda_k is NOT diagonal matrix
			//arma::mat  lambda_ij_k_mat_inv  = A_inv + (Ainv_qij_product * Ainv_qij_product.t()) / (1.0/N_ij - triple_product);
			lambda_ij_k_inv.slice(k) 	    = lambda_ij_k_mat_inv;


		    //compute mu_ij_k from precomputed entities
			arma::vec mu_ij_k_vec      = lambda_ij_k_mat_inv * ( Hij_wij_prod + lambda_k * mu_k);
			mu_ij_k.col(k)             = mu_ij_k_vec;


            //debugging: we assume diagonal Hessian ================================================================
//            log_det_lambda_ij_k(k) = arma::sum(arma::log(lambda_ij_k_mat.diag()));
            //debugging=======================================================================================
			//save log determinant of lambda_ij_k, see page 16 Saikats theory
			log_det_lambda_ij_k(k) = log(1 - N_ij * triple_product) + log_det_A;

			//ratio of two gaussians in log space
			//     N(0 | mu_k, lambda_k)
            //------------------------------
            //  N(0 | mu_ij_k, lambda_ij,k)
			double gaussian_ratio_logdensity = log_density_gaussian_ratio(	mu_k,
																			mu_ij_k_vec,
																			lambda_k,
																			lambda_ij_k_mat,
																			this->parameters.get_log_det_inv_covMat(k),
																			log_det_lambda_ij_k(k) );


            if(this->debug > 0 and gaussian_ratio_logdensity>1000){
                std::cout  << protein_id << " " << i << " " << j << " " << pair << " " << contact << " " << k << std::endl;
                std::cout  << "Gaussian log density > 1: " << gaussian_ratio_logdensity << std::endl;
                std::cout  << "A_inv.max() : " << A_inv.max()  << std::endl;
                arma::uword max_ind = index_max(A_inv);
                std::cout  << "A(max_ind) : " << A(max_ind)  << std::endl;
                std::cout  << "vqij(max_ind): " << vqij(max_ind) << std::endl;
                std::cout  << "N_ij: " << N_ij << std::endl;
                std::cout  << "mu_ij_k_vec(max_ind) : " << mu_ij_k_vec(max_ind)  << std::endl;
                std::cout  << "mu_k(max_ind) : " << mu_k(max_ind)  << std::endl;
                std::cout  << "lambda_ij_k_mat(max_ind, max_ind) : " << lambda_ij_k_mat(max_ind, max_ind)  << std::endl;
                std::cout  << "lambda_ij_k_mat_inv(max_ind, max_ind) : " << lambda_ij_k_mat_inv(max_ind, max_ind)  << std::endl;
                std::cout  << "lambda_k(max_ind, max_ind) : " << lambda_k(max_ind, max_ind)  << std::endl;
                std::cout  << "parameters.get_log_det_inv_covMat(k) : " << this->parameters.get_log_det_inv_covMat(k)  << std::endl;
                std::cout  << "log_det_lambda_ij_k(k) : " << log_det_lambda_ij_k(k)  << std::endl;
            }


            log_density(k) = log(weight_k) + gaussian_ratio_logdensity;

		}//end loop over components k

		//Johannes suggestion how to precisely compute the responsibilities
		double a_max = arma::max(log_density);
		this->responsibilities = arma::exp(log_density - a_max);//r_nk = exp( a_nk - a_max)
		double sum_resp = arma::sum(this->responsibilities);    //sum += r_nk
		this->responsibilities /= sum_resp; //normalization of responsibilities, NOT in log space  => r_nk /= sum;



		//save neg likelihood of current pair
		double f = log(sum_resp) + a_max;
		if(! std::isnormal(f)) {
				std::cout  << "ERROR: likelihood cannot be computed for protein " << protein_id << ", i " << i << ", j " << j << " ("<< contact <<"): " << f << std::endl;
                std::cout  << "Nij: " << N_ij << ", sum_resp: " << sum_resp << ", a_max: " << a_max << std::endl;
                for(int k = 0; k < this->nr_components; k++){
                    std::cout  << "component: " << k << ", sum_precMat(k)diag: "<< arma::sum(this->parameters.get_precMat(k).diag()) << ", responsibilty:" << this->responsibilities(k) << ", log_density: " << log_density(k) << ", log_det_lambda_ij_k: " << log_det_lambda_ij_k(k) << std::endl;
                }

				continue;
		} else log_likelihood(pair) = f;


        // Compute ALL gradients for ALL components
        for(int k = 0; k < this->nr_components; k++){

            //weights (either for contact or non_contact)
            grad_weight(k, contact) += gradient_weight_comp(k, contact);

            //mu
            grad_mu.col(k) += gradient_mu_comp(k);

            //precMat - diagonal
            arma::mat grad_precMat_protein = gradient_precisionMatrix_comp(k);
            grad_precMat.col(k) += grad_precMat_protein.diag();

        }//end components


	}//end loop over ij pairs


}
double ChLcpInteriorPoint::Solve(
		ChLcpSystemDescriptor& sysd		///< system description with constraints and variables
)
{
	std::cout << "-------using interior point solver!!------" << std::endl;
	std::vector<ChLcpConstraint*>& mconstraints = sysd.GetConstraintsList();
	std::vector<ChLcpVariables*>&  mvariables	= sysd.GetVariablesList();



	ChMatrixDynamic <double> mv0;
	ChSparseMatrix mM;
	ChSparseMatrix mCq;
	ChSparseMatrix mE;
	ChMatrixDynamic <double> mf;
	ChMatrixDynamic <double> mb;
	ChMatrixDynamic <double> mfric;

	sysd.ConvertToMatrixForm(&mCq, &mM, &mE, &mf, &mb, &mfric);
	sysd.FromVariablesToVector(mv0);
	
		ChStreamOutAsciiFile file_V0( "dump_V_old.dat" ) ;
		mv0.StreamOUTdenseMatlabFormat(file_V0) ;
		ChStreamOutAsciiFile file_M ( "dump_M.dat" ) ;
		mM.StreamOUTsparseMatlabFormat ( file_M ) ;

		ChStreamOutAsciiFile file_Cq ( "dump_Cq.dat" ) ;
		mCq.StreamOUTsparseMatlabFormat ( file_Cq ) ;

		ChStreamOutAsciiFile file_E ( "dump_E.dat" ) ;
		mE.StreamOUTsparseMatlabFormat ( file_E ) ;

		ChStreamOutAsciiFile file_f ( "dump_f.dat" ) ;
		mf.StreamOUTdenseMatlabFormat ( file_f ) ;

		ChStreamOutAsciiFile file_b ( "dump_b.dat" ) ;
		mb.StreamOUTdenseMatlabFormat ( file_b ) ;

		ChStreamOutAsciiFile file_fric ( "dump_fric.dat" ) ;
		mfric.StreamOUTdenseMatlabFormat ( file_fric ) ;

		printf("Successfully writing chickenbutt files!\n");

/*	file_f.GetFstream().close();
	file_fric.GetFstream().close();
	file_V0.GetFstream().close();
	file_M.GetFstream().close();
	file_Cq.GetFstream().close();
	file_b.GetFstream().close();
*/

	

	int nBodies = mM.GetColumns()/6;
	size_t nVariables = mvariables.size();
	size_t nConstraints = sysd.CountActiveConstraints();
	int numContacts = nConstraints/3;
	size_t nOfConstraints = mconstraints.size();

	/* ALWYAS DO THIS IN THE LCP SOLVER!!!*/
	for (unsigned int ic = 0; ic < nConstraints; ic++)
		mconstraints[ic]->Update_auxiliary();
	//Get sparse info for contact jacobian and Minv matrix to pass on to Ang's solver
	std::vector<int> index_i_Cq;
	std::vector<int> index_j_Cq;
	std::vector<double> val_Cq;
	double val;
//	fprintf(stderr, "------------Cq(from C::E)----------\n");
	for (int ii = 0; ii < mCq.GetRows(); ii++){
		for (int jj = 0; jj < mCq.GetColumns(); jj++){
			val = mCq.GetElement(ii,jj);
			if (val){
				index_i_Cq.push_back(jj);
				index_j_Cq.push_back(ii);
				val_Cq.push_back(val);
//				fprintf(stderr, "%d %d %.20g\n", ii, jj, val);
			}
		}
	}

	

/*	for (int iv = 0; iv < mvariables.size(); iv++)
		if (mvariables[iv]->IsActive())
			mvariables[iv]->Compute_invMb_v(mvariables[iv]->Get_qb(), mvariables[iv]->Get_fb());
*/

//	int count = 0;
//	for (std::vector<int>::iterator it = index_i_Cq.begin(); it != index_i_Cq.end(); it ++){
//		std::cout << "(" << index_i_Cq[count] <<"," << index_j_Cq[count] <<"):" << val_Cq[count] << std::endl;
//		count ++;
//	}

	// Minv matrix
	std::vector<int> index_i_Minv;
	std::vector<int> index_j_Minv;
	std::vector<double> val_Minv;
	for (int i = 0; i < nBodies*6; i++){
		index_i_Minv.push_back(i);
		index_j_Minv.push_back(i);
		val_Minv.push_back(1.0/mM.GetElement(i,i));
	}

	// create reference to pass on to SPIKE
	int *Cq_i = &index_i_Cq[0];
	int *Cq_j = &index_j_Cq[0];
	int Cq_nnz = val_Cq.size();
	double *Cq_val = &val_Cq[0];

	int *Minv_i = &index_i_Minv[0];
	int *Minv_j = &index_j_Minv[0];
	double *Minv_val = &val_Minv[0];

	// formulate rhs of optimization problem f(x) = 1/2 *x'*N*x + r'*x
	ChMatrixDynamic <double> opt_r_tmp(nConstraints,1);
	// assemble r vector
	/** 1. put [M^-1]*k in q sparse vector of each variable **/
	for (unsigned int iv = 0; iv < nVariables; iv ++)
		if (mvariables[iv]->IsActive()){
			mvariables[iv]->Compute_invMb_v(mvariables[iv]->Get_qb(), mvariables[iv]->Get_fb());
			ChMatrix<double> k = mvariables[iv]->Get_fb();
			ChMatrix<double> Mk = mvariables[iv]->Get_qb();
//			fprintf(stderr, "Body %d k: %.12f %.12f %.12f\n", iv,  k(0,0), k(1,0), k(2,0));
//			fprintf(stderr, "Body %d M^[-1]*k: %.12f %.12f %.12f\n", iv,  Mk(0,0), Mk(1,0), Mk(2,0));
	}
	/** 2. now do rhs = D'*q = D'*(M^-1)*k **/
	int s_i = 0;
	opt_r.Resize(nConstraints,1);
	for (unsigned int ic = 0; ic < nConstraints; ic ++)
		if (mconstraints[ic]->IsActive()){
			opt_r(s_i,0) = mconstraints[ic]->Compute_Cq_q();
			++s_i;	
		}
//	fprintf(stderr, "------D'*M^(-1)*k-------\n");
//	for (int i = 0; i < opt_r.GetRows(); i++)
//		fprintf(stderr, "%.16f\n", opt_r(i,0));

	/** 3.  rhs = rhs + c **/
	sysd.BuildBiVector(opt_r_tmp);
	opt_r.MatrInc(opt_r_tmp);
	
//	fprintf(stderr, "------opt_r-------\n");
//	for (int i = 0; i < opt_r.GetRows(); i++)
//		fprintf(stderr, "%.12f\n", opt_r(i,0));


	///////////////////
	//velocity update//
	///////////////////
	ChMatrixDynamic<> mq;
	sysd.FromVariablesToVector(mq, true);

	

	for (int i = 0; i < mq.GetRows(); i++){
//		mq.SetElementN(i, mf.GetElementN(i)/mM.GetElement(i,i) + mv0.GetElementN(i));
//			mq.SetElementN(i, mf.GetElementN(i)/mM.GetElement(i,i));
//	fprintf(stderr, "%d: %g / %g + %g = %g\n",i, mf.GetElementN(i), mM.GetElement(i,i), mv0.GetElementN(i), mq.GetElementN(i));
//		fprintf(stderr, "%g\n", mq.GetElementN(i));
}

	////////////////////////////
	//assign solver parameters//
	////////////////////////////
	double barrier_t = 1;
	double eta_hat;
	int numStages = 500;
	int mu1 = 10;
	double b1 = 0.5;
	double a1 = 0.01;

	// assign vectors here
	ff.Resize(numContacts*2,1);
	lambda_k.Resize(numContacts*2,1); /*initialize lambda_k*/
	xk.Resize(numContacts*3,1);
	r_dual.Resize(numContacts*3,1);
	r_cent.Resize(numContacts*2,1);
	d_x.Resize(numContacts*3,1);
	d_lambda.Resize(numContacts*2,1);
	Schur_rhs.Resize(3*numContacts,1);
	grad_f.Resize(3*numContacts,1);


	if (mconstraints.size() == 0){
			sysd.FromVectorToConstraints(xk);
			sysd.FromVectorToVariables(mq);

			for (size_t ic = 0; ic < mconstraints.size(); ic ++){
				if (mconstraints[ic]->IsActive())
					mconstraints[ic]->Increment_q(mconstraints[ic]->Get_l_i());
			}

		return 1e-8;
	}



	double *BlockDiagonal_val = new double[9*numContacts];
	int *BlockDiagonal_i = new int[9*numContacts];
	int *BlockDiagonal_j = new int[9*numContacts];
	double *spike_rhs = new double[3*numContacts];

	int tmp0, tmp1, tmp2;
	for (int i = 0; i < numContacts; i ++){
		tmp0 = 3*i;
		tmp1 = 3*i+1;
		tmp2 = 3*i+2;
		*(BlockDiagonal_i + 9*i) = tmp0;
		*(BlockDiagonal_i + 9*i+1) = tmp0;
		*(BlockDiagonal_i + 9*i+2) = tmp0;
		*(BlockDiagonal_i + 9*i+3) = tmp1;
		*(BlockDiagonal_i + 9*i+4) = tmp1;
		*(BlockDiagonal_i + 9*i+5) = tmp1;
		*(BlockDiagonal_i + 9*i+6) = tmp2;
		*(BlockDiagonal_i + 9*i+7) = tmp2;
		*(BlockDiagonal_i + 9*i+8) = tmp2;

		*(BlockDiagonal_j + 9*i) = tmp0;
		*(BlockDiagonal_j + 9*i+1) = tmp1;
		*(BlockDiagonal_j + 9*i+2) = tmp2;
		*(BlockDiagonal_j + 9*i+3) = tmp0;
		*(BlockDiagonal_j + 9*i+4) = tmp1;
		*(BlockDiagonal_j + 9*i+5) = tmp2;
		*(BlockDiagonal_j + 9*i+6) = tmp0;
		*(BlockDiagonal_j + 9*i+7) = tmp1;
		*(BlockDiagonal_j + 9*i+8) = tmp2;
	}



	// initialize xk
	for (int i = 0; i < numContacts; i ++){
		xk(3*i, 0) = 1;
		xk(3*i+1, 0) = 0;
		xk(3*i+2, 0) = 0;
	}

	evaluateConstraints(mfric.GetAddress(), numContacts, false);



	//initialize lambda
	for (int i = 0; i < lambda_k.GetRows(); i++)
		lambda_k(i,0) = -1/(barrier_t * ff(i,0));

	/////////////////////////////
	////GO THROUGH EACH STAGE////
	/////////////////////////////
	for (int stage = 0; stage < numStages; stage++){
		eta_hat = - lambda_k.MatrDot(&lambda_k, &ff);
		barrier_t = mu1 * (2*numContacts)/eta_hat;
		// assemble grad_f = N*x + r
		sysd.ShurComplementProduct(grad_f, &xk, 0);
//		fprintf(stderr, "----------N*x----------\n");
//		for (int i = 0; i < grad_f.GetRows(); i++)
//		fprintf(stderr, "%.20f\n", grad_f.GetElementN(i));
		grad_f.MatrInc(opt_r);
//		fprintf(stderr, "----------grad_f----------\n");
//		for (int i = 0; i < grad_f.GetRows(); i++)
//			fprintf(stderr, "%.20f\n", grad_f.GetElementN(i));
	
		// compute r_d and r_c for schur implementation
		computeSchurRHS(grad_f.GetAddress(), mfric.GetAddress(), numContacts, barrier_t);
//		fprintf(stderr, "----------r_dual----------\n");
//		for (int i = 0; i < r_dual.GetRows(); i++)
//			fprintf(stderr, "%.16f\n", r_dual.GetElementN(i));
//		fprintf(stderr, "----------r_cent----------\n");
//		for (int i = 0; i < r_cent.GetRows(); i++)
//			fprintf(stderr, "%.16f\n", r_cent.GetElementN(i));



		// assemble block diagonal matrix
		computeBlockDiagonal(BlockDiagonal_val, mfric.GetAddress(), numContacts, barrier_t);

		// assemble rhs vector for spike solver
		computeSpikeRHS(spike_rhs, mfric.GetAddress(), numContacts, barrier_t);
//		fprintf(stderr, "----------spike_rhs----------\n");
//		for (int i = 0; i < 3*numContacts; i++){
//			fprintf(stderr, "%.16f\n", *(spike_rhs + i));
//		}

		double *spike_dx = new double [3*numContacts];

		//call ang's solver here....
		bool solveSuc = solveSPIKE(nBodies, numContacts, Cq_i, Cq_j, Cq_nnz, Cq_val, Minv_i, Minv_j, Minv_val, BlockDiagonal_i, BlockDiagonal_j, BlockDiagonal_val, spike_dx, spike_rhs);
		
		if (solveSuc == false)
			std::cerr << "Solve Failed!" << std::endl;
		// assume d_x is calculated perfectly!
		for (int i = 0; i < numContacts; i++){
			d_x(3*i,0) = *(spike_dx + 3*i);
			d_x(3*i+1,0) = *(spike_dx + 3*i + 1);
			d_x(3*i+2,0) = *(spike_dx + 3*i + 2);
		}

/*		fprintf(stderr, "-------d_x---------\n");
		for (int i = 0; i < d_x.GetRows(); i++){
			fprintf(stderr, "%.20f\n", d_x(i,0));
		}
*/
		// free the heap!
		delete [] spike_dx;

		// evaluate d_lambda
		for (int i = 0; i < numContacts; i++){
			d_lambda(i) = lambda_k(i,0)/ff(i,0) * (pow(mfric(3*i,0),2)*xk(3*i,0)*d_x(3*i,0) - xk(3*i+1,0)*d_x(3*i+1,0) -xk(3*i+2,0)*d_x(3*i+2,0) - r_cent(i,0) );
			d_lambda(i + numContacts) = lambda_k(i+numContacts,0)/ff(i+numContacts)*(d_x(3*i) - r_cent(i + numContacts));
		}
/*		fprintf(stderr, "----------d_lambda----------\n");
		for (int i = 0; i < 2*numContacts; i++){
			fprintf(stderr, "%.16f\n", d_lambda(i,0));
		}
*/
		///////////////
		//LINE SEARCH//
		///////////////
		double s_max = 1;
		double tmp;
		for (int i = 0; i < 2*numContacts; i ++){
			if (d_lambda(i,0) < 0){
				tmp = -lambda_k(i,0)/d_lambda(i,0);
//				fprintf(stderr, "i = %d, tmp = %.20f\n", i, tmp);
				if (tmp < s_max){
					s_max = tmp;
				}

			}
		}
		double bla = 0.99;
		double ss = bla * s_max;
//		fprintf(stderr, "s_max = %.20g\n", s_max);

		ff_tmp.Resize(2*numContacts,1);
		lambda_k_tmp.Resize(2*numContacts,1);
		xk_tmp.Resize(3*numContacts,1);;
		r_dual_tmp.Resize(3*numContacts,1);;
		r_cent_tmp.Resize(3*numContacts,1);;

		bool DO = true;
		int count = 0;
//		fprintf(stderr, "----line search----\n");
		while (DO){
			xk_tmp = d_x;
//			fprintf(stderr, "-----d_x----\n");
//			for (int i = 0; i < 3*numContacts; i ++)
//				fprintf(stderr, "%.20g\n", xk_tmp(i,0));
			xk_tmp.MatrScale(ss);
//			fprintf(stderr, "-----ss*d_x----\n");
//			for (int i = 0; i < 3*numContacts; i ++)
//				fprintf(stderr, "%.20g\n", xk_tmp(i,0));
			xk_tmp.MatrAdd(xk,xk_tmp);
//			fprintf(stderr, "-----xk+ss*d_x----\n");
//			for (int i = 0; i < 3*numContacts; i ++)
//				fprintf(stderr, "%.20g\n", xk_tmp(i,0));
			evaluateConstraints(mfric.GetAddress(), numContacts, true);
//			fprintf(stderr, "-----tmp_ff----\n");
//			for (int i = 0; i < 2*numContacts; i ++)
//				fprintf(stderr, "%.20g\n", ff_tmp(i,0));
//			fprintf(stderr, "max_ff = %.20g\n", ff_tmp.Max());
			if (ff_tmp.Max()<0){
				DO = false;
			}
			else{
				count++;
				ss = b1 * ss;
//			fprintf(stderr,"ss[%d] = %.20g\n", count, ss); 
			}
		}

		DO = true;
		double norm_r_t = sqrt(pow(r_dual.NormTwo(),2) + pow(r_cent.NormTwo(),2));
		double norm_r_t_ss;
		count = 0;
		while (DO){
			xk_tmp = d_x;
			xk_tmp.MatrScale(ss);
			xk_tmp.MatrAdd(xk,xk_tmp);

			lambda_k_tmp = d_lambda;
			lambda_k_tmp.MatrScale(ss);
			lambda_k_tmp.MatrAdd(lambda_k, lambda_k_tmp);
			evaluateConstraints(mfric.GetAddress(),numContacts,true);
			sysd.ShurComplementProduct(grad_f, &xk_tmp, 0);
			grad_f.MatrInc(opt_r);
			computeSchurKKT(grad_f.GetAddress(), mfric.GetAddress(), numContacts, barrier_t, true);
			norm_r_t_ss = sqrt(pow(r_dual_tmp.NormTwo(),2) + pow(r_cent_tmp.NormTwo(),2));
			if (norm_r_t_ss < (1 - a1*ss)*norm_r_t)
				DO = false;
			else{
				count ++;
				ss = b1*ss;
//				fprintf(stderr,"ss[%d] = %.20g\n", count, ss); 
		}

		}
		// upadate xk and lambda_k
		d_x.MatrScale(ss);
//		fprintf(stderr, "-------ss*d_x---------\n");
//		for (int i = 0; i < d_x.GetRows(); i++)
//			fprintf(stderr, "%.20f\n", d_x(i,0));
		
//		fprintf(stderr, "----------xk = xk + ss*d_x--------\n");
		xk.MatrInc(d_x);
//		for (int i = 0; i < xk.GetRows(); i++)
//			fprintf(stderr, "%.20f\n", xk(i,0));
		
		d_lambda.MatrScale(ss);
		lambda_k.MatrInc(d_lambda);
//		fprintf(stderr, "-------lambda_k------\n");
//		for (int i = 0; i < lambda_k.GetRows(); i++)
//			fprintf(stderr, "%.20f\n", lambda_k(i,0));

		sysd.ShurComplementProduct(grad_f, &xk, 0);
		grad_f.MatrInc(opt_r);
		evaluateConstraints(mfric.GetAddress(), numContacts, false);
		computeSchurKKT(grad_f.GetAddress(), mfric.GetAddress(), numContacts, barrier_t, false);
//		std::cout << "----r_dual-----" << std::endl;
//		for (int i = 0; i < r_dual.GetRows(); i++)
//			std::cout << r_dual(i,0) << std::endl;
//		std::cout << "-----r_cent-----" << std::endl;
//		for (int i = 0; i < r_cent.GetRows(); i++)
//			std::cout << r_cent(i,0) << std::endl;
fprintf(stderr, "stage[%d], rd = %e, rg = %e, s = %f, t = %f\n", stage+1, r_dual.NormInf(), r_cent.NormInf(), ss, barrier_t);


		if (r_cent.NormInf() < 1e-10 ||stage == (numStages - 1)){
			fprintf(stderr, "solution found after %d stages!\n", stage+1);
//			fprintf(stderr, "stage[%d], rd = %e, rg = %e, s = %f, t = %f\n", stage+1, r_dual.NormInf(), r_cent.NormInf(), ss, barrier_t);
			delete [] BlockDiagonal_val;
			delete [] BlockDiagonal_i;
			delete [] BlockDiagonal_j;
			delete [] spike_rhs;

			/////////////////////////////////////////////
			//set small-magnitude contact force to zero//
			/////////////////////////////////////////////
			
//			for (int i = 0; i < numContacts; i++){
//				if (sqrt(pow(xk(3*i,0),2) + pow(xk(3*i+1,0),2) + pow(xk(3*i+2,0),2)) < 1e-6){
///					xk(3*i,0) = 0;
//					xk(3*i+1,0) = 0;
//					xk(3*i+2, 0) = 0;
//				}
//			}

			sysd.FromVectorToConstraints(xk);
			sysd.FromVectorToVariables(mq);

			for (size_t ic = 0; ic < mconstraints.size(); ic ++){
				if (mconstraints[ic]->IsActive())
					mconstraints[ic]->Increment_q(mconstraints[ic]->Get_l_i());
			}

//			return r_cent.NormInf();
			return 1e-8;

		}
		evaluateConstraints(mfric.GetAddress(), numContacts, false);

	}

}