// use REML equation to estimate variance component // input P, calculate PA, H, R and varcmp void gcta::ai_reml(eigenMatrix &P, eigenMatrix &Hi, eigenVector &Py, eigenVector &prev_varcmp, eigenVector &varcmp, double dlogL) { int i=0, j=0; Py=P*_y; eigenVector cvec(_n); eigenMatrix APy(_n, _r_indx.size()); for(i=0; i<_r_indx.size(); i++) (APy.col(i))=(_A.block(0,_r_indx[i]*_n,_n,_n))*Py; // Calculate Hi eigenVector R(_r_indx.size()+1); for(i=0; i<_r_indx.size(); i++){ R(i)=(Py.transpose()*(APy.col(i)))(0,0); cvec=P*(APy.col(i)); Hi(i,i)=((APy.col(i)).transpose()*cvec)(0,0); for(j=0; j<i; j++) Hi(j,i)=Hi(i,j)=((APy.col(j)).transpose()*cvec)(0,0); } cvec=P*Py; for(j=0; j<_r_indx.size(); j++) Hi(j,_r_indx.size())=Hi(_r_indx.size(),j)=((APy.col(j)).transpose()*cvec)(0,0); R(_r_indx.size())=(Py.transpose()*Py)(0,0); Hi(_r_indx.size(),_r_indx.size())=(Py.transpose()*cvec)(0,0); Hi=0.5*Hi; // Calcualte tr(PA) and dL eigenVector tr_PA; calcu_tr_PA(P, tr_PA); R=-0.5*(tr_PA-R); // Calculate variance component comput_inverse_logdet_LU(Hi, "Error: AI matrix is not invertible."); eigenVector delta(_r_indx.size()+1); delta=Hi*R; if(dlogL>1.0) varcmp=prev_varcmp+0.316*delta; else varcmp=prev_varcmp+delta; }
// use REML equation to estimate variance component // input P, calculate PA, H, R and varcmp void gcta::reml_equation(eigenMatrix &P, eigenMatrix &Hi, eigenVector &Py, eigenVector &varcmp) { // Calculate Hi calcu_Hi(P, Hi); // Calculate R Py=P*_y; eigenVector R(_r_indx.size()+1); for(int i=0; i<_r_indx.size(); i++) R(i)=(Py.transpose()*(_A.block(0,_r_indx[i]*_n,_n,_n))*Py)(0,0); R(_r_indx.size())=Py.squaredNorm(); // Calculate variance component varcmp=Hi*R; Hi=2*Hi; // for calculation of SE }
// input P, calculate varcmp void gcta::em_reml(eigenMatrix &P, eigenVector &Py, eigenVector &prev_varcmp, eigenVector &varcmp) { int i=0; // Calculate trace(PA) eigenVector tr_PA; calcu_tr_PA(P, tr_PA); // Calculate R Py=P*_y; eigenVector R(_r_indx.size()+1); for(i=0; i<_r_indx.size(); i++) R(i)=(Py.transpose()*(_A.block(0,_r_indx[i]*_n,_n,_n))*Py)(0,0); R(_r_indx.size())=Py.squaredNorm(); // Calculate variance component for(i=0; i<_r_indx.size(); i++) varcmp(i)=(prev_varcmp(i)*_n-prev_varcmp(i)*prev_varcmp(i)*tr_PA(i)+prev_varcmp(i)*prev_varcmp(i)*R(i))/_n; int j=_r_indx.size(); varcmp(j)=(prev_varcmp(j)*_n-prev_varcmp(j)*prev_varcmp(j)*tr_PA(j)+prev_varcmp(j)*prev_varcmp(j)*R(j))/_n; }
bool gcta::bending_eigenval(eigenVector &eval) { int j=0; double eval_m=eval.mean(); if(eval.minCoeff()>0.0) return false; double S=0.0, P=0.0; for(j=0; j<eval.size(); j++){ if(eval[j]>=0) continue; S+=eval[j]; P=-eval[j]; } double W=S*S*100.0+1; for(j=0; j<eval.size(); j++){ if(eval[j]>=0) continue; eval[j]=P*(S-eval[j])*(S-eval[j])/W; } eval*=eval_m/eval.mean(); return true; }
void gcta::makex_eigenVector(int j, eigenVector &x, bool resize, bool minus_2p) { int i=0; if(resize) x.resize(_keep.size()); for(i=0; i<_keep.size(); i++){ if(!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]){ if(_allele1[_include[j]]==_ref_A[_include[j]]) x[i]=(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]); else x[i]=2.0-(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]); } else x[i]=_mu[_include[j]]; if(minus_2p) x[i]-=_mu[_include[j]]; } }
bool gcta::comput_inverse_logdet_LDLT(eigenMatrix &Vi, eigenVector &prev_varcmp, double &logdet) { int i=0, n=Vi.cols(); LDLT<eigenMatrix> ldlt(Vi); eigenVector d=ldlt.vectorD(); if(d.minCoeff()<0){ if(prev_varcmp.minCoeff()>0) return false; else throw("Error: the matrix V becomes negative-definite because of one of the variance component is negative.\nPlease re-run the analysis without the --reml-no-constrain option."); } logdet=0.0; for(i=0; i<n; i++) logdet+=log(d[i]); Vi.setIdentity(); ldlt.solveInPlace(Vi); return true; }
// input P, calculate tr(PA) void gcta::calcu_tr_PA(eigenMatrix &P, eigenVector &tr_PA) { int i=0, k=0, l=0; double d_buf=0.0; // Calculate trace(PA) tr_PA.resize(_r_indx.size()+1); for(i=0; i<_r_indx.size(); i++){ d_buf=0.0; for(k=0; k<_n; k++){ for(l=0; l<_n; l++) d_buf+=P(k,l)*(_A.block(0,_r_indx[i]*_n,_n,_n))(k,l); } tr_PA(i)=d_buf; } tr_PA(_r_indx.size())=P.trace(); }
double gcta::reml_iteration(double y_Ssq, eigenMatrix &Vi_X, eigenMatrix &Xt_Vi_X_i, eigenMatrix &Hi, eigenVector &Py, eigenVector &varcmp, bool prior_var_flag, bool no_constrain) { char *mtd_str[3]={"AI-REML algorithm", "REML equation ...", "EM-REML algorithm ..."}; int i=0, constrain_num=0, iter=0, reml_mtd_tmp=_reml_mtd; double logdet=0.0, logdet_Xt_Vi_X=0.0, prev_lgL=-1e20, lgL=-1e20, dlogL=1000.0; eigenVector prev_varcmp(varcmp), varcomp_init(varcmp); _Vi.resize(_n,_n); _P(_n,_n); for(iter=0; iter<_reml_max_iter; iter++){ if(iter==0){ prev_varcmp=varcomp_init; if(prior_var_flag){ cout<<"User-specified prior values of variance components: "<<varcmp.transpose()<<endl; //continue; } else{ _reml_mtd=2; cout<<"Calculating prior values of variance components by EM-REML ..."<<endl; } } if(iter==1){ _reml_mtd=reml_mtd_tmp; cout<<"Running "<<mtd_str[_reml_mtd]<<" ..."<<"\nIter.\tlogL\t"; for(i=0; i<_r_indx.size(); i++) cout<<_var_name[_r_indx[i]]<<"\t"; cout<<_var_name[_var_name.size()-1]<<endl; } if(!calcu_Vi(_Vi, prev_varcmp, logdet, iter)) continue; // Calculate Vi logdet_Xt_Vi_X=calcu_P(_Vi, Vi_X, Xt_Vi_X_i, _P); // Calculate P if(_reml_mtd==0) ai_reml(_P, Hi, Py, prev_varcmp, varcmp, dlogL); else if(_reml_mtd==1) reml_equation(_P, Hi, Py, varcmp); else if(_reml_mtd==2) em_reml(_P, Py, prev_varcmp, varcmp); lgL=-0.5*(logdet_Xt_Vi_X+logdet+(_y.transpose()*Py)(0,0)); // output log if(!no_constrain) constrain_num=constrain_varcmp(varcmp, y_Ssq); if(iter>0){ cout<<iter<<"\t"<<setiosflags(ios::fixed)<<setprecision(2)<<lgL<<"\t"; for(i=0; i<_r_indx.size()+1; i++) cout<<setprecision(5)<<varcmp[i]<<"\t"; if(constrain_num>0) cout<<"("<<constrain_num<<" component(s) constrained)"<<endl; else cout<<endl; } else{ if(!prior_var_flag) cout<<"Prior values updated from EM-REML: "<<varcmp.transpose()<<endl; cout<<"logL: "<<lgL<<endl; } if(constrain_num*2>_r_indx.size()+1) throw("Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable.\n Please have a try to add the option --reml-no-constrain."); // convergence dlogL=lgL-prev_lgL; if((varcmp-prev_varcmp).squaredNorm()/varcmp.squaredNorm()<1e-8 && (fabs(dlogL)<1e-4 || (fabs(dlogL)<1e-2 && dlogL<0))){ if(_reml_mtd==2){ calcu_Hi(_P, Hi); Hi=2*Hi; } // for calculation of SE break; } prev_varcmp=varcmp; prev_lgL=lgL; } if(iter==_reml_max_iter){ stringstream errmsg; errmsg<<"Error: Log-likelihood not converged (stop after "<<_reml_max_iter<<" iteractions). \nYou can specify the option --reml-maxit to allow for more iterations."<<endl; throw(errmsg.str()); } else cout<<"Log-likelihood ratio converged."<<endl; return lgL; }