// use REML equation to estimate variance component // input P, calculate PA, H, R and varcmp void gcta::reml_equation(eigenMatrix &P, eigenMatrix &Hi, eigenVector &Py, eigenVector &varcmp) { // Calculate Hi calcu_Hi(P, Hi); // Calculate R Py=P*_y; eigenVector R(_r_indx.size()+1); for(int i=0; i<_r_indx.size(); i++) R(i)=(Py.transpose()*(_A.block(0,_r_indx[i]*_n,_n,_n))*Py)(0,0); R(_r_indx.size())=Py.squaredNorm(); // Calculate variance component varcmp=Hi*R; Hi=2*Hi; // for calculation of SE }
// input P, calculate varcmp void gcta::em_reml(eigenMatrix &P, eigenVector &Py, eigenVector &prev_varcmp, eigenVector &varcmp) { int i=0; // Calculate trace(PA) eigenVector tr_PA; calcu_tr_PA(P, tr_PA); // Calculate R Py=P*_y; eigenVector R(_r_indx.size()+1); for(i=0; i<_r_indx.size(); i++) R(i)=(Py.transpose()*(_A.block(0,_r_indx[i]*_n,_n,_n))*Py)(0,0); R(_r_indx.size())=Py.squaredNorm(); // Calculate variance component for(i=0; i<_r_indx.size(); i++) varcmp(i)=(prev_varcmp(i)*_n-prev_varcmp(i)*prev_varcmp(i)*tr_PA(i)+prev_varcmp(i)*prev_varcmp(i)*R(i))/_n; int j=_r_indx.size(); varcmp(j)=(prev_varcmp(j)*_n-prev_varcmp(j)*prev_varcmp(j)*tr_PA(j)+prev_varcmp(j)*prev_varcmp(j)*R(j))/_n; }
double gcta::reml_iteration(double y_Ssq, eigenMatrix &Vi_X, eigenMatrix &Xt_Vi_X_i, eigenMatrix &Hi, eigenVector &Py, eigenVector &varcmp, bool prior_var_flag, bool no_constrain) { char *mtd_str[3]={"AI-REML algorithm", "REML equation ...", "EM-REML algorithm ..."}; int i=0, constrain_num=0, iter=0, reml_mtd_tmp=_reml_mtd; double logdet=0.0, logdet_Xt_Vi_X=0.0, prev_lgL=-1e20, lgL=-1e20, dlogL=1000.0; eigenVector prev_varcmp(varcmp), varcomp_init(varcmp); _Vi.resize(_n,_n); _P(_n,_n); for(iter=0; iter<_reml_max_iter; iter++){ if(iter==0){ prev_varcmp=varcomp_init; if(prior_var_flag){ cout<<"User-specified prior values of variance components: "<<varcmp.transpose()<<endl; //continue; } else{ _reml_mtd=2; cout<<"Calculating prior values of variance components by EM-REML ..."<<endl; } } if(iter==1){ _reml_mtd=reml_mtd_tmp; cout<<"Running "<<mtd_str[_reml_mtd]<<" ..."<<"\nIter.\tlogL\t"; for(i=0; i<_r_indx.size(); i++) cout<<_var_name[_r_indx[i]]<<"\t"; cout<<_var_name[_var_name.size()-1]<<endl; } if(!calcu_Vi(_Vi, prev_varcmp, logdet, iter)) continue; // Calculate Vi logdet_Xt_Vi_X=calcu_P(_Vi, Vi_X, Xt_Vi_X_i, _P); // Calculate P if(_reml_mtd==0) ai_reml(_P, Hi, Py, prev_varcmp, varcmp, dlogL); else if(_reml_mtd==1) reml_equation(_P, Hi, Py, varcmp); else if(_reml_mtd==2) em_reml(_P, Py, prev_varcmp, varcmp); lgL=-0.5*(logdet_Xt_Vi_X+logdet+(_y.transpose()*Py)(0,0)); // output log if(!no_constrain) constrain_num=constrain_varcmp(varcmp, y_Ssq); if(iter>0){ cout<<iter<<"\t"<<setiosflags(ios::fixed)<<setprecision(2)<<lgL<<"\t"; for(i=0; i<_r_indx.size()+1; i++) cout<<setprecision(5)<<varcmp[i]<<"\t"; if(constrain_num>0) cout<<"("<<constrain_num<<" component(s) constrained)"<<endl; else cout<<endl; } else{ if(!prior_var_flag) cout<<"Prior values updated from EM-REML: "<<varcmp.transpose()<<endl; cout<<"logL: "<<lgL<<endl; } if(constrain_num*2>_r_indx.size()+1) throw("Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable.\n Please have a try to add the option --reml-no-constrain."); // convergence dlogL=lgL-prev_lgL; if((varcmp-prev_varcmp).squaredNorm()/varcmp.squaredNorm()<1e-8 && (fabs(dlogL)<1e-4 || (fabs(dlogL)<1e-2 && dlogL<0))){ if(_reml_mtd==2){ calcu_Hi(_P, Hi); Hi=2*Hi; } // for calculation of SE break; } prev_varcmp=varcmp; prev_lgL=lgL; } if(iter==_reml_max_iter){ stringstream errmsg; errmsg<<"Error: Log-likelihood not converged (stop after "<<_reml_max_iter<<" iteractions). \nYou can specify the option --reml-maxit to allow for more iterations."<<endl; throw(errmsg.str()); } else cout<<"Log-likelihood ratio converged."<<endl; return lgL; }