/* * Set a environment variable. */ int mc_setenv(const char *name, const char *value) { static int init = 0; char *p, **e, **newe; int count = 0; if ((p = makenv(name, value)) == NULL) return -1; for (e = environ; *e; e++) { count++; if (varcmp(p, *e)) { *e = p; return 0; } } count += 2; if ((newe = (char **)malloc(sizeof(char *) * count)) == (char **)0) { free(p); return -1; } memcpy((char *)newe, (char *)environ , (int) (count * sizeof(char *))); if (init) free((char *)environ); init = 1; environ = newe; for(e = environ; *e; e++) ; *e++ = p; *e = NULL; return 0; }
double gcta::lgL_reduce_mdl(double y_Ssq, bool no_constrain) { if(_r_indx.size()==0) return 0; bool multi_comp=(_r_indx.size()-_r_indx_drop.size()>1); cout<<"\nCalculating the logLikelihood for the reduced model ...\n(variance component"<<(multi_comp?"s ":" "); for(int i=0; i<_r_indx.size(); i++){ if(find(_r_indx_drop.begin(), _r_indx_drop.end(), _r_indx[i])==_r_indx_drop.end()) cout<<_r_indx[i]+1<<" "; } cout<<(multi_comp?"are":"is")<<" dropped from the model)"<<endl; vector<int> vi_buf(_r_indx); _r_indx=_r_indx_drop; eigenMatrix Vi_X(_n, _X_c), Xt_Vi_X_i(_X_c, _X_c), Hi(_r_indx.size()+1, _r_indx.size()+1); eigenVector Py(_n); eigenVector varcmp(_r_indx.size()+1); varcmp.setConstant(y_Ssq/(_r_indx.size()+1)); double lgL=reml_iteration(y_Ssq, Vi_X, Xt_Vi_X_i, Hi, Py, varcmp, false, no_constrain); _r_indx=vi_buf; return(lgL); }
void gcta::reml(bool pred_rand_eff, bool est_fix_eff, vector<double> &reml_priors, vector<double> &reml_priors_var, double prevalence, bool no_constrain, bool no_lrt, bool mlmassoc) { int i=0, j=0, k=0; // case-control double ncase=0.0; bool flag_cc=check_case_control(ncase); if(flag_cc){ if(mlmassoc) throw("Error: the option --mlm-assoc is valid for the quantitative trait only."); if(prevalence<-1) cout<<"Warning: please specify the disease prevalence by the option --prevalence so that GCTA can transform the variance explained to the underlying liability scale."<<endl; } // Initialize variance component // 0: AI; 1: REML equation; 2: EM stringstream errmsg; double d_buf=0.0, y_mean=_y.mean(); eigenVector y(_y); for(i=0; i<_n; i++) y(i)-=y_mean; double y_Ssq=y.squaredNorm()/(_n-1.0); if(!(fabs(y_Ssq)<1e30)) throw("Error: the phenotypic variance is infinite. Please check the missing data in your phenotype file. Missing values should be represented by \"NA\" or \"-9\"."); bool reml_priors_flag=!reml_priors.empty(), reml_priors_var_flag=!reml_priors_var.empty(); if(reml_priors_flag && reml_priors.size()<_r_indx.size()){ errmsg<<"Error: in option --reml-priors. There are "<<_r_indx.size()+1<<" variance components. At least "<<_r_indx.size()<<" prior values should be specified."; throw(errmsg.str()); } if(reml_priors_var_flag && reml_priors_var.size()<_r_indx.size()){ errmsg<<"Error: in option --reml-priors-var. There are "<<_r_indx.size()+1<<" variance components. At least "<<_r_indx.size()<<" prior values should be specified."; throw(errmsg.str()); } cout<<"\nPerforming REML analysis ... (NOTE: may take hours depending on sample size)."<<endl; if(_n<10) throw("Error: sample size is too small."); cout<<_n<<" observations, "<<_X_c<<" fixed effect(s), and "<<_r_indx.size()+1<<" variance component(s)(including residual variance)."<<endl; eigenMatrix Vi_X(_n, _X_c), Xt_Vi_X_i(_X_c, _X_c), Hi(_r_indx.size()+1, _r_indx.size()+1); eigenVector Py(_n), varcmp(_r_indx.size()+1); if(reml_priors_var_flag){ for(i=0; i<_r_indx.size(); i++) varcmp[i]=reml_priors_var[i]; if(varcmp[_r_indx.size()]<1e-30) varcmp[i]=y_Ssq-varcmp.sum(); } else if(reml_priors_flag){ for(i=0; i<_r_indx.size(); i++) { varcmp[i]=reml_priors[i]*y_Ssq; d_buf+=reml_priors[i]; } varcmp[_r_indx.size()]=(1.0-d_buf)*y_Ssq; } else varcmp.setConstant(y_Ssq/(_r_indx.size()+1)); double lgL=reml_iteration(y_Ssq, Vi_X, Xt_Vi_X_i, Hi, Py, varcmp, reml_priors_var_flag|reml_priors_flag, no_constrain); eigenMatrix u; if(pred_rand_eff){ u.resize(_n, _r_indx.size()); for(i=0; i<_r_indx.size(); i++) (u.col(i))=(((_A.block(0,_r_indx[i]*_n,_n,_n))*Py)*varcmp[i]); } eigenVector b; if(est_fix_eff){ b.resize(_X_c); b=Xt_Vi_X_i*(Vi_X.transpose()*_y); } // calculate Hsq and SE double Vp=0.0, VarVp=0.0, Vp_f=0.0, VarVp_f=0.0; vector<double> Hsq(_r_indx.size()), VarHsq(_r_indx.size()); calcu_Vp(Vp, VarVp, -1, varcmp, Hi); for(i=0; i<_r_indx.size(); i++) calcu_hsq(i, Vp, VarVp, -1, Hsq[i], VarHsq[i], varcmp, Hi); // calculate the logL for a reduce model double lgL_rdu_mdl=0.0, LRT=0.0; if(!no_lrt){ lgL_rdu_mdl=lgL_reduce_mdl(y_Ssq, no_constrain); LRT=2.0*(lgL-lgL_rdu_mdl); if(LRT<0.0) LRT=0.0; } // output results cout<<"\nSummary result of REML analysis:"<<endl; cout<<"Source\tVariance\tSE"<<setiosflags(ios::fixed)<<setprecision(6)<<endl; for(i=0; i<_r_indx.size()+1; i++) cout<<_var_name[i]<<"\t"<<varcmp[i]<<"\t"<<sqrt(Hi(i,i))<<endl; cout<<"Vp\t"<<Vp<<"\t"<<sqrt(VarVp)<<endl; for(i=0; i<_r_indx.size(); i++) cout<<_hsq_name[i]<<"\t"<<Hsq[i]<<"\t"<<sqrt(VarHsq[i])<<endl; if(flag_cc && prevalence>-1){ cout<<"The estimate of variance explained on the observed scale is transformed to that on the underlying scale:"<<endl; cout<<"(Proportion of cases in the sample = "<<ncase<<"; User-specified disease prevalence = "<<prevalence<<")"<<endl; for(i=0; i<_r_indx.size(); i++) cout<<_hsq_name[i]<<"_L\t"<<transform_hsq_L(ncase, prevalence, Hsq[i])<<"\t"<<transform_hsq_L(ncase, prevalence, sqrt(VarHsq[i]))<<endl; } if(mlmassoc) return; cout<<"\nCovariance/Variance/Correlation Matrix:"<<endl; for(i=0; i<_r_indx.size()+1; i++){ for(j=0; j<=i; j++) cout<<setiosflags(ios::scientific)<<Hi(i,j)<<"\t"; cout<<endl; } if(est_fix_eff){ cout<<"Estimate"<<(_X_c>1?"s":"")<<"of fixed effect"<<(_X_c>1?"s":"")<<":"<<endl; cout<<"\nSource\tEstimate\tSE"<<endl; for(i=0; i<_X_c; i++){ if(i==0) cout<<"mean\t"; else cout<<"X_"<<i+1<<"\t"; cout<<setiosflags(ios::fixed)<<b[i]<<"\t"<<sqrt(Xt_Vi_X_i(i,i))<<endl; } } // save summary result into a file string reml_rst_file=_out+".hsq"; ofstream o_reml(reml_rst_file.c_str()); o_reml<<"Source\tVariance\tSE"<<setiosflags(ios::fixed)<<setprecision(6)<<endl; for(i=0; i<_r_indx.size()+1; i++) o_reml<<_var_name[i]<<"\t"<<varcmp[i]<<"\t"<<sqrt(Hi(i,i))<<endl; o_reml<<"Vp\t"<<Vp<<"\t"<<sqrt(VarVp)<<endl; for(i=0; i<_r_indx.size(); i++) o_reml<<_hsq_name[i]<<"\t"<<Hsq[i]<<"\t"<<sqrt(VarHsq[i])<<endl; if(flag_cc && prevalence>-1){ o_reml<<"The estimate of variance explained on the observed scale is transformed to that on the underlying scale:"<<endl; o_reml<<"(Proportion of cases in the sample = "<<ncase<<"; User-specified disease prevalence = "<<prevalence<<")"<<endl; for(i=0; i<_r_indx.size(); i++) o_reml<<_hsq_name[i]<<"_L\t"<<transform_hsq_L(ncase, prevalence, Hsq[i])<<"\t"<<transform_hsq_L(ncase, prevalence, sqrt(VarHsq[i]))<<endl; } o_reml<<"logL\t"<<setprecision(3)<<lgL<<endl; if(!no_lrt && _r_indx.size()>0){ o_reml<<"logL0\t"<<setprecision(3)<<lgL_rdu_mdl<<endl; o_reml<<"LRT\t"<<setprecision(3)<<LRT<<endl; o_reml<<"Pval\t"<<setiosflags(ios::scientific)<<0.5*StatFunc::chi_prob(_r_indx.size()-_r_indx_drop.size(), LRT)<<setiosflags(ios::fixed)<<endl; } o_reml<<"n\t"<<_n<<endl; if(est_fix_eff){ o_reml<<"\nFix_eff\tSE"<<endl; for(i=0; i<_X_c; i++) o_reml<<setprecision(6)<<b[i]<<"\t"<<sqrt(Xt_Vi_X_i(i,i))<<endl; o_reml.close(); } cout<<"\nSummary result of REML analysis has been saved in the file ["+reml_rst_file+"]."<<endl; // save random effect to a file if(pred_rand_eff){ string rand_eff_file=_out+".indi.blp"; ofstream o_rand_eff(rand_eff_file.c_str()); for(i=0; i<_keep.size(); i++){ o_rand_eff<<_fid[_keep[i]]<<"\t"<<_pid[_keep[i]]<<"\t"; for(j=0; j<_r_indx.size(); j++) o_rand_eff<<setprecision(6)<<Py[i]*varcmp[j]<<"\t"<<u(i,j)<<"\t"; o_rand_eff<<endl; } cout<<"\nBLUP of the genetic effects for "<<_keep.size()<<" individuals has been saved in the file ["+rand_eff_file+"]."<<endl; } }