void gcta::read_imp_dose_beagle(string zdosefile, string kp_indi_file, string rm_indi_file, string blup_indi_file) { if(_include.size()==0) throw("Error: No SNP is retained for analysis."); int i=0, j=0; vector<int> rsnp; get_rsnp(rsnp); const int MAX_LINE_LENGTH=10000000; char buf[MAX_LINE_LENGTH]; string str_buf; gzifstream zinf; zinf.open( zdosefile.c_str() ); if(! zinf.is_open()) throw("Error: can not open the file ["+zdosefile+"] to read."); cout<<"Reading imputed dosage scores (BEAGLE output) ..."<<endl; zinf.getline(buf, MAX_LINE_LENGTH, '\n'); stringstream ss(buf); for(i=0; i<3; i++) ss>>str_buf; while(ss>>str_buf){ _fid.push_back(str_buf); } _pid=_fid; _indi_num=_fid.size(); _fa_id.resize(_indi_num); _mo_id.resize(_indi_num); _sex.resize(_indi_num); _pheno.resize(_indi_num); cout<<_indi_num<<" individuals to be included from ["+zdosefile+"]."<<endl; init_keep(); if(!kp_indi_file.empty()) keep_indi(kp_indi_file); if(!blup_indi_file.empty()) read_indi_blup(blup_indi_file); if(!rm_indi_file.empty()) remove_indi(rm_indi_file); _geno_dose.resize(_keep.size()); for(i=0; i<_keep.size(); i++) _geno_dose[i].resize(_include.size()); vector<int> rindi; get_rindi(rindi); int line=0; int k=0; double d_buf=0.0; while(1){ zinf.getline(buf, MAX_LINE_LENGTH, '\n'); if(zinf.fail() || !zinf.good()) break; if(!rsnp[line++]) continue; stringstream ss(buf); ss>>str_buf; if(str_buf!=_snp_name[line-1]){ stringstream errmsg; errmsg<<"Error: the "<<line<<" th SNP ["+_snp_name[line-1]+"] in the summary file doesn't match to that in the dosage file."<<endl; throw(errmsg.str()); } ss>>str_buf>>str_buf; for(i=0, j=0; i<_indi_num; i++){ ss>>d_buf; if(rindi[i]){ _geno_dose[j][k]=d_buf; j++; } } k++; } zinf.clear(); zinf.close(); }
void gcta::fit_reml(string grm_file, string phen_file, string qcovar_file, string covar_file, string qGE_file, string GE_file, string keep_indi_file, string remove_indi_file, string sex_file, int mphen, double grm_cutoff, double adj_grm_fac, int dosage_compen, bool m_grm_flag, bool pred_rand_eff, bool est_fix_eff, int reml_mtd, int MaxIter, vector<double> reml_priors, vector<double> reml_priors_var, vector<int> drop, bool no_lrt, double prevalence, bool no_constrain, bool mlmassoc) { _reml_mtd=reml_mtd; _reml_max_iter=MaxIter; int i=0, j=0, k=0; bool grm_flag=(!grm_file.empty()); bool qcovar_flag=(!qcovar_file.empty()); bool covar_flag=(!covar_file.empty()); bool GE_flag=(!GE_file.empty()); bool qGE_flag=(!qGE_file.empty()); if(m_grm_flag) grm_flag=false; // Read data stringstream errmsg; int qcovar_num=0, covar_num=0, qE_fac_num=0, E_fac_num=0; vector<string> phen_ID, phen_buf, qcovar_ID, covar_ID, qGE_ID, GE_ID, grm_id, grm_files; vector< vector<string> > qcovar, covar, GE, qGE; // save individuals by column if(grm_flag){ read_grm_gz(grm_file, grm_id); update_id_map_kp(grm_id, _id_map, _keep); grm_files.push_back(grm_file); } else if(m_grm_flag){ read_grm_filenames(grm_file, grm_files, false); for(i=0; i<grm_files.size(); i++){ read_grm_gz(grm_files[i], grm_id, false, true); update_id_map_kp(grm_id, _id_map, _keep); } } read_phen(phen_file, phen_ID, phen_buf, mphen); update_id_map_kp(phen_ID, _id_map, _keep); if(qcovar_flag){ qcovar_num=read_covar(qcovar_file, qcovar_ID, qcovar, true); update_id_map_kp(qcovar_ID, _id_map, _keep); } if(covar_flag){ covar_num=read_covar(covar_file, covar_ID, covar, false); update_id_map_kp(covar_ID, _id_map, _keep); } if(qGE_flag){ qE_fac_num=read_GE(qGE_file, qGE_ID, qGE, true); update_id_map_kp(qGE_ID, _id_map, _keep); } if(GE_flag){ E_fac_num=read_GE(GE_file, GE_ID, GE, false); update_id_map_kp(GE_ID, _id_map, _keep); } if(!mlmassoc){ if(!keep_indi_file.empty()) keep_indi(keep_indi_file); if(!remove_indi_file.empty()) remove_indi(remove_indi_file); } if(grm_flag){ if(grm_cutoff>-1.0) rm_cor_indi(grm_cutoff); if(!sex_file.empty()) update_sex(sex_file); if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac); if(dosage_compen>-1) dc(dosage_compen); _grm_N.resize(1,1); } vector<string> uni_id; map<string, int> uni_id_map; map<string, int>::iterator iter; for(i=0; i<_keep.size(); i++){ uni_id.push_back(_fid[_keep[i]]+":"+_pid[_keep[i]]); uni_id_map.insert(pair<string,int>(_fid[_keep[i]]+":"+_pid[_keep[i]], i)); } _n=_keep.size(); if(_n<1) throw("Error: no individual is in common in the input files."); // construct model terms _y.setZero(_n); for(i=0; i<phen_ID.size(); i++){ iter=uni_id_map.find(phen_ID[i]); if(iter==uni_id_map.end()) continue; _y[iter->second]=atof(phen_buf[i].c_str()); } int pos=0; _r_indx.clear(); eigenMatrix A_N(_n, _n); vector<int> kp; if(grm_flag){ for(i=0; i<1+qE_fac_num+E_fac_num; i++) _r_indx.push_back(i); if(!no_lrt) drop_comp(drop); _A=eigenMatrix::Zero(_n, _r_indx.size()*_n); if(mlmassoc) StrFunc::match(uni_id, grm_id, kp); else kp=_keep; for(i=0; i<_n; i++){ for(j=0; j<=i; j++) (_A.block(0,0,_n,_n))(j,i)=(_A.block(0,0,_n,_n))(i,j)=_grm(kp[i],kp[j]); } pos++; _grm.resize(1,1); } else if(m_grm_flag){ if(!sex_file.empty()) update_sex(sex_file); for(i=0; i<(1+qE_fac_num+E_fac_num)*grm_files.size(); i++) _r_indx.push_back(i); if(!no_lrt) drop_comp(drop); _A=eigenMatrix::Zero(_n, _r_indx.size()*_n); string prev_file=grm_files[0]; vector<string> prev_grm_id(grm_id); cout<<"There are "<<grm_files.size()<<" GRM file names specified in the file ["+grm_file+"]."<<endl; for(i=0; i<grm_files.size(); i++, pos++){ cout<<"Reading the GRM from the "<<i+1<<"th file ..."<<endl; read_grm_gz(grm_files[i], grm_id); if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac); if(dosage_compen>-1) dc(dosage_compen); StrFunc::match(uni_id, grm_id, kp); int pos_n=pos*_n; for(j=0; j<_n; j++){ for(k=0; k<=j; k++){ if(kp[j]>=kp[k]) (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[j],kp[k]); else (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[k],kp[j]); } } prev_file=grm_files[i]; prev_grm_id=grm_id; } _grm_N.resize(1,1); _grm.resize(1,1); } // GE interaction vector<eigenMatrix> E_float(E_fac_num); eigenMatrix qE_float, mbuf; if(qGE_flag){ qE_float.resize(_n, qE_fac_num); for(i=0; i<qGE_ID.size(); i++){ iter=uni_id_map.find(qGE_ID[i]); if(iter==uni_id_map.end()) continue; for(j=0; j<qE_fac_num; j++) qE_float(iter->second,j)=atof(qGE[i][j].c_str()); } for(j=0; j<qE_fac_num; j++){ mbuf=((qE_float.block(0,j,_n,1))*(qE_float.block(0,j,_n,1)).transpose()); for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array(); } } if(GE_flag){ vector< vector<string> > E_str(E_fac_num); for(i=0; i<E_fac_num; i++) E_str[i].resize(_n); for(i=0; i<GE_ID.size(); i++){ iter=uni_id_map.find(GE_ID[i]); if(iter!=uni_id_map.end()){ for(j=0; j<E_fac_num; j++) E_str[j][iter->second]=GE[i][j]; } } for(j=0; j<E_fac_num; j++){ stringstream errmsg; errmsg<<"Error: too many classes for the "<<j+1<<"th environmental factor. \nPlease make sure you input a discrete variable as the environmental factor."; string errmsg1=errmsg.str(); errmsg.str(""); errmsg<<"Error: the "<<j+1<<"th envronmental factor has only one class."; string errmsg2=errmsg.str(); coeff_mat(E_str[j], E_float[j], errmsg1, errmsg2); mbuf=((E_float[j])*(E_float[j]).transpose()); for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array(); } } // construct X matrix construct_X(uni_id_map, qcovar_flag, qcovar_num, qcovar_ID, qcovar, covar_flag, covar_num, covar_ID, covar, E_float, qE_float); // names of variance component for(i=0; i<grm_files.size(); i++){ stringstream strstrm; if(grm_files.size()==1) strstrm<<""; else strstrm<<i+1; _var_name.push_back("V(G"+strstrm.str()+")"); _hsq_name.push_back("V(G"+strstrm.str()+")/Vp"); } for(j=0; j<qE_fac_num; j++){ for(i=0; i<grm_files.size(); i++){ stringstream strstrm1,strstrm2; if(grm_files.size()==1) strstrm1<<""; else strstrm1<<i+1; if(qE_fac_num==1) strstrm2<<""; else strstrm2<<j+1; _var_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")"); _hsq_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")"+"/Vp"); } } for(j=0; j<E_fac_num; j++){ for(i=0; i<grm_files.size(); i++){ stringstream strstrm1,strstrm2; if(grm_files.size()==1) strstrm1<<""; else strstrm1<<i+1; if(E_fac_num==1) strstrm2<<""; else strstrm2<<j+1; _var_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")"); _hsq_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")"+"/Vp"); } } _var_name.push_back("V(e)"); // run REML algorithm cout<<_n<<" individuals are in common in these files."<<endl; reml(pred_rand_eff, est_fix_eff, reml_priors, reml_priors_var, prevalence, no_constrain, no_lrt, mlmassoc); }