void gcta::blup_snp_dosage() { check_autosome(); if(_mu.empty()) calcu_mu(); int i=0, j=0, k=0, col_num=_varcmp_Py.cols(); // Substract each element by 2p for(i=0; i<_keep.size(); i++){ for(j=0; j<_include.size(); j++) _geno_dose[i][_include[j]]-=_mu[_include[j]]; } // Calcuate A matrix cout<<"Calculating the BLUP solution to SNP effects using imputed dosage scores ... "<<endl; vector<double> var_SNP(_include.size()); // variance of each SNP, 2pq eigenMatrix b_SNP=eigenMatrix::Zero(_include.size(), col_num); // variance of each SNP, 2pq for(j=0; j<_include.size(); j++){ for(i=0; i<_keep.size(); i++) var_SNP[j]+=_geno_dose[i][_include[j]]*_geno_dose[i][_include[j]]; var_SNP[j]/=(double)(_keep.size()-1); if(fabs(var_SNP[j])<1.0e-50) var_SNP[j]=0.0; else var_SNP[j]=1.0/var_SNP[j]; } for(k=0; k<_include.size(); k++){ for(i=0; i<_keep.size(); i++){ for(j=0; j<col_num; j++) b_SNP(k,j)+=_geno_dose[i][_include[k]]*_varcmp_Py(i,j); } for(j=0; j<col_num; j++) b_SNP(k,j)=b_SNP(k,j)*var_SNP[k]/(double)_include.size(); } output_blup_snp(b_SNP); }
// blue estimate of SNP effect void gcta::blup_snp_geno() { check_autosome(); if(_mu.empty()) calcu_mu(); int i=0, j=0, k=0, col_num=_varcmp_Py.cols(); double x=0.0, fcount=0.0; // Calcuate A matrix cout<<"Calculating the BLUP solution to SNP effects ..."<<endl; vector<double> var_SNP(_include.size()); eigenMatrix b_SNP=eigenMatrix::Zero(_include.size(), col_num); // variance of each SNP, 2pq for(j=0; j<_include.size(); j++){ var_SNP[j]=_mu[_include[j]]*(1.0-0.5*_mu[_include[j]]); if(fabs(var_SNP[j])<1.0e-50) var_SNP[j]=0.0; else var_SNP[j]=1.0/var_SNP[j]; } for(k=0; k<_include.size(); k++){ fcount=0.0; for(i=0; i<_keep.size(); i++){ if(!_snp_1[_include[k]][i] || _snp_2[_include[k]][i]){ if(_allele1[_include[k]]==_ref_A[_include[k]]) x=_snp_1[_include[k]][i]+_snp_2[_include[k]][i]; else x=2.0-(_snp_1[_include[k]][i]+_snp_2[_include[k]][i]); x=(x-_mu[_include[k]]); for(j=0; j<col_num; j++) b_SNP(k,j)+=x*_varcmp_Py(i,j); fcount+=1.0; } } for(j=0; j<col_num; j++) b_SNP(k,j)=(b_SNP(k,j)*var_SNP[k]/fcount)*((double)_keep.size()/(double)_include.size()); } output_blup_snp(b_SNP); }
void gcta::save_XMat(bool miss_with_mu) { if(miss_with_mu && _mu.empty()) calcu_mu(); // Save matrix X string X_zFile=_out+".xmat.gz"; gzofstream zoutf; zoutf.open( X_zFile.c_str() ); if(!zoutf.is_open()) throw("Error: can not open the file ["+X_zFile+"] to write."); cout<<"Saving the recoded genotype matrix to the file ["+X_zFile+"]."<<endl; int i=0, j=0; zoutf<<"FID IID "; for(j=0; j<_include.size(); j++) zoutf<<_snp_name[_include[j]]<<" "; zoutf<<endl; zoutf<<"Reference Allele "; for(j=0; j<_include.size(); j++) zoutf<<_ref_A[_include[j]]<<" "; zoutf<<endl; for(i=0; i<_keep.size(); i++){ zoutf<<_fid[_keep[i]]<<' '<<_pid[_keep[i]]<<' '; if(_dosage_flag){ for(j=0; j<_include.size(); j++){ if(_geno_dose[_keep[i]][_include[j]]<1e5){ if(_allele1[_include[j]]==_ref_A[_include[j]]) zoutf<<_geno_dose[_keep[i]][_include[j]]<<' '; else zoutf<<2.0-_geno_dose[_keep[i]][_include[j]]<<' '; } else{ if(miss_with_mu) zoutf<<_mu[_include[j]]<<' '; else zoutf<<"NA "; } } } else{ for(j=0; j<_include.size(); j++){ if(!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]){ if(_allele1[_include[j]]==_ref_A[_include[j]]) zoutf<<_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]<<' '; else zoutf<<2.0-(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]])<<' '; } else{ if(miss_with_mu) zoutf<<_mu[_include[j]]<<' '; else zoutf<<"NA "; } } } zoutf<<endl; } zoutf.close(); cout<<"The recoded genotype matrix has been saved in the file ["+X_zFile+"] (in compressed text format) ."<<endl; }
void gcta::save_freq(bool ssq_flag) { if(_mu.empty()) calcu_mu(ssq_flag); string save_freq=_out+".freq"; ofstream ofreq(save_freq.c_str()); if(!ofreq) throw("Error: can not open the file ["+save_freq+"] to write."); int i=0; cout<<"Writing allele frequencies of "<<_include.size()<<" SNPs to ["+save_freq+"]."<<endl; for(i=0; i<_include.size(); i++){ ofreq<<_snp_name[_include[i]]<<"\t"<<_ref_A[_include[i]]<<"\t"<<setprecision(15)<<_mu[_include[i]]*0.5; // if(ssq_flag) ofreq<<"\t"<<_ssq[_include[i]]<<"\t"<<_w[_include[i]]; ofreq<<endl; } ofreq.close(); cout<<"Allele frequencies of "<<_include.size()<<" SNPs have been saved in the file ["+save_freq+"]."<<endl; }
void gcta::std_XMat(vector< vector<float> > &X, vector<double> &sd_SNP, bool grm_xchr_flag, bool divid_by_std) { if(_mu.empty()) calcu_mu(); int i=0, j=0; sd_SNP.clear(); sd_SNP.resize(_include.size()); // SD of each SNP, sqrt(2pq) if(_dosage_flag){ for(j=0; j<_include.size(); j++){ for(i=0; i<_keep.size(); i++) sd_SNP[j]+=(X[i][j]-_mu[_include[j]])*(X[i][j]-_mu[_include[j]]); sd_SNP[j]/=(_keep.size()-1.0); } } else{ for(j=0; j<_include.size(); j++) sd_SNP[j]=_mu[_include[j]]*(1.0-0.5*_mu[_include[j]]); } for(j=0; j<_include.size(); j++){ if(fabs(sd_SNP[j])<1.0e-50) sd_SNP[j]=0.0; else sd_SNP[j]=sqrt(1.0/sd_SNP[j]); } for(i=0; i<_keep.size(); i++){ for(j=0; j<_include.size(); j++){ if(X[i][j]<1e5){ X[i][j]-=_mu[_include[j]]; if(divid_by_std) X[i][j]*=sd_SNP[j]; } } } if(!grm_xchr_flag) return; // for the X-chromosome check_sex(); double f_buf=sqrt(0.5); for(i=0; i<_keep.size(); i++){ if(_sex[_keep[i]]==1){ for(j=0; j<_include.size(); j++){ if(X[i][j]<1e5) X[i][j]*=f_buf; } } } }
void gcta::filter_snp_max_maf(double max_maf) { if(_mu.empty()) calcu_mu(); cout<<"Pruning SNPs with MAF < "<<max_maf<<" ..."<<endl; map<string, int> id_map_buf(_snp_name_map); map<string, int>::iterator iter, end=id_map_buf.end(); int prev_size=_include.size(); double fbuf=0.0; _include.clear(); _snp_name_map.clear(); for(iter=id_map_buf.begin(); iter!=end; iter++){ fbuf=_mu[iter->second]*0.5; if(fbuf>max_maf && 1.0-fbuf>max_maf) continue; _snp_name_map.insert(*iter); _include.push_back(iter->second); } if(_include.size()==0) throw("Error: No SNP is retained for analysis."); else{ stable_sort(_include.begin(), _include.end()); cout<<"After pruning SNPs with MAF < "<<max_maf<<", there are "<<_include.size()<<" SNPs ("<<prev_size-_include.size()<<" SNPs with MAF > "<<max_maf<<")."<<endl; } }
void gcta::make_XMat(vector< vector<float> > &X, bool miss_with_mu) { if(_mu.empty() && miss_with_mu) calcu_mu(); cout<<"Recoding genotypes (individual major mode) ..."<<endl; int i=0, j=0; X.clear(); X.resize(_keep.size()); for(i=0; i<_keep.size(); i++){ X[i].resize(_include.size()); bool need2fill=false; if(_dosage_flag){ for(j=0; j<_include.size(); j++){ if(_geno_dose[_keep[i]][_include[j]]<1e5){ if(_allele1[_include[j]]==_ref_A[_include[j]]) X[i][j]=_geno_dose[_keep[i]][_include[j]]; else X[i][j]=2.0-_geno_dose[_keep[i]][_include[j]]; } else{ X[i][j]=1e6; need2fill=true; } } _geno_dose[i].clear(); } else{ for(j=0; j<_include.size(); j++){ if(!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]){ if(_allele1[_include[j]]==_ref_A[_include[j]]) X[i][j]=_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]; else X[i][j]=2.0-(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]); } else{ X[i][j]=1e6; need2fill=true; } } } // Fill the missing genotype with the mean of x (2p) for(j=0; j<_include.size() && miss_with_mu && need2fill; j++){ if(X[i][j]>1e5) X[i][j]=_mu[_include[j]]; } } }