Пример #1
0
void gcta::blup_snp_dosage()
{
    check_autosome();

    if(_mu.empty()) calcu_mu();

    int i=0, j=0, k=0, col_num=_varcmp_Py.cols();

    // Substract each element by 2p
    for(i=0; i<_keep.size(); i++){
         for(j=0; j<_include.size(); j++) _geno_dose[i][_include[j]]-=_mu[_include[j]];
	}

	// Calcuate A matrix
	cout<<"Calculating the BLUP solution to SNP effects using imputed dosage scores ... "<<endl;
	vector<double> var_SNP(_include.size()); // variance of each SNP, 2pq
    eigenMatrix b_SNP=eigenMatrix::Zero(_include.size(), col_num); // variance of each SNP, 2pq
	for(j=0; j<_include.size(); j++){
        for(i=0; i<_keep.size(); i++) var_SNP[j]+=_geno_dose[i][_include[j]]*_geno_dose[i][_include[j]];
        var_SNP[j]/=(double)(_keep.size()-1);
        if(fabs(var_SNP[j])<1.0e-50) var_SNP[j]=0.0;
        else var_SNP[j]=1.0/var_SNP[j];
	}
	for(k=0; k<_include.size(); k++){
	    for(i=0; i<_keep.size(); i++){
            for(j=0; j<col_num; j++) b_SNP(k,j)+=_geno_dose[i][_include[k]]*_varcmp_Py(i,j);
        }
        for(j=0; j<col_num; j++) b_SNP(k,j)=b_SNP(k,j)*var_SNP[k]/(double)_include.size();
	}
	output_blup_snp(b_SNP);
}
Пример #2
0
// blue estimate of SNP effect
void gcta::blup_snp_geno()
{
    check_autosome();

    if(_mu.empty()) calcu_mu();

    int i=0, j=0, k=0, col_num=_varcmp_Py.cols();
    double x=0.0, fcount=0.0;

	// Calcuate A matrix
	cout<<"Calculating the BLUP solution to SNP effects ..."<<endl;
	vector<double> var_SNP(_include.size());
	eigenMatrix b_SNP=eigenMatrix::Zero(_include.size(), col_num); // variance of each SNP, 2pq
	for(j=0; j<_include.size(); j++){
        var_SNP[j]=_mu[_include[j]]*(1.0-0.5*_mu[_include[j]]);
        if(fabs(var_SNP[j])<1.0e-50) var_SNP[j]=0.0;
        else var_SNP[j]=1.0/var_SNP[j];
	}
	for(k=0; k<_include.size(); k++){
	    fcount=0.0;
        for(i=0; i<_keep.size(); i++){
            if(!_snp_1[_include[k]][i] || _snp_2[_include[k]][i]){
                if(_allele1[_include[k]]==_ref_A[_include[k]]) x=_snp_1[_include[k]][i]+_snp_2[_include[k]][i];
                else x=2.0-(_snp_1[_include[k]][i]+_snp_2[_include[k]][i]);
                x=(x-_mu[_include[k]]);
                for(j=0; j<col_num; j++) b_SNP(k,j)+=x*_varcmp_Py(i,j);
                fcount+=1.0;
            }
        }
        for(j=0; j<col_num; j++) b_SNP(k,j)=(b_SNP(k,j)*var_SNP[k]/fcount)*((double)_keep.size()/(double)_include.size());
	}
	output_blup_snp(b_SNP);
}
Пример #3
0
void gcta::save_XMat(bool miss_with_mu)
{
    if(miss_with_mu && _mu.empty()) calcu_mu();

	// Save matrix X
    string X_zFile=_out+".xmat.gz";
    gzofstream zoutf;
    zoutf.open( X_zFile.c_str() );
    if(!zoutf.is_open()) throw("Error: can not open the file ["+X_zFile+"] to write.");
	cout<<"Saving the recoded genotype matrix to the file ["+X_zFile+"]."<<endl;
    int i=0, j=0;
    zoutf<<"FID IID ";
    for(j=0; j<_include.size(); j++) zoutf<<_snp_name[_include[j]]<<" ";
    zoutf<<endl;
    zoutf<<"Reference Allele ";
    for(j=0; j<_include.size(); j++) zoutf<<_ref_A[_include[j]]<<" ";
    zoutf<<endl;
    for(i=0; i<_keep.size(); i++){
		zoutf<<_fid[_keep[i]]<<' '<<_pid[_keep[i]]<<' ';
        if(_dosage_flag){
            for(j=0; j<_include.size(); j++){
                if(_geno_dose[_keep[i]][_include[j]]<1e5){
                    if(_allele1[_include[j]]==_ref_A[_include[j]]) zoutf<<_geno_dose[_keep[i]][_include[j]]<<' ';
                    else zoutf<<2.0-_geno_dose[_keep[i]][_include[j]]<<' ';
                }
                else{
                    if(miss_with_mu) zoutf<<_mu[_include[j]]<<' ';
                    else zoutf<<"NA ";
                }
            }
		}
		else{
            for(j=0; j<_include.size(); j++){
                if(!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]){
                    if(_allele1[_include[j]]==_ref_A[_include[j]]) zoutf<<_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]<<' ';
                    else zoutf<<2.0-(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]])<<' ';
                }
                else{
                    if(miss_with_mu) zoutf<<_mu[_include[j]]<<' ';
                    else zoutf<<"NA ";
                }
            }
		}
		zoutf<<endl;
    }
    zoutf.close();
    cout<<"The recoded genotype matrix has been saved in the file ["+X_zFile+"] (in compressed text format) ."<<endl;
}
Пример #4
0
void gcta::save_freq(bool ssq_flag)
{
    if(_mu.empty()) calcu_mu(ssq_flag);
    string save_freq=_out+".freq";
    ofstream ofreq(save_freq.c_str());
    if(!ofreq) throw("Error: can not open the file ["+save_freq+"] to write.");
    int i=0;
	cout<<"Writing allele frequencies of "<<_include.size()<<" SNPs to ["+save_freq+"]."<<endl;
    for(i=0; i<_include.size(); i++){
        ofreq<<_snp_name[_include[i]]<<"\t"<<_ref_A[_include[i]]<<"\t"<<setprecision(15)<<_mu[_include[i]]*0.5;
//        if(ssq_flag) ofreq<<"\t"<<_ssq[_include[i]]<<"\t"<<_w[_include[i]];
        ofreq<<endl;
    }
    ofreq.close();
	cout<<"Allele frequencies of "<<_include.size()<<" SNPs have been saved in the file ["+save_freq+"]."<<endl;
}
Пример #5
0
void gcta::std_XMat(vector< vector<float> > &X, vector<double> &sd_SNP, bool grm_xchr_flag, bool divid_by_std)
{
	if(_mu.empty()) calcu_mu();

    int i=0, j=0;
	sd_SNP.clear();
	sd_SNP.resize(_include.size()); // SD of each SNP, sqrt(2pq)
	if(_dosage_flag){
        for(j=0; j<_include.size(); j++){
            for(i=0; i<_keep.size(); i++) sd_SNP[j]+=(X[i][j]-_mu[_include[j]])*(X[i][j]-_mu[_include[j]]);
            sd_SNP[j]/=(_keep.size()-1.0);
        }
	}
	else{
        for(j=0; j<_include.size(); j++) sd_SNP[j]=_mu[_include[j]]*(1.0-0.5*_mu[_include[j]]);
	}
    for(j=0; j<_include.size(); j++){
        if(fabs(sd_SNP[j])<1.0e-50) sd_SNP[j]=0.0;
        else sd_SNP[j]=sqrt(1.0/sd_SNP[j]);
    }
    for(i=0; i<_keep.size(); i++){
         for(j=0; j<_include.size(); j++){
            if(X[i][j]<1e5){
                X[i][j]-=_mu[_include[j]];
                if(divid_by_std) X[i][j]*=sd_SNP[j];
            }
         }
	}

	if(!grm_xchr_flag) return;
	// for the X-chromosome
	check_sex();
	double f_buf=sqrt(0.5);
    for(i=0; i<_keep.size(); i++){
        if(_sex[_keep[i]]==1){
            for(j=0; j<_include.size(); j++){
                if(X[i][j]<1e5) X[i][j]*=f_buf;
            }
        }
	}
}
Пример #6
0
void gcta::filter_snp_max_maf(double max_maf)
{
    if(_mu.empty()) calcu_mu();

    cout<<"Pruning SNPs with MAF < "<<max_maf<<" ..."<<endl;
    map<string, int> id_map_buf(_snp_name_map);
    map<string, int>::iterator iter, end=id_map_buf.end();
    int prev_size=_include.size();
    double fbuf=0.0;
    _include.clear();
    _snp_name_map.clear();
    for(iter=id_map_buf.begin(); iter!=end; iter++){
        fbuf=_mu[iter->second]*0.5;
        if(fbuf>max_maf && 1.0-fbuf>max_maf) continue;
        _snp_name_map.insert(*iter);
        _include.push_back(iter->second);
    }
	if(_include.size()==0) throw("Error: No SNP is retained for analysis.");
	else{
		stable_sort(_include.begin(), _include.end());
		cout<<"After pruning SNPs with MAF < "<<max_maf<<", there are "<<_include.size()<<" SNPs ("<<prev_size-_include.size()<<" SNPs with MAF > "<<max_maf<<")."<<endl;
	}
}
Пример #7
0
void gcta::make_XMat(vector< vector<float> > &X, bool miss_with_mu)
{
    if(_mu.empty() && miss_with_mu) calcu_mu();

	cout<<"Recoding genotypes (individual major mode) ..."<<endl;
	int i=0, j=0;
	X.clear();
	X.resize(_keep.size());
	
	for(i=0; i<_keep.size(); i++){
	    X[i].resize(_include.size());
        bool need2fill=false;
		if(_dosage_flag){
            for(j=0; j<_include.size(); j++){
                if(_geno_dose[_keep[i]][_include[j]]<1e5){
                    if(_allele1[_include[j]]==_ref_A[_include[j]]) X[i][j]=_geno_dose[_keep[i]][_include[j]];
                    else X[i][j]=2.0-_geno_dose[_keep[i]][_include[j]];
                }
                else{ X[i][j]=1e6; need2fill=true; }
            }
            _geno_dose[i].clear();
		}
		else{
            for(j=0; j<_include.size(); j++){
                if(!_snp_1[_include[j]][_keep[i]] || _snp_2[_include[j]][_keep[i]]){
                    if(_allele1[_include[j]]==_ref_A[_include[j]]) X[i][j]=_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]];
                    else X[i][j]=2.0-(_snp_1[_include[j]][_keep[i]]+_snp_2[_include[j]][_keep[i]]);
                }
                else{ X[i][j]=1e6; need2fill=true; }
            }
		}
        // Fill the missing genotype with the mean of x (2p)
        for(j=0; j<_include.size() && miss_with_mu && need2fill; j++){
            if(X[i][j]>1e5) X[i][j]=_mu[_include[j]];
        }
	}
}