Esempio n. 1
0
double GenotypeCounter::getHWE() const {
  double hweP = 0.0;
  if (nHomRef + nHet + nHomAlt == 0 ||
      (nHet < 0 || nHomRef < 0 || nHomAlt < 0)) {
    hweP = 0.0;
  } else {
    hweP = SNPHWE(nHet, nHomRef, nHomAlt);
  }
  return hweP;
}
Esempio n. 2
0
	void snp_summary_exhwe_Processor(unsigned int *gt, unsigned int nids, double *out) {
		unsigned int i; //,j,idx;
		//unsigned int nids = (*Nids);
		//char str;
		unsigned int count[3];
		double meaids,p;
		count[0]=count[1]=count[2]=0;
		p = 0.;
		for (i=0;i<9;i++) out[i] = 0.;
		for (i=0;i<nids;i++)
			if (gt[i]) {
				count[gt[i]-1]++;
				p+=(gt[i]-1);
			}

		meaids = 1.*(count[0]+count[1]+count[2]);
		out[0] = meaids;
		out[1] = meaids/nids;
		if (meaids>0)
			out[2] = p/(2.*meaids);
		else
			out[2] = 0.0;
		out[3] = count[0];
		out[4] = count[1];
		out[5] = count[2];
		if (meaids>0) {
			double qmax, maf, pmax, loglik0, loglik1, chi2lrt, fmax;
			out[6] = SNPHWE(count[1],count[0],count[2]);
			pmax = out[2];
			qmax = 1.-pmax;
			maf = qmax; if (pmax<qmax) maf = pmax;
			if (maf>1.e-16) {
				fmax = (4.*count[0]*count[2] - 1.*count[1]*count[1])/((2.*count[0]+1.*count[1])*(2.*count[2]+1.*count[1]));
				loglik0 = 0.;
				if (count[0]) loglik0 += 2.*count[0]*log(qmax);
				if (count[1]) loglik0 += 1.*count[1]*log(2.*qmax*pmax);
				if (count[2]) loglik0 += 2.*count[2]*log(pmax);
				loglik1 = 0.;
				if (count[0]) loglik1 += 1.*count[0]*log(qmax*qmax+qmax*pmax*fmax);
				if (count[1]) loglik1 += 1.*count[1]*log(2.*qmax*pmax*(1.-fmax));
				if (count[2]) loglik1 += 1.*count[2]*log(pmax*pmax+qmax*pmax*fmax);
				chi2lrt = 2*(loglik1-loglik0);
				out[7] = fmax;
				out[8] = chi2lrt;
			} else {
				out[7] = 0.;//maf;
				out[8] = 0.;
			}
		} else {
			out[6] = 1.0;
		}

	}
int main (int argc, char *argv[]) {
	//HARDY WEINBERG EQUILIBRIUM QUALITY CONTROL
	//Get the data	
	Datacube datacube;
	datacube.loadData(argv[1]);
	datacube.setVariablesMeaning(argv[6]);
	
	int countsCol = datacube.values.at(0).size() - 1;//The column with the counts

	//Set the threshold
	float threshold;
	threshold = atof(argv[2]);

	//Set the target variables index if the test will only be applied on controls
	bool onlyOnControls = false;
	int targetVarIndex = countsCol - 1;
	int CONTROL_VAL;
	if(argv[3][0] == '1')
	{
		onlyOnControls = true;
		CONTROL_VAL = getControlValue(&datacube,targetVarIndex);
	}
	
	
	bool keep[countsCol];
	for(int i=0; i<datacube.values.at(0).size(); ++i)
	{
        //initialize the values of the vectors
		keep[i] = true;
	}
	
    //initialize the counters
	int minorHom=0, majorHom=0, h1count=0, h2count=0, heterozygous=0;
	
	//Check which variables pass the HWE QC
	double pValue;
	for(int column=0;column<countsCol;column++) 
	{
        //Only if the variable is a SNP
		if(datacube.getVariableType(column).compare("SNP")==0)
		{
			h1count = 0;h2count = 0; heterozygous = 0;
			for(int row=0;row<datacube.values.size();row++)
			{	
				//chech if only controls should be used 
				if(!onlyOnControls || (onlyOnControls && datacube.values.at(row).at(targetVarIndex) == CONTROL_VAL))
				{
					switch(datacube.values.at(row).at(column))
					{
                        //count the minor alleles
						case HOMOZYGOUS_1:
							h1count += datacube.values.at(row).at(countsCol);
							break;
						//count the heterozygous alleles
						case HETEROZYGOUS:
							heterozygous += datacube.values.at(row).at(countsCol);
							break;	
						//count the major alleles
						case HOMOZYGOUS_2:
							h2count += datacube.values.at(row).at(countsCol);
					}
				}
			}
				
			//Set the minor and major alleles
			if(h1count<h2count)
			{
				minorHom = h1count;
				majorHom = h2count;
			}
			else
			{
				minorHom = h2count;
				majorHom = h1count;				
			}
				
			//If no alleles were counted (e.g. all missing, no controls)
			if(h1count == 0 && h2count==0 && heterozygous==0)
			{
				keep[column] = false;
			}
			else
			{	
				//calculate the HWE pValue
				pValue = SNPHWE(heterozygous,minorHom,majorHom);
				//Check if the SNP will be kept
				if(pValue < threshold )
					keep[column] = false;
			}
		}
	}

	//Remove the variables that did not pass the HWE QC
    datacube.removeVariables(keep);
    //output the data cube
	datacube.writeToFile(argv[4]);		
	
	
	//Output the XML contents describing the algorithm
	ofstream outpXML;
	outpXML.open(argv[5]);
	
	outpXML << "<log>" << endl;
	outpXML << "<QualityControl algorithm_name=\"Hardy Weinberg Equilibrium Test\" algorithmParamsSetting=\"threshold=" << argv[2];
	outpXML << " only_on_controls=" << argv[3] << "\"/>" << endl;
	outpXML << "</log>";	
	
	outpXML.close();
	
}