Exemplo n.º 1
0
Arquivo: hmm.C Projeto: bnv2103/NGS
void CHMM::FindFBDistance(CObsSeq *obsSeq, ostream &outFile, long snp_start, long snp_end)
{
	double *scale;
	double **alpha, **beta;
	double logProb;
	int i, j, em, t, k;
	double **posterior;
	double post;

	for(i=1;i<=obsSeq->mNbSequences;i++){ // Loop over observation files:
		long T = obsSeq->mNbObs[i];
		CObs **obs = obsSeq->mObs[i];
		alpha = SetMatrix(T, mN);// different size every time
		beta = SetMatrix(T, mN);
		scale = SetVector(T);
		int *q = SetIntVector(T);
		posterior = SetMatrix(T,mN);

		// logProb contains log P(x)
		// Haplotype Calling
		logProb = ForwardAlgo(alpha, scale, obs, T, TRUE);
		BackwardAlgo(beta, scale, obs, T);

		for(t=1;t<=T;t++) {
			READ *nd = (((CFlexibleObs<READ*>*)(obs[t]))->Get(1));
			for(k=1;k<=mN;k++) {
				// Experimental: P(x) cancels scaling for alpha and beta
				// so posterior is just their product
				double st = 1.0;
				for(j=T; j>t; j--) {
					st *= scale[j];
				}
				post = (alpha[t][k]*beta[t][k]*scale[t]);
#ifdef DEBUG
cout << endl;
cout << "Scale = " << scale[t] << endl;
cout << "Scaled alpha = " << alpha[t][k] << endl;
cout << "beta = " << beta[t][k] << endl;
cout << "Scaled beta = " << beta[t][k]*scale[t] << endl;
#endif
				posterior[t][k] = post;
			}
			// REVISIT: hardcoding #states.
			q[t] = posterior[t][1] > posterior[t][2] ? 1 : 2;
#ifdef DEBUG
//cout << "q[t] = " << q[t] << endl;
//cout << "posterior = " << posterior[t][q[t]] << endl;
#endif
			nd->assignHaplotype(q[t],posterior[t][q[t]]);
		}

		UpdateGenotypes(snp_start, snp_end);

		for(j=2;j<=T;j++) {
			outFile << q[j] << "\t" << ((reads_list)[reads_list.size()-T+j-1])->GetPos() << endl;
		}
		delete [] q;
	}
}
Exemplo n.º 2
0
void NaiveBayes(int snp_start, int snp_end, int T)
{
	int i, j, t;
        SNP **known_snp_list = new SNP*[100];
        int *known_index = new int[200];
	int reads_list_size = reads_list.size();

	for (t = 1+reads_list_size-T; t <= reads_list_size-1; t++) {
		int known_snp_count = 0, flag = 0, direction = 1, hap = 0, mismatchsum = 0;
		double norm = 0.0;
                double emission_list[3], haprob[3], emission_set[3][100];
		READ *pd = reads_list[t-1];
		READ *nd = reads_list[t];
		int plen = pd->GetLen();
		int nlen = nd->GetLen();
		int ppos = pd->GetPos();
		int npos = nd->GetPos();
                int rd_start = ppos < npos ? npos : ppos;
                int rd_end = ppos+plen > npos+nlen ? npos+nlen-1 : ppos+plen-1;
		
		//mismatchsum = GetMismatchSum(nd);
		//if(mismatchsum>=50)
		//	continue;
		
                GetKnownSnpList(known_snp_list, &known_snp_count, known_index, t);
                // This adds the count of known overlapping snps between adjacent reads
		(*nd).AddKnownCount(known_snp_count);
#ifdef DEBUG
if(known_snp_count>0)
cout << "Read " << nd->GetPos() << " ; "<< t << " and " << t+1 << ", " << rd_start << ", " << rd_end << " with " << known_snp_count << " known snps." << endl << endl;
cout << "SnpPos  R A LL..\tAA_gen AA_obs AA_genob\tAB_gen AB_obs AB_genob\tBB_gen BB_obs BB_genob\tprob\n";
cout << "Throwing emissions from read: " << nd->GetPos() << endl;
#endif

		for(j=1;j<=2;j++) {
#ifdef DEBUG
cout << "Haplotype " << j << endl;
#endif
			emission_list[j] = 0.0;
			for(int count=0; count<known_snp_count; count++) {
				double emission;
                       	        SNP *sp = known_snp_list[count];
#ifdef DEBUG
cout << sp->GetPos() << " " << sp->GetRef() << " " << sp->GetAlt();
#endif
				// Obtains naive bayes score for the count-th overlapping known snp 
				// between the adjacent reads for haplotype, 'j'.
				// Working with relative haplotype here.
				emission = compute_new_emission(known_snp_list, count, t, known_index, j);
					if(emission==0.0)
						continue;
					emission_list[j] += ((emission));
					emission_set[j][count] = emission;
			}
#ifdef DEBUG
cout << "Total Emission = " << emission_list[j] << endl;
#endif
	  	}
		if(emission_set[1][0]<emission_set[2][0])
			direction = 2;
		for(int count=1; count<known_snp_count; count++) {
			if(emission_set[1][count]>emission_set[2][count]&&direction==2)
				flag = 1;
			if(emission_set[1][count]<emission_set[2][count]&&direction==1)
				flag = 1;
		}
#ifdef DEBUG
cout << "Discordance = " << flag << endl;
#endif
		norm = emission_list[1] + emission_list[2];
		if(norm!=0.0) {
			haprob[1] = emission_list[1] / norm;
			haprob[2] = emission_list[2] / norm;
		} else {
			haprob[1] = 0.5;
			haprob[2] = 0.5;
		}
#ifdef DEBUG
cout << "Happrob[1] = " << haprob[1] << endl;
cout << "Happrob[2] = " << haprob[2] << endl;
#endif
		// If haplotype probabilities are equal, randomly assign haplotype
		hap = haprob[1] > haprob[2] ? 1 : haprob[1] == haprob[2] ? t%2+1 : 2;
		double happrob = haprob[hap];
		// convert relative haplotype to absolute haplotype
		hap = (*pd).GetHap() == hap ? 1 : 2;
		nd->assignHaplotype(hap, happrob, flag);
		flag = 0; direction = 1;
	}

	delete [] known_index;
	delete [] known_snp_list;
}