Beispiel #1
0
//TODO: inline
Viterbi::BacktraceScore Viterbi::ScoreForBacktrace(HMMSimd* q_four, HMMSimd* t_four,
        int elem,Viterbi::BacktraceResult * backtraceResult,
        float alignmentScore[VECSIZE_FLOAT],
        int ss_hmm_mode)
{

    // Allocate new space for alignment scores
    const HMM * q = (const HMM *) q_four->GetHMM(elem);
    const HMM * t = (const HMM *) t_four->GetHMM(elem);
    char * states=backtraceResult->states;
    int * i_steps=backtraceResult->i_steps;
    int * j_steps=backtraceResult->j_steps;
    int nsteps=backtraceResult->count;
    float * S=new float[nsteps+1];
    float * S_ss=new float[nsteps+1];
    if (!S_ss) MemoryError("space for HMM-HMM alignments", __FILE__, __LINE__, __func__);

    // Add contribution from secondary structure score, record score a long alignment,
    // and record template consensus sequence in master-slave-alignment to query sequence

    float score_ss=0.0f;
    float score=alignmentScore[elem];
    float score_sort = 0.0f;
    float score_aass = 0.0f;
    float Pvalt = 1.0f;
    float logPvalt = 0.0f;
    for (int step=1; step<=nsteps; step++)
    {
        switch(states[step])
        {
            case ViterbiMatrix::MM:
                S[step]    = Score(q->p[i_steps[step]],t->p[j_steps[step]]);
//                printf("i=%d j=%d S=%d\n", i_steps[step], j_steps[step], Score(q->p[i_steps[step]],t->p[j_steps[step]]));
                S_ss[step] = ScoreSS(q,t,i_steps[step],j_steps[step], ssw, ss_hmm_mode, S73, S37, S33);
                score_ss += S_ss[step];
                break;
            case ViterbiMatrix::MI: //if gap in template
            case ViterbiMatrix::DG:
            default: //if gap in T or Q
                S[step]=S_ss[step]=0.0f;
                break;
        }
    }
//    printf("###old score %f\t",score);
    if (ss_mode == Hit::SCORE_ALIGNMENT) score-=score_ss;    // subtract SS score added during alignment!!!!
    // Add contribution from correlation of neighboring columns to score
    float Scorr=0;
    if (nsteps)
    {
        for (int step=2; step<=nsteps; step++) Scorr+=S[step]*S[step-1];
        for (int step=3; step<=nsteps; step++) Scorr+=S[step]*S[step-2];
        for (int step=4; step<=nsteps; step++) Scorr+=S[step]*S[step-3];
        for (int step=5; step<=nsteps; step++) Scorr+=S[step]*S[step-4];
        score+=correlation*Scorr;
//        printf("Scorr=%f\t",Scorr);
//        printf("correlation=%f\t",correlation);

    }
//    printf("new=%f\n",score);

    // Set score, P-value etc.
    score_sort = score_aass = -score;
    if (t->mu)
    {
        logPvalt=logPvalue(score,t->lamda,t->mu);
        Pvalt   =Pvalue(score,t->lamda,t->mu);
    }
    else { logPvalt=0; Pvalt=1;}
    //   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t->lamda,score,logPvalt);
    //DEBUG: Print out Viterbi path

    Viterbi::BacktraceScore backtraceScore;
    backtraceScore.score_ss=score_ss;
    backtraceScore.score=score;
    backtraceScore.score_sort=score_sort;
    backtraceScore.score_aass=score_aass;
    backtraceScore.Pvalt=Pvalt;
    backtraceScore.logPvalt=logPvalt;
    backtraceScore.S=S;
    backtraceScore.S_ss=S_ss;

    if (Log::reporting_level() >= DEBUG1) {
        Viterbi::PrintDebug(q,t,&backtraceScore,backtraceResult, ss_mode);
    }

    return backtraceScore;
}
/**
 * Compute deltas from bases and misses.
 */
ERROR_CODE
recal_calc_deltas(recal_info_t* data)
{
	double global_empirical, phred, err0;
	double r_empirical;
	int matrix_index;
	int i, j;

	//Time measures
	#ifdef D_TIME_DEBUG
		time_init_slot(D_SLOT_CALC_DELTAS, clock(), TIME_GLOBAL_STATS);
	#endif

	printf("Processing deltas...\n");

	//Global delta
	global_empirical = (double)(data->total_miss + 1) / (double)(data->total_bases + 1);
	phred = 0.0;
	for(i = 0; i < data->num_quals; i++)
	{
		if(data->min_qual + i != 0)
		{
			//err0 = 1.0 / (10.0 * log10(data->min_qual + i + 1));
			//err0 = 1.0 / ((double)(data->min_qual + i));
			err0 = Pvalue((double)(data->min_qual + i));
			phred += err0 * (double)data->qual_bases[i];
		}
	}
	phred = phred / (double)data->total_bases;
	data->total_delta = Qvalue(global_empirical) - Qvalue(phred);

	//Delta R
	for(i = 0; i < data->num_quals; i++)
	{
		if(data->qual_bases[i] != 0)
		{
			data->qual_delta[i] = Qvalue(((double)(data->qual_miss[i]) / (double)(data->qual_bases[i])))
				- (double)(i + data->min_qual) - data->total_delta;
		}
	}


	//Delta R,C
	for(i = 0; i < data->num_quals; i++)
	{
		for(j = 0; j < data->num_cycles; j++)
		{
			matrix_index = i * data->num_cycles + j;
			if(data->qual_cycle_bases[matrix_index] != 0)
			{
				data->qual_cycle_delta[matrix_index] = Qvalue((double)(data->qual_cycle_miss[matrix_index]) / (double)(data->qual_cycle_bases[matrix_index]))
					- (double)(i + data->min_qual) - (data->total_delta + data->qual_delta[i]);
			}
		}
	}

	//Delta R,D
	for(i = 0; i < data->num_quals; i++)
	{
		for(j = 0; j < data->num_dinuc; j++)
		{
			matrix_index = i * data->num_dinuc + j;
			if(data->qual_dinuc_bases[matrix_index] != 0)
			{
				data->qual_dinuc_delta[matrix_index] = Qvalue((double)(data->qual_dinuc_miss[matrix_index]) / (double)(data->qual_dinuc_bases[matrix_index]))
					- (double)(i + data->min_qual) - (data->total_delta + data->qual_delta[i]);
			}
		}
	}

	printf("Deltas processed.\n");

	#ifdef D_TIME_DEBUG
		time_set_slot(D_SLOT_CALC_DELTAS, clock(), TIME_GLOBAL_STATS);
	#endif

	return NO_ERROR;
}
Beispiel #3
0
void PosteriorDecoder::backtraceMAC(HMM & q, HMM & t, PosteriorMatrix & p_mm, ViterbiMatrix & backtrace_matrix, const int elem, Hit & hit, float corr) {

	// Trace back trough the matrix b[i][j] until STOP state is found

  LogLevel actual_level = Log::reporting_level();
	int step;      // counts steps in path through 5-layered dynamic programming matrix
	int i,j;       // query and template match state indices

	initializeBacktrace(t,hit);

	// Make sure that backtracing stops when t:M1 or q:M1 is reached (Start state), e.g. sMM[i][1], or sIM[i][1] (M:MM, B:IM)
	for (i = 0; i <= q.L; ++i) backtrace_matrix.setMatMat(i, 1, elem, ViterbiMatrix::STOP);	// b[i][1] = STOP;
	for (j = 1; j <= t.L; ++j) backtrace_matrix.setMatMat(1, j, elem, ViterbiMatrix::STOP);	// b[1][j] = STOP;

	// Back-tracing loop
	// In contrast to the Viterbi-Backtracing, STOP signifies the first Match-Match state, NOT the state before the first MM state
	hit.matched_cols = 1; // for each MACTH (or STOP) state matched_col is incremented by 1
	hit.state = ViterbiMatrix::MM;       // lowest state with maximum score must be match-match state
	step = 0;         // steps through the matrix correspond to alignment columns (from 1 to nsteps)
	i = hit.i2; j = hit.j2;     // last aligned pair is (i2,j2)
	if (backtrace_matrix.getMatMat(i, j, elem) != ViterbiMatrix::MM) {		// b[i][j] != MM
		if (Log::reporting_level() > DEBUG)
		  fprintf(stderr,"Error: backtrace does not start in match-match state, but in state %i, (i,j)=(%i,%i)\n",backtrace_matrix.getMatMat(i, j, elem),i,j);

		step = 0;
		hit.i[step] = i;
		hit.j[step] = j;
		hit.alt_i->push_back(i);
		hit.alt_j->push_back(j);
		hit.state = ViterbiMatrix::STOP;
	} else {
		while (hit.state != ViterbiMatrix::STOP) {
			step++;
			hit.states[step] = hit.state = backtrace_matrix.getMatMat(i, j, elem); // b[i][j];
			hit.i[step] = i;
			hit.j[step] = j;
			hit.alt_i->push_back(i);
			hit.alt_j->push_back(j);
			// Exclude cells in direct neighbourhood from all further alignments
			for (int ii = imax(i-2,1); ii <= imin(i+2, q.L); ++ii)
//				hit.cell_off[ii][j] = 1;
				backtrace_matrix.setCellOff(ii, j, elem, true);
			for (int jj = imax(j-2,1); jj <= imin(j+2, t.L); ++jj)
				backtrace_matrix.setCellOff(i, jj, elem, true);

			if (hit.state == ViterbiMatrix::MM) hit.matched_cols++;

			switch (hit.state) {
				case ViterbiMatrix::MM: i--; j--; break;
				case ViterbiMatrix::IM: j--; break;
				case ViterbiMatrix::MI: i--; break;
				case ViterbiMatrix::STOP: break;
				default:
					fprintf(stderr,"Error: unallowed state value %i occurred during backtracing at step %i, (i,j)=(%i,%i)\n", hit.state, step, i, j);
					hit.state = 0;
					actual_level = DEBUG1;
					break;
			} //end switch (state)
		} //end while (state)
	}
	hit.i1 = hit.i[step];
	hit.j1 = hit.j[step];
	hit.states[step] = ViterbiMatrix::MM;  // first state (STOP state) is set to MM state
	hit.nsteps = step;

	// Allocate new space for alignment scores
	hit.S    = new float[hit.nsteps+1];
	hit.S_ss = new float[hit.nsteps+1];
	hit.P_posterior = new float[hit.nsteps+1];

	if (!hit.P_posterior)
		MemoryError("space for HMM-HMM alignments", __FILE__, __LINE__, __func__);

	// Add contribution from secondary structure score, record score along alignment,
	// and record template consensus sequence in master-slave-alignment to query sequence
	hit.score_ss = 0.0f;
	hit.sum_of_probs = 0.0;       // number of identical residues in query and template sequence
	int ssm = hit.ssm1 + hit.ssm2;
	//   printf("Hit=%s\n",name); /////////////////////////////////////////////////////////////

	for (step = 1; step <= hit.nsteps; step++) {
		switch(hit.states[step]) {
		case ViterbiMatrix::MM:
			i = hit.i[step];
			j = hit.j[step];

			hit.S[step] = Score(q.p[i], t.p[j]);
            hit.S_ss[step] = Viterbi::ScoreSS(&q, &t, i, j, ssw, ssm, S73, S37, S33);
			hit.score_ss += hit.S_ss[step];
//			hit.P_posterior[step] = powf(2, p_mm.getPosteriorValue(hit.i[step], hit.j[step], elem));
			hit.P_posterior[step] = p_mm.getPosteriorValue(hit.i[step], hit.j[step]);

			// Add probability to sum of probs if no dssp states given or dssp states exist and state is resolved in 3D structure
			if (t.nss_dssp<0 || t.ss_dssp[j]>0)
				hit.sum_of_probs += hit.P_posterior[step];
//			printf("j=%-3i P=%4.2f  sum=%6.2f\n",j, hit.P_posterior[step],hit.sum_of_probs); //////////////////////////
			break;
		case ViterbiMatrix::MI: //if gap in template
		case ViterbiMatrix::DG:
		default: //if gap in T or Q
			hit.S[step] = hit.S_ss[step] = hit.P_posterior[step] = 0.0;
			break;
		}
	}
	//   printf("\n"); /////////////////////////////////////////////////////////////
	if (hit.ssm2 >= 1)
		hit.score -= hit.score_ss;    // subtract SS score added during alignment!!!!

	// Add contribution from correlation of neighboring columns to score
	float Scorr = 0;
	if (hit.nsteps) {
				for (step = 1; step <= hit.nsteps-1; step++) Scorr += hit.S[step] * hit.S[step+1];
				for (step = 1; step <= hit.nsteps-2; step++) Scorr += hit.S[step] * hit.S[step+2];
				for (step = 1; step <= hit.nsteps-3; step++) Scorr += hit.S[step] * hit.S[step+3];
				for (step = 1; step <= hit.nsteps-4; step++) Scorr += hit.S[step] * hit.S[step+4];
				hit.score += corr * Scorr;
	}

	// Set score, P-value etc.
	hit.score_sort = hit.score_aass = -hit.score;
	hit.logPval = 0; hit.Pval = 1;
	if (t.mu) {
		hit.logPvalt = logPvalue(hit.score, t.lamda, t.mu);
		hit.Pvalt = Pvalue(hit.score,t.lamda,t.mu);
	} else {
		hit.logPvalt = 0;
		hit.Pvalt = 1;
	}
	//   printf("%-10.10s lamda=%-9f  score=%-9f  logPval=%-9g\n",name,t.lamda,score,logPvalt);

	//DEBUG: Print out MAC alignment path
	//TODO bad debugging code
	if (actual_level >= DEBUG1) {
				float sum_post = 0.0;
				printf("NAME=%7.7s score=%7.3f  score_ss=%7.3f\n", hit.name, hit.score, hit.score_ss);
				printf("step  Q T    i    j  state   score    T Q cf ss-score   P_post Sum_post\n");
				for (step = hit.nsteps; step >= 1; step--) {
						switch(hit.states[step]) {
								case ViterbiMatrix::MM:
										sum_post += hit.P_posterior[step];
										printf("%4i  %1c %1c ",step,q.seq[q.nfirst][hit.i[step]], hit.seq[hit.nfirst][hit.j[step]]);
										break;
								case ViterbiMatrix::IM:
										printf("%4i  - %1c ",step, hit.seq[hit.nfirst][hit.j[step]]);
										break;
								case ViterbiMatrix::MI:
										printf("%4i  %1c - ",step,q.seq[q.nfirst][hit.i[step]]);
										break;
				}
						printf("%4i %4i     %2i %7.1f    ", hit.i[step], hit.j[step],(int)hit.states[step], hit.S[step]);
						printf("%c %c  %1i  %7.1f  ", i2ss(t.ss_dssp[hit.j[step]]),i2ss(q.ss_pred[hit.i[step]]),q.ss_conf[hit.i[step]]-1, hit.S_ss[step]);
						printf("%7.5f  %7.2f\n", hit.P_posterior[step],sum_post);
				}
	}

	return;

}