static void extendHitLeft(int qMax, int tMax, char **pStartQ, char **pStartT, int (*scoreMatch)(char a, char b), int maxDown) /* Extend startQ/startT as much to the left as possible. */ { int maxScore = 0; int score = 0; int maxPos = 0; int last = -min(qMax, tMax); int i; char *q = *pStartQ, *t = *pStartT; for (i=-1; i>=last; --i) { score += scoreMatch(q[i], t[i]); if (score > maxScore) { maxScore = score; maxPos = i; } else if (i < maxPos - maxDown) { break; } } *pStartQ = q+maxPos; *pStartT = t+maxPos; }
int ffScoreSomething(struct ffAli *ali, enum ffStringency stringency, boolean isProt) /* Score alignment. */ { int score = 0; int oneScore; int (*scoreMatch)(char *a, char *b, int size); if (ali == NULL) return -0x7FFFFFFF; scoreMatch = (isProt ? aaScoreMatch : dnaScoreMatch ); while (ali->left != NULL) ali = ali->left; while (ali != NULL) { int len = ali->hEnd - ali->hStart; struct ffAli *right = ali->right; oneScore = scoreMatch(ali->hStart, ali->nStart, len); score += oneScore; if (right) /* Calculate gap penalty */ { score -= ffGapPenalty(ali, right, stringency); } ali = right; } return score; }
static void extendHitRight(int qMax, int tMax, char **pEndQ, char **pEndT, int (*scoreMatch)(char a, char b), int maxDown) /* Extend endQ/endT as much to the right as possible. */ { int maxScore = 0; int score = 0; int maxPos = -1; int last = min(qMax, tMax); int i; char *q = *pEndQ, *t = *pEndT; for (i=0; i<last; ++i) { score += scoreMatch(q[i], t[i]); if (score > maxScore) { maxScore = score; maxPos = i; } else if (i > maxPos + maxDown) { break; } } *pEndQ = q+maxPos+1; *pEndT = t+maxPos+1; }
static int findCrossover(struct ffAli *left, struct ffAli *right, int overlap, boolean isProt) /* Find ideal crossover point of overlapping blocks. That is * the point where we should start using the right block rather * than the left block. This point is an offset from the start * of the overlapping region (which is the same as the start of the * right block). */ { int bestPos = 0; char *nStart = right->nStart; char *lhStart = left->hEnd - overlap; char *rhStart = right->hStart; int i; int (*scoreMatch)(char a, char b); int score, bestScore; if (isProt) { scoreMatch = aaScore2; score = bestScore = aaScoreMatch(nStart, rhStart, overlap); } else { scoreMatch = dnaScore2; score = bestScore = dnaScoreMatch(nStart, rhStart, overlap); } for (i=0; i<overlap; ++i) { char n = nStart[i]; score += scoreMatch(lhStart[i], n); score -= scoreMatch(rhStart[i], n); if (score > bestScore) { bestScore = score; bestPos = i+1; } } return bestPos; }
//------------------------------------------ Private Function Definitions ----// bool _alignEngine (const char * A0, long int Astart, long int & Aend, const char * B0, long int Bstart, long int & Bend, vector<long int> & Delta, unsigned int m_o) // A0 is a sequence such that A [1...\0] // B0 is a sequence such that B [1...\0] // The alignment should use bases A [Astart...Aend] (inclusive) // The alignment should use beses B [Bstart...Bend] (inclusive) // of [Aend...Astart] etc. if BACKWARD_SEARCH // Aend must never equal Astart, same goes for Bend and Bstart // Delta is an integer vector, not necessarily empty // m_o is the modus operandi of the function: // FORWARD_ALIGN, FORWARD_SEARCH, BACKWARD_SEARCH // Returns true on s.cppess (Aend & Bend reached) or false on failure { Diagonal * Diag; // the list of diagonals to make up edit matrix bool TargetReached; // the target was reached const char * A, * B; // the sequence pointers to be used by this func long int min_score = (-1 * LONG_MAX); // minimum possible score long int high_score = min_score; // global maximum score long int xhigh_score = min_score; // non-optimal high score // max score difference long int max_diff = GOOD_SCORE [getMatrixType( )] * _break_len; long int CDi; // conceptual diagonal index (not relating to mem) long int Dct, Di; // diagonal counter, actual diagonal index long int PDct, PPDct; // previous diagonal and prev prev diagonal long int PDi, PPDi; // previous diagonal index and prev prev diag index long int Ds, PDs, PPDs; // diagonal size, prev, prev prev diagonal size // where 'size' = rbound - lbound + 1 long int Ll = 100; // capacity of the diagonal list long int Dl = 2; // current conceptual diagonal length long int lbound = 0; // current diagonal left(lower) node bound index long int rbound = 0; // current diagonal right(upper) node bound index long int FinishCt = 0; // diagonal containing the high_score long int FinishCDi = 0; // conceptual index of the high_score on FinishCt long int xFinishCt = 0; // non-optimal ... long int xFinishCDi = 0; // non-optimal ... long int N, M, L; // maximum matrix dimensions... N rows, M columns long int tlb, trb; double Dmid = .5; // diag midpoint double Dband = _banding/2.0; // diag banding int Iadj, Dadj, Madj; // insert, delete and match adjust values #ifdef _DEBUG_VERBOSE long int MaxL = 0; // biggest diagonal seen long int TrimCt = 0; // counter of nodes trimmed long int CalcCt = 0; // counter of nodes calculated #endif //-- Set up character pointers for the appropriate m_o if ( m_o & DIRECTION_BIT ) { A = A0 + ( Astart - 1 ); B = B0 + ( Bstart - 1 ); N = Aend - Astart + 1; M = Bend - Bstart + 1; } else { A = A0 + ( Astart + 1 ); B = B0 + ( Bstart + 1 ); N = Astart - Aend + 1; M = Bstart - Bend + 1; } //-- Initialize the diagonals list Diag = (Diagonal *) Safe_malloc ( Ll * sizeof(Diagonal) ); //-- Initialize position 0,0 in the matrices Diag[0] . lbound = lbound; Diag[0] . rbound = rbound ++; Diag[0] . I = (Node *) Safe_malloc ( 1 * sizeof(Node) ); Diag[0] . I[0] . S[DELETE] . value = min_score; Diag[0] . I[0] . S[INSERT] . value = min_score; Diag[0] . I[0] . S[MATCH] . value = 0; Diag[0] . I[0] . max = Diag[0] . I[0] . S + MATCH; Diag[0] . I[0] . S[DELETE] . used = NONE; Diag[0] . I[0] . S[INSERT] . used = NONE; Diag[0] . I[0] . S[MATCH] . used = START; L = N < M ? N : M; //-- **START** of diagonal processing loop //-- Calculate the rest of the diagonals until goal reached or score worsens for ( Dct = 1; Dct <= N + M && (Dct - FinishCt) <= _break_len && lbound <= rbound; Dct++ ) { //-- If diagonals capacity exceeded, realloc if ( Dct >= Ll ) { Ll *= 2; Diag = (Diagonal *) Safe_realloc ( Diag, sizeof(Diagonal) * Ll ); } Diag[Dct] . lbound = lbound; Diag[Dct] . rbound = rbound; //-- malloc space for the edit char and score nodes Ds = rbound - lbound + 1; Diag[Dct] . I = (Node *) Safe_malloc ( Ds * sizeof(Node) ); #ifdef _DEBUG_VERBOSE //-- Keep count of trimmed and calculated nodes CalcCt += Ds; TrimCt += Dl - Ds; if ( Ds > MaxL ) MaxL = Ds; #endif //-- Set diagonal index adjustment values if ( Dct <= N ) { Iadj = 0; Madj = -1; } else { Iadj = 1; Madj = Dct == N + 1 ? 0 : 1; } Dadj = Iadj - 1; //-- Set parent diagonal values PDct = Dct - 1; PDs = Diag[PDct] . rbound - Diag[PDct] . lbound + 1; PDi = lbound + Dadj; PDi = PDi - Diag[PDct] . lbound; //-- Set grandparent diagonal values PPDct = Dct - 2; if ( PPDct >= 0 ) { PPDs = Diag[PPDct] . rbound - Diag[PPDct] . lbound + 1; PPDi = lbound + Madj; PPDi = PPDi - Diag[PPDct] . lbound; } else PPDi = PPDs = 0; //-- If forced alignment, don't keep track of global max if ( m_o & FORCED_BIT ) high_score = min_score; //-- **START** of internal node scoring loop //-- Calculate scores for every node (within bounds) for diagonal Dct for ( CDi = lbound; CDi <= rbound; CDi ++ ) { //-- Set the index (in memory) of current node and clear score Di = CDi - Diag[Dct] . lbound; //-- Calculate DELETE score if ( PDi >= 0 && PDi < PDs ) scoreEdit (Diag[Dct] . I[Di] . S[DELETE], Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? Diag[PDct] . I[PDi] . S[DELETE] . value : Diag[PDct] . I[PDi] . S[DELETE] . value + CONT_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? Diag[PDct] . I[PDi] . S[INSERT] . value : Diag[PDct] . I[PDi] . S[INSERT] . value + OPEN_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? Diag[PDct] . I[PDi] . S[MATCH] . value : Diag[PDct] . I[PDi] . S[MATCH] . value + OPEN_GAP_SCORE [_matrix_type]); else { Diag[Dct] . I[Di] . S[DELETE] . value = min_score; Diag[Dct] . I[Di] . S[DELETE] . used = NONE; } PDi ++; //-- Calculate INSERT score if ( PDi >= 0 && PDi < PDs ) scoreEdit (Diag[Dct] . I[Di] . S[INSERT], Diag[PDct] . I[PDi] . S[DELETE] . used == NONE ? Diag[PDct] . I[PDi] . S[DELETE] . value : Diag[PDct] . I[PDi] . S[DELETE] . value + OPEN_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[INSERT] . used == NONE ? Diag[PDct] . I[PDi] . S[INSERT] . value : Diag[PDct] . I[PDi] . S[INSERT] . value + CONT_GAP_SCORE [_matrix_type], Diag[PDct] . I[PDi] . S[MATCH] . used == NONE ? Diag[PDct] . I[PDi] . S[MATCH] . value : Diag[PDct] . I[PDi] . S[MATCH] . value + OPEN_GAP_SCORE [_matrix_type]); else { Diag[Dct] . I[Di] . S[INSERT] . value = min_score; Diag[Dct] . I[Di] . S[INSERT] . used = NONE; } //-- Calculate MATCH/MIS-MATCH score if ( PPDi >= 0 && PPDi < PPDs ) { scoreEdit (Diag[Dct] . I[Di] . S[MATCH], Diag[PPDct] . I[PPDi] . S[DELETE] . value, Diag[PPDct] . I[PPDi] . S[INSERT] . value, Diag[PPDct] . I[PPDi] . S[MATCH] . value); Diag[Dct] . I[Di] . S[MATCH] . value += scoreMatch (Diag[Dct], Dct, CDi, A, B, N, m_o); } else { Diag[Dct] . I[Di] . S[MATCH] . value = min_score; Diag[Dct] . I[Di] . S[MATCH] . used = NONE; } PPDi ++; Diag[Dct] . I[Di] . max = maxScore (Diag[Dct] . I[Di] . S); //-- Reset high_score if new global max was found if ( Diag[Dct] . I[Di] . max->value >= high_score ) { high_score = Diag[Dct] . I[Di] . max->value; FinishCt = Dct; FinishCDi = CDi; } } //-- **END** of internal node scoring loop //-- Calculate max non-optimal score if ( m_o & SEQEND_BIT && Dct >= L ) { if ( L == N ) { if ( lbound == 0 ) { if ( Diag[Dct] . I[0] . max->value >= xhigh_score ) { xhigh_score = Diag[Dct] . I[0] . max->value; xFinishCt = Dct; xFinishCDi = 0; } } } else { // L == M if ( rbound == M ) { if ( Diag[Dct] . I[M-Diag[Dct].lbound] . max->value >= xhigh_score ) { xhigh_score = Diag[Dct] . I[M-Diag[Dct].lbound] . max->value; xFinishCt = Dct; xFinishCDi = M; } } } } //-- If in extender modus operandi, free soon to be greatgrandparent diag if ( m_o & SEARCH_BIT && Dct > 1 ) free ( Diag[PPDct] . I ); //-- Trim hopeless diagonal nodes for ( Di = 0; Di < Ds; Di ++ ) { if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) lbound ++; else break; } for ( Di = Ds - 1; Di >= 0; Di -- ) { if ( high_score - Diag[Dct] . I[Di] . max->value > max_diff ) rbound --; else break; } //-- Grow new diagonal and reset boundaries if ( Dct < N && Dct < M ) { Dl ++; rbound ++; Dmid = (Dct+1)/2.0; } else if ( Dct >= N && Dct >= M ) { Dl --; lbound --; Dmid = N - (Dct+1)/2.0; } else if ( Dct >= N ) { lbound --; Dmid = N - (Dct+1)/2.0; } else { rbound ++; Dmid = (Dct+1)/2.0; } //-- Trim at hard band if ( Dband > 0 ) { tlb = (long int)ceil(Dmid - Dband); if ( lbound < tlb ) lbound = tlb; trb = (long int)floor(Dmid + Dband); if ( rbound > trb ) rbound = trb; } if ( lbound < 0 ) lbound = 0; if ( rbound >= Dl ) rbound = Dl - 1; } //-- **END** of diagonal processing loop Dct --; //-- Check if the target was reached // If OPTIMAL, backtrack to last high_score to maximize alignment score TargetReached = false; if ( Dct == N + M ) { if ( ~m_o & OPTIMAL_BIT || m_o & SEQEND_BIT ) { TargetReached = true; FinishCt = N + M; FinishCDi = 0; } else if ( FinishCt == Dct ) TargetReached = true; } else if ( m_o & SEQEND_BIT && xFinishCt != 0 ) { //-- non-optimal, extend alignment to end of shortest seq if possible FinishCt = xFinishCt; FinishCDi = xFinishCDi; } //-- Set A/Bend to finish positions long int Aadj = FinishCt <= N ? FinishCt - FinishCDi - 1 : N - FinishCDi - 1; long int Badj = FinishCt <= N ? FinishCDi - 1 : FinishCt - N + FinishCDi - 1; if ( ~m_o & DIRECTION_BIT ) { Aadj *= -1; Badj *= -1; } Aend = Astart + Aadj; Bend = Bstart + Badj; #ifdef _DEBUG_VERBOSE assert (FinishCt > 1); //-- Ouput calculation statistics if ( TargetReached ) fprintf(stderr,"Finish score = %ld : %ld,%ld\n", Diag[FinishCt] . I[0] . max->value, N, M); else fprintf(stderr,"High score = %ld : %ld,%ld\n", high_score, labs(Aadj) + 1, labs(Badj) + 1); fprintf(stderr, "%ld nodes calculated, %ld nodes trimmed\n", CalcCt, TrimCt); if ( m_o & DIRECTION_BIT ) fprintf(stderr, "%ld bytes used\n", (long int)sizeof(Diagonal) * Dct + (long int)sizeof(Node) * CalcCt); else fprintf(stderr, "%ld bytes used\n", ((long int)sizeof(Diagonal) + (long int)sizeof(Node) * MaxL) * 2); #endif //-- If in forward alignment m_o, create the Delta information if ( ~m_o & SEARCH_BIT ) generateDelta (Diag, FinishCt, FinishCDi, N, Delta); //-- Free the scoring and edit spaces remaining for ( Di = m_o & SEARCH_BIT ? Dct - 1 : 0; Di <= Dct; Di ++ ) free ( Diag[Di] . I ); free ( Diag ); return TargetReached; }