static int Rev_Prefix_Edit_Dist (char A[], int m, char T[], int n, int Error_Limit, int * A_End, int * T_End, int * Leftover, int * Match_To_End, Work_Area_t * WA) { double Score, Max_Score; int Max_Score_Len = 0, Max_Score_Best_d = 0, Max_Score_Best_e = 0; int Tail_Len; int Best_d, Best_e, From, Last, Longest, Max, Row; int Left, Right; int d, e, j, k; assert (m <= n); Best_d = Best_e = Longest = 0; WA->Left_Delta_Len = 0; for (Row = 0; Row < m && (A[- Row] == T[- Row] || A[- Row] == DONT_KNOW_CHAR || T[- Row] == DONT_KNOW_CHAR); Row++) ; if (WA->Edit_Array_Lazy[0] == NULL) Allocate_More_Edit_Space(WA); WA->Edit_Array_Lazy[0][0] = Row; if (Row == m) { (* A_End) = (* T_End) = - m; (* Leftover) = m; (* Match_To_End) = TRUE; return 0; } Left = Right = 0; Max_Score = 0.0; for (e = 1; e <= Error_Limit; e++) { if (WA->Edit_Array_Lazy[e] == NULL) Allocate_More_Edit_Space(WA); Left = MAX (Left - 1, -e); Right = MIN (Right + 1, e); WA->Edit_Array_Lazy[e - 1][Left] = -2; WA->Edit_Array_Lazy[e - 1][Left - 1] = -2; WA->Edit_Array_Lazy[e - 1][Right] = -2; WA->Edit_Array_Lazy[e - 1][Right + 1] = -2; for (d = Left; d <= Right; d++) { Row = 1 + WA->Edit_Array_Lazy[e - 1][d]; if ((j = WA->Edit_Array_Lazy[e - 1][d - 1]) > Row) Row = j; if ((j = 1 + WA->Edit_Array_Lazy[e - 1][d + 1]) > Row) Row = j; while (Row < m && Row + d < n && (A[- Row] == T[- Row - d] || A[- Row] == DONT_KNOW_CHAR || T[- Row - d] == DONT_KNOW_CHAR)) Row++; WA->Edit_Array_Lazy[e][d] = Row; if (Row == m || Row + d == n) { // Check for branch point here caused by uneven // distribution of errors Score = Row * Branch_Match_Value - e; // Assumes Branch_Match_Value // - Branch_Error_Value == 1.0 Tail_Len = Row - Max_Score_Len; if ((Doing_Partial_Overlaps && Score < Max_Score) || (e > MIN_BRANCH_END_DIST / 2 && Tail_Len >= MIN_BRANCH_END_DIST && (Max_Score - Score) / Tail_Len >= MIN_BRANCH_TAIL_SLOPE)) { (* A_End) = - Max_Score_Len; (* T_End) = - Max_Score_Len - Max_Score_Best_d; Set_Left_Delta (Max_Score_Best_e, Max_Score_Best_d, Leftover, T_End, n, WA); (* Match_To_End) = FALSE; return Max_Score_Best_e; } (* A_End) = - Row; // One past last align position (* T_End) = - Row - d; Set_Left_Delta (e, d, Leftover, T_End, n, WA); (* Match_To_End) = TRUE; return e; } } while (Left <= Right && Left < 0 && WA->Edit_Array_Lazy[e][Left] < WA->Edit_Match_Limit[e]) Left++; if (Left >= 0) while (Left <= Right && WA->Edit_Array_Lazy[e][Left] + Left < WA->Edit_Match_Limit[e]) Left++; if (Left > Right) break; while (Right > 0 && WA->Edit_Array_Lazy[e][Right] + Right < WA->Edit_Match_Limit[e]) Right--; if (Right <= 0) while (WA->Edit_Array_Lazy[e][Right] < WA->Edit_Match_Limit[e]) Right--; assert (Left <= Right); for (d = Left; d <= Right; d++) if (WA->Edit_Array_Lazy[e][d] > Longest) { Best_d = d; Best_e = e; Longest = WA->Edit_Array_Lazy[e][d]; } Score = Longest * Branch_Match_Value - e; // Assumes Branch_Match_Value - Branch_Error_Value == 1.0 if (Score > Max_Score) { Max_Score = Score; Max_Score_Len = Longest; Max_Score_Best_d = Best_d; Max_Score_Best_e = Best_e; } } (* A_End) = - Max_Score_Len; (* T_End) = - Max_Score_Len - Max_Score_Best_d; Set_Left_Delta (Max_Score_Best_e, Max_Score_Best_d, Leftover, T_End, n, WA); (* Match_To_End) = FALSE; return Max_Score_Best_e; }
void NDalgorithm::reverse(char *A, int32 Alen, // first sequence and length char *T, int32 Tlen, // second sequence and length int32 &A_End, int32 &T_End, int32 &Leftover, // <- novel bool &Match_To_End) { assert (Alen <= Tlen); int32 Best_d = 0; int32 Best_e = 0; int32 Best_row = 0; int32 Best_score = 0; int32 Row = 0; int32 Dst = 0; int32 Err = 0; int32 Sco = 0; int32 fromd = 0; // Skip ahead over matches. The original used to also skip if either sequence was N. while ((Row < Alen) && (isMatch(A[-Row], T[-Row]))) { Sco += matchScore(A[-Row], T[-Row]); Row++; } if (Edit_Array_Lazy[0] == NULL) allocateMoreEditSpace(); Edit_Array_Lazy[0][0].row = Row; Edit_Array_Lazy[0][0].dist = Dst; Edit_Array_Lazy[0][0].errs = 0; Edit_Array_Lazy[0][0].score = Sco; Edit_Array_Lazy[0][0].fromd = INT32_MAX; // Exact match? if (Row == Alen) { A_End = -Alen; T_End = -Alen; Leftover = Alen; Match_To_End = true; #ifdef DEBUG fprintf(stderr, "NDalgorithm::reverse()-- exact match\n"); #endif Left_Score = Sco; Left_Delta_Len = 0; return; } int32 Left = 0; int32 Right = 0; int32 Max_Score = PEDMINSCORE; int32 Max_Score_Len = 0; int32 Max_Score_Best_d = 0; int32 Max_Score_Best_e = 0; for (int32 ei=1; ei <= Edit_Space_Max; ei++) { if (Edit_Array_Lazy[ei] == NULL) if (allocateMoreEditSpace() == false) { // FAIL return; } Left = MAX (Left - 1, -ei); Right = MIN (Right + 1, ei); //fprintf(stderr, "REVERSE ei=%d Left=%d Right=%d\n", ei, Left, Right); Edit_Array_Lazy[ei-1][Left - 1].init(); Edit_Array_Lazy[ei-1][Left ].init(); // Of note, [0][0] on the first iteration is not reset here. Edit_Array_Lazy[ei-1][Right ].init(); Edit_Array_Lazy[ei-1][Right + 1].init(); for (int32 d = Left; d <= Right; d++) { // A mismatch. { int32 aPos = -(1 + Edit_Array_Lazy[ei-1][d].row) + 1; // +1 because we need to compare the base we are at, int32 tPos = -(1 + Edit_Array_Lazy[ei-1][d].row) - d + 1; // not the base we will be at after the mismatch Row = 1 + Edit_Array_Lazy[ei-1][d].row; Dst = Edit_Array_Lazy[ei-1][d].dist + 1; Err = Edit_Array_Lazy[ei-1][d].errs + 1; fromd = d; //fprintf(stderr, "aPos %d tPos %d\n", aPos, tPos); // If negative, we have a pointer into valid sequence. If not, this mismatch // doesn't make sense, and the row/score are set to bogus values. if ((aPos <= 0) && (tPos <= 0)) { assert(-aPos <= Alen); assert(-tPos <= Tlen); assert(A[aPos] != T[tPos]); Sco = Edit_Array_Lazy[ei-1][d].score + mismatchScore(A[aPos], T[tPos]); } else { Sco = PEDMINSCORE; } } // Insert a gap in A. Check the other sequence to see if this is a zero-cost gap. Note // agreement with future value of Row and what is used in isMatch() below. // Testcase test-st-ts shows this works. { int32 tPos = -(0 + Edit_Array_Lazy[ei-1][d-1].row) - d; //assert( tPos <= 0); Not true at the lower end; we'll just skip the cell since it's invalid //assert(-tPos < Tlen); if ((tPos <= 0) && (-tPos < Tlen)) { int32 gapCost = isFreeGap( T[tPos] ) ? PEDFREEGAP : PEDGAP; //if (gapCost == 0) // fprintf(stderr, "NDalgorithm::reverse()-- free A gap for aPos=%d tPos=%d t=%c/%d\n", tPos + d, tPos, T[tPos], T[tPos]); if (Edit_Array_Lazy[ei-1][d-1].score + gapCost > Sco) { Row = Edit_Array_Lazy[ei-1][d-1].row; Dst = Edit_Array_Lazy[ei-1][d-1].dist + (gapCost == PEDFREEGAP) ? 0 : 0; Err = Edit_Array_Lazy[ei-1][d-1].errs + (gapCost == PEDFREEGAP) ? 0 : 0; Sco = Edit_Array_Lazy[ei-1][d-1].score + gapCost; fromd = d-1; } } } // Insert a gap in T. { int32 aPos = -(1 + Edit_Array_Lazy[ei-1][d+1].row); //assert( aPos <= 0); //assert(-aPos < Tlen); if ((aPos <= 0) && (-aPos <= Alen)) { int32 gapCost = isFreeGap( A[aPos] ) ? 0 : PEDGAP; //if (gapCost == 0) // fprintf(stderr, "NDalgorithm::reverse()-- free T gap for aPos=%d tPos=%d t=%c/%d\n", aPos, aPos - d, A[aPos], A[aPos]); if (Edit_Array_Lazy[ei-1][d+1].score + gapCost > Sco) { Row = 1 + Edit_Array_Lazy[ei-1][d+1].row; Dst = Edit_Array_Lazy[ei-1][d+1].dist + (gapCost == PEDFREEGAP) ? 0 : 1; Err = Edit_Array_Lazy[ei-1][d+1].errs + (gapCost == PEDFREEGAP) ? 0 : 1; Sco = Edit_Array_Lazy[ei-1][d+1].score + gapCost; fromd = d+1; } } } // If A or B is N, that isn't a mismatch. // If A is lowercase and T is uppercase, it's a match. // If A is lowercase and T doesn't match, ignore the cost of the gap in B while ((Row < Alen) && (Row + d < Tlen) && (isMatch(A[-Row], T[-Row - d]))) { Sco += matchScore(A[-Row], T[-Row - d]); Row += 1; Dst += 1; Err += 0; } Edit_Array_Lazy[ei][d].row = Row; Edit_Array_Lazy[ei][d].dist = Dst; Edit_Array_Lazy[ei][d].errs = Err; Edit_Array_Lazy[ei][d].score = Sco; Edit_Array_Lazy[ei][d].fromd = fromd; if (Row == Alen || Row + d == Tlen) { A_End = - Row; // One past last align position T_End = - Row - d; Set_Left_Delta(A, T, ei, d, Leftover, T_End, Tlen); Match_To_End = true; return; //return(ei); } } // Over all diagonals. // Reset the band // // The .dist used to be .row. while ((Left <= Right) && (Left < 0) && (Edit_Array_Lazy[ei][Left].dist < Edit_Match_Limit[ Edit_Array_Lazy[ei][Left].errs ])) Left++; if (Left >= 0) while ((Left <= Right) && (Edit_Array_Lazy[ei][Left].dist + Left < Edit_Match_Limit[ Edit_Array_Lazy[ei][Left].errs ])) Left++; if (Left > Right) break; while ((Right > 0) && (Edit_Array_Lazy[ei][Right].dist + Right < Edit_Match_Limit[ Edit_Array_Lazy[ei][Right].errs ])) Right--; if (Right <= 0) while (Edit_Array_Lazy[ei][Right].dist < Edit_Match_Limit[ Edit_Array_Lazy[ei][Right].errs ]) Right--; assert (Left <= Right); for (int32 d = Left; d <= Right; d++) if (Edit_Array_Lazy[ei][d].score > Best_score) { Best_d = d; Best_e = ei; Best_row = Edit_Array_Lazy[ei][d].row; Best_score = Edit_Array_Lazy[ei][d].score; } if (Best_score > Max_Score) { Max_Score_Best_d = Best_d; Max_Score_Best_e = Best_e; Max_Score = Best_score; Max_Score_Len = Best_row; } } // Over all possible number of errors #ifdef DEBUG fprintf(stderr, "NDalgorithm::reverse()-- iterated over all errors, return best found\n"); #endif A_End = - Max_Score_Len; T_End = - Max_Score_Len - Max_Score_Best_d; Set_Left_Delta(A, T, Max_Score_Best_e, Max_Score_Best_d, Leftover, T_End, Tlen); Match_To_End = false; return; //return(Max_Score_Best_e); }