static int Rev_Prefix_Edit_Dist (char A[], int m, char T[], int n, int Error_Limit, int * A_End, int * T_End, int * Leftover, int * Match_To_End, Work_Area_t * WA) { double Score, Max_Score; int Max_Score_Len = 0, Max_Score_Best_d = 0, Max_Score_Best_e = 0; int Tail_Len; int Best_d, Best_e, From, Last, Longest, Max, Row; int Left, Right; int d, e, j, k; assert (m <= n); Best_d = Best_e = Longest = 0; WA->Left_Delta_Len = 0; for (Row = 0; Row < m && (A[- Row] == T[- Row] || A[- Row] == DONT_KNOW_CHAR || T[- Row] == DONT_KNOW_CHAR); Row++) ; if (WA->Edit_Array_Lazy[0] == NULL) Allocate_More_Edit_Space(WA); WA->Edit_Array_Lazy[0][0] = Row; if (Row == m) { (* A_End) = (* T_End) = - m; (* Leftover) = m; (* Match_To_End) = TRUE; return 0; } Left = Right = 0; Max_Score = 0.0; for (e = 1; e <= Error_Limit; e++) { if (WA->Edit_Array_Lazy[e] == NULL) Allocate_More_Edit_Space(WA); Left = MAX (Left - 1, -e); Right = MIN (Right + 1, e); WA->Edit_Array_Lazy[e - 1][Left] = -2; WA->Edit_Array_Lazy[e - 1][Left - 1] = -2; WA->Edit_Array_Lazy[e - 1][Right] = -2; WA->Edit_Array_Lazy[e - 1][Right + 1] = -2; for (d = Left; d <= Right; d++) { Row = 1 + WA->Edit_Array_Lazy[e - 1][d]; if ((j = WA->Edit_Array_Lazy[e - 1][d - 1]) > Row) Row = j; if ((j = 1 + WA->Edit_Array_Lazy[e - 1][d + 1]) > Row) Row = j; while (Row < m && Row + d < n && (A[- Row] == T[- Row - d] || A[- Row] == DONT_KNOW_CHAR || T[- Row - d] == DONT_KNOW_CHAR)) Row++; WA->Edit_Array_Lazy[e][d] = Row; if (Row == m || Row + d == n) { // Check for branch point here caused by uneven // distribution of errors Score = Row * Branch_Match_Value - e; // Assumes Branch_Match_Value // - Branch_Error_Value == 1.0 Tail_Len = Row - Max_Score_Len; if ((Doing_Partial_Overlaps && Score < Max_Score) || (e > MIN_BRANCH_END_DIST / 2 && Tail_Len >= MIN_BRANCH_END_DIST && (Max_Score - Score) / Tail_Len >= MIN_BRANCH_TAIL_SLOPE)) { (* A_End) = - Max_Score_Len; (* T_End) = - Max_Score_Len - Max_Score_Best_d; Set_Left_Delta (Max_Score_Best_e, Max_Score_Best_d, Leftover, T_End, n, WA); (* Match_To_End) = FALSE; return Max_Score_Best_e; } (* A_End) = - Row; // One past last align position (* T_End) = - Row - d; Set_Left_Delta (e, d, Leftover, T_End, n, WA); (* Match_To_End) = TRUE; return e; } } while (Left <= Right && Left < 0 && WA->Edit_Array_Lazy[e][Left] < WA->Edit_Match_Limit[e]) Left++; if (Left >= 0) while (Left <= Right && WA->Edit_Array_Lazy[e][Left] + Left < WA->Edit_Match_Limit[e]) Left++; if (Left > Right) break; while (Right > 0 && WA->Edit_Array_Lazy[e][Right] + Right < WA->Edit_Match_Limit[e]) Right--; if (Right <= 0) while (WA->Edit_Array_Lazy[e][Right] < WA->Edit_Match_Limit[e]) Right--; assert (Left <= Right); for (d = Left; d <= Right; d++) if (WA->Edit_Array_Lazy[e][d] > Longest) { Best_d = d; Best_e = e; Longest = WA->Edit_Array_Lazy[e][d]; } Score = Longest * Branch_Match_Value - e; // Assumes Branch_Match_Value - Branch_Error_Value == 1.0 if (Score > Max_Score) { Max_Score = Score; Max_Score_Len = Longest; Max_Score_Best_d = Best_d; Max_Score_Best_e = Best_e; } } (* A_End) = - Max_Score_Len; (* T_End) = - Max_Score_Len - Max_Score_Best_d; Set_Left_Delta (Max_Score_Best_e, Max_Score_Best_d, Leftover, T_End, n, WA); (* Match_To_End) = FALSE; return Max_Score_Best_e; }
int32 Prefix_Edit_Dist(char *A, int32 m, char *T, int32 n, int32 Error_Limit, int32 &A_End, int32 &T_End, bool &Match_To_End, pedWorkArea_t *WA) { //assert (m <= n); int32 Best_d = 0; int32 Best_e = 0; int32 Longest = 0; WA->deltaLen = 0; int32 shorter = min(m, n); int32 Row = 0; while ((Row < shorter) && (A[Row] == T[Row])) Row++; //fprintf(stderr, "Row=%d matches at the start\n", Row); if (WA->Edit_Array_Lazy[0] == NULL) Allocate_More_Edit_Space(WA); WA->Edit_Array_Lazy[0][0] = Row; // Exact match? if (Row == shorter) { A_End = Row; T_End = Row; Match_To_End = true; return(0); } int32 Left = 0; int32 Right = 0; double Max_Score = 0.0; int32 Max_Score_Len = 0; int32 Max_Score_Best_d = 0; int32 Max_Score_Best_e = 0; for (int32 e=1; e<=Error_Limit; e++) { if (WA->Edit_Array_Lazy[e] == NULL) Allocate_More_Edit_Space(WA); Left = max(Left - 1, -e); Right = min(Right + 1, e); WA->Edit_Array_Lazy[e-1][Left] = -2; WA->Edit_Array_Lazy[e-1][Left-1] = -2; WA->Edit_Array_Lazy[e-1][Right] = -2; WA->Edit_Array_Lazy[e-1][Right+1] = -2; for (int32 d=Left; d<=Right; d++) { Row = 1 + WA->Edit_Array_Lazy[e-1][d]; Row = max(Row, WA->Edit_Array_Lazy[e-1][d-1]); Row = max(Row, WA->Edit_Array_Lazy[e-1][d+1] + 1); while ((Row < m) && (Row + d < n) && (A[Row] == T[Row + d])) Row++; //fprintf(stderr, "Row=%d matches at error e=%d\n", Row, e); assert(e < WA->Edit_Array_Max); WA->Edit_Array_Lazy[e][d] = Row; if (Row == m || Row + d == n) { //fprintf(stderr, "Hit end Row=%d m=%d Row+d=%d n=%d\n", Row, m, Row+d, n); // Force last error to be mismatch rather than insertion if ((Row == m) && (1 + WA->Edit_Array_Lazy[e-1][d+1] == WA->Edit_Array_Lazy[e][d]) && (d < Right)) { d++; WA->Edit_Array_Lazy[e][d] = WA->Edit_Array_Lazy[e][d-1]; } A_End = Row; // One past last align position T_End = Row + d; Match_To_End = true; Compute_Delta(WA, e, d, Row); return(e); } } while (Left <= Right && Left < 0 && WA->Edit_Array_Lazy[e][Left] < WA->G->Edit_Match_Limit[e]) Left++; if (Left >= 0) while (Left <= Right && WA->Edit_Array_Lazy[e][Left] + Left < WA->G->Edit_Match_Limit[e]) Left++; if (Left > Right) break; while (Right > 0 && WA->Edit_Array_Lazy[e][Right] + Right < WA->G->Edit_Match_Limit[e]) Right--; if (Right <= 0) while (WA->Edit_Array_Lazy[e][Right] < WA->G->Edit_Match_Limit[e]) Right--; assert (Left <= Right); for (int32 d=Left; d <= Right; d++) if (WA->Edit_Array_Lazy[e][d] > Longest) { Best_d = d; Best_e = e; Longest = WA->Edit_Array_Lazy[e][d]; } int32 Score = Longest * BRANCH_PT_MATCH_VALUE - e; // Assumes BRANCH_PT_MATCH_VALUE - BRANCH_PT_ERROR_VALUE == 1.0 // findErrors also included a second test; overlapper doesn't. if (Score > Max_Score) { Max_Score = Score; Max_Score_Len = Longest; Max_Score_Best_d = Best_d; Max_Score_Best_e = Best_e; } } // findErrors does this call. Overlapper doesn't. //Compute_Delta(WA, Max_Score_Best_e, Max_Score_Best_d, Max_Score_Len); A_End = Max_Score_Len; T_End = Max_Score_Len + Max_Score_Best_d; Match_To_End = false; // findErrors is returning Max_Score_Best_e. So does overlapper. // The original return was just e, but the only way we get here is if the e loop // exits with e = Error_Limit+1. return(Error_Limit + 1); }