static void get_find_palindromes_at(const char *x, int x_len, int i1, int i2, int max_loop_len1, int min_arm_len, int max_nmis, const int *lkup, int lkup_len) { int arm_len, valid_indices; char c1, c2; arm_len = 0; while (((valid_indices = i1 >= 0 && i2 < x_len) && i2 - i1 <= max_loop_len1) || arm_len != 0) { if (valid_indices) { c1 = x[i1]; c2 = x[i2]; if (is_match(c1, c2, lkup, lkup_len) || max_nmis-- > 0) { arm_len++; goto next; } } if (arm_len >= min_arm_len) _report_match(i1 + 2, i2 - i1 - 1); arm_len = 0; next: i1--; i2++; } return; }
static void match_naive_exact(const Chars_holder *P, const Chars_holder *S) { const char *p, *s; int plen, slen, start, n2; if (P->length <= 0) error("empty pattern"); p = P->ptr; plen = P->length; s = S->ptr; slen = S->length; for (start = 1, n2 = plen; n2 <= slen; start++, n2++, s++) { if (memcmp(p, s, plen) == 0) _report_match(start, P->length); } return; }
static void report_provisory_match(int start, int width, int nedit) { int end; end = start + width - 1; if (provisory_match_nedit != -1) { // Given how we walk on S, 'start' is always guaranteed to be > // 'provisory_match_start'. if (end > provisory_match_end) _report_match(provisory_match_start, provisory_match_width); else if (nedit > provisory_match_nedit) return; } provisory_match_start = start; provisory_match_end = end; provisory_match_width = width; provisory_match_nedit = nedit; return; }
static void match_naive_inexact(const Chars_holder *P, const Chars_holder *S, int max_nmis, int min_nmis, int fixedP, int fixedS) { int Pshift, // position of pattern left-most char relative to the subject n2, // 1 + position of pattern right-most char relative to the subject min_Pshift, max_n2, nmis; const BytewiseOpTable *bytewise_match_table; if (P->length <= 0) error("empty pattern"); bytewise_match_table = _select_bytewise_match_table(fixedP, fixedS); min_Pshift = P->length <= max_nmis ? 1 - P->length : -max_nmis; max_n2 = S->length - min_Pshift; for (Pshift = min_Pshift, n2 = min_Pshift + P->length; n2 <= max_n2; Pshift++, n2++) { nmis = _nmismatch_at_Pshift(P, S, Pshift, max_nmis, bytewise_match_table); if (nmis <= max_nmis && nmis >= min_nmis) _report_match(Pshift + 1, P->length); } return; }
void _match_pattern_indels(const cachedCharSeq *P, const cachedCharSeq *S, int max_nmis, int fixedP, int fixedS) { int i0, j0, max_nmis1, nedit1, width1; char c0; cachedCharSeq P1; if (P->length <= 0) error("empty pattern"); _select_nmismatch_at_Pshift_fun(fixedP, fixedS); if (!fixedP || !fixedS) error("'fixed' must be TRUE when 'algorithm=\"indels\"' (for now)"); // Before we can support fixedP=FALSE or fixedS=FALSE in // _match_pattern_indels(), we need to support them in // _init_byte2offset_with_cachedCharSeq() and _nedit_for_Ploffset(). _init_byte2offset_with_cachedCharSeq(byte2offset, P, 0); provisory_match_nedit = -1; // means no provisory match yet j0 = 0; while (j0 < S->length) { while (1) { c0 = S->seq[j0]; i0 = byte2offset[(unsigned char) c0]; if (i0 != NA_INTEGER) break; j0++; if (j0 >= S->length) goto done; } P1.seq = P->seq + i0 + 1; P1.length = P->length - i0 - 1; max_nmis1 = max_nmis - i0; /* #ifdef DEBUG_BIOSTRINGS if (debug) { Rprintf("[DEBUG] _match_pattern_indels(): " "j0=%d c0=%c i0=%d max_nmis1=%d\n", j0, c0, i0, max_nmis1); } #endif */ if (max_nmis1 >= 0) { if (max_nmis1 == 0) { nedit1 = _selected_nmismatch_at_Pshift_fun(&P1, S, j0 + 1, max_nmis1); width1 = P1.length; } else { nedit1 = _nedit_for_Ploffset(&P1, S, j0 + 1, max_nmis1, 1, &width1); } if (nedit1 <= max_nmis1) { #ifdef DEBUG_BIOSTRINGS if (debug) { Rprintf("[DEBUG] _match_pattern_indels(): " "provisory match found at "); print_match(j0 + 1, width1 + 1, P, S); } #endif report_provisory_match(j0 + 1, width1 + 1, nedit1 + i0); } } j0++; } done: if (provisory_match_nedit != -1) _report_match(provisory_match_start, provisory_match_width); return; }
void _match_pattern_indels(const Chars_holder *P, const Chars_holder *S, int max_nmis, int fixedP, int fixedS) { int i0, j0, max_nmis1, nedit1, width1; char c0; const BytewiseOpTable *bytewise_match_table; Chars_holder P1; if (P->length <= 0) error("empty pattern"); bytewise_match_table = _select_bytewise_match_table(fixedP, fixedS); _init_byte2offset_with_Chars_holder(&byte2offset, P, bytewise_match_table); provisory_match_nedit = -1; // means no provisory match yet j0 = 0; while (j0 < S->length) { while (1) { c0 = S->ptr[j0]; i0 = byte2offset.byte2code[(unsigned char) c0]; if (i0 != NA_INTEGER) break; j0++; if (j0 >= S->length) goto done; } P1.ptr = P->ptr + i0 + 1; P1.length = P->length - i0 - 1; max_nmis1 = max_nmis - i0; /* #ifdef DEBUG_BIOSTRINGS if (debug) { Rprintf("[DEBUG] _match_pattern_indels(): " "j0=%d c0=%c i0=%d max_nmis1=%d\n", j0, c0, i0, max_nmis1); } #endif */ if (max_nmis1 >= 0) { if (max_nmis1 == 0) { nedit1 = _nmismatch_at_Pshift(&P1, S, j0 + 1, max_nmis1, bytewise_match_table); width1 = P1.length; } else { nedit1 = _nedit_for_Ploffset(&P1, S, j0 + 1, max_nmis1, 1, &width1, bytewise_match_table); } if (nedit1 <= max_nmis1) { #ifdef DEBUG_BIOSTRINGS if (debug) { Rprintf("[DEBUG] _match_pattern_indels(): " "provisory match found at "); print_match(j0 + 1, width1 + 1, P, S, bytewise_match_table); } #endif report_provisory_match(j0 + 1, width1 + 1, nedit1 + i0); } } j0++; } done: if (provisory_match_nedit != -1) _report_match(provisory_match_start, provisory_match_width); return; }
static void BOC2_exact_search(const char *P, int nP, const char *S, int nS, char c1, char c2, char c3, char c4, const int *buf, const double *means, const int *table1, const int *table2, const int *table3, const int *table4) { int n1, n1max, n2, c1_oc, c2_oc, c3_oc, Psignature, nPsuf4, *Psuf4_offsets[4], Psuf4_noffsets[4], i, j, *offsets, noffsets; char c, Ppre4, codes[4]; const char *Psuf4, *Ssuf4; #ifdef DEBUG_BIOSTRINGS int count_preapprovals = 0; #endif c1_oc = c2_oc = c3_oc = 0; for (n2 = 0; n2 < nP; n2++) { c = P[n2]; if (c == c1) c1_oc++; else if (c == c2) c2_oc++; else if (c == c3) c3_oc++; else if (c != c4) error("'pattern' contains non-base DNA letters"); } Ppre4 = make_pre4(P, c1, c2, c3, c4); Psignature = make_32bit_signature(c1_oc, c2_oc, c3_oc, Ppre4); #ifdef DEBUG_BIOSTRINGS if (debug) Rprintf("[DEBUG] pattern: c1_oc=%d c2_oc=%d c3_oc=%d Ppre4=%d\n", c1_oc, c2_oc, c3_oc, Ppre4); #endif Psuf4 = P + 4; nPsuf4 = nP - 4; codes[0] = c1; codes[1] = c2; codes[2] = c3; codes[3] = c4; for (i = 0; i < 4; i++) Psuf4_offsets[i] = Salloc((long) nP, int); split4_offsets(codes, Psuf4_offsets, Psuf4_noffsets, Psuf4, nPsuf4); n1max = nS - nP; for (n1 = 0, Ssuf4 = S + 4; n1 <= n1max; n1++, Ssuf4++, buf++) { if (Psignature != *buf) continue; #ifdef DEBUG_BIOSTRINGS count_preapprovals++; #endif if (memcmp(Psuf4, Ssuf4, nPsuf4) != 0) continue; // same as goto continue0; /* // Uncomment the 2 lines above if you want to use the fancy // comparison method below. for (i = 0; i < 3; i++) { c = codes[i]; offsets = Psuf4_offsets[i]; noffsets = Psuf4_noffsets[i]; for (j = 0; j < noffsets; j++) if (c != Ssuf4[offsets[j]]) goto continue0; } */ _report_match(n1 + 1, nP); continue0: ; } #ifdef DEBUG_BIOSTRINGS if (debug) Rprintf("[DEBUG] count_preapprovals=%d\n", count_preapprovals); #endif return; }