Esempio n. 1
0
static void get_find_palindromes_at(const char *x, int x_len,
	int i1, int i2, int max_loop_len1, int min_arm_len, int max_nmis,
	const int *lkup, int lkup_len)
{
	int arm_len, valid_indices;
	char c1, c2;

	arm_len = 0;
	while (((valid_indices = i1 >= 0 && i2 < x_len) &&
                i2 - i1 <= max_loop_len1) || arm_len != 0)
	{
		if (valid_indices) {
			c1 = x[i1];
			c2 = x[i2];
			if (is_match(c1, c2, lkup, lkup_len) ||
			    max_nmis-- > 0) {
				arm_len++;
				goto next;
			}
		}
		if (arm_len >= min_arm_len)
			_report_match(i1 + 2, i2 - i1 - 1);
		arm_len = 0;
	next:
		i1--;
		i2++;
	}
	return;
}
Esempio n. 2
0
static void match_naive_exact(const Chars_holder *P, const Chars_holder *S)
{
	const char *p, *s;
	int plen, slen, start, n2;

	if (P->length <= 0)
		error("empty pattern");
	p = P->ptr;
	plen = P->length;
	s = S->ptr;
	slen = S->length;
	for (start = 1, n2 = plen; n2 <= slen; start++, n2++, s++) {
		if (memcmp(p, s, plen) == 0)
			_report_match(start, P->length);
	}
	return;
}
static void report_provisory_match(int start, int width, int nedit)
{
	int end;

	end = start + width - 1;
	if (provisory_match_nedit != -1) {
		// Given how we walk on S, 'start' is always guaranteed to be >
		// 'provisory_match_start'.
		if (end > provisory_match_end)
			_report_match(provisory_match_start, provisory_match_width);
		else if (nedit > provisory_match_nedit)
			return;
	}
	provisory_match_start = start;
	provisory_match_end = end;
	provisory_match_width = width;
	provisory_match_nedit = nedit;
	return;
}
Esempio n. 4
0
static void match_naive_inexact(const Chars_holder *P, const Chars_holder *S,
		int max_nmis, int min_nmis, int fixedP, int fixedS)
{
	int Pshift, // position of pattern left-most char relative to the subject
	    n2, // 1 + position of pattern right-most char relative to the subject
	    min_Pshift, max_n2, nmis;
	const BytewiseOpTable *bytewise_match_table;

	if (P->length <= 0)
		error("empty pattern");
	bytewise_match_table = _select_bytewise_match_table(fixedP, fixedS);
	min_Pshift = P->length <= max_nmis ? 1 - P->length : -max_nmis;
	max_n2 = S->length - min_Pshift;
	for (Pshift = min_Pshift, n2 = min_Pshift + P->length;
	     n2 <= max_n2;
	     Pshift++, n2++)
	{
		nmis = _nmismatch_at_Pshift(P, S, Pshift, max_nmis,
					    bytewise_match_table);
		if (nmis <= max_nmis && nmis >= min_nmis)
			_report_match(Pshift + 1, P->length);
	}
	return;
}
void _match_pattern_indels(const cachedCharSeq *P, const cachedCharSeq *S,
		int max_nmis, int fixedP, int fixedS)
{
	int i0, j0, max_nmis1, nedit1, width1;
	char c0;
	cachedCharSeq P1;

	if (P->length <= 0)
		error("empty pattern");
	_select_nmismatch_at_Pshift_fun(fixedP, fixedS);
	if (!fixedP || !fixedS)
		error("'fixed' must be TRUE when 'algorithm=\"indels\"' (for now)");
	// Before we can support fixedP=FALSE or fixedS=FALSE in
	// _match_pattern_indels(), we need to support them in
	// _init_byte2offset_with_cachedCharSeq() and _nedit_for_Ploffset().
	_init_byte2offset_with_cachedCharSeq(byte2offset, P, 0);
	provisory_match_nedit = -1; // means no provisory match yet
	j0 = 0;
	while (j0 < S->length) {
		while (1) {
			c0 = S->seq[j0];
			i0 = byte2offset[(unsigned char) c0];
			if (i0 != NA_INTEGER) break;
			j0++;
			if (j0 >= S->length) goto done;
		}
		P1.seq = P->seq + i0 + 1;
		P1.length = P->length - i0 - 1;
		max_nmis1 = max_nmis - i0;
/*
#ifdef DEBUG_BIOSTRINGS
		if (debug) {
			Rprintf("[DEBUG] _match_pattern_indels(): "
				"j0=%d c0=%c i0=%d max_nmis1=%d\n", j0, c0, i0, max_nmis1);
		}
#endif
*/
		if (max_nmis1 >= 0) {
			if (max_nmis1 == 0) {
				nedit1 = _selected_nmismatch_at_Pshift_fun(&P1, S, j0 + 1, max_nmis1);
				width1 = P1.length;
			} else {
				nedit1 = _nedit_for_Ploffset(&P1, S, j0 + 1, max_nmis1, 1, &width1);
			}
			if (nedit1 <= max_nmis1) {
#ifdef DEBUG_BIOSTRINGS
				if (debug) {
					Rprintf("[DEBUG] _match_pattern_indels(): "
						"provisory match found at ");
					print_match(j0 + 1, width1 + 1, P, S);
				}
#endif
				report_provisory_match(j0 + 1, width1 + 1, nedit1 + i0);
			}
		}
		j0++;
	}
	done:
	if (provisory_match_nedit != -1)
		_report_match(provisory_match_start, provisory_match_width);
	return;
}
Esempio n. 6
0
void _match_pattern_indels(const Chars_holder *P, const Chars_holder *S,
		int max_nmis, int fixedP, int fixedS)
{
	int i0, j0, max_nmis1, nedit1, width1;
	char c0;
	const BytewiseOpTable *bytewise_match_table;
	Chars_holder P1;

	if (P->length <= 0)
		error("empty pattern");
	bytewise_match_table = _select_bytewise_match_table(fixedP, fixedS);
	_init_byte2offset_with_Chars_holder(&byte2offset, P,
					     bytewise_match_table);
	provisory_match_nedit = -1; // means no provisory match yet
	j0 = 0;
	while (j0 < S->length) {
		while (1) {
			c0 = S->ptr[j0];
			i0 = byte2offset.byte2code[(unsigned char) c0];
			if (i0 != NA_INTEGER) break;
			j0++;
			if (j0 >= S->length) goto done;
		}
		P1.ptr = P->ptr + i0 + 1;
		P1.length = P->length - i0 - 1;
		max_nmis1 = max_nmis - i0;
/*
#ifdef DEBUG_BIOSTRINGS
		if (debug) {
			Rprintf("[DEBUG] _match_pattern_indels(): "
				"j0=%d c0=%c i0=%d max_nmis1=%d\n", j0, c0, i0, max_nmis1);
		}
#endif
*/
		if (max_nmis1 >= 0) {
			if (max_nmis1 == 0) {
				nedit1 = _nmismatch_at_Pshift(&P1, S, j0 + 1,
							max_nmis1,
							bytewise_match_table);
				width1 = P1.length;
			} else {
				nedit1 = _nedit_for_Ploffset(&P1, S, j0 + 1,
							max_nmis1, 1, &width1,
							bytewise_match_table);
			}
			if (nedit1 <= max_nmis1) {
#ifdef DEBUG_BIOSTRINGS
				if (debug) {
					Rprintf("[DEBUG] _match_pattern_indels(): "
						"provisory match found at ");
					print_match(j0 + 1, width1 + 1, P, S,
						    bytewise_match_table);
				}
#endif
				report_provisory_match(j0 + 1, width1 + 1, nedit1 + i0);
			}
		}
		j0++;
	}
	done:
	if (provisory_match_nedit != -1)
		_report_match(provisory_match_start, provisory_match_width);
	return;
}
static void BOC2_exact_search(const char *P, int nP, const char *S, int nS,
		char c1, char c2, char c3, char c4,
		const int *buf, const double *means,
		const int *table1, const int *table2, const int *table3, const int *table4)
{
	int n1, n1max, n2, c1_oc, c2_oc, c3_oc, Psignature,
	    nPsuf4, *Psuf4_offsets[4], Psuf4_noffsets[4], i, j, *offsets, noffsets;
	char c, Ppre4, codes[4];
	const char *Psuf4, *Ssuf4;
#ifdef DEBUG_BIOSTRINGS
	int count_preapprovals = 0;
#endif

	c1_oc = c2_oc = c3_oc = 0;
	for (n2 = 0; n2 < nP; n2++) {
		c = P[n2];
		if (c == c1) c1_oc++;
		else if (c == c2) c2_oc++;
		else if (c == c3) c3_oc++;
		else if (c != c4)
			error("'pattern' contains non-base DNA letters");
	}
	Ppre4 = make_pre4(P, c1, c2, c3, c4);
	Psignature = make_32bit_signature(c1_oc, c2_oc, c3_oc, Ppre4);
#ifdef DEBUG_BIOSTRINGS
	if (debug)
		Rprintf("[DEBUG] pattern: c1_oc=%d c2_oc=%d c3_oc=%d Ppre4=%d\n",
			c1_oc, c2_oc, c3_oc, Ppre4);
#endif
	Psuf4 = P + 4;
	nPsuf4 = nP - 4;
	codes[0] = c1;
	codes[1] = c2;
	codes[2] = c3;
	codes[3] = c4;
	for (i = 0; i < 4; i++)
		Psuf4_offsets[i] = Salloc((long) nP, int);
	split4_offsets(codes, Psuf4_offsets, Psuf4_noffsets, Psuf4, nPsuf4);
	n1max = nS - nP;
	for (n1 = 0, Ssuf4 = S + 4; n1 <= n1max; n1++, Ssuf4++, buf++) {
		if (Psignature != *buf)
			continue;
#ifdef DEBUG_BIOSTRINGS
		count_preapprovals++;
#endif
		if (memcmp(Psuf4, Ssuf4, nPsuf4) != 0)
			continue; // same as goto continue0;
/*
		// Uncomment the 2 lines above if you want to use the fancy
		// comparison method below.
		for (i = 0; i < 3; i++) {
			c = codes[i];
			offsets = Psuf4_offsets[i];
			noffsets = Psuf4_noffsets[i];
			for (j = 0; j < noffsets; j++)
				if (c != Ssuf4[offsets[j]])
					goto continue0;
		}
*/
		_report_match(n1 + 1, nP);
		continue0: ;
	}
#ifdef DEBUG_BIOSTRINGS
	if (debug)
		Rprintf("[DEBUG] count_preapprovals=%d\n", count_preapprovals);
#endif
	return;
}