Exemplo n.º 1
0
int inexact_pad_match(char *seq,
		      int seq_len,
		      char *string,
		      int string_len,
		      int mis_match,
		      int *match,
		      int *score,
		      int max_matches)
{
    char *pos;
    char *uppert;
    int i;
    int n_matches;
    int n_mis;

    /* remove any pads from the pattern search */
    depad_seq(string, &string_len, NULL);

    /* uppercase search string */
    if (NULL == (uppert = (char *)xmalloc(string_len + 1)))
	return -2;
    uppert[string_len] = 0;
    for (i = string_len-1; i >= 0; i--) {
	uppert[i] = toupper(string[i]);
    }
    for (i = 0; i < seq_len; i++) {
	seq[i] = toupper(seq[i]);
    }
    pos = NULL;

    n_matches = 0;
    pos = pstrnstr_inexact(seq,seq_len, uppert,string_len, mis_match, &n_mis);
    while (pos) {
	if (n_matches < max_matches) {
	    match[n_matches] = pos - seq;
	    score[n_matches] = string_len - n_mis;
	    n_matches++;
	} else {
	    /* make positions start at 1 */
	    for (i=0; i < max_matches; i++) {
		match[i]++;
	    }
	    return -1; /* out of match storage */
	}
	pos++;
	pos = pstrnstr_inexact(pos, seq_len - (pos-seq),
			       uppert, string_len, mis_match, &n_mis);
    }
    /* make positions start at 1 */
    for (i=0; i < n_matches; i++) {
	match[i]++;
    }
    xfree(uppert);
    return n_matches;
}
Exemplo n.º 2
0
int edview_search_sequence(edview *xx, int dir, int strand, char *value) {
    int mismatches = 0; /* exact match */
    int where = 2;      /* consensus */
    char *p;
    int start, end;
    int patlen;
    char *uppert, *upperb;
    int found = 0, at_end = 0;
    tg_rec fseq;
    int fpos, i, j;
    contig_t *c;
    contig_iterator *iter;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    rangec_t *r;
    int best_pos;

    if (dir) {
	start = xx->cursor_apos + 1;
	end = CITER_CEND;
	iter = contig_iter_new(xx->io, xx->cnum, 1,
			       CITER_FIRST | CITER_ISTART,
			       start, end);
	ifunc = contig_iter_next;
	best_pos = INT_MAX;
    } else {
	start = CITER_CSTART;
	end = xx->cursor_apos -1;
	iter = contig_iter_new(xx->io, xx->cnum, 1,
			       CITER_LAST | CITER_IEND,
			       start, end);
	ifunc = contig_iter_prev;
	best_pos = INT_MIN;
    }

    if (!iter)
	return -1;


    /*
     * Parse value search string. It optionally includes two extra params
     * separated by #. Ie:
     *     <string>#<N.mismatches>#<where>.
     * <where> is 1 for readings, 2 for consensus, 3 for both.
     */
    if (p = strchr(value, '#')) {
	mismatches = atoi(p+1);
	*p = 0;
	if (p = strchr(p+1, '#'))
	    where = atoi(p+1);
    }


    /* uppercase search string, remove pads, and store fwd/rev copies */
    patlen = strlen(value);
    depad_seq(value, &patlen, NULL);
    if (NULL == (uppert = (char *)xmalloc(patlen + 1)))
	return 0;
    if (NULL == (upperb = (char *)xmalloc(patlen + 1)))
	return 0;

    uppert[patlen] = upperb[patlen] = 0;
    for (i = patlen-1; i >= 0; i--) {
	upperb[i] = uppert[i] = toupper(value[i]);
    }
    complement_seq(upperb, patlen);

    while ((r = ifunc(xx->io, iter))) {
	seq_t *s, *sorig;
	char *ind, *indt = NULL, *indb = NULL, *seq;
	int seq_len, comp, off = 0;

	if (found && dir  && r->start > best_pos)
	    break;
	if (found && !dir && r->end < best_pos)
	    break;

	if (NULL == (s = sorig = cache_search(xx->io, GT_Seq, r->rec)))
	    break;

	if (r->comp ^ (s->len < 0)) {
	    s = dup_seq(s);
	    complement_seq_t(s);
	}

	seq = s->seq;
	seq_len = ABS(s->len);

	if (r->start < start) {
	    off      = start - r->start;
	    seq     += off;
	    seq_len -= off;
	}
	if (r->end - (patlen-1) > end)
	    seq_len -= r->end - (patlen-1) - end;

	if (dir) {
	    if (strand == '+' || strand == '=')
		indt = pstrnstr_inexact(seq, seq_len, uppert, patlen,
					mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = pstrnstr_inexact(seq, seq_len, upperb, patlen,
					mismatches, NULL);
	} else {
	    if (strand == '+' || strand == '=')
		indt = prstrnstr_inexact(seq, seq_len, uppert, patlen,
					 mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = prstrnstr_inexact(seq, seq_len, upperb, patlen,
					 mismatches, NULL);
	}

	if (indt && indb)
	    ind = MIN(indt, indb);
	else if (indt)
	    ind = indt;
	else if (indb)
	    ind = indb;
	else
	    ind = NULL;

	if (ind) {
	    int pos =  r->start + ind - seq + off;
	    if (dir) {
		if (best_pos > pos) {
		    found = 1;
		    best_pos = pos;
		    fpos = ind - s->seq;
		    fseq = r->rec;
		}
	    } else {
		if (best_pos < pos) {
		    found = 1;
		    best_pos = pos;
		    fpos = ind - s->seq;
		    fseq = r->rec;
		}
	    }
	    //printf("Matches #%"PRIrec": at abs pos %d\n", r->rec, pos);
	}

	if (s != sorig)
	    free(s);
    }

    if (found) {
	edSetCursorPos(xx, fseq == xx->cnum ? GT_Contig : GT_Seq,
		       fseq, fpos, 1);
    }

    free(uppert);
    free(upperb);

    contig_iter_del(iter);

    return found ? 0 : -1;
}