Пример #1
0
int inexact_pad_match(char *seq,
		      int seq_len,
		      char *string,
		      int string_len,
		      int mis_match,
		      int *match,
		      int *score,
		      int max_matches)
{
    char *pos;
    char *uppert;
    int i;
    int n_matches;
    int n_mis;

    /* remove any pads from the pattern search */
    depad_seq(string, &string_len, NULL);

    /* uppercase search string */
    if (NULL == (uppert = (char *)xmalloc(string_len + 1)))
	return -2;
    uppert[string_len] = 0;
    for (i = string_len-1; i >= 0; i--) {
	uppert[i] = toupper(string[i]);
    }
    for (i = 0; i < seq_len; i++) {
	seq[i] = toupper(seq[i]);
    }
    pos = NULL;

    n_matches = 0;
    pos = pstrnstr_inexact(seq,seq_len, uppert,string_len, mis_match, &n_mis);
    while (pos) {
	if (n_matches < max_matches) {
	    match[n_matches] = pos - seq;
	    score[n_matches] = string_len - n_mis;
	    n_matches++;
	} else {
	    /* make positions start at 1 */
	    for (i=0; i < max_matches; i++) {
		match[i]++;
	    }
	    return -1; /* out of match storage */
	}
	pos++;
	pos = pstrnstr_inexact(pos, seq_len - (pos-seq),
			       uppert, string_len, mis_match, &n_mis);
    }
    /* make positions start at 1 */
    for (i=0; i < n_matches; i++) {
	match[i]++;
    }
    xfree(uppert);
    return n_matches;
}
Пример #2
0
void dust(int len, char *s)
{
    int i, j, l, from, to, a, b, v;
    char *depadded = (char *)malloc(len);
    int *depad_to_pad = (int *)calloc(len, sizeof(int));
    int depadded_len;

    if (!depadded || !depad_to_pad)
        return;

    memcpy(depadded, s, len);
    depadded_len = len;
    depad_seq(depadded, &depadded_len, depad_to_pad);

    from = 0;
    to = -1;
    for (i=0; i < depadded_len; i += window2) {
        from -= window2;
        to -= window2;
        l = (depadded_len > i+window) ? window : depadded_len-i;
        v = wo(l, depadded+i, &a, &b);
        for (j = from; j <= to; j++) {
            if (isalpha(s[depad_to_pad[i+j]]))
                s[depad_to_pad[i+j]] = '#';
        }
        if (v > level) {
            for (j = a; j <= b && j < window2; j++) {
                if (isalpha(s[depad_to_pad[i+j]]))
                    s[depad_to_pad[i+j]] = '#';
            }
            from = j;
            to = b;
        } else {
            from = 0;
            to = -1;
        }
    }

    free(depadded);
    free(depad_to_pad);
}
Пример #3
0
/*
 * Main picking function.
 * Picks primers from the right end of contig1 and the left end of contig2
 * suitable for a PCR reaction.
 * After calling the returned value is a an array of structures linking into
 * the primer3 primer_pair structure along with gap4 sanitised copies holding
 * padded position and length in each contig and the depadded sequence.
 * The number of elements in this array can be fetched from pstate->npairs.
 *
 * Returns g4_primer_pair array pointer for success,
 *        NULL for failure
 */
static g4_primer_pair *pick_pcr_primers2(finish_t *fin, primlib_state *pstate,
					 int contig1, int contig2)
{
    char *cons1 = NULL, *cons2 = NULL, *cons_joined = NULL;
    int pos1l, pos1r, pos2l, pos2r;
    int len1, len2;
    int *depad1 = NULL, *depad2 = NULL;
    char *upcons1 = NULL, *upcons2 = NULL;
    g4_primer_pair *pp = NULL;
    int i, j;

    /* Compute contig ranges */
    pos1l = MAX(1, io_clength(fin->io, contig1) - (fin->opts.pcr_offset1-1));
    pos1r = MAX(1, io_clength(fin->io, contig1) - (fin->opts.pcr_offset2-1));
    len1 = pos1r - pos1l + 1;
    if (len1 < 25)
	return NULL;

    pos2l = MIN(io_clength(fin->io, contig2), fin->opts.pcr_offset2);
    pos2r = MIN(io_clength(fin->io, contig2), fin->opts.pcr_offset1);
    len2 = pos2r - pos2l + 1;
    if (len2 < 25)
	return NULL;


    /* Get the depadded consensus */
    cons1 = (char *)xmalloc(len1+1);
    cons2 = (char *)xmalloc(len2+1);
    if (!cons1 || !cons2)
	goto error;

    calc_consensus(contig1, pos1l, pos1r, CON_SUM, cons1, NULL,
		   NULL, NULL, consensus_cutoff, quality_cutoff,
		   database_info, (void *)fin->io);
    calc_consensus(contig2, pos2l, pos2r, CON_SUM, cons2, NULL,
		   NULL, NULL, consensus_cutoff, quality_cutoff,
		   database_info, (void *)fin->io);
    cons1[pos1r-pos1l+1] = 0;
    cons2[pos2r-pos2l+1] = 0;

    upcons1 = strdup(cons1);
    upcons2 = strdup(cons2);

    if (!(depad1 = (int *)xmalloc((len1+1)*sizeof(int))))
	goto error;
    if (!(depad2 = (int *)xmalloc((len2+1)*sizeof(int))))
	goto error;
    depad_seq(cons1, &len1, depad1);
    depad_seq(cons2, &len2, depad2);

    /* Filter low complexity data from the consensus */
    finish_filter(fin, cons1, len1);
    finish_filter(fin, cons2, len2);
    

    /*
     * For primer3 we join our two sequences together thus, with 20 Ns:
     *
     * <CONS1>NNNNNNNNNNNNNNNNNNNN<CONS2>
     *        ^                  ^
     *        x                  y
     *
     * Points x and y define the target ("TARGET=x,y-x" in the normal
     * boulder-io input file).
     * We also need to redefine the product size range to be 20 to
     * 20 + 2*(len2-len1) allowing for full flexibility of primer
     * positioning within the two consensus fragments.
     *
     * PCR primers will then be chosen using one primer within <CONS1>
     * and the other within <CONS2>.
     */
    if (NULL == (cons_joined = (char *)xmalloc(2*(len1 + len2 +2)+20)))
	goto error;
    sprintf(cons_joined, "%sNNNNNNNNNNNNNNNNNNNN%s", cons1, cons2);

    {
	size_t l = strlen(cons_joined);
	for (i = 0; i < l; i++)
	    if (cons_joined[i] != 'A' &&
		cons_joined[i] != 'C' &&
		cons_joined[i] != 'G' &&
		cons_joined[i] != 'T')
		cons_joined[i] = 'N';
    }
    puts(cons_joined);
    printf("target = %"PRId64",%d\n", (uint64_t)strlen(cons1)+1, 20);

    /* Tweak arguments */
    pstate->p3args.primer_task = pick_pcr_primers;
    pstate->p3args.num_return = 20;

    /* Pick the primer pairs */
    if (-1 == primlib_choose_pcr(pstate, cons_joined, strlen(cons1)+1, 20))
	goto error;

    if (!(pp = (g4_primer_pair *)xmalloc(pstate->npairs * sizeof(*pp))))
	goto error;



    /* Store the primer pairs in the return structures */
    for (i = j = 0; i < pstate->npairs; i++) {
	int p1, p2, len;

	/*
	 * Only pick pairs that have not had one or both primers rejected
	 * by the secondary primer-site detection code.
	 */
	/*
	if (pstate->pairs[i].left->excl ||
	    pstate->pairs[i].right->excl) {
	    continue;
	}
	*/

	pp[j].pair = &pstate->pairs[i];

	/* Compute padded start + length for these primers. */
	p1 = depad1[pstate->pairs[i].left->start];
	p2 = depad1[pstate->pairs[i].left->start +
		    pstate->pairs[i].left->length-1];
	pp[j].contig[0] = contig1;
	pp[j].pos[0] = pos1l + p1;
	pp[j].len[0] = p2-p1+1;
	
	p1 = depad2[pstate->pairs[i].right->start-
		    pstate->pairs[i].right->length+1 - len1 -20];
	p2 = depad2[pstate->pairs[i].right->start - len1 -20];
	pp[j].contig[1] = contig2;
	pp[j].pos[1] = pos2l + p1;
	pp[j].len[1] = p2-p1+1;

	/* Copy over depadded primer sequence */
	len = MIN(pstate->pairs[i].left->length, MAX_PRIMER_LEN);
	strncpy(pp[j].seq[0], &cons_joined[pstate->pairs[i].left->start], len);
	pp[j].seq[0][len] = '\0';

	len = MIN(pstate->pairs[i].right->length, MAX_PRIMER_LEN);
	strncpy(pp[j].seq[1],
		&cons_joined[pstate->pairs[i].right->start-
			     pstate->pairs[i].right->length+1],
		len);
	pp[j].seq[1][len] = '\0';
	complement_seq(pp[j].seq[1], len);
	
	/*
	 * Check if left/right primers have secondary binding sites, caching
	 * the result (in primer_rec.excl) to avoid subsequent searches.
	 */
	if (pstate->pairs[i].left->excl == 0) {
	    if (filter_primers(fin, 0, pp[j].seq[0]))
		pstate->pairs[i].left->excl = 1;
	    else
		pstate->pairs[i].left->excl = -1;
	}

	if (pstate->pairs[i].right->excl == 0) {
	    if (filter_primers(fin, 1, pp[j].seq[1]))
		pstate->pairs[i].right->excl = 1;
	    else
		pstate->pairs[i].right->excl = -1;
	}

	/* Use only if both L & R have no 2ndary match */
	if (pstate->pairs[i].left->excl == -1 &&
	    pstate->pairs[i].right->excl == -1)
	    j++;
    }

    pstate->npairs = j;
    if (!pstate->npairs) {
	xfree(pp);
	pp = NULL;
    }


    xfree(cons1);
    xfree(cons2);
    xfree(upcons1);
    xfree(upcons2);
    xfree(cons_joined);
    xfree(depad1);
    xfree(depad2);

    return pp;
    
 error:
    if (cons1)
	xfree(cons1);
    if (cons2)
	xfree(cons2);
    if (upcons1)
	xfree(upcons1);
    if (upcons2)
	xfree(upcons2);
    if (cons_joined)
	xfree(cons_joined);
    if (depad1)
	xfree(depad1);
    if (depad2)
	xfree(depad2);
    if (pp)
	xfree(pp);

    return NULL;
}
Пример #4
0
int edview_search_consensus(edview *xx, int dir, int strand, char *value) {
    int mismatches = 0; /* exact match */
    int where = 2;      /* consensus */
    char *p;
    int start, end;
    char cons[WIN_WIDTH+1];
    int patlen;
    char *uppert, *upperb;
    int found = 0, at_end = 0;
    tg_rec fseq;
    int fpos, i, j;
    contig_t *c;

    /*
     * Parse value search string. It optionally includes two extra params
     * separated by #. Ie:
     *     <string>#<N.mismatches>#<where>.
     * <where> is 1 for readings, 2 for consensus, 3 for both.
     */
    if (p = strchr(value, '#')) {
	mismatches = atoi(p+1);
	*p = 0;
	if (p = strchr(p+1, '#'))
	    where = atoi(p+1);
    }


    /* uppercase search string, remove pads, and store fwd/rev copies */
    patlen = strlen(value);
    depad_seq(value, &patlen, NULL);
    if (NULL == (uppert = (char *)xmalloc(patlen + 1)))
	return 0;
    if (NULL == (upperb = (char *)xmalloc(patlen + 1)))
	return 0;

    uppert[patlen] = upperb[patlen] = 0;
    for (i = patlen-1; i >= 0; i--) {
	upperb[i] = uppert[i] = toupper(value[i]);
    }
    complement_seq(upperb, patlen);


    /* Loop */
    if (dir) {
	start = xx->cursor_apos + (dir ? 1 : -1);
	end   = start + (WIN_WIDTH-1);
    } else {
	end   = xx->cursor_apos + (dir ? 1 : -1);
	start = end - (WIN_WIDTH-1);
    }
    fpos = xx->cursor_apos;

    c = cache_search(xx->io, GT_Contig, xx->cnum);
    cache_incr(xx->io, c);
    do {
	char *ind, *indt = NULL, *indb = NULL;

	calculate_consensus_simple(xx->io, xx->cnum, start, end, cons, NULL);
	cons[WIN_WIDTH] = 0;

	if (dir) {
	    if (strand == '+' || strand == '=')
		indt = pstrstr_inexact(cons, uppert, mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = pstrstr_inexact(cons, upperb, mismatches, NULL);
	} else {
	    if (strand == '+' || strand == '=')
		indt = prstrstr_inexact(cons, uppert, mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = prstrstr_inexact(cons, upperb, mismatches, NULL);
	}

	if (indt && indb)
	    ind = MIN(indt, indb);
	else if (indt)
	    ind = indt;
	else if (indb)
	    ind = indb;
	else
	    ind = NULL;

	if (ind != NULL) {
	    if (dir) {
		if (fpos <= start + ind-cons) {
		    found = 1;
		    fpos = start + ind-cons;
		    fseq = xx->cnum;
		}
	    } else {
		if (fpos >= start + ind-cons) {
		    found = 1;
		    fpos = start + ind-cons;
		    fseq = xx->cnum;
		}
	    }
	    break;
	}

	/* Next search region - overlapping by patlen+pads */
	if (dir) {
	    for (i = WIN_WIDTH-1, j = patlen; j && i; i--) {
		if (cons[i] != '*')
		    j--;
	    }
	    if (i == 0)
		break;
	    start += i;
	    end   += i;

	    if (start > c->end)
		at_end = 1;
	} else {
	    for (i = 0, j = patlen; j && i < WIN_WIDTH; i++) {
		if (cons[i] != '*')
		    j--;
	    }
	    if (i == WIN_WIDTH)
		break;

	    start -= WIN_WIDTH-i;
	    end   -= WIN_WIDTH-i;

	    if (end < c->start)
		at_end = 1;
	}
    } while (!at_end);
    cache_decr(xx->io, c);

    if (found) {
	edSetCursorPos(xx, fseq == xx->cnum ? GT_Contig : GT_Seq,
		       fseq, fpos, 1);
    }

    free(uppert);
    free(upperb);

    return found ? 0 : -1;
}
Пример #5
0
int edview_search_sequence(edview *xx, int dir, int strand, char *value) {
    int mismatches = 0; /* exact match */
    int where = 2;      /* consensus */
    char *p;
    int start, end;
    int patlen;
    char *uppert, *upperb;
    int found = 0, at_end = 0;
    tg_rec fseq;
    int fpos, i, j;
    contig_t *c;
    contig_iterator *iter;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    rangec_t *r;
    int best_pos;

    if (dir) {
	start = xx->cursor_apos + 1;
	end = CITER_CEND;
	iter = contig_iter_new(xx->io, xx->cnum, 1,
			       CITER_FIRST | CITER_ISTART,
			       start, end);
	ifunc = contig_iter_next;
	best_pos = INT_MAX;
    } else {
	start = CITER_CSTART;
	end = xx->cursor_apos -1;
	iter = contig_iter_new(xx->io, xx->cnum, 1,
			       CITER_LAST | CITER_IEND,
			       start, end);
	ifunc = contig_iter_prev;
	best_pos = INT_MIN;
    }

    if (!iter)
	return -1;


    /*
     * Parse value search string. It optionally includes two extra params
     * separated by #. Ie:
     *     <string>#<N.mismatches>#<where>.
     * <where> is 1 for readings, 2 for consensus, 3 for both.
     */
    if (p = strchr(value, '#')) {
	mismatches = atoi(p+1);
	*p = 0;
	if (p = strchr(p+1, '#'))
	    where = atoi(p+1);
    }


    /* uppercase search string, remove pads, and store fwd/rev copies */
    patlen = strlen(value);
    depad_seq(value, &patlen, NULL);
    if (NULL == (uppert = (char *)xmalloc(patlen + 1)))
	return 0;
    if (NULL == (upperb = (char *)xmalloc(patlen + 1)))
	return 0;

    uppert[patlen] = upperb[patlen] = 0;
    for (i = patlen-1; i >= 0; i--) {
	upperb[i] = uppert[i] = toupper(value[i]);
    }
    complement_seq(upperb, patlen);

    while ((r = ifunc(xx->io, iter))) {
	seq_t *s, *sorig;
	char *ind, *indt = NULL, *indb = NULL, *seq;
	int seq_len, comp, off = 0;

	if (found && dir  && r->start > best_pos)
	    break;
	if (found && !dir && r->end < best_pos)
	    break;

	if (NULL == (s = sorig = cache_search(xx->io, GT_Seq, r->rec)))
	    break;

	if (r->comp ^ (s->len < 0)) {
	    s = dup_seq(s);
	    complement_seq_t(s);
	}

	seq = s->seq;
	seq_len = ABS(s->len);

	if (r->start < start) {
	    off      = start - r->start;
	    seq     += off;
	    seq_len -= off;
	}
	if (r->end - (patlen-1) > end)
	    seq_len -= r->end - (patlen-1) - end;

	if (dir) {
	    if (strand == '+' || strand == '=')
		indt = pstrnstr_inexact(seq, seq_len, uppert, patlen,
					mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = pstrnstr_inexact(seq, seq_len, upperb, patlen,
					mismatches, NULL);
	} else {
	    if (strand == '+' || strand == '=')
		indt = prstrnstr_inexact(seq, seq_len, uppert, patlen,
					 mismatches, NULL);
	    if (strand == '-' || strand == '=')
		indb = prstrnstr_inexact(seq, seq_len, upperb, patlen,
					 mismatches, NULL);
	}

	if (indt && indb)
	    ind = MIN(indt, indb);
	else if (indt)
	    ind = indt;
	else if (indb)
	    ind = indb;
	else
	    ind = NULL;

	if (ind) {
	    int pos =  r->start + ind - seq + off;
	    if (dir) {
		if (best_pos > pos) {
		    found = 1;
		    best_pos = pos;
		    fpos = ind - s->seq;
		    fseq = r->rec;
		}
	    } else {
		if (best_pos < pos) {
		    found = 1;
		    best_pos = pos;
		    fpos = ind - s->seq;
		    fseq = r->rec;
		}
	    }
	    //printf("Matches #%"PRIrec": at abs pos %d\n", r->rec, pos);
	}

	if (s != sorig)
	    free(s);
    }

    if (found) {
	edSetCursorPos(xx, fseq == xx->cnum ? GT_Contig : GT_Seq,
		       fseq, fpos, 1);
    }

    free(uppert);
    free(upperb);

    contig_iter_del(iter);

    return found ? 0 : -1;
}
Пример #6
0
int repeat_search (
	           int mode,		/* 1=f, 2=r, 3=b */
		   int min_match,	/* the minimum match length */
		   int **seq1_match,	/* positions of matches in seq1 */
		   int **seq2_match,	/* positions of matches in seq2 */
		   int **len_match,	/* length of matches */
		   int max_mat,		/* maximum number of matches */
		   char *seq1,		/* seq1 */
		   int seq1_len, 	/* size of seq1 and its hash array */
		   int *num_f_matches,
		   int *num_r_matches
		   ) {

    int n_matches,seq2_len,max_matches,nres;
    char *seq2,sense;
    Hash *h;
    char *depadded_seq;
    int depadded_len;
    int *depad_to_pad;
    int i;

    /* Depad sequence */
    if (NULL == (depad_to_pad = (int *)xmalloc(sizeof(int) * seq1_len)))
	return -1;
    if (NULL == (depadded_seq = (char *)xmalloc(seq1_len+1))) {
	xfree(depad_to_pad);
	return -1;
    }
    copy_seq(depadded_seq, seq1, seq1_len);
    depadded_len = seq1_len;
    depad_seq(depadded_seq, &depadded_len, depad_to_pad);
    seq1 = depadded_seq;
    seq1_len = depadded_len;

    max_matches = max_mat;
    seq2_len = seq1_len;
    seq2 = NULL;

    if ( init_hash8n ( seq1_len, seq2_len, 
		      8, max_mat, min_match, 1, &h )) {
	free_hash8n(h);
	xfree(depadded_seq);
	xfree(depad_to_pad);
	return -2;
    }
	
    h->seq1 = seq1;
    h->seq1_len = seq1_len;

    if ( hash_seqn ( h, 1)) {
	verror(ERR_WARN, "hash_seqn", "sequence too short");
	xfree(depadded_seq);
	xfree(depad_to_pad);
	return -1;
    }
    (void) store_hashn ( h );

    if ( ! (seq2 = (char *) xmalloc ( sizeof(char)*(seq1_len) ))) {
	free_hash8n ( h );
	xfree(depadded_seq);
	xfree(depad_to_pad);
	return -1;
    }
    
    (void) copy_seq ( seq2, seq1, seq1_len );
    
    h->seq2 = seq2;
    h->seq2_len = seq2_len;
    *num_f_matches = 0;
    nres = 0;
    n_matches = 0;
    if ( mode & 1 ) {

	if ( hash_seqn ( h, 2)) {
	    verror(ERR_WARN, "hash_seqn", "sequence too short");
	    free_hash8n ( h );
	    if (seq2) xfree(seq2);
	    xfree(depadded_seq);
	    xfree(depad_to_pad);
	    return -1;
	}
	sense = 'f';
	n_matches = reps ( h, seq1_match, seq2_match, len_match, 0, sense);
	*num_f_matches = n_matches;
	nres += n_matches;

    }

    *num_r_matches = 0;
    if ( mode & 2 )  {

	(void) complement_seq(seq2, seq2_len);

	if ( hash_seqn ( h, 2)) {
	    verror(ERR_WARN, "hash_seqn", "sequence too short");
	    free_hash8n ( h );
	    if (seq2) xfree(seq2);
	    xfree(depadded_seq);
	    xfree(depad_to_pad);
	    return -1;
	}

	sense = 'r';
	n_matches = reps ( h, seq1_match, seq2_match, len_match, nres, sense);
	*num_r_matches = n_matches;
	n_matches += nres;
    }

    /* Remap depadded hits to padded positions */
    for (i = 0; i < n_matches; i++) {
	int p1, p2, p1_end;
	p1 = depad_to_pad[(*seq1_match)[i]];
	p2 = depad_to_pad[(*seq2_match)[i]];
	p1_end = depad_to_pad[(*seq1_match)[i]+(*len_match)[i]-1];

	(*seq1_match)[i] = p1;
	(*seq2_match)[i] = p2;
	(*len_match) [i] = p1_end - p1 + 1;
    }

    free_hash8n ( h );
    if (seq2) xfree(seq2);
    xfree(depadded_seq);
    xfree(depad_to_pad);

    return n_matches;
}
Пример #7
0
static int align_old(EdStruct *xx0, int pos0, int len0,
		 EdStruct *xx1, int pos1, int len1)
{

    char *ol0,*ol1;
    int  *depad_to_pad0_m, *depad_to_pad1_m;
    int  *depad_to_pad0,   *depad_to_pad1;
    align_int *res, *S;
    int old_def_conf0 = xx0->default_conf_n;
    int old_def_conf1 = xx1->default_conf_n;
    int off0 = 0, off1 = 0;
    int left0 = 0, left1 = 0;

    vfuncheader("Align contigs (join editor)");

    /* Memory allocation */
    ol0 = (char *) xmalloc(len0+1);
    ol1 = (char *) xmalloc(len1+1);
    depad_to_pad0 = depad_to_pad0_m = (int *)xmalloc((len0+1) * sizeof(int));
    depad_to_pad1 = depad_to_pad1_m = (int *)xmalloc((len1+1) * sizeof(int));
    S = res = (align_int *) xmalloc((len0+len1+1)*sizeof(align_int));

    /* Compute the consensus */
    DBcalcConsensus(xx0,pos0,len0,ol0,NULL,BOTH_STRANDS);
    DBcalcConsensus(xx1,pos1,len1,ol1,NULL,BOTH_STRANDS);

    /* Strip the pads from the consensus */
    depad_seq(ol0, &len0, depad_to_pad0);
    depad_seq(ol1, &len1, depad_to_pad1);

    /* Do the actual alignment */
    (void)calign(ol0, ol1, len0, len1,
		 NULL, NULL, NULL, NULL,
		 0, 0, gopenval, gextendval, 3, 0, res);

    /* Clip left end */
    if (*S != 0) {
	/* Pad at start, so shift contigs */
	if (*S < 0) {
	    left0 = -*S; /* used for display only */
	    depad_to_pad0 += -*S;
	    off0 = depad_to_pad0[0];
	    xx1->displayPos -= off0;
	    pos0 += off0;
	    len0 -= off0;
	} else {
	    left1 = *S; /* used for display only */
	    depad_to_pad1 += *S;
	    off1 = depad_to_pad1[0];
	    xx0->displayPos -= off1;
	    pos1 += off1;
	    len1 -= off1;
	}
	S++;
	xx0->link->lockOffset = xx1->displayPos - xx0->displayPos;
    }

    /* Clip right end */
    {
	int i = 0, j = 0, op;
	align_int *S2 = S;

	while (i < len0 && j < len1) {
	    if ((op = *S2++) == 0)
		i++, j++;
	    else if (op > 0)
		j += op;
	    else
		i -= op;
	}
	
	len0 = i;
	len1 = j;
    }

    /* Display the alignment. */
    {
	char *exp0, *exp1;
	int exp_len0, exp_len1;
	char name0[100];
	char name1[100];

	exp0 = (char *) xmalloc(len0+len1+1);
	exp1 = (char *) xmalloc(len0+len1+1);

	sprintf(name0, "%d", xx0->DBi->DB_contigNum);
	sprintf(name1, "%d", xx1->DBi->DB_contigNum);
	cexpand(ol0+left0, ol1+left1, len0, len1,
		exp0, exp1, &exp_len0, &exp_len1, 
		ALIGN_J_SSH | ALIGN_J_PADS, S);
	list_alignment(exp0, exp1, name0, name1, pos0, pos1, "");

	xfree(exp0);
	xfree(exp1);
    }


    /*************************************************************************/
    /* Now actually make the edits, keeping track of old and new pads. */
    openUndo(DBI(xx0));
    openUndo(DBI(xx1));

    xx0->default_conf_n = -1;
    xx1->default_conf_n = -1;
    {
	int depad_pos0 = 0, depad_pos1 = 0;
	int curr_pad0;  /* Current padded position in seq 0 */
	int curr_pad1;  /* Current padded position in seq 1 */
	int extra_pads; /* Difference between padded positions */
	int last_pad0 = -1;
	int last_pad1 = -1;
	int inserted_bases0 = 0;
	int inserted_bases1 = 0;

	while (depad_pos0 < len0 || depad_pos1 < len1) {
	    if (*S < 0) {
		depad_pos0 -= *S;
	    } else if (*S > 0) {
		depad_pos1 += *S;
	    }

	    curr_pad0 = depad_to_pad0[depad_pos0]-off0;
	    curr_pad1 = depad_to_pad1[depad_pos1]-off1;
	    extra_pads = (curr_pad1 - last_pad1) - (curr_pad0 - last_pad0);

	    if (extra_pads < 0) { /* Add to seq 0 */
		add_pads(xx1, pos1 + curr_pad1 + inserted_bases1, -extra_pads);
		inserted_bases1 -= extra_pads;
	    } else if (extra_pads > 0) { /* Add to seq 1 */
		add_pads(xx0, pos0 + curr_pad0 + inserted_bases0,  extra_pads);
		inserted_bases0 += extra_pads;
	    }
	    
	    last_pad0 = curr_pad0;
	    last_pad1 = curr_pad1;

	    if (*S == 0) {
		depad_pos0++;
		depad_pos1++;
	    }

	    S++;
	}
    }
    xx0->default_conf_n = old_def_conf0;
    xx1->default_conf_n = old_def_conf1;
    /*************************************************************************/

    closeUndo(xx1, DBI(xx1));
    closeUndo(xx0, DBI(xx0));

    xfree(res);
    xfree(ol0);
    xfree(ol1);
    xfree(depad_to_pad0_m);
    xfree(depad_to_pad1_m);

    return(0);
}
Пример #8
0
static int align(EdStruct *xx0, int pos0, int len0,
		 EdStruct *xx1, int pos1, int len1)
{

    char *ol0,*ol1, *cons0, *cons1;
    int old_def_conf0 = xx0->default_conf_n;
    int old_def_conf1 = xx1->default_conf_n;
    OVERLAP *overlap;
    int ierr;
    char PAD_SYM = '.';
    int  *depad_to_pad0, *dp0, *depad_to_pad1, *dp1;
    int *S, *res;
    int off0 = 0, off1 = 0;
    int left0 = 0, left1 = 0;

    vfuncheader("Align contigs (join editor)");

    /* Memory allocation */
    ol0 = (char *) xmalloc(len0+1);
    ol1 = (char *) xmalloc(len1+1);
    cons0 = (char *) xmalloc(len0+1);
    cons1 = (char *) xmalloc(len1+1);
    dp0 = depad_to_pad0 = (int *)xmalloc((len0+1) * sizeof(int));
    dp1 = depad_to_pad1 = (int *)xmalloc((len1+1) * sizeof(int));

    /* Compute the consensus */
    DBcalcConsensus(xx0,pos0,len0,ol0,NULL,BOTH_STRANDS);
    DBcalcConsensus(xx1,pos1,len1,ol1,NULL,BOTH_STRANDS);

    memcpy(cons0, ol0, len0+1);
    memcpy(cons1, ol1, len1+1);

    /* Strip the pads from the consensus */
    depad_seq(ol0, &len0, depad_to_pad0);
    depad_seq(ol1, &len1, depad_to_pad1);

    if (NULL == (overlap = create_overlap())) return -1;
    init_overlap (overlap, ol0, ol1, len0, len1);

    if(-1 == (ierr =  align_contigs (overlap))) {
	xfree(ol0);
	xfree(ol1);
	destroy_overlap(overlap);
	return -1;
    }

    /*
    overlap->seq1_out[overlap->right+1] = 0;
    overlap->seq2_out[overlap->right+1] = 0;
    */

    S = res = rsalign2myers(overlap->seq1_out, strlen(overlap->seq1_out),
			    overlap->seq2_out, strlen(overlap->seq2_out),
			    PAD_SYM);

    /* Clip left end */
    if (*S != 0) {
	/* Pad at start, so shift contigs */
	if (*S < 0) {
	    left0 = -*S; /* used for display only */
	    depad_to_pad0 += -*S;
	    off0 = depad_to_pad0[0];
	    xx1->displayPos -= off0;
	    pos0 += off0;
	    len0 -= -*S;
	} else {
	    left1 = *S; /* used for display only */
	    depad_to_pad1 += *S;
	    off1 = depad_to_pad1[0];
	    xx0->displayPos -= off1;
	    pos1 += off1;
	    len1 -= *S;
	}
	S++;
	xx0->link->lockOffset = xx1->displayPos - xx0->displayPos;
    }

    /* Clip right end */
    {
	int pos0 = 0, pos1 = 0;
	int *s = S;

	while (pos0 < len0 && pos1 < len1) {
	    if (*s < 0) {
		pos0 -= *s;
	    } else if (*s > 0) {
		pos0 += *s;
	    } else {
		pos0++;
		pos1++;
	    }

	    s++;
	}

	if (*s < 0)
	    len0 += *s;
	else if (*s > 0)
	    len1 -= *s;
    }

    /* Display the alignment. */
    {
	char *exp0, *exp1;
	int exp_len0, exp_len1;
	char name0[100];
	char name1[100];

	exp0 = (char *) xmalloc(len0+len1+1);
	exp1 = (char *) xmalloc(len0+len1+1);

	sprintf(name0, "%d", xx0->DBi->DB_contigNum);
	sprintf(name1, "%d", xx1->DBi->DB_contigNum);
	cexpand(ol0+left0, ol1+left1, len0, len1,
		exp0, exp1, &exp_len0, &exp_len1, 
		ALIGN_J_SSH | ALIGN_J_PADS, S);
	list_alignment(exp0, exp1, name0, name1, pos0, pos1, "");

	xfree(exp0);
	xfree(exp1);
    }


    /*************************************************************************/
    /* Now actually make the edits, keeping track of old and new pads. */
    openUndo(DBI(xx0));
    openUndo(DBI(xx1));

    xx0->default_conf_n = -1;
    xx1->default_conf_n = -1;
    {
	int depad_pos0 = 0, depad_pos1 = 0;
	int curr_pad0;  /* Current padded position in seq 0 */
	int curr_pad1;  /* Current padded position in seq 1 */
	int extra_pads; /* Difference between padded positions */
	int last_pad0 = -1;
	int last_pad1 = -1;
	int inserted_bases0 = 0;
	int inserted_bases1 = 0;


	while (depad_pos0 < len0 && depad_pos1 < len1) {
	    if (*S < 0) {
		depad_pos0 -= *S;
	    } else if (*S > 0) {
		depad_pos1 += *S;
	    }

	    if (depad_pos0 >= len0 || depad_pos1 >= len1)
		break;

	    curr_pad0 = depad_to_pad0[depad_pos0]-off0;
	    curr_pad1 = depad_to_pad1[depad_pos1]-off1;

	    extra_pads = (curr_pad1 - last_pad1) - (curr_pad0 - last_pad0);

	    if (extra_pads < 0) { /* Add to seq 0 */
		add_pads(xx1, pos1 + curr_pad1 + inserted_bases1, -extra_pads);
		inserted_bases1 -= extra_pads;
	    } else if (extra_pads > 0) { /* Add to seq 1 */
		add_pads(xx0, pos0 + curr_pad0 + inserted_bases0,  extra_pads);
		inserted_bases0 += extra_pads;
	    }
	    
	    last_pad0 = curr_pad0;
	    last_pad1 = curr_pad1;

	    if (*S == 0) {
		depad_pos0++;
		depad_pos1++;
	    }

	    S++;
	}
    }
    xx0->default_conf_n = old_def_conf0;
    xx1->default_conf_n = old_def_conf1;
    /*************************************************************************/

    closeUndo(xx1, DBI(xx1));
    closeUndo(xx0, DBI(xx0));

    xfree(res);

    xx0->default_conf_n = old_def_conf0;
    xx1->default_conf_n = old_def_conf1;

    xfree(ol0);
    xfree(ol1);
    xfree(dp0);
    xfree(dp1);
    destroy_overlap(overlap);

    return(0);
}