int inexact_pad_match(char *seq, int seq_len, char *string, int string_len, int mis_match, int *match, int *score, int max_matches) { char *pos; char *uppert; int i; int n_matches; int n_mis; /* remove any pads from the pattern search */ depad_seq(string, &string_len, NULL); /* uppercase search string */ if (NULL == (uppert = (char *)xmalloc(string_len + 1))) return -2; uppert[string_len] = 0; for (i = string_len-1; i >= 0; i--) { uppert[i] = toupper(string[i]); } for (i = 0; i < seq_len; i++) { seq[i] = toupper(seq[i]); } pos = NULL; n_matches = 0; pos = pstrnstr_inexact(seq,seq_len, uppert,string_len, mis_match, &n_mis); while (pos) { if (n_matches < max_matches) { match[n_matches] = pos - seq; score[n_matches] = string_len - n_mis; n_matches++; } else { /* make positions start at 1 */ for (i=0; i < max_matches; i++) { match[i]++; } return -1; /* out of match storage */ } pos++; pos = pstrnstr_inexact(pos, seq_len - (pos-seq), uppert, string_len, mis_match, &n_mis); } /* make positions start at 1 */ for (i=0; i < n_matches; i++) { match[i]++; } xfree(uppert); return n_matches; }
void dust(int len, char *s) { int i, j, l, from, to, a, b, v; char *depadded = (char *)malloc(len); int *depad_to_pad = (int *)calloc(len, sizeof(int)); int depadded_len; if (!depadded || !depad_to_pad) return; memcpy(depadded, s, len); depadded_len = len; depad_seq(depadded, &depadded_len, depad_to_pad); from = 0; to = -1; for (i=0; i < depadded_len; i += window2) { from -= window2; to -= window2; l = (depadded_len > i+window) ? window : depadded_len-i; v = wo(l, depadded+i, &a, &b); for (j = from; j <= to; j++) { if (isalpha(s[depad_to_pad[i+j]])) s[depad_to_pad[i+j]] = '#'; } if (v > level) { for (j = a; j <= b && j < window2; j++) { if (isalpha(s[depad_to_pad[i+j]])) s[depad_to_pad[i+j]] = '#'; } from = j; to = b; } else { from = 0; to = -1; } } free(depadded); free(depad_to_pad); }
/* * Main picking function. * Picks primers from the right end of contig1 and the left end of contig2 * suitable for a PCR reaction. * After calling the returned value is a an array of structures linking into * the primer3 primer_pair structure along with gap4 sanitised copies holding * padded position and length in each contig and the depadded sequence. * The number of elements in this array can be fetched from pstate->npairs. * * Returns g4_primer_pair array pointer for success, * NULL for failure */ static g4_primer_pair *pick_pcr_primers2(finish_t *fin, primlib_state *pstate, int contig1, int contig2) { char *cons1 = NULL, *cons2 = NULL, *cons_joined = NULL; int pos1l, pos1r, pos2l, pos2r; int len1, len2; int *depad1 = NULL, *depad2 = NULL; char *upcons1 = NULL, *upcons2 = NULL; g4_primer_pair *pp = NULL; int i, j; /* Compute contig ranges */ pos1l = MAX(1, io_clength(fin->io, contig1) - (fin->opts.pcr_offset1-1)); pos1r = MAX(1, io_clength(fin->io, contig1) - (fin->opts.pcr_offset2-1)); len1 = pos1r - pos1l + 1; if (len1 < 25) return NULL; pos2l = MIN(io_clength(fin->io, contig2), fin->opts.pcr_offset2); pos2r = MIN(io_clength(fin->io, contig2), fin->opts.pcr_offset1); len2 = pos2r - pos2l + 1; if (len2 < 25) return NULL; /* Get the depadded consensus */ cons1 = (char *)xmalloc(len1+1); cons2 = (char *)xmalloc(len2+1); if (!cons1 || !cons2) goto error; calc_consensus(contig1, pos1l, pos1r, CON_SUM, cons1, NULL, NULL, NULL, consensus_cutoff, quality_cutoff, database_info, (void *)fin->io); calc_consensus(contig2, pos2l, pos2r, CON_SUM, cons2, NULL, NULL, NULL, consensus_cutoff, quality_cutoff, database_info, (void *)fin->io); cons1[pos1r-pos1l+1] = 0; cons2[pos2r-pos2l+1] = 0; upcons1 = strdup(cons1); upcons2 = strdup(cons2); if (!(depad1 = (int *)xmalloc((len1+1)*sizeof(int)))) goto error; if (!(depad2 = (int *)xmalloc((len2+1)*sizeof(int)))) goto error; depad_seq(cons1, &len1, depad1); depad_seq(cons2, &len2, depad2); /* Filter low complexity data from the consensus */ finish_filter(fin, cons1, len1); finish_filter(fin, cons2, len2); /* * For primer3 we join our two sequences together thus, with 20 Ns: * * <CONS1>NNNNNNNNNNNNNNNNNNNN<CONS2> * ^ ^ * x y * * Points x and y define the target ("TARGET=x,y-x" in the normal * boulder-io input file). * We also need to redefine the product size range to be 20 to * 20 + 2*(len2-len1) allowing for full flexibility of primer * positioning within the two consensus fragments. * * PCR primers will then be chosen using one primer within <CONS1> * and the other within <CONS2>. */ if (NULL == (cons_joined = (char *)xmalloc(2*(len1 + len2 +2)+20))) goto error; sprintf(cons_joined, "%sNNNNNNNNNNNNNNNNNNNN%s", cons1, cons2); { size_t l = strlen(cons_joined); for (i = 0; i < l; i++) if (cons_joined[i] != 'A' && cons_joined[i] != 'C' && cons_joined[i] != 'G' && cons_joined[i] != 'T') cons_joined[i] = 'N'; } puts(cons_joined); printf("target = %"PRId64",%d\n", (uint64_t)strlen(cons1)+1, 20); /* Tweak arguments */ pstate->p3args.primer_task = pick_pcr_primers; pstate->p3args.num_return = 20; /* Pick the primer pairs */ if (-1 == primlib_choose_pcr(pstate, cons_joined, strlen(cons1)+1, 20)) goto error; if (!(pp = (g4_primer_pair *)xmalloc(pstate->npairs * sizeof(*pp)))) goto error; /* Store the primer pairs in the return structures */ for (i = j = 0; i < pstate->npairs; i++) { int p1, p2, len; /* * Only pick pairs that have not had one or both primers rejected * by the secondary primer-site detection code. */ /* if (pstate->pairs[i].left->excl || pstate->pairs[i].right->excl) { continue; } */ pp[j].pair = &pstate->pairs[i]; /* Compute padded start + length for these primers. */ p1 = depad1[pstate->pairs[i].left->start]; p2 = depad1[pstate->pairs[i].left->start + pstate->pairs[i].left->length-1]; pp[j].contig[0] = contig1; pp[j].pos[0] = pos1l + p1; pp[j].len[0] = p2-p1+1; p1 = depad2[pstate->pairs[i].right->start- pstate->pairs[i].right->length+1 - len1 -20]; p2 = depad2[pstate->pairs[i].right->start - len1 -20]; pp[j].contig[1] = contig2; pp[j].pos[1] = pos2l + p1; pp[j].len[1] = p2-p1+1; /* Copy over depadded primer sequence */ len = MIN(pstate->pairs[i].left->length, MAX_PRIMER_LEN); strncpy(pp[j].seq[0], &cons_joined[pstate->pairs[i].left->start], len); pp[j].seq[0][len] = '\0'; len = MIN(pstate->pairs[i].right->length, MAX_PRIMER_LEN); strncpy(pp[j].seq[1], &cons_joined[pstate->pairs[i].right->start- pstate->pairs[i].right->length+1], len); pp[j].seq[1][len] = '\0'; complement_seq(pp[j].seq[1], len); /* * Check if left/right primers have secondary binding sites, caching * the result (in primer_rec.excl) to avoid subsequent searches. */ if (pstate->pairs[i].left->excl == 0) { if (filter_primers(fin, 0, pp[j].seq[0])) pstate->pairs[i].left->excl = 1; else pstate->pairs[i].left->excl = -1; } if (pstate->pairs[i].right->excl == 0) { if (filter_primers(fin, 1, pp[j].seq[1])) pstate->pairs[i].right->excl = 1; else pstate->pairs[i].right->excl = -1; } /* Use only if both L & R have no 2ndary match */ if (pstate->pairs[i].left->excl == -1 && pstate->pairs[i].right->excl == -1) j++; } pstate->npairs = j; if (!pstate->npairs) { xfree(pp); pp = NULL; } xfree(cons1); xfree(cons2); xfree(upcons1); xfree(upcons2); xfree(cons_joined); xfree(depad1); xfree(depad2); return pp; error: if (cons1) xfree(cons1); if (cons2) xfree(cons2); if (upcons1) xfree(upcons1); if (upcons2) xfree(upcons2); if (cons_joined) xfree(cons_joined); if (depad1) xfree(depad1); if (depad2) xfree(depad2); if (pp) xfree(pp); return NULL; }
int edview_search_consensus(edview *xx, int dir, int strand, char *value) { int mismatches = 0; /* exact match */ int where = 2; /* consensus */ char *p; int start, end; char cons[WIN_WIDTH+1]; int patlen; char *uppert, *upperb; int found = 0, at_end = 0; tg_rec fseq; int fpos, i, j; contig_t *c; /* * Parse value search string. It optionally includes two extra params * separated by #. Ie: * <string>#<N.mismatches>#<where>. * <where> is 1 for readings, 2 for consensus, 3 for both. */ if (p = strchr(value, '#')) { mismatches = atoi(p+1); *p = 0; if (p = strchr(p+1, '#')) where = atoi(p+1); } /* uppercase search string, remove pads, and store fwd/rev copies */ patlen = strlen(value); depad_seq(value, &patlen, NULL); if (NULL == (uppert = (char *)xmalloc(patlen + 1))) return 0; if (NULL == (upperb = (char *)xmalloc(patlen + 1))) return 0; uppert[patlen] = upperb[patlen] = 0; for (i = patlen-1; i >= 0; i--) { upperb[i] = uppert[i] = toupper(value[i]); } complement_seq(upperb, patlen); /* Loop */ if (dir) { start = xx->cursor_apos + (dir ? 1 : -1); end = start + (WIN_WIDTH-1); } else { end = xx->cursor_apos + (dir ? 1 : -1); start = end - (WIN_WIDTH-1); } fpos = xx->cursor_apos; c = cache_search(xx->io, GT_Contig, xx->cnum); cache_incr(xx->io, c); do { char *ind, *indt = NULL, *indb = NULL; calculate_consensus_simple(xx->io, xx->cnum, start, end, cons, NULL); cons[WIN_WIDTH] = 0; if (dir) { if (strand == '+' || strand == '=') indt = pstrstr_inexact(cons, uppert, mismatches, NULL); if (strand == '-' || strand == '=') indb = pstrstr_inexact(cons, upperb, mismatches, NULL); } else { if (strand == '+' || strand == '=') indt = prstrstr_inexact(cons, uppert, mismatches, NULL); if (strand == '-' || strand == '=') indb = prstrstr_inexact(cons, upperb, mismatches, NULL); } if (indt && indb) ind = MIN(indt, indb); else if (indt) ind = indt; else if (indb) ind = indb; else ind = NULL; if (ind != NULL) { if (dir) { if (fpos <= start + ind-cons) { found = 1; fpos = start + ind-cons; fseq = xx->cnum; } } else { if (fpos >= start + ind-cons) { found = 1; fpos = start + ind-cons; fseq = xx->cnum; } } break; } /* Next search region - overlapping by patlen+pads */ if (dir) { for (i = WIN_WIDTH-1, j = patlen; j && i; i--) { if (cons[i] != '*') j--; } if (i == 0) break; start += i; end += i; if (start > c->end) at_end = 1; } else { for (i = 0, j = patlen; j && i < WIN_WIDTH; i++) { if (cons[i] != '*') j--; } if (i == WIN_WIDTH) break; start -= WIN_WIDTH-i; end -= WIN_WIDTH-i; if (end < c->start) at_end = 1; } } while (!at_end); cache_decr(xx->io, c); if (found) { edSetCursorPos(xx, fseq == xx->cnum ? GT_Contig : GT_Seq, fseq, fpos, 1); } free(uppert); free(upperb); return found ? 0 : -1; }
int edview_search_sequence(edview *xx, int dir, int strand, char *value) { int mismatches = 0; /* exact match */ int where = 2; /* consensus */ char *p; int start, end; int patlen; char *uppert, *upperb; int found = 0, at_end = 0; tg_rec fseq; int fpos, i, j; contig_t *c; contig_iterator *iter; rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci); rangec_t *r; int best_pos; if (dir) { start = xx->cursor_apos + 1; end = CITER_CEND; iter = contig_iter_new(xx->io, xx->cnum, 1, CITER_FIRST | CITER_ISTART, start, end); ifunc = contig_iter_next; best_pos = INT_MAX; } else { start = CITER_CSTART; end = xx->cursor_apos -1; iter = contig_iter_new(xx->io, xx->cnum, 1, CITER_LAST | CITER_IEND, start, end); ifunc = contig_iter_prev; best_pos = INT_MIN; } if (!iter) return -1; /* * Parse value search string. It optionally includes two extra params * separated by #. Ie: * <string>#<N.mismatches>#<where>. * <where> is 1 for readings, 2 for consensus, 3 for both. */ if (p = strchr(value, '#')) { mismatches = atoi(p+1); *p = 0; if (p = strchr(p+1, '#')) where = atoi(p+1); } /* uppercase search string, remove pads, and store fwd/rev copies */ patlen = strlen(value); depad_seq(value, &patlen, NULL); if (NULL == (uppert = (char *)xmalloc(patlen + 1))) return 0; if (NULL == (upperb = (char *)xmalloc(patlen + 1))) return 0; uppert[patlen] = upperb[patlen] = 0; for (i = patlen-1; i >= 0; i--) { upperb[i] = uppert[i] = toupper(value[i]); } complement_seq(upperb, patlen); while ((r = ifunc(xx->io, iter))) { seq_t *s, *sorig; char *ind, *indt = NULL, *indb = NULL, *seq; int seq_len, comp, off = 0; if (found && dir && r->start > best_pos) break; if (found && !dir && r->end < best_pos) break; if (NULL == (s = sorig = cache_search(xx->io, GT_Seq, r->rec))) break; if (r->comp ^ (s->len < 0)) { s = dup_seq(s); complement_seq_t(s); } seq = s->seq; seq_len = ABS(s->len); if (r->start < start) { off = start - r->start; seq += off; seq_len -= off; } if (r->end - (patlen-1) > end) seq_len -= r->end - (patlen-1) - end; if (dir) { if (strand == '+' || strand == '=') indt = pstrnstr_inexact(seq, seq_len, uppert, patlen, mismatches, NULL); if (strand == '-' || strand == '=') indb = pstrnstr_inexact(seq, seq_len, upperb, patlen, mismatches, NULL); } else { if (strand == '+' || strand == '=') indt = prstrnstr_inexact(seq, seq_len, uppert, patlen, mismatches, NULL); if (strand == '-' || strand == '=') indb = prstrnstr_inexact(seq, seq_len, upperb, patlen, mismatches, NULL); } if (indt && indb) ind = MIN(indt, indb); else if (indt) ind = indt; else if (indb) ind = indb; else ind = NULL; if (ind) { int pos = r->start + ind - seq + off; if (dir) { if (best_pos > pos) { found = 1; best_pos = pos; fpos = ind - s->seq; fseq = r->rec; } } else { if (best_pos < pos) { found = 1; best_pos = pos; fpos = ind - s->seq; fseq = r->rec; } } //printf("Matches #%"PRIrec": at abs pos %d\n", r->rec, pos); } if (s != sorig) free(s); } if (found) { edSetCursorPos(xx, fseq == xx->cnum ? GT_Contig : GT_Seq, fseq, fpos, 1); } free(uppert); free(upperb); contig_iter_del(iter); return found ? 0 : -1; }
int repeat_search ( int mode, /* 1=f, 2=r, 3=b */ int min_match, /* the minimum match length */ int **seq1_match, /* positions of matches in seq1 */ int **seq2_match, /* positions of matches in seq2 */ int **len_match, /* length of matches */ int max_mat, /* maximum number of matches */ char *seq1, /* seq1 */ int seq1_len, /* size of seq1 and its hash array */ int *num_f_matches, int *num_r_matches ) { int n_matches,seq2_len,max_matches,nres; char *seq2,sense; Hash *h; char *depadded_seq; int depadded_len; int *depad_to_pad; int i; /* Depad sequence */ if (NULL == (depad_to_pad = (int *)xmalloc(sizeof(int) * seq1_len))) return -1; if (NULL == (depadded_seq = (char *)xmalloc(seq1_len+1))) { xfree(depad_to_pad); return -1; } copy_seq(depadded_seq, seq1, seq1_len); depadded_len = seq1_len; depad_seq(depadded_seq, &depadded_len, depad_to_pad); seq1 = depadded_seq; seq1_len = depadded_len; max_matches = max_mat; seq2_len = seq1_len; seq2 = NULL; if ( init_hash8n ( seq1_len, seq2_len, 8, max_mat, min_match, 1, &h )) { free_hash8n(h); xfree(depadded_seq); xfree(depad_to_pad); return -2; } h->seq1 = seq1; h->seq1_len = seq1_len; if ( hash_seqn ( h, 1)) { verror(ERR_WARN, "hash_seqn", "sequence too short"); xfree(depadded_seq); xfree(depad_to_pad); return -1; } (void) store_hashn ( h ); if ( ! (seq2 = (char *) xmalloc ( sizeof(char)*(seq1_len) ))) { free_hash8n ( h ); xfree(depadded_seq); xfree(depad_to_pad); return -1; } (void) copy_seq ( seq2, seq1, seq1_len ); h->seq2 = seq2; h->seq2_len = seq2_len; *num_f_matches = 0; nres = 0; n_matches = 0; if ( mode & 1 ) { if ( hash_seqn ( h, 2)) { verror(ERR_WARN, "hash_seqn", "sequence too short"); free_hash8n ( h ); if (seq2) xfree(seq2); xfree(depadded_seq); xfree(depad_to_pad); return -1; } sense = 'f'; n_matches = reps ( h, seq1_match, seq2_match, len_match, 0, sense); *num_f_matches = n_matches; nres += n_matches; } *num_r_matches = 0; if ( mode & 2 ) { (void) complement_seq(seq2, seq2_len); if ( hash_seqn ( h, 2)) { verror(ERR_WARN, "hash_seqn", "sequence too short"); free_hash8n ( h ); if (seq2) xfree(seq2); xfree(depadded_seq); xfree(depad_to_pad); return -1; } sense = 'r'; n_matches = reps ( h, seq1_match, seq2_match, len_match, nres, sense); *num_r_matches = n_matches; n_matches += nres; } /* Remap depadded hits to padded positions */ for (i = 0; i < n_matches; i++) { int p1, p2, p1_end; p1 = depad_to_pad[(*seq1_match)[i]]; p2 = depad_to_pad[(*seq2_match)[i]]; p1_end = depad_to_pad[(*seq1_match)[i]+(*len_match)[i]-1]; (*seq1_match)[i] = p1; (*seq2_match)[i] = p2; (*len_match) [i] = p1_end - p1 + 1; } free_hash8n ( h ); if (seq2) xfree(seq2); xfree(depadded_seq); xfree(depad_to_pad); return n_matches; }
static int align_old(EdStruct *xx0, int pos0, int len0, EdStruct *xx1, int pos1, int len1) { char *ol0,*ol1; int *depad_to_pad0_m, *depad_to_pad1_m; int *depad_to_pad0, *depad_to_pad1; align_int *res, *S; int old_def_conf0 = xx0->default_conf_n; int old_def_conf1 = xx1->default_conf_n; int off0 = 0, off1 = 0; int left0 = 0, left1 = 0; vfuncheader("Align contigs (join editor)"); /* Memory allocation */ ol0 = (char *) xmalloc(len0+1); ol1 = (char *) xmalloc(len1+1); depad_to_pad0 = depad_to_pad0_m = (int *)xmalloc((len0+1) * sizeof(int)); depad_to_pad1 = depad_to_pad1_m = (int *)xmalloc((len1+1) * sizeof(int)); S = res = (align_int *) xmalloc((len0+len1+1)*sizeof(align_int)); /* Compute the consensus */ DBcalcConsensus(xx0,pos0,len0,ol0,NULL,BOTH_STRANDS); DBcalcConsensus(xx1,pos1,len1,ol1,NULL,BOTH_STRANDS); /* Strip the pads from the consensus */ depad_seq(ol0, &len0, depad_to_pad0); depad_seq(ol1, &len1, depad_to_pad1); /* Do the actual alignment */ (void)calign(ol0, ol1, len0, len1, NULL, NULL, NULL, NULL, 0, 0, gopenval, gextendval, 3, 0, res); /* Clip left end */ if (*S != 0) { /* Pad at start, so shift contigs */ if (*S < 0) { left0 = -*S; /* used for display only */ depad_to_pad0 += -*S; off0 = depad_to_pad0[0]; xx1->displayPos -= off0; pos0 += off0; len0 -= off0; } else { left1 = *S; /* used for display only */ depad_to_pad1 += *S; off1 = depad_to_pad1[0]; xx0->displayPos -= off1; pos1 += off1; len1 -= off1; } S++; xx0->link->lockOffset = xx1->displayPos - xx0->displayPos; } /* Clip right end */ { int i = 0, j = 0, op; align_int *S2 = S; while (i < len0 && j < len1) { if ((op = *S2++) == 0) i++, j++; else if (op > 0) j += op; else i -= op; } len0 = i; len1 = j; } /* Display the alignment. */ { char *exp0, *exp1; int exp_len0, exp_len1; char name0[100]; char name1[100]; exp0 = (char *) xmalloc(len0+len1+1); exp1 = (char *) xmalloc(len0+len1+1); sprintf(name0, "%d", xx0->DBi->DB_contigNum); sprintf(name1, "%d", xx1->DBi->DB_contigNum); cexpand(ol0+left0, ol1+left1, len0, len1, exp0, exp1, &exp_len0, &exp_len1, ALIGN_J_SSH | ALIGN_J_PADS, S); list_alignment(exp0, exp1, name0, name1, pos0, pos1, ""); xfree(exp0); xfree(exp1); } /*************************************************************************/ /* Now actually make the edits, keeping track of old and new pads. */ openUndo(DBI(xx0)); openUndo(DBI(xx1)); xx0->default_conf_n = -1; xx1->default_conf_n = -1; { int depad_pos0 = 0, depad_pos1 = 0; int curr_pad0; /* Current padded position in seq 0 */ int curr_pad1; /* Current padded position in seq 1 */ int extra_pads; /* Difference between padded positions */ int last_pad0 = -1; int last_pad1 = -1; int inserted_bases0 = 0; int inserted_bases1 = 0; while (depad_pos0 < len0 || depad_pos1 < len1) { if (*S < 0) { depad_pos0 -= *S; } else if (*S > 0) { depad_pos1 += *S; } curr_pad0 = depad_to_pad0[depad_pos0]-off0; curr_pad1 = depad_to_pad1[depad_pos1]-off1; extra_pads = (curr_pad1 - last_pad1) - (curr_pad0 - last_pad0); if (extra_pads < 0) { /* Add to seq 0 */ add_pads(xx1, pos1 + curr_pad1 + inserted_bases1, -extra_pads); inserted_bases1 -= extra_pads; } else if (extra_pads > 0) { /* Add to seq 1 */ add_pads(xx0, pos0 + curr_pad0 + inserted_bases0, extra_pads); inserted_bases0 += extra_pads; } last_pad0 = curr_pad0; last_pad1 = curr_pad1; if (*S == 0) { depad_pos0++; depad_pos1++; } S++; } } xx0->default_conf_n = old_def_conf0; xx1->default_conf_n = old_def_conf1; /*************************************************************************/ closeUndo(xx1, DBI(xx1)); closeUndo(xx0, DBI(xx0)); xfree(res); xfree(ol0); xfree(ol1); xfree(depad_to_pad0_m); xfree(depad_to_pad1_m); return(0); }
static int align(EdStruct *xx0, int pos0, int len0, EdStruct *xx1, int pos1, int len1) { char *ol0,*ol1, *cons0, *cons1; int old_def_conf0 = xx0->default_conf_n; int old_def_conf1 = xx1->default_conf_n; OVERLAP *overlap; int ierr; char PAD_SYM = '.'; int *depad_to_pad0, *dp0, *depad_to_pad1, *dp1; int *S, *res; int off0 = 0, off1 = 0; int left0 = 0, left1 = 0; vfuncheader("Align contigs (join editor)"); /* Memory allocation */ ol0 = (char *) xmalloc(len0+1); ol1 = (char *) xmalloc(len1+1); cons0 = (char *) xmalloc(len0+1); cons1 = (char *) xmalloc(len1+1); dp0 = depad_to_pad0 = (int *)xmalloc((len0+1) * sizeof(int)); dp1 = depad_to_pad1 = (int *)xmalloc((len1+1) * sizeof(int)); /* Compute the consensus */ DBcalcConsensus(xx0,pos0,len0,ol0,NULL,BOTH_STRANDS); DBcalcConsensus(xx1,pos1,len1,ol1,NULL,BOTH_STRANDS); memcpy(cons0, ol0, len0+1); memcpy(cons1, ol1, len1+1); /* Strip the pads from the consensus */ depad_seq(ol0, &len0, depad_to_pad0); depad_seq(ol1, &len1, depad_to_pad1); if (NULL == (overlap = create_overlap())) return -1; init_overlap (overlap, ol0, ol1, len0, len1); if(-1 == (ierr = align_contigs (overlap))) { xfree(ol0); xfree(ol1); destroy_overlap(overlap); return -1; } /* overlap->seq1_out[overlap->right+1] = 0; overlap->seq2_out[overlap->right+1] = 0; */ S = res = rsalign2myers(overlap->seq1_out, strlen(overlap->seq1_out), overlap->seq2_out, strlen(overlap->seq2_out), PAD_SYM); /* Clip left end */ if (*S != 0) { /* Pad at start, so shift contigs */ if (*S < 0) { left0 = -*S; /* used for display only */ depad_to_pad0 += -*S; off0 = depad_to_pad0[0]; xx1->displayPos -= off0; pos0 += off0; len0 -= -*S; } else { left1 = *S; /* used for display only */ depad_to_pad1 += *S; off1 = depad_to_pad1[0]; xx0->displayPos -= off1; pos1 += off1; len1 -= *S; } S++; xx0->link->lockOffset = xx1->displayPos - xx0->displayPos; } /* Clip right end */ { int pos0 = 0, pos1 = 0; int *s = S; while (pos0 < len0 && pos1 < len1) { if (*s < 0) { pos0 -= *s; } else if (*s > 0) { pos0 += *s; } else { pos0++; pos1++; } s++; } if (*s < 0) len0 += *s; else if (*s > 0) len1 -= *s; } /* Display the alignment. */ { char *exp0, *exp1; int exp_len0, exp_len1; char name0[100]; char name1[100]; exp0 = (char *) xmalloc(len0+len1+1); exp1 = (char *) xmalloc(len0+len1+1); sprintf(name0, "%d", xx0->DBi->DB_contigNum); sprintf(name1, "%d", xx1->DBi->DB_contigNum); cexpand(ol0+left0, ol1+left1, len0, len1, exp0, exp1, &exp_len0, &exp_len1, ALIGN_J_SSH | ALIGN_J_PADS, S); list_alignment(exp0, exp1, name0, name1, pos0, pos1, ""); xfree(exp0); xfree(exp1); } /*************************************************************************/ /* Now actually make the edits, keeping track of old and new pads. */ openUndo(DBI(xx0)); openUndo(DBI(xx1)); xx0->default_conf_n = -1; xx1->default_conf_n = -1; { int depad_pos0 = 0, depad_pos1 = 0; int curr_pad0; /* Current padded position in seq 0 */ int curr_pad1; /* Current padded position in seq 1 */ int extra_pads; /* Difference between padded positions */ int last_pad0 = -1; int last_pad1 = -1; int inserted_bases0 = 0; int inserted_bases1 = 0; while (depad_pos0 < len0 && depad_pos1 < len1) { if (*S < 0) { depad_pos0 -= *S; } else if (*S > 0) { depad_pos1 += *S; } if (depad_pos0 >= len0 || depad_pos1 >= len1) break; curr_pad0 = depad_to_pad0[depad_pos0]-off0; curr_pad1 = depad_to_pad1[depad_pos1]-off1; extra_pads = (curr_pad1 - last_pad1) - (curr_pad0 - last_pad0); if (extra_pads < 0) { /* Add to seq 0 */ add_pads(xx1, pos1 + curr_pad1 + inserted_bases1, -extra_pads); inserted_bases1 -= extra_pads; } else if (extra_pads > 0) { /* Add to seq 1 */ add_pads(xx0, pos0 + curr_pad0 + inserted_bases0, extra_pads); inserted_bases0 += extra_pads; } last_pad0 = curr_pad0; last_pad1 = curr_pad1; if (*S == 0) { depad_pos0++; depad_pos1++; } S++; } } xx0->default_conf_n = old_def_conf0; xx1->default_conf_n = old_def_conf1; /*************************************************************************/ closeUndo(xx1, DBI(xx1)); closeUndo(xx0, DBI(xx0)); xfree(res); xx0->default_conf_n = old_def_conf0; xx1->default_conf_n = old_def_conf1; xfree(ol0); xfree(ol1); xfree(dp0); xfree(dp1); destroy_overlap(overlap); return(0); }