/* Write tests */ int edit_contig_name(GapIO *io, contig_t **cp) { char name[1024]; int i; strcpy(name, contig_get_name(cp)); for (i = 0; name[i]; i++) if (isalpha(name[i])) name[i] ^= 0x20; /* change case */ return contig_set_name(io, cp, name); }
static void display_gap(GapIO *io, contig_t **c, int xpos, int ypos, int nlines, int wid, int mode, int qual_cutoff, int in_curses) { rangec_t *r; int i, nr, lno, y; char line[1024], *lp; char cons[1024]; int attr; static int lookup_1conf[256]; static int lookup_4conf[256]; static int lookup_init = 0; if (!lookup_init) { for (i = 0; i < 256; i++) lookup_1conf[i] = lookup_4conf[0] = 0; lookup_4conf['a'] = lookup_4conf['A'] = 0; lookup_4conf['c'] = lookup_4conf['C'] = 1; lookup_4conf['g'] = lookup_4conf['G'] = 2; lookup_4conf['t'] = lookup_4conf['T'] = 3; } wid -= MAX_NAME_LEN+2; //if (xpos < wid/2 + (*c)->start) // xpos = wid/2 + (*c)->start; xpos -= wid/2; /* Query visible objects */ r = contig_seqs_in_range(io, c, xpos, xpos+wid-1, CSIR_SORT_BY_X, &nr); /* Consensus */ calc_cons(io, r, nr, xpos, wid, cons); if (in_curses) { clear(); mvaddnstr(0, 1, contig_get_name(c), strlen(contig_get_name(c))); mvaddnstr(0, MAX_NAME_LEN+2, cons, wid); } else { printf(" %-*s %.*s\n", MAX_NAME_LEN, contig_get_name(c), wid, cons); } /* Position */ for (lp = line, i = xpos; i < xpos+wid+19; i++) { if (i % 10 == 0) { sprintf(lp, "%10d", i-10); lp += 10; } } if (in_curses) { int m = (xpos-1)%10; if (m < 0) m += 10; mvaddnstr(1, MAX_NAME_LEN+2, line+10+m, wid); } else { printf("%*s%.*s\n", MAX_NAME_LEN+2, "", wid, line+9+((xpos-1)%10)); } /* Sequences */ for (i = y = 0; i < nr && y < ypos; i++, y++); for (lno = 2; i < nr && lno < nlines; i++, lno++) { seq_t *s = get_seq(io, r[i].rec); seq_t *sorig = s; int sp = r[i].start; int l = s->len > 0 ? s->len : -s->len; unsigned char seq_a[MAX_SEQ_LEN], *seq = seq_a; int j, dir = '+'; int left, right; char *conf; int nc = s->format == SEQ_FORMAT_CNF4 ? 4 : 1; int *L = s->format == SEQ_FORMAT_CNF4 ? lookup_4conf : lookup_1conf; /* Complement data on-the-fly */ if ((s->len < 0) ^ r[i].comp) { dir = '-'; s = dup_seq(s); complement_seq_t(s); } left = s->left; right = s->right; memcpy(seq, s->seq, l); conf = s->conf; if (sp < xpos) { seq += xpos - sp; conf += nc * (xpos - sp); l -= xpos - sp; left -= xpos - sp; right -= xpos - sp; sp = xpos; } if (l > wid - (sp-xpos)) l = wid - (sp-xpos); if (in_curses) { /* Test of sequence_get_position */ /* int c, p; sequence_get_position(io, r[i].rec, &c, &p); s->name_len = sprintf(s->name, ":%d-%d:", p, p+ABS(s->len)-1); */ mvaddch(lno, 0, dir); addnstr(s->name, MIN(MAX_NAME_LEN, s->name_len)); move(lno, MAX_NAME_LEN+2+sp-xpos); } else { printf("%c%.*s%*s", dir, MIN(MAX_NAME_LEN, s->name_len), s->name, MAX_NAME_LEN+1-MIN(MAX_NAME_LEN, s->name_len) +sp-xpos, ""); } for (j = 0; j < l; j++) { attr = (mode & DISPLAY_COLOURS) ? COLOR_PAIR(lookup[seq[j]]) : 0; if (mode & DISPLAY_DIFFS && sp-xpos+j < wid && seq[j] == cons[sp-xpos+j]) seq[j] = '.'; if (j < left-1 || j > right-1) seq[j] = (mode & DISPLAY_CUTOFFS) ? tolower(seq[j]) : ' '; if (conf[j*nc+L[seq[j]]] >= qual_cutoff && mode & DISPLAY_QUAL) { attr |= A_BOLD; } if (in_curses) { addch(seq[j] | attr); } else { putchar(seq[j]); } } if (!in_curses) putchar('\n'); if (s != sorig) free(s); } /* Useful debugging code to show bin locations. */ #if 0 free(r); r = contig_bins_in_range(io, c, xpos, xpos+wid-1, &nr); /* Bins */ for (i=0; i < nr && lno < nlines; i++, lno++) { bin_index_t *bin = (bin_index_t *)cache_search(io, GT_Bin, r[i].rec); unsigned char *seq, *seqm; int j, dir = "+-"[r[i].comp]; int sp = r[i].start; int l = ABS(r[i].end - r[i].start + 1); char name[100]; sprintf(name, "bin-%d", bin->rec); seqm = seq = malloc(l+1); memset(seq, '-', l); if (!(bin->start_used == 0 && bin->end_used == 0)) { if (r[i].comp) { memset(&seq[bin->size - bin->end_used - 1], '=', bin->end_used - bin->start_used + 1); } else { memset(&seq[bin->start_used], '=', bin->end_used - bin->start_used + 1); } } /* fprintf(stderr, "Bin-%d: %d+%d %d..%d\n", bin->rec, bin->pos, bin->size, bin->start_used, bin->end_used); */ if (sp < xpos) { seq += xpos - sp; l -= xpos - sp; sp = xpos; } if (l > wid - (sp-xpos)) l = wid - (sp-xpos); if (in_curses) { mvaddch(lno, 0, dir); addnstr(name, strlen(name)); move(lno, MAX_NAME_LEN+2+sp-xpos); } else { printf("%c%.*s%*s", dir, (int)MIN(MAX_NAME_LEN, strlen(name)), name, (int)(MAX_NAME_LEN+1-MIN(MAX_NAME_LEN, strlen(name)) +sp-xpos), ""); } for (j = 0; j < l; j++) { if (in_curses) { addch(seq[j]); } else { putchar(seq[j]); } } if (!in_curses) putchar('\n'); free(seqm); } #endif if (in_curses) refresh(); free(r); }
/* * Breaks a contig in two such that snum is the right-most reading of * a new contig. */ int break_contig(GapIO *io, tg_rec crec, int cpos) { contig_t *cl; contig_t *cr; int cid; char cname[1024], *cname_end; int left_end, right_start; bin_index_t *bin; int do_comp = 0; HacheTable *h; cl = (contig_t *)cache_search(io, GT_Contig, crec); //contig_dump_ps(io, &cl, "/tmp/tree.ps"); /* * Our hash table is keyed on sequence record numbers for all sequences * in all bins spanning the break point. The value is either 0 or 1 * for left/right contig. * * The purpose of this hash is to allow us to work out whether a tag * belongs in the left or right contig, as a tag could start beyond the * break point but be attached to a sequence before the break point. * * Further complicating this is that a tag could be in a smaller bin * than the sequence as it may not be as long. However we know * we'll recurse down these in a logical order so we can be sure * we've already "seen" the sequence that the tag has been * attached to. */ h = HacheTableCreate(1024, HASH_DYNAMIC_SIZE); strncpy(cname, contig_get_name(&cl), 1000); cname_end = cname + strlen(cname); cid = 1; do { sprintf(cname_end, "#%d", cid++); } while (contig_index_query(io, cname) > 0); if (!(cr = contig_new(io, cname))) return -1; cl = cache_rw(io, cl); cr = cache_rw(io, cr); if (0 != contig_index_update(io, cname, strlen(cname), cr->rec)) return -1; printf("Break in contig %"PRIrec", pos %d\n", crec, cpos); printf("Existing left bin = %"PRIrec", right bin = %"PRIrec"\n", cl->bin, cr->bin); cache_incr(io, cl); cache_incr(io, cr); bin = get_bin(io, cl->bin); do_comp = bin->flags & BIN_COMPLEMENTED; break_contig_recurse(io, h, cl, cr, contig_get_bin(&cl), cpos, contig_offset(io, &cl), 0, cl->rec, cr->rec, 0, 0); /* Recompute end positions */ left_end = contig_visible_end(io, cl->rec); right_start = contig_visible_start(io, cr->rec); /* Ensure start/end positions of contigs work out */ bin = cache_rw(io, get_bin(io, cr->bin)); //#define KEEP_POSITIONS 1 #ifndef KEEP_POSITIONS cr->start = 1; cr->end = cl->end - right_start + 1; bin->pos -= right_start-1; #else cr->start = right_start; cr->end = cl->end; #endif if ((do_comp && !(bin->flags & BIN_COMPLEMENTED)) || (!do_comp && (bin->flags & BIN_COMPLEMENTED))) { bin->flags ^= BIN_COMPLEMENTED; } cl->end = left_end; // remove_redundant_bins(io, cl); // remove_redundant_bins(io, cr); printf("Final left bin = %"PRIrec", right bin = %"PRIrec"\n", cl->bin, cr->bin); HacheTableDestroy(h, 0); //if (cl->bin) contig_dump_ps(io, &cl, "/tmp/tree_l.ps"); //if (cr->bin) contig_dump_ps(io, &cr, "/tmp/tree_r.ps"); cache_flush(io); remove_empty_bins(io, cl->rec); remove_empty_bins(io, cr->rec); /* Empty contig? If so remove it completely */ if (cl->bin == 0) { printf("Removing empty contig %"PRIrec"\n", cl->rec); contig_destroy(io, cl->rec); } if (cr->bin == 0) { printf("Removing empty contig %"PRIrec"\n", cr->rec); contig_destroy(io, cr->rec); } cache_decr(io, cl); cache_decr(io, cr); cache_flush(io); return 0; }