/* * Adds a contig named ctg_name to a scaffold named scaf_name. The names are * looked up in the B+Tree index. */ int scaffold_add_by_name(GapIO *io, char *scaf_name, char *ctg_name, int gap_size, int gap_type, int evidence) { tg_rec srec, crec; if ((crec = contig_index_query(io, ctg_name)) <= 0) return -1; if ((srec = scaffold_index_query(io, scaf_name)) <= 0) { scaffold_t *f = scaffold_new(io, scaf_name); srec = f->rec; } return scaffold_add(io, srec, crec, gap_size, gap_type, evidence); }
/* * Breaks a contig in two such that snum is the right-most reading of * a new contig. */ int break_contig(GapIO *io, tg_rec crec, int cpos) { contig_t *cl; contig_t *cr; int cid; char cname[1024], *cname_end; int left_end, right_start; bin_index_t *bin; int do_comp = 0; HacheTable *h; cl = (contig_t *)cache_search(io, GT_Contig, crec); //contig_dump_ps(io, &cl, "/tmp/tree.ps"); /* * Our hash table is keyed on sequence record numbers for all sequences * in all bins spanning the break point. The value is either 0 or 1 * for left/right contig. * * The purpose of this hash is to allow us to work out whether a tag * belongs in the left or right contig, as a tag could start beyond the * break point but be attached to a sequence before the break point. * * Further complicating this is that a tag could be in a smaller bin * than the sequence as it may not be as long. However we know * we'll recurse down these in a logical order so we can be sure * we've already "seen" the sequence that the tag has been * attached to. */ h = HacheTableCreate(1024, HASH_DYNAMIC_SIZE); strncpy(cname, contig_get_name(&cl), 1000); cname_end = cname + strlen(cname); cid = 1; do { sprintf(cname_end, "#%d", cid++); } while (contig_index_query(io, cname) > 0); if (!(cr = contig_new(io, cname))) return -1; cl = cache_rw(io, cl); cr = cache_rw(io, cr); if (0 != contig_index_update(io, cname, strlen(cname), cr->rec)) return -1; printf("Break in contig %"PRIrec", pos %d\n", crec, cpos); printf("Existing left bin = %"PRIrec", right bin = %"PRIrec"\n", cl->bin, cr->bin); cache_incr(io, cl); cache_incr(io, cr); bin = get_bin(io, cl->bin); do_comp = bin->flags & BIN_COMPLEMENTED; break_contig_recurse(io, h, cl, cr, contig_get_bin(&cl), cpos, contig_offset(io, &cl), 0, cl->rec, cr->rec, 0, 0); /* Recompute end positions */ left_end = contig_visible_end(io, cl->rec); right_start = contig_visible_start(io, cr->rec); /* Ensure start/end positions of contigs work out */ bin = cache_rw(io, get_bin(io, cr->bin)); //#define KEEP_POSITIONS 1 #ifndef KEEP_POSITIONS cr->start = 1; cr->end = cl->end - right_start + 1; bin->pos -= right_start-1; #else cr->start = right_start; cr->end = cl->end; #endif if ((do_comp && !(bin->flags & BIN_COMPLEMENTED)) || (!do_comp && (bin->flags & BIN_COMPLEMENTED))) { bin->flags ^= BIN_COMPLEMENTED; } cl->end = left_end; // remove_redundant_bins(io, cl); // remove_redundant_bins(io, cr); printf("Final left bin = %"PRIrec", right bin = %"PRIrec"\n", cl->bin, cr->bin); HacheTableDestroy(h, 0); //if (cl->bin) contig_dump_ps(io, &cl, "/tmp/tree_l.ps"); //if (cr->bin) contig_dump_ps(io, &cr, "/tmp/tree_r.ps"); cache_flush(io); remove_empty_bins(io, cl->rec); remove_empty_bins(io, cr->rec); /* Empty contig? If so remove it completely */ if (cl->bin == 0) { printf("Removing empty contig %"PRIrec"\n", cl->rec); contig_destroy(io, cl->rec); } if (cr->bin == 0) { printf("Removing empty contig %"PRIrec"\n", cr->rec); contig_destroy(io, cr->rec); } cache_decr(io, cl); cache_decr(io, cr); cache_flush(io); return 0; }