/* * find the position (in bases) of the contig selector cursor local to a contig */ double CSLocalCursor(GapIO *io, double wx) { int i; int offset = 0; int prev_offset = 0; int num_contigs; GCardinal *order = ArrayBase(GCardinal, io->contig_order); int cur_contig; num_contigs = NumContigs(io); /* * a couple of fudges: if num_contigs is 1 then wx is still wx * if wx < 0 then must be to the left of the 1st contig and wx is still wx */ if ((num_contigs == 1) || (wx < 0)) { return wx; } for (i = 0; i < num_contigs; i++) { cur_contig = order[i]; prev_offset = offset; offset += ABS(io_clength(io, cur_contig)); if ((wx > prev_offset) && (wx <= offset+1)) { return (wx - prev_offset); } } /* last contig */ return (wx - offset); }
/* save sequence, returns recno */ tg_rec save_sequence(GapIO *io, seq_t *seq, bin_index_t *bin, range_t *r_out) { seq->bin = bin->rec; seq->bin_index = r_out - ArrayBase(range_t, bin->rng); return sequence_new_from(io, seq); }
/* determines the position of a base in terms of the entire database */ int find_position_in_DB(GapIO *io, int c_num, int position) { GCardinal *order = ArrayBase(GCardinal, io->contig_order); int i; int cur_length = 0; int cur_contig; for (i = 0; i < NumContigs(io); i++){ cur_contig = order[i]; if (c_num == cur_contig) { #ifdef DEBUG printf("position %d cur_length %d c_num %d cur_contig %d\n", position, cur_length, c_num, cur_contig); #endif return(cur_length + position); } /* cur_length += io_clength(io, cur_contig) + 1; */ cur_length += io_clength(io, cur_contig); } return -1; }
/* * Removes some or all tags from some or all contigs. * If the contig list or tag list is blank it implies all contigs or all tags. * * Returns 0 on success * -1 on failure */ int delete_tags(GapIO *io, int ncontigs, contig_list_t *contigs, char *tag_list, int verbose) { HashTable *h = NULL; int ret = 0; /* Hash tag types */ if (tag_list && *tag_list) { int i; if (SetActiveTags(tag_list) == -1) { return -1; } h = HashTableCreate(32, 0); for (i = 0; i < number_of_active_tags; i++) { HashData hd; hd.i = 0; HashTableAdd(h, active_tag_types[i], 4, hd, NULL); } } /* Iterate over contig list or all contigs */ if (verbose) vfuncheader("Delete Tags"); if (ncontigs) { int i; for (i = 0; i < ncontigs; i++) { contig_t *c = cache_search(io, GT_Contig, contigs[i].contig); vmessage("Scanning contig %d of %d (%s)\n", i+1, ncontigs, c->name); ret |= delete_tag_single_contig(io, contigs[i].contig, h, verbose); UpdateTextOutput(); cache_flush(io); } } else { int i; tg_rec *order = ArrayBase(tg_rec, io->contig_order); for (i = 0; i < NumContigs(io); i++) { contig_t *c = cache_search(io, GT_Contig, order[i]); vmessage("Scanning contig %d of %d (%s)\n", i+1, NumContigs(io), c->name); ret |= delete_tag_single_contig(io, order[i], h, verbose); UpdateTextOutput(); cache_flush(io); } } SetActiveTags(""); if (h) HashTableDestroy(h, 0); return ret; }
void update_contig_order(Tcl_Interp *interp, GapIO *io, int cs_id, int *contig_array, int num_contigs, int cx) { GCardinal *order = ArrayBase(GCardinal, io->contig_order); obj_cs *cs; int i, j; double wx, wy; int left_position; char cmd[1024]; int orig_pos = 0; reg_buffer_start rs; reg_buffer_end re; reg_order ro; cs = result_data(io, cs_id, 0); CanvasToWorld(cs->canvas, cx, 0, &wx, &wy); /* * returns the nth contig to the left of the wx, NOT the contig number. * If this is to the left of the first contig, returns 0. */ left_position = find_left_position(io, order, wx); for (i = 0; i < NumContigs(io); i++) { if (order[i] == contig_array[0]) { orig_pos = i+1; break; } } /* convert index on order to index on contig num */ for (i = 0; i < num_contigs; i++) { for (j = 0; j < NumContigs(io); j++) { if (order[j] == contig_array[i]) break; } ReOrder(io, order, j, left_position); if (j > left_position) { left_position++; orig_pos++; } } ro.job = REG_ORDER; ro.pos = left_position; #ifdef HACK /* HACK is there a better way of representing this - only need to * replot once */ contig_notify(io, 1, (reg_data *)&ro); #endif /* Notify of the start of the flurry of updates */ rs.job = REG_BUFFER_START; for (i = 0; i < num_contigs; i++) { contig_notify(io, contig_array[i], (reg_data *)&rs); } ro.job = REG_ORDER; ro.pos = left_position; for (i = 0; i< num_contigs; i++) contig_notify(io, contig_array[i], (reg_data *)&ro); /* Notify the end of our updates */ re.job = REG_BUFFER_END; for (i = 0; i < num_contigs; i++) { contig_notify(io, contig_array[i], (reg_data *)&re); } /* draw larger separator tick to show where contig was moved from */ sprintf(cmd, "HighlightSeparator %s %d", cs->hori, orig_pos); Tcl_Eval(interp, cmd); }
/* * Takes a multiple alignment and updates the on-disk data structures to * match. This needs to correct confidence values, original positions and * tags too. */ void update_io(GapIO *io, tg_rec cnum, MALIGN *malign, Array indels) { CONTIGL *cl; tg_rec rnum; range_t r, *r_out; bin_index_t *bin; contig_t *c = cache_search(io, GT_Contig, cnum); size_t i, nindel; cache_incr(io, c); /* * To minimise number of data modifications we use a three step approach. * * Step 1: insert columns of pads, shifting reads as appropriate. * Step 2: edit sequence alignments as required, possibly involving * moving sequences and/or adding and removing pads. * Step 3: remove columns of entire pads. * * This means that when we introduce a column of pads we don't have * to make edits to every single read position down stream, and can * instead make use of the optimised recursive bin functions to do this * for us. */ /* Step 1: make indels */ nindel = ArrayMax(indels); for (i = 0; i < nindel; i++) { con_indel_t *id = arrp(con_indel_t, indels, i); int j; if (id->size > 0) { contig_insert_bases(io, &c, id->pos+1, '*', -1, id->size); } else { for (j = 0; j < -id->size; j++) { contig_delete_pad(io, &c, id->pos+1); } } } /* Step 2: edit alignments */ for (cl = malign->contigl; cl; cl = cl->next) { seq_t *s, *sorig; int len, update_range = 0; int shift; rnum = cl->id; sorig = cache_search(io, GT_Seq, rnum); cache_incr(io, sorig); s = dup_seq(sorig); if (cl->mseg->comp) complement_seq_t(s); len = s->right - s->left + 1; /* Check if sequence has changed. If so assign a new one */ if (cl->mseg->length != len || memcmp(s->seq + s->left-1, cl->mseg->seq, cl->mseg->length) != 0) { int newlen = s->left-1 + ABS(s->len) - s->right + cl->mseg->length; int i, j, np; char *newseq = malloc(newlen+1); int8_t *newconf = malloc(newlen+1); /* Build new seq/conf arrays */ memcpy(newseq, s->seq, s->left-1); memcpy(newconf, s->conf, s->left-1); memcpy(&newseq[s->left-1], cl->mseg->seq, cl->mseg->length); /* * Step through both old and new sequences working out how * they differ. This will (*should*) be entire pad movements. * i = index to old seq * j = index to new seq * np = number of pads added minus removed from old seq. */ np = 0; for (i =j =s->left-1; i < ABS(s->len) && j < s->left-1 + cl->mseg->length; ) { /* Bases match */ if (toupper(newseq[j]) == toupper(s->seq[i]) || (s->seq[i] == '.' && newseq[j] == 'N')) { if (isupper(s->seq[i])) newseq[j] = toupper(newseq[j]); else newseq[j] = tolower(newseq[j]); newconf[j] = s->conf[i]; i++, j++; continue; } /* Pad removed */ if (s->seq[i] == '*') { i++; tag_shift_for_delete(io, cnum, rnum, cl->mseg->offset, cl->mseg->length, i+np--, s->bin); /* if (io_length(io, rnum) < 0) { tag_shift_for_delete(io, rnum, r.length - i + 1); } else { tag_shift_for_delete(io, rnum, i+np--); } */ continue; } /* Pad created */ if (newseq[j] == '*') { int k; int ql = 0, qr = 0; for (k = i-1; k >= 0; k--) { if (s->seq[k] != '*') { ql = s->conf[k]; break; } } for (k = i+1; k < s->right; k++) { if (s->seq[k] != '*') { qr = s->conf[k]; break; } } newconf[j] = MIN(ql, qr); /* min conf of neighbours */ j++; tag_shift_for_insert(io, cnum, rnum, cl->mseg->offset, cl->mseg->length, i+ ++np, s->bin); /* if (io_length(io, rnum) < 0) { tag_shift_for_insert(io, rnum, r.length - i + 1); } else { tag_shift_for_insert(io, rnum, i+ ++np); } */ continue; } fprintf(stderr, "Alignment introduced non-pad character"); abort(); } /* Pads previously at the end of the reading & now removed */ while (i < s->right) { if (s->seq[i] == '*') { i++; tag_shift_for_delete(io, cnum, rnum, cl->mseg->offset, cl->mseg->length, i+np--, s->bin); /* if (io_length(io, rnum) < 0) { tag_shift_for_delete(io, rnum, r.length - i + 1); } else { tag_shift_for_delete(io, rnum, i+np--); } */ } else { /* Error: clipped data that wasn't a pad */ abort(); } } /* Should only be pads remaining in newseq, if anything */ s->right = j; for (; j < s->left-1 + cl->mseg->length; j++) { if (newseq[j] != '*') { fprintf(stderr, "Alignment introduced non-pad character"); abort(); } newconf[j] = 0; } /* Append on the right hand cutoff data */ for (; i < ABS(s->len); i++, j++) { newseq[j] = s->seq[i]; newconf[j] = s->conf[i]; } if (j != newlen) { abort(); } /* Write it back out */ /* Copy newseq/newconf into seq_t */ s->seq = newseq; s->conf = newconf; update_range = 0; if (ABS(s->len) != j) { /* Length change implies updating the range array too */ s->len = s->len >= 0 ? j : -j; update_range = 1; } if (cl->mseg->comp) complement_seq_t(s); /* The memcpy trashes the block pointer, so special care needed */ { sorig = cache_rw(io, sorig); void *blk = sorig->block; memcpy(sorig, s, sizeof(seq_t)); sorig->block = blk; } if (update_range) sorig = cache_item_resize(sorig, sizeof(*sorig) + sequence_extra_len(sorig)); sequence_reset_ptr(sorig); if (s->name) memcpy(sorig->name, s->name, s->name_len+1); if (s->trace_name) memcpy(sorig->trace_name, s->trace_name, s->trace_name_len+1); if (s->alignment) memcpy(sorig->alignment, s->alignment, s->alignment_len+1); memcpy(sorig->seq, s->seq, ABS(s->len)); memcpy(sorig->conf, s->conf, ABS(s->len)); xfree(newconf); xfree(newseq); } { int st, en, or; sequence_get_position(io, s->rec, NULL, &st, &en, &or); if (or ^ (sorig->len < 0)) { shift = ABS(sorig->len) - sorig->right; } else { shift = sorig->left-1; } st += shift; if (st != cl->mseg->offset+1) { update_range = 1; } } free(s); if (update_range) { int bin_changed = 0; /* Get old range and pair data */ s = sorig; bin = cache_search(io, GT_Bin, s->bin); r = *arrp(range_t, bin->rng, s->bin_index); assert(r.rec == s->rec); /* Update range, tedious and slow way */ bin_remove_item(io, &c, GT_Seq, s->rec); r.start = cl->mseg->offset + 1 - shift; r.end = r.start + ABS(s->len) - 1; bin = bin_add_range(io, &c, &r, &r_out, NULL, 0); /* Check if the new bin has a different complemented status too */ if (s->bin != bin->rec) { int old_comp = bin_get_orient(io, s->bin); int new_comp = bin_get_orient(io, bin->rec); if (new_comp != old_comp) { //int tmp; s = cache_rw(io, s); s->len *= -1; s->flags ^= SEQ_COMPLEMENTED; //tmp = s->left; //s->left = ABS(s->len) - (s->right-1); //s->right = ABS(s->len) - (tmp-1); } bin_changed = 1; } /* Update seq bin & bin_index fields */ s = cache_rw(io, s); s->bin = bin->rec; s->bin_index = r_out - ArrayBase(range_t, bin->rng); if (bin_changed) { if (-1 == sequence_fix_anno_bins(io, &s)) { verror(ERR_WARN, "update_io", "sequence_fix_anno_bins() failure"); } } } cache_decr(io, sorig); } /* Step 3 (remove pad columns) done in calling function. */ cache_decr(io, c); }
/* * Complements a scaffold; both complementing each contig within it and * reversing the order of contigs in the scaffold. * * Returns 0 on success * -1 on failure */ int complement_scaffold(GapIO *io, tg_rec srec) { scaffold_t *f; int i, j, nc = ArrayMax(io->contig_order); scaffold_member_t *contigs; tg_rec *crecs; HashTable *h; reg_order ro; reg_buffer_start rs; reg_buffer_end re; if (!(f = cache_search(io, GT_Scaffold, srec))) return -1; if (!(f = cache_rw(io, f))) return -1; cache_incr(io, f); /* Complement contigs */ contigs = ArrayBase(scaffold_member_t, f->contig); for (i = 0; i < ArrayMax(f->contig); i++) { complement_contig(io, contigs[i].rec); } /* Reverse the order of the contigs in the scaffold array */ for (i = 0, j = ArrayMax(f->contig)-1; i < j; i++, j--) { scaffold_member_t cr1 = contigs[i]; contigs[i] = contigs[j]; contigs[j] = cr1; } /* * Reverse the order of contigs in the contig_order array too. * This is the part that really matters. It's also hard as the contigs * in the contig order array could be in any order and not adjacent. * For our purposes we'll just ensure the contigs in this scaffold in * the contig order array match our freshly complemented scaffold * ordering. * * We initially build a hash table of contigs in this scaffold, and * then iterate through contig_order copying out the new contigs whenever * one matches. */ h = HashTableCreate(nc, 0); for (i = 0; i < ArrayMax(f->contig); i++) { HashData hd; hd.i = 0; HashTableAdd(h, (char *)&contigs[i].rec, sizeof(tg_rec), hd, NULL); } /* Replace any contig matching the scaffold with the new order */ crecs = ArrayBase(tg_rec, io->contig_order); for (i = j = 0; i < nc; i++) { HashItem *hi; if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec)))) continue; crecs[i] = contigs[j++].rec; } /* Send event messages around */ rs.job = REG_BUFFER_START; for (i = 0; i < nc; i++) { HashItem *hi; if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec)))) continue; contig_notify(io, crecs[i], (reg_data *)&rs); } ro.job = REG_ORDER; for (i = 0; i < nc; i++) { HashItem *hi; if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec)))) continue; ro.pos = i+1; contig_notify(io, crecs[i], (reg_data *)&ro); } /* Notify the end of our updates */ re.job = REG_BUFFER_END; for (i = 0; i < nc; i++) { HashItem *hi; if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec)))) continue; contig_notify(io, crecs[i], (reg_data *)&re); } HashTableDestroy(h, 0); cache_decr(io, f); return 0; }
/* * Given a contig order and a set of current scaffolds, this updates the * order of entries within each scaffold to match the contig order. * * For example if we have contigs in order 1 3 5 2 6 8 4 7 9 and * scaffolds {1 2 3 4} {5 6 7 8 9} we would shuffle the scaffold members * to {1 3 2 4} {5 6 8 7 9} * * The purpose is for integration with contig shuffling in the Contig List * or Contig Selector. The master contig order array is what gets shuffled * manually by the user and it is also the definitive order to use when * outputting data (so it is completely under users control whether they * sort by name, size or scaffold). * * Returns 0 on success * -1 on failure */ int update_scaffold_order(GapIO *io) { int i, j, ret = -1; int nc; int ns; tg_rec *crecs; if (!io->scaffold) return 0; /* Not supported, but considered success */ nc = ArrayMax(io->contig_order); ns = ArrayMax(io->scaffold); scaf_ctg_t *a = (scaf_ctg_t *)malloc(nc * sizeof(*a)); if (!a) return -1; /* * Produce an array of scaffold and contig recs, so we can sort on * both fields. */ crecs = ArrayBase(tg_rec, io->contig_order); for (i = 0; i < nc; i++) { contig_t *c = cache_search(io, GT_Contig, crecs[i]); if (!c) goto err; a[i].ctg_idx = i; a[i].scaffold = c->scaffold; } qsort(a, nc, sizeof(*a), scaf_ctg_sort); /* * Now recreate scaffold orders from the sorted contig list. */ for (i = 0; i < nc; i++) { scaffold_t *f; int k; if (!a[i].scaffold) continue; j = i; while (i < nc && a[i].scaffold == a[j].scaffold) i++; /* j .. i-1 share the same scaffold */ f = cache_search(io, GT_Scaffold, a[j].scaffold); if (!f) goto err; if (!f->contig || ArrayMax(f->contig) != i-j) { verror(ERR_WARN, "update_scaffold_order", "Scaffold %"PRIrec "has different number of entries than contigs claim.", f->rec); goto err; } /* Only mark r/w and update if they differ */ for (k = 0; k < ArrayMax(f->contig); k++) { if ((arrp(scaffold_member_t, f->contig, k))->rec != crecs[a[j+k].ctg_idx]) break; } if (k != ArrayMax(f->contig)) { f = cache_rw(io, f); for (k = 0; k < ArrayMax(f->contig); k++) (arrp(scaffold_member_t, f->contig, k))->rec = crecs[a[j+k].ctg_idx]; } i--; } ret = 0; err: free(a); return ret; }