Exemplo n.º 1
0
/*
 * find the position (in bases) of the contig selector cursor local to a contig
 */
double
CSLocalCursor(GapIO *io,
	      double wx)
{
    int i;
    int offset = 0;
    int prev_offset = 0;
    int num_contigs;
    GCardinal *order = ArrayBase(GCardinal, io->contig_order);
    int cur_contig;

    num_contigs = NumContigs(io);
    /*
     * a couple of fudges: if num_contigs is 1 then wx is still wx
     * if wx < 0 then must be to the left of the 1st contig and wx is still wx
     */
    if ((num_contigs == 1) || (wx < 0)) {
	return wx;
    }

    for (i = 0; i < num_contigs; i++) {

	cur_contig = order[i];
	prev_offset = offset;
	offset += ABS(io_clength(io, cur_contig));
	if ((wx > prev_offset) && (wx <= offset+1)) {
	    return (wx - prev_offset);
	}
    }
    /* last contig */
    return (wx - offset);
}
Exemplo n.º 2
0
/* save sequence, returns recno */
tg_rec save_sequence(GapIO *io, seq_t *seq, bin_index_t *bin, range_t *r_out) {

    seq->bin = bin->rec;
    seq->bin_index = r_out - ArrayBase(range_t, bin->rng);
    
    return sequence_new_from(io, seq);
}
Exemplo n.º 3
0
/* determines the position of a base in terms of the entire database */
int
find_position_in_DB(GapIO *io,
		    int c_num,
		    int position)
{
    GCardinal *order = ArrayBase(GCardinal, io->contig_order);
    int i;
    int cur_length = 0;
    int cur_contig;

    for (i = 0; i < NumContigs(io); i++){

	cur_contig = order[i];
	if (c_num == cur_contig) {
#ifdef DEBUG
	       printf("position %d cur_length %d c_num %d cur_contig %d\n",
	       position, cur_length, c_num, cur_contig);
#endif
	    return(cur_length + position);
	}
	/* cur_length += io_clength(io, cur_contig) + 1; */
	cur_length += io_clength(io, cur_contig);
    }
    return -1;
}
Exemplo n.º 4
0
/*
 * Removes some or all tags from some or all contigs.
 * If the contig list or tag list is blank it implies all contigs or all tags.
 *
 * Returns 0 on success
 *        -1 on failure
 */
int delete_tags(GapIO *io, int ncontigs, contig_list_t *contigs,
		char *tag_list, int verbose) {
    HashTable *h = NULL;
    int ret = 0;

    /* Hash tag types */
    if (tag_list && *tag_list) {
	int i;
	if (SetActiveTags(tag_list) == -1) {
	    return -1;
	}
	h = HashTableCreate(32, 0);
	for (i = 0; i < number_of_active_tags; i++) {
	    HashData hd;
	    hd.i = 0;
	    HashTableAdd(h, active_tag_types[i], 4, hd, NULL);
	}
    }

    /* Iterate over contig list or all contigs */
    if (verbose)
	vfuncheader("Delete Tags");

    if (ncontigs) {
	int i;

	for (i = 0; i < ncontigs; i++) {
	    contig_t *c = cache_search(io, GT_Contig, contigs[i].contig);
	    vmessage("Scanning contig %d of %d (%s)\n",
		     i+1, ncontigs, c->name);
	    ret |= delete_tag_single_contig(io, contigs[i].contig, h, verbose);
	    UpdateTextOutput();
	    cache_flush(io);
	}

    } else {
	int i;
	tg_rec *order = ArrayBase(tg_rec, io->contig_order);

	for (i = 0; i < NumContigs(io); i++) {
	    contig_t *c = cache_search(io, GT_Contig, order[i]);
	    vmessage("Scanning contig %d of %d (%s)\n",
		     i+1, NumContigs(io), c->name);
	    ret |= delete_tag_single_contig(io, order[i], h, verbose);
	    UpdateTextOutput();
	    cache_flush(io);
	}
    }

    SetActiveTags("");
    if (h)
	HashTableDestroy(h, 0);

    return ret;
}
Exemplo n.º 5
0
void
update_contig_order(Tcl_Interp *interp,
		    GapIO *io,
		    int cs_id,
		    int *contig_array,
		    int num_contigs,
		    int cx)
{
    GCardinal *order = ArrayBase(GCardinal, io->contig_order);
    obj_cs *cs;
    int i, j;
    double wx, wy;
    int left_position;
    char cmd[1024];
    int orig_pos = 0;
    reg_buffer_start rs;
    reg_buffer_end re;
    reg_order ro;

    cs = result_data(io, cs_id, 0);

    CanvasToWorld(cs->canvas, cx, 0, &wx, &wy);

    /*
     * returns the nth contig to the left of the wx, NOT the contig number.
     * If this is to the left of the first contig, returns 0.
     */
    left_position = find_left_position(io, order, wx);

    for (i = 0; i < NumContigs(io); i++) {
	if (order[i] == contig_array[0]) {
	    orig_pos = i+1;
	    break;
	}
    }

    /* convert index on order to index on contig num */
    for (i = 0; i < num_contigs; i++) {

	for (j = 0; j < NumContigs(io); j++) {
	    if (order[j] == contig_array[i])
		break;
	}
	ReOrder(io, order, j, left_position);

	if (j > left_position) {
	    left_position++;
	    orig_pos++;
	}
    }

    ro.job = REG_ORDER;
    ro.pos = left_position;

#ifdef HACK
    /* HACK is there a better way of representing this - only need to
     * replot once
     */
    contig_notify(io, 1, (reg_data *)&ro);
#endif

    /* Notify of the start of the flurry of updates */
    rs.job = REG_BUFFER_START;
    for (i = 0; i < num_contigs; i++) {
	contig_notify(io, contig_array[i], (reg_data *)&rs);
    }

    ro.job = REG_ORDER;
    ro.pos = left_position;

    for (i = 0; i< num_contigs; i++)
	contig_notify(io, contig_array[i], (reg_data *)&ro);

    /* Notify the end of our updates */
    re.job = REG_BUFFER_END;
    for (i = 0; i < num_contigs; i++) {
	contig_notify(io, contig_array[i], (reg_data *)&re);
    }

    /* draw larger separator tick to show where contig was moved from */
    sprintf(cmd, "HighlightSeparator %s %d", cs->hori, orig_pos);
    Tcl_Eval(interp, cmd);
}
Exemplo n.º 6
0
/*
 * Takes a multiple alignment and updates the on-disk data structures to
 * match. This needs to correct confidence values, original positions and
 * tags too.
 */
void update_io(GapIO *io, tg_rec cnum, MALIGN *malign, Array indels) {
    CONTIGL *cl;
    tg_rec rnum;
    range_t r, *r_out;
    bin_index_t *bin;
    contig_t *c = cache_search(io, GT_Contig, cnum);
    size_t i, nindel;

    cache_incr(io, c);

    /*
     * To minimise number of data modifications we use a three step approach.
     *
     * Step 1: insert columns of pads, shifting reads as appropriate.
     * Step 2: edit sequence alignments as required, possibly involving
     *         moving sequences and/or adding and removing pads.
     * Step 3: remove columns of entire pads.
     *
     * This means that when we introduce a column of pads we don't have
     * to make edits to every single read position down stream, and can
     * instead make use of the optimised recursive bin functions to do this
     * for us.
     */

    /* Step 1: make indels */
    nindel = ArrayMax(indels);
    for (i = 0; i < nindel; i++) {
	con_indel_t *id = arrp(con_indel_t, indels, i);
	int j;

	if (id->size > 0) {
	    contig_insert_bases(io, &c, id->pos+1, '*', -1, id->size);
	} else {
	    for (j = 0; j < -id->size; j++) {
		contig_delete_pad(io, &c, id->pos+1);
	    }
	}
    }

    /* Step 2: edit alignments */
    for (cl = malign->contigl; cl; cl = cl->next) {
	seq_t *s, *sorig;
	int len, update_range = 0;
	int shift;

	rnum = cl->id;
	
	sorig = cache_search(io, GT_Seq, rnum);
	cache_incr(io, sorig);
	s = dup_seq(sorig);
	if (cl->mseg->comp)
	    complement_seq_t(s);

	len = s->right - s->left + 1;

	/* Check if sequence has changed. If so assign a new one */
	if (cl->mseg->length != len ||
	    memcmp(s->seq + s->left-1, cl->mseg->seq, cl->mseg->length) != 0) {
	    int newlen = s->left-1 + ABS(s->len) - s->right + cl->mseg->length;
	    int i, j, np;
	    char   *newseq  = malloc(newlen+1);
	    int8_t *newconf = malloc(newlen+1);

	    /* Build new seq/conf arrays */
	    memcpy(newseq,  s->seq,  s->left-1);
	    memcpy(newconf, s->conf, s->left-1);

	    memcpy(&newseq[s->left-1], cl->mseg->seq, cl->mseg->length);

	    /*
	     * Step through both old and new sequences working out how
	     * they differ. This will (*should*) be entire pad movements.
	     * i = index to old seq
	     * j = index to new seq
	     * np = number of pads added minus removed from old seq.
	     */
	    np = 0;
	    for (i =j =s->left-1;
		 i < ABS(s->len) && j < s->left-1 + cl->mseg->length;
		 ) {
		/* Bases match */
		if (toupper(newseq[j]) == toupper(s->seq[i]) ||
		    (s->seq[i] == '.' && newseq[j] == 'N')) {
		    if (isupper(s->seq[i]))
			newseq[j] = toupper(newseq[j]);
		    else
			newseq[j] = tolower(newseq[j]);
		    newconf[j] = s->conf[i];
		    i++, j++;
		    continue;
		}

		/* Pad removed */
		if (s->seq[i] == '*') {
		    i++;
		    tag_shift_for_delete(io, cnum, rnum, cl->mseg->offset,
					 cl->mseg->length, i+np--,
					 s->bin);
		    /*
		    if (io_length(io, rnum) < 0) {
			tag_shift_for_delete(io, rnum, r.length - i + 1);
		    } else {
			tag_shift_for_delete(io, rnum, i+np--);
		    }
		    */
		    continue;
		}

		/* Pad created */
		if (newseq[j] == '*') {
		    int k;
		    int ql = 0, qr = 0;
		    for (k = i-1; k >= 0; k--) {
			if (s->seq[k] != '*') {
			    ql = s->conf[k];
			    break;
			}
		    }
		    for (k = i+1; k < s->right; k++) {
			if (s->seq[k] != '*') {
			    qr = s->conf[k];
			    break;
			}
		    }
		    newconf[j] = MIN(ql, qr); /* min conf of neighbours */
		    j++;
		    tag_shift_for_insert(io, cnum, rnum, cl->mseg->offset,
					 cl->mseg->length, i+ ++np,
					 s->bin);
		    /*
		    if (io_length(io, rnum) < 0) {
			tag_shift_for_insert(io, rnum, r.length - i + 1);
		    } else {
			tag_shift_for_insert(io, rnum, i+ ++np);
		    }
		    */
		    continue;
		}

		fprintf(stderr, "Alignment introduced non-pad character");
		abort();
	    }

	    /* Pads previously at the end of the reading & now removed */
	    while (i < s->right) {
		if (s->seq[i] == '*') {
		    i++;
		    tag_shift_for_delete(io, cnum, rnum, cl->mseg->offset,
					 cl->mseg->length, i+np--,
					 s->bin);
		    /*
		    if (io_length(io, rnum) < 0) {
			tag_shift_for_delete(io, rnum, r.length - i + 1);
		    } else {
			tag_shift_for_delete(io, rnum, i+np--);
		    }
		    */
		} else {
		    /* Error: clipped data that wasn't a pad */
		    abort();
		}
	    }

	    /* Should only be pads remaining in newseq, if anything */
	    s->right = j;
	    for (; j < s->left-1 + cl->mseg->length; j++) {
		if (newseq[j] != '*') {
		    fprintf(stderr, "Alignment introduced non-pad character");
		    abort();
		}
		newconf[j] = 0;
	    }

	    /* Append on the right hand cutoff data */
	    for (; i < ABS(s->len); i++, j++) {
		newseq[j]  = s->seq[i];
		newconf[j] = s->conf[i];
	    }
	    if (j != newlen) {
		abort();
	    }

	    /* Write it back out */
	    /* Copy newseq/newconf into seq_t */

	    s->seq = newseq;
	    s->conf = newconf;
	    update_range = 0;
	    if (ABS(s->len) != j) {
		/* Length change implies updating the range array too */
		s->len = s->len >= 0 ? j : -j;
		update_range = 1;
	    }

	    if (cl->mseg->comp)
		complement_seq_t(s);

	    /* The memcpy trashes the block pointer, so special care needed */
	    {
		sorig = cache_rw(io, sorig);
		void *blk = sorig->block;
		memcpy(sorig, s, sizeof(seq_t)); 
		sorig->block = blk;
	    }

	    if (update_range)
		sorig = cache_item_resize(sorig, sizeof(*sorig) +
					  sequence_extra_len(sorig));

	    sequence_reset_ptr(sorig);

	    if (s->name)
		memcpy(sorig->name,       s->name,       s->name_len+1);
	    if (s->trace_name)
		memcpy(sorig->trace_name, s->trace_name, s->trace_name_len+1);
	    if (s->alignment)
		memcpy(sorig->alignment,  s->alignment,  s->alignment_len+1);
	    memcpy(sorig->seq,  s->seq,  ABS(s->len));
	    memcpy(sorig->conf, s->conf, ABS(s->len));

	    xfree(newconf);
	    xfree(newseq);
	}

	{
	    int st, en, or;
	    sequence_get_position(io, s->rec, NULL, &st, &en, &or);
	    if (or ^ (sorig->len < 0)) {
		shift = ABS(sorig->len) - sorig->right;
	    } else {
		shift = sorig->left-1;
	    }
	    st += shift;
	    if (st != cl->mseg->offset+1) {
		update_range = 1;
	    }
	}

	free(s);

	if (update_range) {
	    int bin_changed = 0;

	    /* Get old range and pair data */
	    s = sorig;
	    bin = cache_search(io, GT_Bin, s->bin);
	    r = *arrp(range_t, bin->rng, s->bin_index);
	    assert(r.rec == s->rec);

	    /* Update range, tedious and slow way */
	    bin_remove_item(io, &c, GT_Seq, s->rec);
	    r.start = cl->mseg->offset + 1 - shift;
	    r.end   = r.start + ABS(s->len) - 1;
	    bin = bin_add_range(io, &c, &r, &r_out, NULL, 0);

	    /* Check if the new bin has a different complemented status too */
	    if (s->bin != bin->rec) {
		int old_comp = bin_get_orient(io, s->bin);
		int new_comp = bin_get_orient(io, bin->rec);

		if (new_comp != old_comp) {
		    //int tmp;
		    s = cache_rw(io, s);
		    s->len *= -1;
		    s->flags ^= SEQ_COMPLEMENTED;
		    //tmp = s->left;
		    //s->left  = ABS(s->len) - (s->right-1);
		    //s->right = ABS(s->len) - (tmp-1);
		}

		bin_changed = 1;
	    }
	
	    /* Update seq bin & bin_index fields */
	    s = cache_rw(io, s);
	    s->bin = bin->rec;
	    s->bin_index = r_out - ArrayBase(range_t, bin->rng);

	    if (bin_changed) {
		if (-1 == sequence_fix_anno_bins(io, &s)) {
		    verror(ERR_WARN, "update_io",
			   "sequence_fix_anno_bins() failure");
		}
	    }
	}

	cache_decr(io, sorig);
    }

    /* Step 3 (remove pad columns) done in calling function. */

    cache_decr(io, c);
}
Exemplo n.º 7
0
/*
 * Complements a scaffold; both complementing each contig within it and
 * reversing the order of contigs in the scaffold.
 *
 * Returns 0 on success
 *        -1 on failure
 */
int complement_scaffold(GapIO *io, tg_rec srec) {
    scaffold_t *f;
    int i, j, nc = ArrayMax(io->contig_order);
    scaffold_member_t *contigs;
    tg_rec *crecs;
    HashTable *h;
    reg_order ro;
    reg_buffer_start rs;
    reg_buffer_end re;

    if (!(f = cache_search(io, GT_Scaffold, srec)))
	return -1;
    if (!(f = cache_rw(io, f)))
	return -1;
    cache_incr(io, f);

    /* Complement contigs */
    contigs = ArrayBase(scaffold_member_t, f->contig);
    for (i = 0; i < ArrayMax(f->contig); i++) {
	complement_contig(io, contigs[i].rec);
    }

    /* Reverse the order of the contigs in the scaffold array */
    for (i = 0, j = ArrayMax(f->contig)-1; i < j; i++, j--) {
	scaffold_member_t cr1 = contigs[i];
	contigs[i] = contigs[j];
	contigs[j] = cr1;
    }

    /*
     * Reverse the order of contigs in the contig_order array too.
     * This is the part that really matters. It's also hard as the contigs
     * in the contig order array could be in any order and not adjacent.
     * For our purposes we'll just ensure the contigs in this scaffold in 
     * the contig order array match our freshly complemented scaffold
     * ordering.
     *
     * We initially build a hash table of contigs in this scaffold, and
     * then iterate through contig_order copying out the new contigs whenever
     * one matches.
     */
    h = HashTableCreate(nc, 0);
    for (i = 0; i < ArrayMax(f->contig); i++) {
	HashData hd;
	hd.i = 0;
	HashTableAdd(h, (char *)&contigs[i].rec, sizeof(tg_rec), hd, NULL);
    }

    /* Replace any contig matching the scaffold with the new order */
    crecs = ArrayBase(tg_rec, io->contig_order);
    for (i = j = 0; i < nc; i++) {
	HashItem *hi;
	if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec))))
	    continue;

	crecs[i] = contigs[j++].rec;
    }

    /* Send event messages around */
    rs.job = REG_BUFFER_START;
    for (i = 0; i < nc; i++) {
	HashItem *hi;
	if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec))))
	    continue;

	contig_notify(io, crecs[i], (reg_data *)&rs);
    }

    ro.job = REG_ORDER;
    for (i = 0; i < nc; i++) {
	HashItem *hi;
	if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec))))
	    continue;

	ro.pos = i+1;
	contig_notify(io, crecs[i], (reg_data *)&ro);
    }

    /* Notify the end of our updates */
    re.job = REG_BUFFER_END;
    for (i = 0; i < nc; i++) {
	HashItem *hi;
	if (!(hi = HashTableSearch(h, (char *)&crecs[i], sizeof(tg_rec))))
	    continue;

	contig_notify(io, crecs[i], (reg_data *)&re);
    }

    HashTableDestroy(h, 0);
    cache_decr(io, f);

    return 0;
}
Exemplo n.º 8
0
/*
 * Given a contig order and a set of current scaffolds, this updates the
 * order of entries within each scaffold to match the contig order.
 *
 * For example if we have contigs in order 1 3 5 2 6 8 4 7 9 and
 * scaffolds {1 2 3 4} {5 6 7 8 9} we would shuffle the scaffold members
 * to        {1 3 2 4} {5 6 8 7 9}
 *
 * The purpose is for integration with contig shuffling in the Contig List
 * or Contig Selector. The master contig order array is what gets shuffled
 * manually by the user and it is also the definitive order to use when
 * outputting data (so it is completely under users control whether they
 * sort by name, size or scaffold).
 *
 * Returns 0 on success
 *        -1 on failure
 */
int update_scaffold_order(GapIO *io) {
    int i, j, ret = -1;
    int nc;
    int ns;
    tg_rec *crecs;

    if (!io->scaffold)
	return 0; /* Not supported, but considered success */

    nc = ArrayMax(io->contig_order);
    ns = ArrayMax(io->scaffold);

    scaf_ctg_t *a = (scaf_ctg_t *)malloc(nc * sizeof(*a));
    if (!a)
	return -1;

    /*
     * Produce an array of scaffold and contig recs, so we can sort on
     * both fields.
     */
    crecs = ArrayBase(tg_rec, io->contig_order);
    for (i = 0; i < nc; i++) {
	contig_t *c = cache_search(io, GT_Contig, crecs[i]);
	if (!c)
	    goto err;

	a[i].ctg_idx = i;
	a[i].scaffold = c->scaffold;
    }

    qsort(a, nc, sizeof(*a), scaf_ctg_sort);

    /*
     * Now recreate scaffold orders from the sorted contig list.
     */
    for (i = 0; i < nc; i++) {
	scaffold_t *f;
	int k;

	if (!a[i].scaffold)
	    continue;

	j = i;
	while (i < nc && a[i].scaffold == a[j].scaffold)
	    i++;

	/* j .. i-1 share the same scaffold */
	f = cache_search(io, GT_Scaffold, a[j].scaffold);
	if (!f)
	    goto err;

	if (!f->contig || ArrayMax(f->contig) != i-j) {
	    verror(ERR_WARN, "update_scaffold_order", "Scaffold %"PRIrec
		   "has different number of entries than contigs claim.",
		   f->rec);
	    goto err;
	}

	/* Only mark r/w and update if they differ */
	for (k = 0; k < ArrayMax(f->contig); k++) {
	    if ((arrp(scaffold_member_t, f->contig, k))->rec
		!= crecs[a[j+k].ctg_idx])
		break;
	}
	
	if (k != ArrayMax(f->contig)) {
	    f = cache_rw(io, f);
	    for (k = 0; k < ArrayMax(f->contig); k++)
		(arrp(scaffold_member_t, f->contig, k))->rec
		    = crecs[a[j+k].ctg_idx];
	}
	
	i--;
    }

    ret = 0;
 err:
    free(a);
    return ret;
}