Ejemplo n.º 1
0
/*
 * Compute the visible end position of a contig. This isn't just the extents
 * of start_used / end_used in the bins as this can included invisible
 * data such as cached consensus sequences.
 */
int contig_visible_end(GapIO *io, tg_rec crec) {
    rangec_t *r;
    contig_iterator *ci;

    ci = contig_iter_new_by_type(io, crec, 1, CITER_LAST | CITER_IEND,
				 CITER_CSTART, CITER_CEND,
				 GRANGE_FLAG_ISANY);
    if (!ci) {
	contig_t *c = cache_search(io, GT_Contig, crec);
	return c->end;
    }
    
    while (r = contig_iter_prev(io, ci)) {
	int v;

	if ((r->flags & GRANGE_FLAG_ISMASK) == GRANGE_FLAG_ISCONS)
	    continue;
	
	v = r->end;
	contig_iter_del(ci);
	return v;
    }

    contig_iter_del(ci);
    return 0;
}
Ejemplo n.º 2
0
static void tag_shift_for_delete(GapIO *io, tg_rec crec, tg_rec srec,
				 int start, int end, int pos, tg_rec brec) {
    contig_iterator *ci;
    rangec_t *r;
    contig_t *c = cache_search(io, GT_Contig, crec);;

    //printf("< tag in seq %"PRIrec" at %d\n", srec, pos);

    cache_incr(io, c);

    ci = contig_iter_new_by_type(io, crec, 0, CITER_FIRST | CITER_ISTART,
				 start+pos, end, GRANGE_FLAG_ISANNO);
    if (!ci) {
	cache_decr(io, c);
	return;
    }

    while ((r = contig_iter_next(io, ci))) {
	range_t r2, *r_out;
	anno_ele_t *a;
	bin_index_t *bin;

	if (r->pair_rec != srec)
	    continue;

	bin_remove_item(io, &c, GT_AnnoEle, r->rec);
	r2.start    = (r->start > start+pos) ? r->start-1 : r->start;
	r2.end      = r->end-1;
	r2.mqual    = r->mqual;
	r2.rec      = r->rec;
	r2.pair_rec = r->pair_rec;
	r2.flags    = r->flags;

	if (r2.end < r2.start) {
	    /* Tag entirely removed now, it must have been on a pad */
	    a = cache_search(io, GT_AnnoEle, r->rec);
	    a = cache_rw(io, a);
	    cache_deallocate(io, a);
	    continue;
	}
	bin = bin_add_to_range(io, &c, brec, &r2, &r_out, NULL, 0);

	a = cache_search(io, GT_AnnoEle, r->rec);
	if (a->bin != bin->rec /*||
	    a->idx != r_out - ArrayBase(range_t, bin->rng)*/) {
	    /* Annotation moved bins */
	    a = cache_rw(io, a);
	    a->bin = bin->rec;
	    //a->bin_idx = r_out - ArrayBase(range_t, bin->rng);
	}
    }

    cache_decr(io, c);
    contig_iter_del(ci);
}
Ejemplo n.º 3
0
int edview_search_tag_type(edview *xx, int dir, int strand, char *value) {
    contig_iterator *iter;
    int start, end;
    rangec_t *r;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    int type = str2type(value);
    contig_t *c = cache_search(xx->io, GT_Contig, xx->cnum);

    if (dir) {
	start = xx->cursor_apos + (dir ? 1 : -1);
	end   = c->end;
	ifunc = contig_iter_next;
    } else {
	start = c->start;
	end   = xx->cursor_apos + (dir ? 1 : -1);
	ifunc = contig_iter_prev;
    }

    iter = contig_iter_new_by_type(xx->io, xx->cnum, 1,
				   dir == 1 ? CITER_FIRST : CITER_LAST,
				   start, end, GRANGE_FLAG_ISANNO);
    if (!iter)
	/* Can happen legitimately when we're already at the end of contig */
	return -1;

    while (r = ifunc(xx->io, iter)) {
	if ((dir  && r->start < start) ||
	    (!dir && r->start > end))
	    continue;

	if (r->mqual == type) 
	    break;
    }

    if (r) {
	if (r->flags & GRANGE_FLAG_TAG_SEQ) {
	    int pos;
	    sequence_get_position(xx->io, r->pair_rec, NULL, &pos, NULL, NULL);
	    pos = r->start - pos;
	    edSetCursorPos(xx, GT_Seq, r->pair_rec, pos, 1);
	} else {
	    edSetCursorPos(xx, GT_Contig, xx->cnum, r->start, 1);
	}
	contig_iter_del(iter);
	return 0;
    }

    contig_iter_del(iter);
    return -1;
}
Ejemplo n.º 4
0
int edview_search_tag_indel(edview *xx, int dir, int strand, char *value) {
    contig_iterator *iter;
    int start, end;
    rangec_t *r;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    contig_t *c = cache_search(xx->io, GT_Contig, xx->cnum);

    if (dir) {
	start = xx->cursor_apos + (dir ? 1 : -1);
	end   = c->end;
	ifunc = contig_iter_next;
    } else {
	start = c->start;
	end   = xx->cursor_apos + (dir ? 1 : -1);
	ifunc = contig_iter_prev;
    }

    iter = contig_iter_new_by_type(xx->io, xx->cnum, 1,
				   dir == 1 ? CITER_FIRST : CITER_LAST,
				   start, end, GRANGE_FLAG_ISREFPOS);
    if (!iter)
	return -1;

    while (r = ifunc(xx->io, iter)) {
	if ((dir  && r->start < start) ||
	    (!dir && r->start > end))
	    continue;

	break;
    }

    if (r) {
	edSetCursorPos(xx, GT_Contig, xx->cnum, r->start, 1);
	contig_iter_del(iter);
	return 0;
    }

    contig_iter_del(iter);
    return -1;
}
Ejemplo n.º 5
0
/*
 * Removes all tags of specific types (hashed in h, or all if h == NULL)
 * from a specified contig.
 *
 * Returns 0 on success
 *        -1 on failure
 */
static int delete_tag_single_contig(GapIO *io, tg_rec crec,
				    HashTable *h, int verbose) {
    contig_iterator *ci;
    rangec_t *r;
    contig_t *c;
    int ret = -1;

    ci = contig_iter_new_by_type(io, crec, 1, CITER_FIRST,
				 CITER_CSTART, CITER_CEND,
				 GRANGE_FLAG_ISANNO);
    if (!ci)
	return -1;
    
    if (!(c = cache_search(io, GT_Contig, crec))) {
	contig_iter_del(ci);
	return -1;
    }
    cache_incr(io, c);

    while (NULL != (r = contig_iter_next(io, ci))) {
	char t[5];
	(void)type2str(r->mqual, t);
	if (!h || HashTableSearch(h, t, 4)) {
	    anno_ele_t *e;

	    if (verbose)
		vmessage("Removing anno %s #%"PRIrec"\tContig %s\t%d..%d\n",
			 t, r->rec, c->name, r->start, r->end);
	    if (bin_remove_item(io, &c, GT_AnnoEle, r->rec)) goto fail;
	    /* FIXME: Need to reclaim the GT_AnnoEle record itself */
	}
    }

    ret = 0;
 fail:
    contig_iter_del(ci);
    cache_decr(io, c);

    return ret;
}
Ejemplo n.º 6
0
int edview_search_tag_anno(edview *xx, int dir, int strand, char *value) {
    contig_iterator *iter;
    int start, end;
    rangec_t *r;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    char *r_exp = NULL;
    contig_t *c = cache_search(xx->io, GT_Contig, xx->cnum);

    if (value) {
	if (NULL == (r_exp = REGCMP(xx->interp, value))) {
	    verror(ERR_WARN, "Search by anno", "invalid regular expression");
	    return -1;
	}
    }

    if (dir) {
	start = xx->cursor_apos + (dir ? 1 : -1);
	end   = c->end;
	ifunc = contig_iter_next;
    } else {
	start = c->start;
	end   = xx->cursor_apos + (dir ? 1 : -1);
	ifunc = contig_iter_prev;
    }

    iter = contig_iter_new_by_type(xx->io, xx->cnum, 1,
				   dir == 1 ? CITER_FIRST : CITER_LAST,
				   start, end, GRANGE_FLAG_ISANNO);
    if (!iter)
	return -1;

    while (r = ifunc(xx->io, iter)) {
	anno_ele_t *ae;

	if ((dir  && r->start < start) ||
	    (!dir && r->start > end))
	    continue;

	if (!r_exp)
	    break; /* blank expr => match all */

	ae = cache_search(xx->io, GT_AnnoEle, r->rec);
	if (!ae->comment)
	    continue;

	if (REGEX(xx->interp, ae->comment, r_exp))
	    break;
    }

    REGFREE(xx->interp, r_exp);

    if (r) {
	if (r->flags & GRANGE_FLAG_TAG_SEQ) {
	    int pos;
	    sequence_get_position(xx->io, r->pair_rec, NULL, &pos, NULL, NULL);
	    pos = r->start - pos;
	    edSetCursorPos(xx, GT_Seq, r->pair_rec, pos, 1);
	} else {
	    edSetCursorPos(xx, GT_Contig, xx->cnum, r->start, 1);
	}
	contig_iter_del(iter);
	return 0;
    }

    contig_iter_del(iter);
    return -1;
}
Ejemplo n.º 7
0
int edview_search_name(edview *xx, int dir, int strand, char *value)
{
    tg_rec rec, *rp, cnum = -1, best_rec;
    int best_pos, best_off;
    int nr, i;
    rangec_t *(*ifunc)(GapIO *io, contig_iterator *ci);
    int start, end, cstart;
    contig_iterator *iter;
    contig_t *c;

    /* Check for #num where num is a sequence record in this contig */
    if (*value == '#') {
	char *endp;
	int64_t v = strtol64(value+1, &endp, 10);

	rec = v;

	if (*endp == '\0' && cache_exists(xx->io, GT_Seq, rec)) {
	    sequence_get_clipped_position(xx->io, rec, &cnum,
					  &start, NULL, &cstart, NULL, NULL);
	    if (cnum == xx->cnum) {
		edSetCursorPos(xx, GT_Seq, rec, cstart - start, 1);
		return 0;
	    }
	}
    }

    /* Find all hits matching this name */
    rp = sequence_index_query_all(xx->io, value, 1, &nr);

    /* Also get an position-based iterator */
    c = cache_search(xx->io, GT_Contig, xx->cnum);
    if (dir) {
	start    = xx->cursor_apos + 1;
	end      = c->end;
	ifunc    = contig_iter_next;
	best_pos = end + 1;
	best_off = 0;
    } else {
	start    = c->start;
	end      = xx->cursor_apos - 1;
	ifunc    = contig_iter_prev;
	best_pos = start - 1;
	best_off = 0;
    }

    iter = contig_iter_new_by_type(xx->io, xx->cnum, 1,
				   dir == 1 ? CITER_FIRST : CITER_LAST,
				   start-1, end+1, GRANGE_FLAG_ISSEQ);
    if (!iter)
	return -1;

    /*
     * The iterator also finds overlapping objects, not just ones beyond this
     * point. That's fine if we're on the consensus as we probably want to
     * jump to the first seq-name overlapping this point.
     *
     * However if we're on a sequence already, we want the first one
     * after or before that sequence. So we skip along iterator until we're
     * at the current record.
     */
    if (xx->cursor_type == GT_Seq) {
	rangec_t *r;
	while ((r = ifunc(xx->io, iter))) {
	    if (r->rec == xx->cursor_rec)
		break;
	}
    }
				   

    /* Alternate between the by-name and by-position scan */
    best_rec = -1;
    for (i = 0; i < nr; i++) {
	int start, end;
	rangec_t *r;

	/* From name index */
	rec = rp[i++];
	sequence_get_clipped_position(xx->io, rec, &cnum, &start, &end,
				      &cstart, NULL, NULL);
	if (cnum == xx->cnum) {
	    if ((dir  && best_pos > cstart && cstart > xx->cursor_apos) ||
		(!dir && best_pos < cstart && cstart < xx->cursor_apos)) {
		best_pos = cstart;
		best_off = cstart - start;
		best_rec = rec;
	    }
	}

	/* From iterator */
	if ((r = ifunc(xx->io, iter))) {
	    seq_t *s;
	    if (NULL == (s = cache_search(xx->io, GT_Seq, r->rec))) {
		/* No match */
		best_rec = -1;
		break;
	    }
	    
	    if (strncmp(s->name, value, strlen(value)) == 0) {
		/* prefix match */
		puts("Found by pos iterator");
		best_rec = r->rec;
		break;
	    }
	} else {
	    /* End of contig - bail out early */
	    best_rec = -1;
	    break;
	}
    }

    contig_iter_del(iter);
    if (rp)
	free(rp);
    
    if (best_rec != -1) {
	edSetCursorPos(xx, GT_Seq, best_rec, best_off, 1);
	return 0;
    }

    return -1;
}