Ejemplo n.º 1
0
static int _reader_seek(bcf_sr_t *reader, const char *seq, int start, int end)
{
    if ( end>=MAX_CSI_COOR )
    {
        fprintf(stderr,"The coordinate is out of csi index limit: %d\n", end+1);
        exit(1);
    }
    if ( reader->itr )
    {
        hts_itr_destroy(reader->itr);
        reader->itr = NULL;
    }
    reader->nbuffer = 0;
    if ( reader->tbx_idx )
    {
        int tid = tbx_name2id(reader->tbx_idx, seq);
        if ( tid==-1 ) return -1;    // the sequence not present in this file
        reader->itr = tbx_itr_queryi(reader->tbx_idx,tid,start,end+1);
    }
    else
    {
        int tid = bcf_hdr_name2id(reader->header, seq);
        if ( tid==-1 ) return -1;    // the sequence not present in this file
        reader->itr = bcf_itr_queryi(reader->bcf_idx,tid,start,end+1);
    }
    if ( !reader->itr ) fprintf(stderr,"Could not seek: %s:%d-%d\n",seq,start+1,end+1);
    assert(reader->itr);
    return 0;
}
Ejemplo n.º 2
0
int beds_fill_buffer(struct beds_anno_file *file, bcf_hdr_t *hdr_out, bcf1_t *line)
{
    assert(file->idx);
    int tid = tbx_name2id(file->idx, bcf_seqname(hdr_out, line));
    // if cached this region already, just skip refill. this is different from vcfs_fill_buffer()
    if ( tid == file->last_id && file->last_start <= line->pos + 1 && file->last_end > line->pos)
	return -1;

    if ( tid == -1 ) {
        if ( file->no_such_chrom == 0 ) {
            warnings("no chromosome %s found in databases %s.", bcf_seqname(hdr_out, line), file->fname);
            file->no_such_chrom = 1;
        }
	return 1;
    } else {
        file->no_such_chrom = 0;
    }

    // empty cache
    file->cached = 0;
    int i;
    hts_itr_t *itr = tbx_itr_queryi(file->idx, tid, line->pos, line->pos + line->rlen);
    if ( itr == NULL )
	return 1;
    // if buffer refilled, init last start and end
    file->last_id = tid;
    file->last_start = -1;
    file->last_end = -1;    
    while (1) {
	if ( file->cached == file->max ) {
	    file->max += 8;
	    file->buffer = (struct beds_anno_tsv**)realloc(file->buffer, sizeof(struct beds_anno_tsv*)*file->max);
	    for (i = 8; i > 0; --i)
		file->buffer[file->max - i] = beds_anno_tsv_init();
	}

	if ( tbx_itr_next(file->fp, file->idx, itr, &file->buffer[file->cached]->string) < 0)
	    break;
	struct beds_anno_tsv *tsv = file->buffer[file->cached];
	convert_string_tsv(tsv);
        // Skip if variant located outside of region.
        
        if (line->pos < tsv->start || line->pos >= tsv->end)
            continue;
        if (tsv->end - tsv->start == 1 && line->pos != tsv->start)
            continue;
        file->cached++;
	if ( file->last_end == -1 ) {
	    file->last_end = tsv->end;
	    file->last_start = tsv->start;
	    continue;
	} 
	if ( file->last_end < tsv->end )
	    file->last_end = tsv->end;
	if ( file->last_start > tsv->start )
	    file->last_start = tsv->start;
        
    }
    // if buffer is filled return 0, else return 1
    return file->cached ? 0 : 1;    
}