Beispiel #1
0
static void
tabixiter_dealloc(TabixIteratorObject *self)
{
    ti_iter_destroy(self->iter);
    Py_DECREF(self->tbobj);
    PyObject_Del(self);
}
Beispiel #2
0
static void * downloadTabixFile(void * args) {
	BCFReaderData * data = (BCFReaderData *) args;
	char * line;
	char * last_chrom = "";

	while ((line = nextLine(data))) {
		if (line[0] == '#')
			continue;

		char * chrom = strtok(line, "\t");
		if (strcmp(chrom, last_chrom)) {
			last_chrom = calloc(strlen(chrom) + 1, sizeof(char));
			strcpy(last_chrom, chrom);
		}
		int pos = atoi(strtok(NULL, "\t"));

		if (data->tabix_iterator)
			free(line);

		if (pushValuesToBuffer(data->bufferedReaderData, last_chrom, pos, pos+1, 1))
			break;
	}

	if (data->tabix_iterator) 
		ti_iter_destroy(data->tabix_iterator);

	endBufferedSignal(data->bufferedReaderData);
	return NULL;
}
Beispiel #3
0
void lineFileClose(struct lineFile **pLf)
/* Close up a line file. */
{
struct lineFile *lf;
if ((lf = *pLf) != NULL)
    {
    if (lf->pl != NULL)
        {
        pipelineWait(lf->pl);
        pipelineFree(&lf->pl);
        }
    else if (lf->fd > 0 && lf->fd != fileno(stdin))
	{
	close(lf->fd);
	freeMem(lf->buf);
	}
#ifdef USE_TABIX
    else if (lf->tabix != NULL)
	{
	if (lf->tabixIter != NULL)
	    ti_iter_destroy(lf->tabixIter);
	ti_close(lf->tabix);
	}
#endif // USE_TABIX
    freeMem(lf->fileName);
    metaDataFree(lf);
    freez(pLf);
    }
}
Beispiel #4
0
boolean lineFileSetTabixRegion(struct lineFile *lf, char *seqName, int start, int end)
/* Assuming lf was created by lineFileTabixMayOpen, tell tabix to seek to the specified region
 * and return TRUE (or if there are no items in region, return FALSE). */
{
#ifdef USE_TABIX
if (lf->tabix == NULL)
    errAbort("lineFileSetTabixRegion: lf->tabix is NULL.  Did you open lf with lineFileTabixMayOpen?");
if (seqName == NULL)
    return FALSE;
int tabixSeqId = ti_get_tid(lf->tabix->idx, seqName);
if (tabixSeqId < 0 && startsWith("chr", seqName))
    // We will get some files that have chr-less Ensembl chromosome names:
    tabixSeqId = ti_get_tid(lf->tabix->idx, seqName+strlen("chr"));
if (tabixSeqId < 0)
    return FALSE;
ti_iter_t iter = ti_queryi(lf->tabix, tabixSeqId, start, end);
if (iter == NULL)
    return FALSE;
if (lf->tabixIter != NULL)
    ti_iter_destroy(lf->tabixIter);
lf->tabixIter = iter;
lf->bufOffsetInFile = ti_bgzf_tell(lf->tabix->fp);
lf->bytesInBuf = 0;
lf->lineIx = -1;
lf->lineStart = 0;
lf->lineEnd = 0;
return TRUE;
#else // no USE_TABIX
warn(COMPILE_WITH_TABIX, "lineFileSetTabixRegion");
return FALSE;
#endif // no USE_TABIX
}
Beispiel #5
0
int main(int argc, char *argv[])
{
	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
	ti_conf_t conf = ti_conf_gff;
    const char *reheader = NULL;
	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
		switch (c) {
		case 'B': bed_reg = 1; break;
		case '0': conf.preset |= TI_FLAG_UCSC; break;
		case 'S': skip = atoi(optarg); break;
		case 'c': meta = optarg[0]; break;
		case 'p':
			if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
			else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
			else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
			else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
			else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
			else {
				fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
				return 1;
			}
			break;
		case 's': conf.sc = atoi(optarg); break;
		case 'b': conf.bc = atoi(optarg); break;
		case 'e': conf.ec = atoi(optarg); break;
        case 'l': list_chrms = 1; break;
        case 'h': print_header = 1; break;
		case 'f': force = 1; break;
        case 'r': reheader = optarg; break;
		}
	}
	if (skip >= 0) conf.line_skip = skip;
	if (meta >= 0) conf.meta_char = meta;
	if (optind == argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
		fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
		fprintf(stderr, "Usage:   tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
		fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
		fprintf(stderr, "         -s INT     sequence name column [1]\n");
		fprintf(stderr, "         -b INT     start column [4]\n");
		fprintf(stderr, "         -e INT     end column; can be identical to '-b' [5]\n");
		fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
		fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
	    fprintf(stderr, "         -r FILE    replace the header with the content of FILE [null]\n");
		fprintf(stderr, "         -B         region1 is a BED file (entire file will be read)\n");
		fprintf(stderr, "         -0         zero-based coordinate\n");
		fprintf(stderr, "         -h         print the header lines\n");
		fprintf(stderr, "         -l         list chromosome names\n");
		fprintf(stderr, "         -f         force to overwrite the index\n");
		fprintf(stderr, "\n");
		return 1;
	}
    if (list_chrms) {
		ti_index_t *idx;
		int i, n;
		const char **names;
		idx = ti_index_load(argv[optind]);
		if (idx == 0) {
			fprintf(stderr, "[main] fail to load the index file.\n");
			return 1;
		}
		names = ti_seqname(idx, &n);
		for (i = 0; i < n; ++i) printf("%s\n", names[i]);
		free(names);
		ti_index_destroy(idx);
		return 0;
	}
    if (reheader)
        return reheader_file(reheader,argv[optind],conf.meta_char);

	struct stat stat_tbi,stat_vcf;
    char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
   	strcat(strcpy(fnidx, argv[optind]), ".tbi");

	if (optind + 1 == argc) {
		if (force == 0) {
			if (stat(fnidx, &stat_tbi) == 0) 
            {
                // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
                //  people tend not to notice that tabix failed
                stat(argv[optind], &stat_vcf);
                if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
                {
                    fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
                    free(fnidx);
                    return 1;
                }
			}
		}
        if ( bgzf_check_bgzf(argv[optind])!=1 )
        {
            fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
            free(fnidx);
            return 1;
        }
		return ti_index_build(argv[optind], &conf);
	}
	{ // retrieve
		tabix_t *t;
        // Common source of errors: new VCF is used with an old index
        stat(fnidx, &stat_tbi);
        stat(argv[optind], &stat_vcf);
        if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
        {
            fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
            free(fnidx);
            return 1;
        }
        free(fnidx);

		if ((t = ti_open(argv[optind], 0)) == 0) {
			fprintf(stderr, "[main] fail to open the data file.\n");
			return 1;
		}
		if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
			ti_iter_t iter;
			const char *s;
			int len;
			iter = ti_query(t, 0, 0, 0);
			while ((s = ti_read(t, iter, &len)) != 0) {
				fputs(s, stdout); fputc('\n', stdout);
			}
			ti_iter_destroy(iter);
		} else { // retrieve from specified regions
			int i, len;
            ti_iter_t iter;
            const char *s;
			const ti_conf_t *idxconf;

			if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
                fprintf(stderr,"[tabix] failed to load the index file.\n");
                return 1;
            }
			idxconf = ti_get_conf(t->idx);

            if ( print_header )
            {
                // If requested, print the header lines here
                iter = ti_query(t, 0, 0, 0);
                while ((s = ti_read(t, iter, &len)) != 0) {
                    if ((int)(*s) != idxconf->meta_char) break;
                    fputs(s, stdout); fputc('\n', stdout);
                }
                ti_iter_destroy(iter);
            }
			if (bed_reg) {
				extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
				extern void *bed_read(const char *fn);
				extern void bed_destroy(void *_h);

				const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
				void *bed = bed_read(argv[optind+1]); // load the BED file
				ti_interval_t intv;

				if (bed == 0) {
					fprintf(stderr, "[main] fail to read the BED file.\n");
					return 1;
				}
				iter = ti_query(t, 0, 0, 0);
				while ((s = ti_read(t, iter, &len)) != 0) {
					int c;
					ti_get_intv(conf_, len, (char*)s, &intv);
					c = *intv.se; *intv.se = '\0';
					if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
						*intv.se = c;
						puts(s);
					}
					*intv.se = c;
				}
                ti_iter_destroy(iter);
				bed_destroy(bed);
			} else {
				for (i = optind + 1; i < argc; ++i) {
					int tid, beg, end;
					if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
						iter = ti_queryi(t, tid, beg, end);
							while ((s = ti_read(t, iter, &len)) != 0) {
							fputs(s, stdout); fputc('\n', stdout);
						}
						ti_iter_destroy(iter);
					} 
            	    // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
				}
			}
		}
		ti_close(t);
	}
	return 0;
}
tabix_header_streamer::
~tabix_header_streamer() {
    if (NULL != _titer) ti_iter_destroy(_titer);
    if (NULL != _tfp) ti_close(_tfp);
}