Beispiel #1
0
static void
tabix_dealloc(TabixObject *self)
{
    free(self->fn);
    ti_close(self->tb);
    PyObject_Del(self);
}
Beispiel #2
0
void lineFileClose(struct lineFile **pLf)
/* Close up a line file. */
{
struct lineFile *lf;
if ((lf = *pLf) != NULL)
    {
    if (lf->pl != NULL)
        {
        pipelineWait(lf->pl);
        pipelineFree(&lf->pl);
        }
    else if (lf->fd > 0 && lf->fd != fileno(stdin))
	{
	close(lf->fd);
	freeMem(lf->buf);
	}
#ifdef USE_TABIX
    else if (lf->tabix != NULL)
	{
	if (lf->tabixIter != NULL)
	    ti_iter_destroy(lf->tabixIter);
	ti_close(lf->tabix);
	}
#endif // USE_TABIX
    freeMem(lf->fileName);
    metaDataFree(lf);
    freez(pLf);
    }
}
Beispiel #3
0
struct lineFile *lineFileTabixMayOpen(char *fileOrUrl, bool zTerm)
/* Wrap a line file around a data file that has been compressed and indexed
 * by the tabix command line program.  The index file <fileOrUrl>.tbi must be
 * readable in addition to fileOrUrl. If there's a problem, warn & return NULL.
 * This works only if kent/src has been compiled with USE_TABIX=1 and linked
 * with the tabix C library. */
{
#ifdef USE_TABIX
int tbiNameSize = strlen(fileOrUrl) + strlen(".tbi") + 1;
char *tbiName = needMem(tbiNameSize);
safef(tbiName, tbiNameSize, "%s.tbi", fileOrUrl);
tabix_t *tabix = ti_open(fileOrUrl, tbiName);
if (tabix == NULL)
    {
    warn("Unable to open \"%s\"", fileOrUrl);
    freez(&tbiName);
    return NULL;
    }
if ((tabix->idx = ti_index_load(tbiName)) == NULL)
    {
    warn("Unable to load tabix index from \"%s\"", tbiName);
    ti_close(tabix);
    tabix = NULL;
    freez(&tbiName);
    return NULL;
    }
struct lineFile *lf = needMem(sizeof(struct lineFile));
lf->fileName = cloneString(fileOrUrl);
lf->fd = -1;
lf->bufSize = 64 * 1024;
lf->buf = needMem(lf->bufSize);
lf->zTerm = zTerm;
lf->tabix = tabix;
freez(&tbiName);
return lf;
#else // no USE_TABIX
warn(COMPILE_WITH_TABIX, "lineFileTabixMayOpen");
return NULL;
#endif // no USE_TABIX
}
Beispiel #4
0
bool
parse_tabix_region(const char* filename,
                   const char* region,
                   int& begin,
                   int& end) {

    if (NULL == region) {
        return false;
    }

    if (NULL == filename) {
        throw blt_exception("vcf filename is null ptr");
    }

    if ('\0' == *filename) {
        throw blt_exception("vcf filename is empty string");
    }

    enforce_tabix_index(filename);

    tabix_t* _tfp = ti_open(filename, 0);

    if (NULL == _tfp) {
        log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    // read from a specific region:
    if (ti_lazy_index_load(_tfp) < 0) {
        log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    int tid;
    const bool result(0 == ti_parse_region(_tfp->idx, region, &tid, &begin, &end));

    ti_close(_tfp);
    return result;
}
Beispiel #5
0
int main(int argc, char *argv[])
{
	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
	ti_conf_t conf = ti_conf_gff;
    const char *reheader = NULL;
	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
		switch (c) {
		case 'B': bed_reg = 1; break;
		case '0': conf.preset |= TI_FLAG_UCSC; break;
		case 'S': skip = atoi(optarg); break;
		case 'c': meta = optarg[0]; break;
		case 'p':
			if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
			else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
			else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
			else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
			else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
			else {
				fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
				return 1;
			}
			break;
		case 's': conf.sc = atoi(optarg); break;
		case 'b': conf.bc = atoi(optarg); break;
		case 'e': conf.ec = atoi(optarg); break;
        case 'l': list_chrms = 1; break;
        case 'h': print_header = 1; break;
		case 'f': force = 1; break;
        case 'r': reheader = optarg; break;
		}
	}
	if (skip >= 0) conf.line_skip = skip;
	if (meta >= 0) conf.meta_char = meta;
	if (optind == argc) {
		fprintf(stderr, "\n");
		fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
		fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
		fprintf(stderr, "Usage:   tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
		fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
		fprintf(stderr, "         -s INT     sequence name column [1]\n");
		fprintf(stderr, "         -b INT     start column [4]\n");
		fprintf(stderr, "         -e INT     end column; can be identical to '-b' [5]\n");
		fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
		fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
	    fprintf(stderr, "         -r FILE    replace the header with the content of FILE [null]\n");
		fprintf(stderr, "         -B         region1 is a BED file (entire file will be read)\n");
		fprintf(stderr, "         -0         zero-based coordinate\n");
		fprintf(stderr, "         -h         print the header lines\n");
		fprintf(stderr, "         -l         list chromosome names\n");
		fprintf(stderr, "         -f         force to overwrite the index\n");
		fprintf(stderr, "\n");
		return 1;
	}
    if (list_chrms) {
		ti_index_t *idx;
		int i, n;
		const char **names;
		idx = ti_index_load(argv[optind]);
		if (idx == 0) {
			fprintf(stderr, "[main] fail to load the index file.\n");
			return 1;
		}
		names = ti_seqname(idx, &n);
		for (i = 0; i < n; ++i) printf("%s\n", names[i]);
		free(names);
		ti_index_destroy(idx);
		return 0;
	}
    if (reheader)
        return reheader_file(reheader,argv[optind],conf.meta_char);

	struct stat stat_tbi,stat_vcf;
    char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
   	strcat(strcpy(fnidx, argv[optind]), ".tbi");

	if (optind + 1 == argc) {
		if (force == 0) {
			if (stat(fnidx, &stat_tbi) == 0) 
            {
                // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
                //  people tend not to notice that tabix failed
                stat(argv[optind], &stat_vcf);
                if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
                {
                    fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
                    free(fnidx);
                    return 1;
                }
			}
		}
        if ( bgzf_check_bgzf(argv[optind])!=1 )
        {
            fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
            free(fnidx);
            return 1;
        }
		return ti_index_build(argv[optind], &conf);
	}
	{ // retrieve
		tabix_t *t;
        // Common source of errors: new VCF is used with an old index
        stat(fnidx, &stat_tbi);
        stat(argv[optind], &stat_vcf);
        if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
        {
            fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
            free(fnidx);
            return 1;
        }
        free(fnidx);

		if ((t = ti_open(argv[optind], 0)) == 0) {
			fprintf(stderr, "[main] fail to open the data file.\n");
			return 1;
		}
		if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
			ti_iter_t iter;
			const char *s;
			int len;
			iter = ti_query(t, 0, 0, 0);
			while ((s = ti_read(t, iter, &len)) != 0) {
				fputs(s, stdout); fputc('\n', stdout);
			}
			ti_iter_destroy(iter);
		} else { // retrieve from specified regions
			int i, len;
            ti_iter_t iter;
            const char *s;
			const ti_conf_t *idxconf;

			if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
                fprintf(stderr,"[tabix] failed to load the index file.\n");
                return 1;
            }
			idxconf = ti_get_conf(t->idx);

            if ( print_header )
            {
                // If requested, print the header lines here
                iter = ti_query(t, 0, 0, 0);
                while ((s = ti_read(t, iter, &len)) != 0) {
                    if ((int)(*s) != idxconf->meta_char) break;
                    fputs(s, stdout); fputc('\n', stdout);
                }
                ti_iter_destroy(iter);
            }
			if (bed_reg) {
				extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
				extern void *bed_read(const char *fn);
				extern void bed_destroy(void *_h);

				const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
				void *bed = bed_read(argv[optind+1]); // load the BED file
				ti_interval_t intv;

				if (bed == 0) {
					fprintf(stderr, "[main] fail to read the BED file.\n");
					return 1;
				}
				iter = ti_query(t, 0, 0, 0);
				while ((s = ti_read(t, iter, &len)) != 0) {
					int c;
					ti_get_intv(conf_, len, (char*)s, &intv);
					c = *intv.se; *intv.se = '\0';
					if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
						*intv.se = c;
						puts(s);
					}
					*intv.se = c;
				}
                ti_iter_destroy(iter);
				bed_destroy(bed);
			} else {
				for (i = optind + 1; i < argc; ++i) {
					int tid, beg, end;
					if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
						iter = ti_queryi(t, tid, beg, end);
							while ((s = ti_read(t, iter, &len)) != 0) {
							fputs(s, stdout); fputc('\n', stdout);
						}
						ti_iter_destroy(iter);
					} 
            	    // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
				}
			}
		}
		ti_close(t);
	}
	return 0;
}
Beispiel #6
0
tabix_chrom_list::
~tabix_chrom_list() {
    if (NULL != _tptr) free(_tptr);
    if (NULL != _tfp) ti_close(_tfp);
}
tabix_header_streamer::
~tabix_header_streamer() {
    if (NULL != _titer) ti_iter_destroy(_titer);
    if (NULL != _tfp) ti_close(_tfp);
}
Beispiel #8
0
void closeTabixFile(BCFReaderData * data) {
	ti_close(data->tabix_file);
	gzclose(data->gz_file);
}