static void destroy_data(args_t *args) { if ( args->prefix ) { fclose(args->fh_log); int i, n = args->isec_op==OP_VENN ? 3 : args->files->nreaders; for (i=0; i<n; i++) { if ( !args->fnames[i] ) continue; hts_close(args->fh_out[i]); if ( args->output_type==FT_VCF_GZ ) { tbx_conf_t conf = tbx_conf_vcf; tbx_index_build(args->fnames[i], -1, &conf); } else if ( args->output_type==FT_BCF_GZ ) { if ( bcf_index_build(args->fnames[i],14) ) error("Could not index %s\n", args->fnames[i]); } free(args->fnames[i]); } free(args->fh_out); free(args->fnames); if ( args->fh_sites ) fclose(args->fh_sites); if ( args->write ) free(args->write); } }
int main_vcfindex(int argc, char *argv[]) { int c, force = 0, tbi = 0, stats = 0; int min_shift = BCF_LIDX_SHIFT; static struct option loptions[] = { {"csi",no_argument,NULL,'c'}, {"tbi",no_argument,NULL,'t'}, {"force",no_argument,NULL,'f'}, {"min-shift",required_argument,NULL,'m'}, {"stats",no_argument,NULL,'s'}, {"nrecords",no_argument,NULL,'n'}, {NULL, 0, NULL, 0} }; char *tmp; while ((c = getopt_long(argc, argv, "ctfm:sn", loptions, NULL)) >= 0) { switch (c) { case 'c': tbi = 0; break; case 't': tbi = 1; min_shift = 0; break; case 'f': force = 1; break; case 'm': min_shift = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: --min-shift %s\n", optarg); break; case 's': stats |= 1; break; case 'n': stats |= 2; break; default: usage(); } } if ( optind==argc ) usage(); if (stats>2) { fprintf(stderr, "[E::%s] expected only one of --stats or --nrecords options\n", __func__); return 1; } if (tbi && min_shift>0) { fprintf(stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__); return 1; } if (min_shift < 0 || min_shift > 30) { fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift); return 1; } char *fname = argv[optind]; if (stats) return vcf_index_stats(fname, stats); htsFile *fp = hts_open(fname,"r"); if ( !fp ) error("Failed to read %s\n", fname); htsFormat type = *hts_get_format(fp); hts_close(fp); if ( (type.format!=bcf && type.format!=vcf) || type.compression!=bgzf ) { fprintf(stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__); if ( type.compression!=bgzf ) fprintf(stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__); return 1; } if (tbi && type.format==bcf) { fprintf(stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n"); tbi = 0; min_shift = BCF_LIDX_SHIFT; } if (min_shift == 0 && type.format==bcf) { fprintf(stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__); return 1; } if (!tbi && type.format==vcf && min_shift == 0) { fprintf(stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n"); tbi = 1; } if (!force) { // Before complaining about existing index, check if the VCF file isn't newer. char *idx_fname = (char*)alloca(strlen(fname) + 5); strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi"); struct stat stat_tbi, stat_file; if ( stat(idx_fname, &stat_tbi)==0 ) { stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) { fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__); return 1; } } } if (type.format==bcf) { if ( bcf_index_build(fname, min_shift) != 0 ) { fprintf(stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname); return 1; } } else { if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 ) { fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname); return 1; } } return 0; }
int main_vcfindex(int argc, char *argv[]) { int c, min_shift = 14, force = 0; static struct option loptions[] = { {"help",0,0,'h'}, {"force",0,0,'f'}, {"min-shift",1,0,'m'}, {0,0,0,0} }; while ((c = getopt_long(argc, argv, "h?fm:", loptions,NULL)) >= 0) { switch (c) { case 'f': force = 1; break; case 'm': min_shift = atoi(optarg); break; default: usage(); } } if ( optind==argc ) usage(); if (min_shift < 0 || min_shift > 30) { fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift); return 1; } char *fname = argv[optind]; int ftype = hts_file_type(fname); if (!ftype) { fprintf(stderr, "[E::%s] unknown filetype; expected .vcf.gz or .bcf\n", __func__); return 1; } if (!force) { // Before complaining about existing index, check if the VCF file isn't newer. char *idx_fname = (char*)alloca(strlen(fname) + 5); strcat(strcpy(idx_fname, fname), min_shift <= 0 ? ".tbi" : ".csi"); struct stat stat_tbi, stat_file; if ( stat(idx_fname, &stat_tbi)==0 ) { stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) { fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__); return 1; } } } if (ftype == FT_BCF_GZ) { if ( bcf_index_build(fname, min_shift) != 0 ) { fprintf(stderr,"[E::%s] bcf_index_build failed: %s\n", __func__, fname); return 1; } } else if (ftype == FT_VCF_GZ) { if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 ) { fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname); return 1; } } return 0; }
int main_vcfindex(int argc, char *argv[]) { int c, force = 0, tbi = 0; int min_shift = BCF_LIDX_SHIFT; static struct option loptions[] = { {"csi",no_argument,NULL,'c'}, {"tbi",no_argument,NULL,'t'}, {"force",no_argument,NULL,'f'}, {"min-shift",required_argument,NULL,'m'}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc, argv, "ctfm:", loptions, NULL)) >= 0) { switch (c) { case 'c': tbi = 0; break; case 't': tbi = 1; min_shift = 0; break; case 'f': force = 1; break; case 'm': min_shift = atoi(optarg); break; default: usage(); } } if ( optind==argc ) usage(); if (tbi && min_shift>0) { fprintf(stderr, "[E::%s] min-shift option only expected for CSI indices \n", __func__); return 1; } if (min_shift < 0 || min_shift > 30) { fprintf(stderr, "[E::%s] expected min_shift in range [0,30] (%d)\n", __func__, min_shift); return 1; } char *fname = argv[optind]; int ftype = hts_file_type(fname); if (!ftype || (ftype != FT_BCF_GZ && ftype != FT_VCF_GZ)) { fprintf(stderr, "[E::%s] unknown filetype; expected bgzip compressed VCF or BCF\n", __func__); if (!(ftype & FT_GZ)) fprintf(stderr, "[E::%s] was the VCF/BCF compressed with bgzip?\n", __func__); return 1; } if (tbi && ftype == FT_BCF_GZ) { fprintf(stderr, "[Warning] TBI-index does not work for BCF files. Generating CSI instead.\n"); tbi = 0; min_shift = BCF_LIDX_SHIFT; } if (min_shift == 0 && ftype == FT_BCF_GZ) { fprintf(stderr, "[E::%s] Require min_shift>0 for BCF files.\n", __func__); return 1; } if (!tbi && ftype == FT_VCF_GZ && min_shift == 0) { fprintf(stderr, "[Warning] min-shift set to 0 for VCF file. Generating TBI file.\n"); tbi = 1; } if (!force) { // Before complaining about existing index, check if the VCF file isn't newer. char *idx_fname = (char*)alloca(strlen(fname) + 5); strcat(strcpy(idx_fname, fname), tbi ? ".tbi" : ".csi"); struct stat stat_tbi, stat_file; if ( stat(idx_fname, &stat_tbi)==0 ) { stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) { fprintf(stderr,"[E::%s] the index file exists. Please use '-f' to overwrite.\n", __func__); return 1; } } } if (ftype == FT_BCF_GZ) { if ( bcf_index_build(fname, min_shift) != 0 ) { fprintf(stderr,"[E::%s] bcf_index_build failed for %s\n", __func__, fname); return 1; } } else { if ( tbx_index_build(fname, min_shift, &tbx_conf_vcf) != 0 ) { fprintf(stderr,"[E::%s] tbx_index_build failed for %s\n", __func__, fname); return 1; } } return 0; }
int main(int argc, char *argv[]) { int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; tbx_conf_t conf = tbx_conf_gff; char *reheader = NULL; args_t args; memset(&args,0,sizeof(args_t)); static const struct option loptions[] = { {"help", no_argument, NULL, 2}, {"regions", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 'T'}, {"csi", no_argument, NULL, 'C'}, {"zero-based", no_argument, NULL, '0'}, {"print-header", no_argument, NULL, 'h'}, {"only-header", no_argument, NULL, 'H'}, {"begin", required_argument, NULL, 'b'}, {"comment", required_argument, NULL, 'c'}, {"end", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"preset", required_argument, NULL, 'p'}, {"sequence", required_argument, NULL, 's'}, {"skip-lines", required_argument, NULL, 'S'}, {"list-chroms", no_argument, NULL, 'l'}, {"reheader", required_argument, NULL, 'r'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; char *tmp; while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0) { switch (c) { case 'R': args.regions_fname = optarg; break; case 'T': args.targets_fname = optarg; break; case 'C': do_csi = 1; break; case 'r': reheader = optarg; break; case 'h': args.print_header = 1; break; case 'H': args.print_header = 1; args.header_only = 1; break; case 'l': list_chroms = 1; break; case '0': conf.preset |= TBX_UCSC; detect = 0; break; case 'b': conf.bc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); detect = 0; break; case 'e': conf.ec = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); detect = 0; break; case 'c': conf.meta_char = *optarg; detect = 0; break; case 'f': is_force = 1; break; case 'm': min_shift = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); break; case 'p': detect = 0; if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf else error("The preset string not recognised: '%s'\n", optarg); break; case 's': conf.sc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); detect = 0; break; case 'S': conf.line_skip = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); detect = 0; break; case 1: printf( "tabix (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; default: return usage(); } } if ( optind==argc ) return usage(); if ( list_chroms ) return query_chroms(argv[optind]); if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) { int nregs = 0; char **regs = NULL; if ( !args.header_only ) regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); return query_regions(&args, argv[optind], regs, nregs); } char *fname = argv[optind]; int ftype = file_type(fname); if ( detect ) // no preset given { if ( ftype==IS_GFF ) conf = tbx_conf_gff; else if ( ftype==IS_BED ) conf = tbx_conf_bed; else if ( ftype==IS_SAM ) conf = tbx_conf_sam; else if ( ftype==IS_VCF ) { conf = tbx_conf_vcf; if ( !min_shift && do_csi ) min_shift = 14; } else if ( ftype==IS_BCF ) { if ( !min_shift ) min_shift = 14; } else if ( ftype==IS_BAM ) { if ( !min_shift ) min_shift = 14; } } if ( do_csi ) { if ( !min_shift ) min_shift = 14; min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 } if ( min_shift!=0 && !do_csi ) do_csi = 1; if ( reheader ) return reheader_file(fname, reheader, ftype, &conf); char *suffix = ".tbi"; if ( do_csi ) suffix = ".csi"; else if ( ftype==IS_BAM ) suffix = ".bai"; else if ( ftype==IS_CRAM ) suffix = ".crai"; char *idx_fname = calloc(strlen(fname) + 5, 1); strcat(strcpy(idx_fname, fname), suffix); struct stat stat_tbi, stat_file; if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) { // Before complaining about existing index, check if the VCF file isn't // newer. This is a common source of errors, people tend not to notice // that tabix failed stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); } free(idx_fname); if ( ftype==IS_CRAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } else if ( do_csi ) { if ( ftype==IS_BCF ) { if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); return 0; } if ( ftype==IS_BAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname); return 0; } else // TBI index { if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname); return 0; } return 0; }
int main_tabix(int argc, char *argv[]) { int c, min_shift = -1, is_force = 0, is_all = 0; tbx_conf_t conf = tbx_conf_gff, *conf_ptr = NULL; while ((c = getopt(argc, argv, "0fap:s:b:e:S:c:m:")) >= 0) if (c == '0') conf.preset |= TBX_UCSC; else if (c == 'f') is_force = 1; else if (c == 'a') is_all = 1; else if (c == 'm') min_shift = atoi(optarg); else if (c == 's') conf.sc = atoi(optarg); else if (c == 'b') conf.bc = atoi(optarg); else if (c == 'e') conf.ec = atoi(optarg); else if (c == 'c') conf.meta_char = *optarg; else if (c == 'S') conf.line_skip = atoi(optarg); else if (c == 'p') { if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff; else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed; else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam; else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf; else { fprintf(stderr, "The type '%s' not recognised\n", optarg); return 1; } } if (optind == argc) { fprintf(stderr, "\nUsage: bcftools tabix [options] <in.gz> [reg1 [...]]\n\n"); fprintf(stderr, "Options: -p STR preset: gff, bed, sam or vcf [gff]\n"); fprintf(stderr, " -s INT column number for sequence names (suppressed by -p) [1]\n"); fprintf(stderr, " -b INT column number for region start [4]\n"); fprintf(stderr, " -e INT column number for region end (if no end, set INT to -b) [5]\n"); fprintf(stderr, " -0 specify coordinates are zero-based\n"); fprintf(stderr, " -S INT skip first INT lines [0]\n"); fprintf(stderr, " -c CHAR skip lines starting with CHAR [null]\n"); fprintf(stderr, " -a print all records\n"); fprintf(stderr, " -f force to overwrite existing index\n"); fprintf(stderr, " -m INT set the minimal interval size to 1<<INT; 0 for the old tabix index [0]\n"); fprintf(stderr, "\n"); return 1; } if (is_all) { // read without random access kstring_t s; BGZF *fp; s.l = s.m = 0; s.s = 0; fp = bgzf_open(argv[optind], "r"); while (bgzf_getline(fp, '\n', &s) >= 0) puts(s.s); bgzf_close(fp); free(s.s); } else if (optind + 2 > argc) { // create index if ( !conf_ptr ) { // auto-detect file type by file name int l = strlen(argv[optind]); int strcasecmp(const char *s1, const char *s2); if (l>=7 && strcasecmp(argv[optind]+l-7, ".gff.gz") == 0) conf_ptr = &tbx_conf_gff; else if (l>=7 && strcasecmp(argv[optind]+l-7, ".bed.gz") == 0) conf_ptr = &tbx_conf_bed; else if (l>=7 && strcasecmp(argv[optind]+l-7, ".sam.gz") == 0) conf_ptr = &tbx_conf_sam; else if (l>=7 && strcasecmp(argv[optind]+l-7, ".vcf.gz") == 0) conf_ptr = &tbx_conf_vcf; } if ( conf_ptr ) conf = *conf_ptr; if (!is_force) { char *fn; FILE *fp; fn = (char*)alloca(strlen(argv[optind]) + 5); strcat(strcpy(fn, argv[optind]), min_shift <= 0? ".tbi" : ".csi"); if ((fp = fopen(fn, "rb")) != 0) { fclose(fp); fprintf(stderr, "[E::%s] the index file exists; use option '-f' to overwrite\n", __func__); return 1; } } if ( tbx_index_build(argv[optind], min_shift, &conf) ) { fprintf(stderr,"tbx_index_build failed: Is the file bgzip-compressed? Was wrong -p [type] option used?\n"); return 1; } } else { // read with random access tbx_t *tbx; BGZF *fp; kstring_t s; int i; if ((tbx = tbx_index_load(argv[optind])) == 0) return 1; if ((fp = bgzf_open(argv[optind], "r")) == 0) return 1; s.s = 0; s.l = s.m = 0; for (i = optind + 1; i < argc; ++i) { hts_itr_t *itr; if ((itr = tbx_itr_querys(tbx, argv[i])) == 0) continue; while (tbx_bgzf_itr_next(fp, tbx, itr, &s) >= 0) puts(s.s); tbx_itr_destroy(itr); } free(s.s); bgzf_close(fp); tbx_destroy(tbx); } return 0; }