filter_t *region_exact_filter_new(char *region_descriptor, int use_region_file, char *type, const char *url, const char *species, const char *version) { assert(region_descriptor); assert(url); assert(species); assert(version); filter_t *filter = (filter_t*) malloc (sizeof(filter_t)); filter->type = REGION; filter->filter_func = region_filter; filter->free_func = region_filter_free; filter->priority = 2; region_filter_args *filter_args = (region_filter_args*) malloc (sizeof(region_filter_args)); if (use_region_file) { snprintf(filter->name, 11, "RegionFile"); snprintf(filter->description, 64, "Regions read from '%s'", region_descriptor); if (ends_with(region_descriptor, ".gff")) { filter_args->regions = parse_regions_from_gff_file(region_descriptor, url, species, version); } else if (ends_with(region_descriptor, ".bed")) { filter_args->regions = parse_regions_from_bed_file(region_descriptor, url, species, version); } else { LOG_FATAL_F("Region file %s format not supported! Please use BED or GFF formats\n", region_descriptor); } } else { snprintf(filter->name, 11, "RegionList"); snprintf(filter->description, 64, "Regions (could be more) %s", region_descriptor); filter_args->regions = parse_regions(region_descriptor, 1, url, species, version); } filter_args->type = type; filter->args = filter_args; return filter; }
int main(int argc, char *argv[]) { int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; tbx_conf_t conf = tbx_conf_gff; char *reheader = NULL; args_t args; memset(&args,0,sizeof(args_t)); static const struct option loptions[] = { {"help", no_argument, NULL, 2}, {"regions", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 'T'}, {"csi", no_argument, NULL, 'C'}, {"zero-based", no_argument, NULL, '0'}, {"print-header", no_argument, NULL, 'h'}, {"only-header", no_argument, NULL, 'H'}, {"begin", required_argument, NULL, 'b'}, {"comment", required_argument, NULL, 'c'}, {"end", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"preset", required_argument, NULL, 'p'}, {"sequence", required_argument, NULL, 's'}, {"skip-lines", required_argument, NULL, 'S'}, {"list-chroms", no_argument, NULL, 'l'}, {"reheader", required_argument, NULL, 'r'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; char *tmp; while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0) { switch (c) { case 'R': args.regions_fname = optarg; break; case 'T': args.targets_fname = optarg; break; case 'C': do_csi = 1; break; case 'r': reheader = optarg; break; case 'h': args.print_header = 1; break; case 'H': args.print_header = 1; args.header_only = 1; break; case 'l': list_chroms = 1; break; case '0': conf.preset |= TBX_UCSC; detect = 0; break; case 'b': conf.bc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); detect = 0; break; case 'e': conf.ec = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); detect = 0; break; case 'c': conf.meta_char = *optarg; detect = 0; break; case 'f': is_force = 1; break; case 'm': min_shift = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); break; case 'p': detect = 0; if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf else error("The preset string not recognised: '%s'\n", optarg); break; case 's': conf.sc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); detect = 0; break; case 'S': conf.line_skip = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); detect = 0; break; case 1: printf( "tabix (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; default: return usage(); } } if ( optind==argc ) return usage(); if ( list_chroms ) return query_chroms(argv[optind]); if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) { int nregs = 0; char **regs = NULL; if ( !args.header_only ) regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); return query_regions(&args, argv[optind], regs, nregs); } char *fname = argv[optind]; int ftype = file_type(fname); if ( detect ) // no preset given { if ( ftype==IS_GFF ) conf = tbx_conf_gff; else if ( ftype==IS_BED ) conf = tbx_conf_bed; else if ( ftype==IS_SAM ) conf = tbx_conf_sam; else if ( ftype==IS_VCF ) { conf = tbx_conf_vcf; if ( !min_shift && do_csi ) min_shift = 14; } else if ( ftype==IS_BCF ) { if ( !min_shift ) min_shift = 14; } else if ( ftype==IS_BAM ) { if ( !min_shift ) min_shift = 14; } } if ( do_csi ) { if ( !min_shift ) min_shift = 14; min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 } if ( min_shift!=0 && !do_csi ) do_csi = 1; if ( reheader ) return reheader_file(fname, reheader, ftype, &conf); char *suffix = ".tbi"; if ( do_csi ) suffix = ".csi"; else if ( ftype==IS_BAM ) suffix = ".bai"; else if ( ftype==IS_CRAM ) suffix = ".crai"; char *idx_fname = calloc(strlen(fname) + 5, 1); strcat(strcpy(idx_fname, fname), suffix); struct stat stat_tbi, stat_file; if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) { // Before complaining about existing index, check if the VCF file isn't // newer. This is a common source of errors, people tend not to notice // that tabix failed stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); } free(idx_fname); if ( ftype==IS_CRAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } else if ( do_csi ) { if ( ftype==IS_BCF ) { if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); return 0; } if ( ftype==IS_BAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname); return 0; } else // TBI index { if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname); return 0; } return 0; }
int main(int argc, char* argv[]) { try { if (argc < 13 || argc % 2 != 1) { std::cerr << "usage: " << argv[0] << " number_of_threads region_file gff_or_bed_format extension bam_file bam_file_key hdf5_file " << "log_file write_warnings_to_stderr strand chr1 length1 ...\n" << "\ne.g. " << argv[0] << " /grail/annotations/HG19_SUM159_BRD4_-0_+0.gff gff" << "\n /ifs/labs/bradner/bam/hg18/mm1s/04032013_D1L57ACXX_4.TTAGGC.hg18.bwt.sorted.bam 137 counts.hdf5 " << "\n output/log.txt 1 _ chr1 247249719 chr2 242951149 chr3 199501827\n" << "\nstrand value of _ means use strand that is specified in region file (and use . if strand not specified in region file)." << "\nnumber of threads <= 0 means use a number of threads equal to the number of logical cpus." << "\nnote that this application is intended to be run from bamliquidator_batch.py -- see" << "\nhttps://github.com/BradnerLab/pipeline/wiki for more information" << std::endl; return 1; } const int number_of_threads = boost::lexical_cast<int>(argv[1]); const std::string region_file_path = argv[2]; const std::string region_format = argv[3]; const unsigned int extension = boost::lexical_cast<unsigned int>(argv[4]); const std::string bam_file_path = argv[5]; const unsigned int bam_file_key = boost::lexical_cast<unsigned int>(argv[6]); const std::string hdf5_file_path = argv[7]; const std::string log_file_path = argv[8]; const bool write_warnings_to_stderr = boost::lexical_cast<bool>(argv[9]); const char strand = boost::lexical_cast<char>(argv[10]); const std::vector<std::pair<std::string, size_t>> chromosome_lengths = extract_chromosome_lengths(argc, argv, 11); tbb::task_scheduler_init init( number_of_threads <= 0 ? tbb::task_scheduler_init::automatic : number_of_threads); Logger::configure(log_file_path, write_warnings_to_stderr); hid_t h5file = H5Fopen(hdf5_file_path.c_str(), H5F_ACC_RDWR, H5P_DEFAULT); if (h5file < 0) { Logger::error() << "Failed to open H5 file " << hdf5_file_path; return 3; } #ifdef time_region_parsing boost::timer::cpu_timer timer; #endif std::map<std::string, size_t> chromosome_to_length; for (auto& chr_length : chromosome_lengths) { chromosome_to_length[chr_length.first] = chr_length.second; } std::vector<Region> regions = parse_regions(region_file_path, region_format, bam_file_key, chromosome_to_length, strand); #ifdef time_region_parsing timer.stop(); std::cout << "parsing regions took" << timer.format() << std::endl; #endif if (regions.size() == 0) { Logger::warn() << "No valid regions detected in " << region_file_path; return 0; } liquidate_and_write(h5file, regions, extension, bam_file_path); H5Fclose(h5file); return 0; } catch(const std::exception& e) { Logger::error() << "Unhandled exception: " << e.what(); return 4; } }