void buildTestBamFile( const bam_header_info& bamHeader, const std::vector<bam_record>& readsToAdd, const std::string& bamFilename) { const HtslibBamHeaderManager bamHeaderManager(bamHeader.chrom_data); bam_dumper bamDumper(bamFilename.c_str(), bamHeaderManager.get()); for (const bam_record& bamRecord : readsToAdd) { bamDumper.put_record(bamRecord.get_data()); } bamDumper.close(); const int indexStatus = bam_index_build(bamFilename.c_str(), 0); if (indexStatus < 0) { std::ostringstream oss; oss << "Failed to build index for bam file. bam_index_build return code: " << indexStatus << " bam filename: '" << bamFilename << "'\n"; BOOST_THROW_EXCEPTION(illumina::common::GeneralException(oss.str().c_str())); } }
int bam_index(int argc, char *argv[]) { int csi = 0; int min_shift = BAM_LIDX_SHIFT; int c; while ((c = getopt(argc, argv, "bcm:")) >= 0) switch (c) { case 'b': csi = 0; break; case 'c': csi = 1; break; case 'm': csi = 1; min_shift = atoi(optarg); break; default: index_usage(pysamerr); return 1; } if (optind == argc) { index_usage(stdout); return 1; } if (argc - optind > 1) bam_index_build2(argv[optind], argv[optind+1]); else bam_index_build(argv[optind], csi? min_shift : 0); return 0; }
int main(int argc, char *argv[]) { int c, detect = 1, min_shift = 0, is_force = 0, list_chroms = 0, do_csi = 0; tbx_conf_t conf = tbx_conf_gff; char *reheader = NULL; args_t args; memset(&args,0,sizeof(args_t)); static const struct option loptions[] = { {"help", no_argument, NULL, 2}, {"regions", required_argument, NULL, 'R'}, {"targets", required_argument, NULL, 'T'}, {"csi", no_argument, NULL, 'C'}, {"zero-based", no_argument, NULL, '0'}, {"print-header", no_argument, NULL, 'h'}, {"only-header", no_argument, NULL, 'H'}, {"begin", required_argument, NULL, 'b'}, {"comment", required_argument, NULL, 'c'}, {"end", required_argument, NULL, 'e'}, {"force", no_argument, NULL, 'f'}, {"preset", required_argument, NULL, 'p'}, {"sequence", required_argument, NULL, 's'}, {"skip-lines", required_argument, NULL, 'S'}, {"list-chroms", no_argument, NULL, 'l'}, {"reheader", required_argument, NULL, 'r'}, {"version", no_argument, NULL, 1}, {NULL, 0, NULL, 0} }; char *tmp; while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:", loptions,NULL)) >= 0) { switch (c) { case 'R': args.regions_fname = optarg; break; case 'T': args.targets_fname = optarg; break; case 'C': do_csi = 1; break; case 'r': reheader = optarg; break; case 'h': args.print_header = 1; break; case 'H': args.print_header = 1; args.header_only = 1; break; case 'l': list_chroms = 1; break; case '0': conf.preset |= TBX_UCSC; detect = 0; break; case 'b': conf.bc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -b %s\n", optarg); detect = 0; break; case 'e': conf.ec = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -e %s\n", optarg); detect = 0; break; case 'c': conf.meta_char = *optarg; detect = 0; break; case 'f': is_force = 1; break; case 'm': min_shift = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -m %s\n", optarg); break; case 'p': detect = 0; if (strcmp(optarg, "gff") == 0) conf = tbx_conf_gff; else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed; else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam; else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf; else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf else error("The preset string not recognised: '%s'\n", optarg); break; case 's': conf.sc = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -s %s\n", optarg); detect = 0; break; case 'S': conf.line_skip = strtol(optarg,&tmp,10); if ( *tmp ) error("Could not parse argument: -S %s\n", optarg); detect = 0; break; case 1: printf( "tabix (htslib) %s\n" "Copyright (C) 2017 Genome Research Ltd.\n", hts_version()); return EXIT_SUCCESS; default: return usage(); } } if ( optind==argc ) return usage(); if ( list_chroms ) return query_chroms(argv[optind]); if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname ) { int nregs = 0; char **regs = NULL; if ( !args.header_only ) regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs); return query_regions(&args, argv[optind], regs, nregs); } char *fname = argv[optind]; int ftype = file_type(fname); if ( detect ) // no preset given { if ( ftype==IS_GFF ) conf = tbx_conf_gff; else if ( ftype==IS_BED ) conf = tbx_conf_bed; else if ( ftype==IS_SAM ) conf = tbx_conf_sam; else if ( ftype==IS_VCF ) { conf = tbx_conf_vcf; if ( !min_shift && do_csi ) min_shift = 14; } else if ( ftype==IS_BCF ) { if ( !min_shift ) min_shift = 14; } else if ( ftype==IS_BAM ) { if ( !min_shift ) min_shift = 14; } } if ( do_csi ) { if ( !min_shift ) min_shift = 14; min_shift *= do_csi; // positive for CSIv2, negative for CSIv1 } if ( min_shift!=0 && !do_csi ) do_csi = 1; if ( reheader ) return reheader_file(fname, reheader, ftype, &conf); char *suffix = ".tbi"; if ( do_csi ) suffix = ".csi"; else if ( ftype==IS_BAM ) suffix = ".bai"; else if ( ftype==IS_CRAM ) suffix = ".crai"; char *idx_fname = calloc(strlen(fname) + 5, 1); strcat(strcpy(idx_fname, fname), suffix); struct stat stat_tbi, stat_file; if ( !is_force && stat(idx_fname, &stat_tbi)==0 ) { // Before complaining about existing index, check if the VCF file isn't // newer. This is a common source of errors, people tend not to notice // that tabix failed stat(fname, &stat_file); if ( stat_file.st_mtime <= stat_tbi.st_mtime ) error("[tabix] the index file exists. Please use '-f' to overwrite.\n"); } free(idx_fname); if ( ftype==IS_CRAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } else if ( do_csi ) { if ( ftype==IS_BCF ) { if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname); return 0; } if ( ftype==IS_BAM ) { if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname); return 0; } if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\n", fname); return 0; } else // TBI index { if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\n", fname); return 0; } return 0; }
void BamFile::CreateStandardIndex(void) const { if (bam_index_build(d_->filename_.c_str(), 0) != 0) throw std::runtime_error("could not build BAI index"); }
int extract_main(int argc, char *argv[]) { char *opref = NULL, *oname, *p; int c, i; Config config; //Defaults config.keepCpG = 1; config.keepCHG = 0; config.keepCHH = 0; config.minMapq = 10; config.minPhred = 5; config.keepDupes = 0; config.keepSingleton = 0, config.keepDiscordant = 0; config.merge = 0; config.maxDepth = 2000; config.fai = NULL; config.fp = NULL; config.bai = NULL; config.reg = NULL; config.bedName = NULL; config.bed = NULL; config.fraction = 0; config.counts = 0; config.logit = 0; for(i=0; i<16; i++) config.bounds[i] = 0; static struct option lopts[] = { {"opref", 1, NULL, 'o'}, {"fraction", 0, NULL, 'f'}, {"counts", 0, NULL, 'c'}, {"logit", 0, NULL, 'm'}, {"noCpG", 0, NULL, 1}, {"CHG", 0, NULL, 2}, {"CHH", 0, NULL, 3}, {"keepDupes", 0, NULL, 4}, {"keepSingleton",0, NULL, 5}, {"keepDiscordant",0,NULL, 6}, {"OT", 1, NULL, 7}, {"OB", 1, NULL, 8}, {"CTOT", 1, NULL, 9}, {"CTOB", 1, NULL, 10}, {"mergeContext", 0, NULL, 11}, {"help", 0, NULL, 'h'}, {0, 0, NULL, 0} }; while((c = getopt_long(argc, argv, "q:p:r:l:o:D:f:c:m:", lopts,NULL)) >=0) { switch(c) { case 'h' : extract_usage(); return 0; case 'o' : opref = strdup(optarg); break; case 'D' : config.maxDepth = atoi(optarg); break; case 'r': config.reg = strdup(optarg); break; case 'l' : config.bedName = optarg; break; case 1 : config.keepCpG = 0; break; case 2 : config.keepCHG = 1; break; case 3 : config.keepCHH = 1; break; case 4 : config.keepDupes = 1; break; case 5 : config.keepSingleton = 1; break; case 6 : config.keepDiscordant = 1; break; case 7 : parseBounds(optarg, config.bounds, 0); break; case 8 : parseBounds(optarg, config.bounds, 1); break; case 9 : parseBounds(optarg, config.bounds, 2); break; case 10 : parseBounds(optarg, config.bounds, 3); break; case 11 : config.merge = 1; break; case 'q' : config.minMapq = atoi(optarg); break; case 'p' : config.minPhred = atoi(optarg); break; case 'm' : config.logit = 1; break; case 'f' : config.fraction = 1; break; case 'c' : config.counts = 1; break; case '?' : default : fprintf(stderr, "Invalid option '%c'\n", c); extract_usage(); return 1; } } if(argc == 1) { extract_usage(); return 0; } if(argc-optind != 2) { fprintf(stderr, "You must supply a reference genome in fasta format and an input BAM file!!!\n"); extract_usage(); return -1; } //Are the options reasonable? if(config.minPhred < 1) { fprintf(stderr, "-p %i is invalid. resetting to 1, which is the lowest possible value.\n", config.minPhred); config.minPhred = 1; } if(config.minMapq < 0) { fprintf(stderr, "-q %i is invalid. Resetting to 0, which is the lowest possible value.\n", config.minMapq); config.minMapq = 0; } if(config.fraction+config.counts+config.logit > 1) { fprintf(stderr, "More than one of --fraction, --counts, and --logit were specified. These are mutually exclusive.\n"); extract_usage(); return 1; } //Has more than one output format been requested? if(config.fraction + config.counts + config.logit > 1) { fprintf(stderr, "You may specify AT MOST one of -c/--counts, -f/--fraction, or -m/--logit.\n"); return -6; } //Is there still a metric to output? if(!(config.keepCpG + config.keepCHG + config.keepCHH)) { fprintf(stderr, "You haven't specified any metrics to output!\nEither don't use the --noCpG option or specify --CHG and/or --CHH.\n"); return -1; } //Open the files if((config.fai = fai_load(argv[optind])) == NULL) { fprintf(stderr, "Couldn't open the index for %s!\n", argv[optind]); extract_usage(); return -2; } if((config.fp = hts_open(argv[optind+1], "rb")) == NULL) { fprintf(stderr, "Couldn't open %s for reading!\n", argv[optind+1]); return -4; } if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) { fprintf(stderr, "Couldn't load the index for %s, will attempt to build it.\n", argv[optind+1]); if(bam_index_build(argv[optind+1], 0) < 0) { fprintf(stderr, "Couldn't build the index for %s! File corrupted?\n", argv[optind+1]); return -5; } if((config.bai = sam_index_load(config.fp, argv[optind+1])) == NULL) { fprintf(stderr, "Still couldn't load the index, quiting.\n"); return -5; } } //Output files config.output_fp = malloc(sizeof(FILE *) * 3); assert(config.output_fp); if(opref == NULL) { opref = strdup(argv[optind+1]); assert(opref); p = strrchr(opref, '.'); if(p != NULL) *p = '\0'; fprintf(stderr, "writing to prefix:'%s'\n", opref); } if(config.fraction) { oname = malloc(sizeof(char) * (strlen(opref)+19)); } else if(config.counts) { oname = malloc(sizeof(char) * (strlen(opref)+21)); } else if(config.logit) { oname = malloc(sizeof(char) * (strlen(opref)+20)); } else { oname = malloc(sizeof(char) * (strlen(opref)+14)); } assert(oname); if(config.keepCpG) { if(config.fraction) { sprintf(oname, "%s_CpG.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CpG.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CpG.logit.bedGraph", opref); } else { sprintf(oname, "%s_CpG.bedGraph", opref); } config.output_fp[0] = fopen(oname, "w"); if(config.output_fp[0] == NULL) { fprintf(stderr, "Couldn't open the output CpG metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[0], "CpG", opref, config); } if(config.keepCHG) { if(config.fraction) { sprintf(oname, "%s_CHG.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CHG.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CHG.logit.bedGraph", opref); } else { sprintf(oname, "%s_CHG.bedGraph", opref); } config.output_fp[1] = fopen(oname, "w"); if(config.output_fp[1] == NULL) { fprintf(stderr, "Couldn't open the output CHG metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[1], "CHG", opref, config); } if(config.keepCHH) { if(config.fraction) { sprintf(oname, "%s_CHH.meth.bedGraph", opref); } else if(config.counts) { sprintf(oname, "%s_CHH.counts.bedGraph", opref); } else if(config.logit) { sprintf(oname, "%s_CHH.logit.bedGraph", opref); } else { sprintf(oname, "%s_CHH.bedGraph", opref); } config.output_fp[2] = fopen(oname, "w"); if(config.output_fp[2] == NULL) { fprintf(stderr, "Couldn't open the output CHH metrics file for writing! Insufficient permissions?\n"); return -3; } printHeader(config.output_fp[2], "CHH", opref, config); } //Run the pileup extractCalls(&config); //Close things up hts_close(config.fp); fai_destroy(config.fai); if(config.keepCpG) fclose(config.output_fp[0]); if(config.keepCHG) fclose(config.output_fp[1]); if(config.keepCHH) fclose(config.output_fp[2]); hts_idx_destroy(config.bai); free(opref); if(config.reg) free(config.reg); if(config.bed) destroyBED(config.bed); free(oname); free(config.output_fp); return 0; }
SR_BamInStream* SR_BamInStreamAlloc(const char* bamFilename, uint32_t binLen, unsigned int numThreads, unsigned int buffCapacity, unsigned int reportSize, const SR_StreamMode* pStreamMode) { SR_BamInStream* pBamInStream = (SR_BamInStream*) calloc(1, sizeof(SR_BamInStream)); if (pBamInStream == NULL) SR_ErrQuit("ERROR: Not enough memory for a bam input stream object."); pBamInStream->bam_cur_status = -1; pBamInStream->fpBamInput = bam_open(bamFilename, "r"); if (pBamInStream->fpBamInput == NULL) SR_ErrQuit("ERROR: Cannot open bam file %s for reading.\n", bamFilename); if ((pStreamMode->controlFlag & SR_USE_BAM_INDEX) != 0) { pBamInStream->pBamIndex = bam_index_load(bamFilename); if (pBamInStream->pBamIndex == NULL) { SR_ErrMsg("WARNING: Cannot open bam index file for reading. Creating it......"); bam_index_build(bamFilename); SR_ErrMsg(" The bam index is created."); pBamInStream->pBamIndex = bam_index_load(bamFilename); } } pBamInStream->filterFunc = pStreamMode->filterFunc; pBamInStream->filterData = pStreamMode->filterData; pBamInStream->numThreads = numThreads; pBamInStream->reportSize = reportSize; pBamInStream->currRefID = NO_QUERY_YET; pBamInStream->currBinPos = NO_QUERY_YET; pBamInStream->binLen = binLen; pBamInStream->pNewNode = NULL; pBamInStream->pBamIterator = NULL; if (numThreads > 0) { pBamInStream->pRetLists = (SR_BamList*) calloc(numThreads, sizeof(SR_BamList)); if (pBamInStream->pRetLists == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of retrun alignment lists in the bam input stream object.\n"); pBamInStream->pAlgnTypes = (SR_AlgnType*) malloc(numThreads * reportSize * sizeof(SR_AlgnType)); if (pBamInStream->pAlgnTypes == NULL) SR_ErrQuit("ERROR: Not enough memory for the storage of pair alignment type in the bam input stream object.\n"); } else { pBamInStream->pRetLists = NULL; pBamInStream->pAlgnTypes = NULL; pBamInStream->reportSize = 0; } if ((pStreamMode->controlFlag & SR_PAIR_GENOMICALLY) == 0) { pBamInStream->pNameHashes[PREV_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[PREV_BIN], reportSize); } else { pBamInStream->pNameHashes[PREV_BIN] = NULL; pBamInStream->binLen = SR_MAX_BIN_LEN; } pBamInStream->pNameHashes[CURR_BIN] = kh_init(queryName); kh_resize(queryName, pBamInStream->pNameHashes[CURR_BIN], reportSize); pBamInStream->pMemPool = SR_BamMemPoolAlloc(buffCapacity); pBamInStream->bam_cur_status = 1; return pBamInStream; }