static void copy_check_alignment(const char *infname, const char *informat, const char *outfname, const char *outmode, const char *outref) { samFile *in = sam_open(infname, "r"); samFile *out = sam_open(outfname, outmode); bam1_t *aln = bam_init1(); bam_hdr_t *header = NULL; int res; if (!in) { fail("couldn't open %s", infname); goto err; } if (!out) { fail("couldn't open %s with mode %s", outfname, outmode); goto err; } if (!aln) { fail("bam_init1() failed"); goto err; } if (outref) { if (hts_set_opt(out, CRAM_OPT_REFERENCE, outref) < 0) { fail("setting reference %s for %s", outref, outfname); goto err; } } header = sam_hdr_read(in); if (!header) { fail("reading header from %s", infname); goto err; } if (sam_hdr_write(out, header) < 0) fail("writing headers to %s", outfname); while ((res = sam_read1(in, header, aln)) >= 0) { int mod4 = ((intptr_t) bam_get_cigar(aln)) % 4; if (mod4 != 0) fail("%s CIGAR not 4-byte aligned; offset is 4k+%d for \"%s\"", informat, mod4, bam_get_qname(aln)); if (sam_write1(out, header, aln) < 0) fail("writing to %s", outfname); } if (res < -1) { fail("failed to read alignment from %s", infname); } err: bam_destroy1(aln); bam_hdr_destroy(header); if (in) sam_close(in); if (out) sam_close(out); }
/****************************************************************************** * * quit, while performing some cleanup * * int FLAG: What to free/close/etc. * 0x1 things created by create_fastq_names() * 0x2 things pthreads are closed and bam headers destroyed * In addition, the master node will free chromosomes.genome, close * the BAM file, and free everything in the chromosomes struct. * * int rv: return value * *******************************************************************************/ void quit(int FLAG, int rv) { int taskid, i; free(config.bowtie2_options); MPI_Comm_rank(MPI_COMM_WORLD, &taskid); if(FLAG & 1) { //FASTQ filenames set #ifndef DEBUG if(taskid == MASTER) { if(config.FASTQ1CT != NULL) remove(config.FASTQ1CT); if(config.paired && (config.FASTQ2GA != NULL)) remove(config.FASTQ2GA); if(!config.directional) { if(config.FASTQ1GA != NULL) remove(config.FASTQ1GA); if(config.paired && (config.FASTQ2CT != NULL)) remove(config.FASTQ2CT); } } #endif if(config.FASTQ1CT != NULL) free(config.FASTQ1CT); if(config.FASTQ1GA != NULL) free(config.FASTQ1GA); if(config.unmapped1 != NULL) free(config.unmapped1); if(config.paired) { if(config.FASTQ2CT != NULL) free(config.FASTQ2CT); if(config.FASTQ2GA != NULL) free(config.FASTQ2GA); if(config.unmapped2 != NULL) free(config.unmapped2); } free(config.basename); free(config.outname); if(config.fai) free(config.fai); } if(taskid == MASTER) { free(chromosomes.genome); for(i=0; i<chromosomes.nchromosomes; i++) { free((chromosomes.chromosome[i])->chrom); free(*(chromosomes.chromosome+i)); } free(chromosomes.chromosome); if(FLAG && OUTPUT_BAM) sam_close(OUTPUT_BAM); } MPI_Finalize(); if(taskid == MASTER && FLAG > 0) { #ifdef DEBUG if(fp1) sam_close(fp1); if(fp2) sam_close(fp2); if(!config.directional) { if(fp3) sam_close(fp3); if(fp4) sam_close(fp4); } #else if(config.unmapped) { pclose(unmapped1); if(config.paired) pclose(unmapped2); } #endif } exit(rv); }
static void cleanup_state(state_t* state) { if (!state) return; free(state->rg_id); if (state->output_file) sam_close(state->output_file); bam_hdr_destroy(state->output_header); if (state->input_file) sam_close(state->input_file); bam_hdr_destroy(state->input_header); free(state); }
samfile_t *samopen(const char *fn, const char *mode, const void *aux) { // hts_open() is really sam_open(), except for #define games samFile *hts_fp = hts_open(fn, mode); if (hts_fp == NULL) return NULL; samfile_t *fp = malloc(sizeof (samfile_t)); if (!fp) { sam_close(hts_fp); return NULL; } fp->file = hts_fp; fp->x.bam = hts_fp->fp.bgzf; if (strchr(mode, 'r')) { if (aux) { if (hts_set_fai_filename(fp->file, aux) != 0) { sam_close(hts_fp); free(fp); return NULL; } } fp->header = sam_hdr_read(fp->file); // samclose() will free this if (fp->header == NULL) { sam_close(hts_fp); free(fp); return NULL; } fp->is_write = 0; if (fp->header->n_targets == 0 && bam_verbose >= 1) fprintf(samtools_stderr, "[samopen] no @SQ lines in the header.\n"); } else { enum htsExactFormat fmt = hts_get_format(fp->file)->format; fp->header = (bam_hdr_t *)aux; // For writing, we won't free it fp->is_write = 1; if (!(fmt == text_format || fmt == sam) || strchr(mode, 'h')) { if (sam_hdr_write(fp->file, fp->header) < 0) { if (bam_verbose >= 1) fprintf(samtools_stderr, "[samopen] Couldn't write header\n"); sam_close(hts_fp); free(fp); return NULL; } } } return fp; }
int main(int argc, char **argv) { dlib::BamHandle in = dlib::BamHandle("bed_test.bam"); dlib::ParsedBed bed = dlib::ParsedBed("bed_test.bed", in.header); bam1_t *b = bam_init1(); size_t diffs = 0; void *lh3bed = bed_read("bed_test.bed"); samFile *so = sam_open("disagreed.bam", "wb9"); sam_hdr_write(so, in.header); size_t disagrees = 0, agrees = 0; int dbr = 0, lh3r = 0; while(in.read(b) != -1) { if(b->core.flag & (BAM_FUNMAP)) continue; if((dbr = bed.bam1_test(b)) != (lh3r = bed_overlap(lh3bed, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)))) { LOG_EXIT("dbr: %i. lh3r: %i. Contig: %s. Position: %i. endpos; %i\n", dbr, lh3r, in.header->target_name[b->core.tid], b->core.pos, bam_endpos(b)); if(++disagrees % 100 == 0) LOG_DEBUG("disagrees: %lu.\n", disagrees); sam_write1(so, in.header, b); } else { if(++agrees % 500000 == 0) LOG_DEBUG("agrees: %lu.\n", agrees); } } sam_close(so); bam_destroy1(b); bed_destroy(lh3bed); return EXIT_SUCCESS; }
int main_cat(int argc, char *argv[]) { bam_header_t *h = 0; char *outfn = 0; int c, ret; while ((c = getopt(argc, argv, "h:o:")) >= 0) { switch (c) { case 'h': { tamFile fph = sam_open(optarg); if (fph == 0) { fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]); return 1; } h = sam_header_read(fph); sam_close(fph); break; } case 'o': outfn = strdup(optarg); break; } } if (argc - optind < 2) { fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n"); return 1; } ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-"); free(outfn); return ret; }
int main_reheader(int argc, char *argv[]) { bam_header_t *h; BGZF *in; if (argc != 3) { fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n"); return 1; } { // read the header tamFile fph = sam_open(argv[1]); if (fph == 0) { fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]); return 1; } h = sam_header_read(fph); sam_close(fph); } in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]); return 1; } bam_reheader(in, h, fileno(stdout)); bgzf_close(in); return 0; }
/*static*/ bam_header_t * SAM::update_header_from_list(bam_header_t *header, names_list_t & list) { Temporary_File samfile; samfile.close_file(); samfile_t * sf = samopen(samfile.get_filename().c_str(),"wh",header); samclose(sf); Temporary_File tempfile; ofstream &output = tempfile.get_stream(); ifstream input(samfile.get_filename().c_str()); string temp; while (not input.eof()) { getline(input,temp); if ((temp.size() >= 3) and (temp[0] != '@' or temp[1] != 'S' or temp[2] != 'Q')) output << temp << '\n'; } for (names_list_t::iterator iter = list.begin(); iter != list.end(); iter++) output << "@SQ\tSN:" << iter->first << "\tLN:" << iter->second << '\n'; tempfile.close_file(); tamFile fp = sam_open(tempfile.get_filename().c_str()); bam_header_t * newheader = sam_header_read(fp); sam_close(fp); return newheader; }
void metaBigClose(struct metaBig** pMb) /* close the file and free up everything. */ { struct metaBig* mb = *pMb; hashFree(&mb->chromSizeHash); if (mb->rgList) hashFree(&mb->rgList); if (mb->sections) bedFreeList(&mb->sections); if (mb->originalFileName) freeMem(mb->originalFileName); if (mb->fileName) freeMem(mb->fileName); if (mb->baseFileName) freeMem(mb->baseFileName); if (mb->remoteSiteAndDir) freeMem(mb->remoteSiteAndDir); #ifdef USE_HTSLIB if (mb->idx) hts_idx_destroy(mb->idx); #endif if (mb->type == isaBigBed) bigBedFileClose(&mb->big.bbi); #ifdef USE_HTSLIB else if (mb->type == isaBam) sam_close(mb->big.bam); #endif else bigWigFileClose(&mb->big.bbi); #ifdef USE_HTSLIB if (mb->header) bam_hdr_destroy(mb->header); #endif freez(pMb); }
int bam_idxstats(int argc, char *argv[]) { hts_idx_t* idx; bam_hdr_t* header; samFile* fp; if (argc < 2) { fprintf(pysamerr, "Usage: samtools idxstats <in.bam>\n"); return 1; } fp = sam_open(argv[1], "r"); if (fp == NULL) { fprintf(pysamerr, "[%s] fail to open BAM.\n", __func__); return 1; } header = sam_hdr_read(fp); idx = sam_index_load(fp, argv[1]); if (idx == NULL) { fprintf(pysamerr, "[%s] fail to load the index.\n", __func__); return 1; } int i; for (i = 0; i < header->n_targets; ++i) { // Print out contig name and length printf("%s\t%d", header->target_name[i], header->target_len[i]); // Now fetch info about it from the meta bin uint64_t u, v; hts_idx_get_stat(idx, i, &u, &v); printf("\t%" PRIu64 "\t%" PRIu64 "\n", u, v); } // Dump information about unmapped reads printf("*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx)); bam_hdr_destroy(header); hts_idx_destroy(idx); sam_close(fp); return 0; }
// remember to clean up with bam_destroy1(b); bam1_t* alignment_to_bam(const string& sam_header, const Alignment& alignment, const string& refseq, const int32_t refpos, const string& cigar, const string& mateseq, const int32_t matepos, const int32_t tlen) { assert(!sam_header.empty()); string sam_file = "data:" + sam_header + alignment_to_sam(alignment, refseq, refpos, cigar, mateseq, matepos, tlen); const char* sam = sam_file.c_str(); samFile *in = sam_open(sam, "r"); bam_hdr_t *header = sam_hdr_read(in); bam1_t *aln = bam_init1(); if (sam_read1(in, header, aln) >= 0) { bam_hdr_destroy(header); sam_close(in); // clean up return aln; } else { cerr << "[vg::alignment] Failure to parse SAM record" << endl << sam << endl; exit(1); } }
static int aux_fields1(void) { static const char sam[] = "data:" "@SQ\tSN:one\tLN:1000\n" "@SQ\tSN:two\tLN:500\n" "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tZZ:i:1000000\n"; // Canonical form of the alignment record above, as output by sam_format1() static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tZZ:i:1000000"; samFile *in = sam_open(sam, "r"); bam_hdr_t *header = sam_hdr_read(in); bam1_t *aln = bam_init1(); uint8_t *p; uint32_t n; kstring_t ks = { 0, 0, NULL }; if (sam_read1(in, header, aln) >= 0) { if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k') fail("XA field is '%c', expected 'k'", bam_aux2A(p)); if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37) fail("Xi field is %d, expected 37", bam_aux2i(p)); if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6) fail("Xf field is %.12f, expected pi", bam_aux2f(p)); if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6) fail("Xf field is %.12f, expected e", bam_aux2f(p)); if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0) fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO); if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0) fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF); // TODO Invent and use bam_aux2B() if ((p = check_bam_aux_get(aln, "XB", 'B')) && ! (memcmp(p, "Bc", 2) == 0 && (memcpy(&n, p+2, 4), n) == 3 && memcmp(p+6, "\xfe\x00\x02", 3) == 0)) fail("XB field is %c,..., expected c,-2,0,+2", p[1]); if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000) fail("ZZ field is %d, expected 1000000", bam_aux2i(p)); if (sam_format1(header, aln, &ks) < 0) fail("can't format record"); if (strcmp(ks.s, r1) != 0) fail("record formatted incorrectly: \"%s\"", ks.s); free(ks.s); } else fail("can't read record"); bam_destroy1(aln); bam_hdr_destroy(header); sam_close(in); return 1; }
void samclose(samfile_t *fp) { if (fp == 0) return; if (fp->header) bam_header_destroy(fp->header); if (fp->type & 1) bam_close(fp->x.bam); else if (fp->type == 2) sam_close(fp->x.tamr); free(fp); }
void samclose(samfile_t *fp) { if (fp) { if (!fp->is_write && fp->header) bam_hdr_destroy(fp->header); sam_close(fp->file); free(fp); } }
int bam_mating(int argc, char *argv[]) { samFile *in, *out; int c, remove_reads = 0, proper_pair_check = 1, add_ct = 0; sam_global_args ga = SAM_GLOBAL_ARGS_INIT; char wmode[3] = {'w', 'b', 0}; static const struct option lopts[] = { SAM_OPT_GLOBAL_OPTIONS('-', 0, 'O', 0, 0), { NULL, 0, NULL, 0 } }; // parse args if (argc == 1) { usage(stdout); return 0; } while ((c = getopt_long(argc, argv, "rpcO:", lopts, NULL)) >= 0) { switch (c) { case 'r': remove_reads = 1; break; case 'p': proper_pair_check = 0; break; case 'c': add_ct = 1; break; default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break; /* else fall-through */ case '?': usage(stderr); return 1; } } if (optind+1 >= argc) { usage(stderr); return 1; } // init if ((in = sam_open_format(argv[optind], "rb", &ga.in)) == NULL) { fprintf(stderr, "[bam_mating] cannot open input file\n"); return 1; } sam_open_mode(wmode+1, argv[optind+1], NULL); if ((out = sam_open_format(argv[optind+1], wmode, &ga.out)) == NULL) { fprintf(stderr, "[bam_mating] cannot open output file\n"); return 1; } // run bam_mating_core(in, out, remove_reads, proper_pair_check, add_ct); // cleanup sam_close(in); sam_close(out); sam_global_args_free(&ga); return 0; }
void samclose(samfile_t *fp) { if (fp == 0) return; if (fp->header) bam_header_destroy(fp->header); if (fp->type & TYPE_BAM) bam_close(fp->x.bam); else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); else fclose(fp->x.tamw); free(fp); }
static void cleanup_state(state_t* status) { if (!status) return; if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header); if (status->unaccounted_file) sam_close(status->unaccounted_file); sam_close(status->merged_input_file); size_t i; for (i = 0; i < status->output_count; i++) { bam_hdr_destroy(status->rg_output_header[i]); sam_close(status->rg_output_file[i]); free(status->rg_id[i]); } bam_hdr_destroy(status->merged_input_header); free(status->rg_output_header); free(status->rg_output_file); kh_destroy_c2i(status->rg_hash); free(status->rg_id); free(status); }
static void check_sam_close(const char *subcmd, samFile *fp, const char *fname, const char *null_fname, int *retp) { int r = sam_close(fp); if (r >= 0) return; // TODO Need error infrastructure so we can print a message instead of r if (fname) print_error(subcmd, "error closing \"%s\": %d", fname, r); else print_error(subcmd, "error closing %s: %d", null_fname, r); *retp = EXIT_FAILURE; }
static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_hdr_t *h, int n_threads) { size_t i; samFile* fp; fp = sam_open(fn, mode); if (fp == NULL) return; sam_hdr_write(fp, h); if (n_threads > 1) hts_set_threads(fp, n_threads); for (i = 0; i < l; ++i) sam_write1(fp, h, buf[i]); sam_close(fp); }
static int cleanup_state(state_t* status, bool check_close) { int ret = 0; if (!status) return 0; if (status->unaccounted_header) bam_hdr_destroy(status->unaccounted_header); if (status->unaccounted_file) { if (sam_close(status->unaccounted_file) < 0 && check_close) { fprintf(pysam_stderr, "Error on closing unaccounted file\n"); ret = -1; } } sam_close(status->merged_input_file); size_t i; for (i = 0; i < status->output_count; i++) { if (status->rg_output_header && status->rg_output_header[i]) bam_hdr_destroy(status->rg_output_header[i]); if (status->rg_output_file && status->rg_output_file[i]) { if (sam_close(status->rg_output_file[i]) < 0 && check_close) { fprintf(pysam_stderr, "Error on closing output file '%s'\n", status->rg_output_file_name[i]); ret = -1; } } if (status->rg_id) free(status->rg_id[i]); if (status->rg_output_file_name) free(status->rg_output_file_name[i]); } if (status->merged_input_header) bam_hdr_destroy(status->merged_input_header); free(status->rg_output_header); free(status->rg_output_file); free(status->rg_output_file_name); kh_destroy_c2i(status->rg_hash); free(status->rg_id); free(status); return ret; }
void bam_parser(opt_t *opt) { samFile *in = sam_open(opt->in_name, "r"); if(in == NULL) die("bam_parser: fail to open file '%s'", opt->in_name); // if output file exists but not force to overwrite if(access(opt->out_name, F_OK)!=-1 && opt->f==false) die("bam_parser: %s exists, use opetion -f to overwrite", opt->out_name); bam_hdr_t *header = sam_hdr_read(in); bam1_t *aln = bam_init1(); int8_t *p; int32_t n; int ret; while((ret=sam_read1(in, header, aln)) >= 0) printf("name=%s\nflag=%d\nseq=%s\nqual=%s\nlane_id=%d\n", get_read_name(aln), aln->core.flag, get_sequence(aln), get_qualities(aln), get_lane_id(aln)); bam_destroy1(aln); sam_close(in); }
void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; if (bs->is_bam) { #ifdef USE_HTSLIB if (0 != sam_close(bs->fp)) err_fatal_simple("Error closing sam/bam file"); bam_hdr_destroy(bs->h); #else if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); #endif } else { err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); }
bam_hdr_t* hts_string_header(string& header, map<string, int64_t>& path_length, map<string, string>& rg_sample) { stringstream hdr; hdr << "@HD\tVN:1.5\tSO:unknown\n"; for (auto& p : path_length) { hdr << "@SQ\tSN:" << p.first << "\t" << "LN:" << p.second << "\n"; } for (auto& s : rg_sample) { hdr << "@RG\tID:" << s.first << "\t" << "SM:" << s.second << "\n"; } hdr << "@PG\tID:0\tPN:vg\n"; header = hdr.str(); string sam = "data:" + header; samFile *in = sam_open(sam.c_str(), "r"); bam_hdr_t *h = sam_hdr_read(in); sam_close(in); return h; }
void convert_sam_to_bam(char* sam_input, char* bam_input) { bam1_t* bam_p = bam_init1(); LOG_DEBUG("CONVERT-START: sam to bam\n"); //open SAM file for read if (time_flag) { start_timer(t1_convert); } tamFile sam_fd = sam_open(sam_input); //open BAM file for write bam_file_t* bam_file_p = bam_fopen_mode(bam_input, NULL, "w"); //read header from SAM file bam_header_t* bam_header_p = sam_header_read(sam_fd); //write header to BAM file bam_header_write(bam_file_p->bam_fd, bam_header_p); //write alignments to BAM file while (sam_read1(sam_fd, bam_header_p, bam_p) > 0) { bam_write1(bam_file_p->bam_fd, bam_p); num_alignments++; } //close BAM and SAM files, free bam alignment and bam file object bam_fclose(bam_file_p); sam_close(sam_fd); bam_header_destroy(bam_header_p); bam_destroy1(bam_p); if (time_flag) { stop_timer(t1_convert, t2_convert, convert_time); } //number_of_batchs = 1, convention value for statistics (not real batch) number_of_batchs = 1; }
BM_mappedRead * extractReads(char * bamFile, char ** contigs, int numContigs, uint16_t * groups, char * prettyName, int headersOnly, int minMapQual, int maxMisMatches, int ignoreSuppAlignments, int ignoreSecondaryAlignments) { //----- // code uses the pattern outlined in samtools view (sam_view.c) // thanks lh3! // int i = 0; int result = -1; int hh = 0; int supp_check = 0x0; // include supp mappings if (ignoreSuppAlignments) { supp_check |= BAM_FSUPPLEMENTARY; } if (ignoreSecondaryAlignments) { supp_check |= BAM_FSECONDARY; } // we need to let the users know if their pairings // will be corrupted int p_corrupt = 0; // helper variables samFile *in = 0; bam_hdr_t *header = NULL; bam1_t *b = bam_init1(); BM_mappedRead * root = 0; BM_mappedRead * prev = 0; // open file handlers if ((in = sam_open(bamFile, "r")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for reading.\n", bamFile); } else { // retrieve the header if ((header = sam_hdr_read(in)) == 0) { fprintf(stderr, "ERROR: Failed to read the header from \"%s\".\n", bamFile); } else { // check the index is intact hts_idx_t *idx = sam_index_load(in, bamFile); // load index if (idx == 0) { // index is unavailable fprintf(stderr, "ERROR: Random retrieval only works "\ "for indexed files.\n"); } else { cfuhash_table_t *pair_buffer = \ cfuhash_new_with_initial_size(1000000); cfuhash_set_flag(pair_buffer, CFUHASH_FROZEN_UNTIL_GROWS); for (hh = 0; hh < numContigs; ++hh) { // parse a region in the format like `chr2:100-200' hts_itr_t *iter = sam_itr_querys(idx, header, contigs[hh]); if (iter == NULL) { // reference name is not found fprintf(stderr, "WARNING: Could not find contig: "\ "[%s] in BAM: [%s].\n", contigs[hh], bamFile); } // fetch alignments int line = 0; while ((result = sam_itr_next(in, iter, b)) >= 0) { bam1_core_t core = b->core; line += 1; // only high quality?, primary? mappings if ( core.qual < minMapQual) continue; if ((core.flag & supp_check) != 0) continue; if(bam_aux2i(bam_aux_get(b, "NM")) > maxMisMatches) { continue; } char * seqId = bam_get_qname(b); char * seq = 0; char * qual = 0; int qual_len = 0; int seq_len = 0; // get sequence and quality if(0 == headersOnly) { // no point allocating unused space seq = calloc(core.l_qseq+1, sizeof(char)); qual = calloc(core.l_qseq+1, sizeof(char)); uint8_t *s = bam_get_seq(b); if (core.flag&BAM_FREVERSE) { // reverse the read int r = 0; for (i = core.l_qseq-1; i >=0 ; --i) { seq[r]="=TGKCYSBAWRDMHVN"[bam_seqi(s, i)]; ++r; } } else { for (i = 0; i < core.l_qseq; ++i) { seq[i]="=ACMGRSVTWYHKDBN"[bam_seqi(s, i)]; } } seq_len = core.l_qseq; s = bam_get_qual(b); if (s[0] != 0xff) { qual_len = core.l_qseq; for (i = 0; i < core.l_qseq; ++i) { qual[i] = (char)(s[i] + 33); } } else if (qual != 0) { free(qual); qual = 0; } } // work out pairing information uint8_t rpi = RPI_ERROR; if (core.flag&BAM_FPAIRED) { if(core.flag&BAM_FMUNMAP) { if (core.flag&BAM_FREAD1) { rpi = RPI_SNGL_FIR; } else if (core.flag&BAM_FREAD2) { rpi = RPI_SNGL_SEC; } } else { if (core.flag&BAM_FREAD1) { rpi = RPI_FIR; } else if (core.flag&BAM_FREAD2) { rpi = RPI_SEC; } } } else { rpi = RPI_SNGL; } // make the funky Id #define MAX_SEQ_ID_LEN 80 char * seq_id = calloc(MAX_SEQ_ID_LEN, sizeof(char)); // allocate the string to the buffer but check to // ensure we're not cutting anything off int id_len = snprintf(seq_id, MAX_SEQ_ID_LEN, "b_%s;c_%s;r_%s", prettyName, contigs[hh], seqId); if(id_len >= MAX_SEQ_ID_LEN) { seq_id = calloc(id_len+1, sizeof(char)); snprintf(seq_id, id_len+1, // don't forget the NULL! "b_%s;c_%s;r_%s", prettyName, contigs[hh], seqId); } // make the mapped read struct prev = makeMappedRead(seq_id, seq, qual, id_len, seq_len, qual_len, rpi, groups[hh], prev); if (0 == root) { root = prev; } if(rpi == RPI_SNGL || \ rpi == RPI_SNGL_FIR || \ rpi == RPI_SNGL_SEC) { // we can just add away // indicate singleton reads by pointing the // partner pointer to itself prev->partnerRead = prev; } else { // RPI_FIR or RPI_SEC // work out pairing information using the hash // we append a 1 or 2 to the end so that // we don't accidentally pair 1's with 1's etc. char * stripped_result; if(rpi == RPI_FIR) { stripped_result = \ pairStripper(seqId, core.l_qname-1, '2'); } else { stripped_result = \ pairStripper(seqId, core.l_qname-1, '1'); } char * stripped = seqId; if(stripped_result) stripped = stripped_result; //fprintf(stdout, "SEARCH %s\n", stripped); // now stripped always holds a stripped value // see if it is in the hash already BM_mappedRead * stored_MR = \ cfuhash_get(pair_buffer, stripped); if (0 != stored_MR) { // exists in the hash -> Add the pair info if(rpi == RPI_FIR) { prev->partnerRead = stored_MR; } else { stored_MR->partnerRead = prev; } // delete the entry from the hash cfuhash_delete(pair_buffer, stripped); } else { // we should put it in the hash // make sure to change it into something // we will find next time if(rpi == RPI_FIR) stripped[strlen(stripped)-1] = '1'; else stripped[strlen(stripped)-1] = '2'; // check to make sure we're not overwriting // anything important. cfuhash overwrites // duplicate entries, so we need to grab // it and put it to "SNGL_XXX" before we // lose the pointer BM_mappedRead * OWMMR = \ cfuhash_put(pair_buffer, stripped, prev); if(OWMMR) { if(OWMMR->rpi == RPI_FIR) OWMMR->rpi = RPI_SNGL_FIR; else OWMMR->rpi = RPI_SNGL_SEC; OWMMR->partnerRead = OWMMR; printPairCorruptionWarning(p_corrupt); p_corrupt = 1; } } if(stripped_result != 0) { // free this! free(stripped_result); stripped_result = 0; } } } hts_itr_destroy(iter); if (result < -1) { fprintf(stderr, "ERROR: retrieval of reads from "\ "contig: \"%s\" failed due to "\ "truncated file or corrupt BAM index "\ "file\n", header->target_name[hh]); break; } } // any entries left in the hash are pairs whose mates did // not meet quality standards size_t key_size = 0; char * key; BM_mappedRead * LOMMR; size_t pr_size = 1; if(cfuhash_each_data(pair_buffer, (void**)&key, &key_size, (void**)&LOMMR, &pr_size)) { do { // get the mapped read // update it's pairing so we know it's really single if (LOMMR->rpi == RPI_FIR) LOMMR->rpi = RPI_SNGL_FIR; else if (LOMMR->rpi == RPI_SEC) LOMMR->rpi = RPI_SNGL_SEC; // indicate singleton reads by pointing the // partner pointer to itself LOMMR->partnerRead = LOMMR; } while(cfuhash_next_data(pair_buffer, (void**)&key, &key_size, (void**)&LOMMR, &pr_size)); } cfuhash_clear(pair_buffer); cfuhash_destroy(pair_buffer); } hts_idx_destroy(idx); // destroy the BAM index } } // always do this if (in) sam_close(in); bam_destroy1(b); if ( header ) bam_hdr_destroy(header); return root; }
int main(int argc, char *argv[]) { samFile *in; char *fn_ref = 0; int flag = 0, c, clevel = -1, ignore_sam_err = 0; char moder[8]; bam_hdr_t *h; bam1_t *b; htsFile *out; char modew[8]; int r = 0, exit_code = 0; hts_opt *in_opts = NULL, *out_opts = NULL, *last = NULL; int nreads = 0; int benchmark = 0; while ((c = getopt(argc, argv, "IbDCSl:t:i:o:N:B")) >= 0) { switch (c) { case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'D': flag |= 4; break; case 'C': flag |= 8; break; case 'B': benchmark = 1; break; case 'l': clevel = atoi(optarg); flag |= 2; break; case 't': fn_ref = optarg; break; case 'I': ignore_sam_err = 1; break; case 'i': if (add_option(&in_opts, optarg)) return 1; break; case 'o': if (add_option(&out_opts, optarg)) return 1; break; case 'N': nreads = atoi(optarg); } } if (argc == optind) { fprintf(stderr, "Usage: samview [-bSCSIB] [-N num_reads] [-l level] [-o option=value] <in.bam>|<in.sam>|<in.cram> [region]\n"); return 1; } strcpy(moder, "r"); if (flag&4) strcat(moder, "c"); else if ((flag&1) == 0) strcat(moder, "b"); in = sam_open(argv[optind], moder); if (in == NULL) { fprintf(stderr, "Error opening \"%s\"\n", argv[optind]); return EXIT_FAILURE; } h = sam_hdr_read(in); h->ignore_sam_err = ignore_sam_err; b = bam_init1(); strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&8) strcat(modew, "c"); else if (flag&2) strcat(modew, "b"); out = hts_open("-", modew); if (out == NULL) { fprintf(stderr, "Error opening standard output\n"); return EXIT_FAILURE; } /* CRAM output */ if (flag & 8) { int ret; // Parse input header and use for CRAM output out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text); // Create CRAM references arrays if (fn_ref) ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref); else // Attempt to fill out a cram->refs[] array from @SQ headers ret = cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL); if (ret != 0) return EXIT_FAILURE; } // Process any options; currently cram only. for (; in_opts; in_opts = (last=in_opts)->next, free(last)) { hts_set_opt(in, in_opts->opt, in_opts->val); if (in_opts->opt == CRAM_OPT_REFERENCE) if (hts_set_opt(out, in_opts->opt, in_opts->val) != 0) return EXIT_FAILURE; } for (; out_opts; out_opts = (last=out_opts)->next, free(last)) if (hts_set_opt(out, out_opts->opt, out_opts->val) != 0) return EXIT_FAILURE; if (!benchmark) sam_hdr_write(out, h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region int i; hts_idx_t *idx; if ((idx = sam_index_load(in, argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = sam_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while ((r = sam_itr_next(in, iter, b)) >= 0) { if (!benchmark && sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } if (nreads && --nreads == 0) break; } hts_itr_destroy(iter); } hts_idx_destroy(idx); } else while ((r = sam_read1(in, h, b)) >= 0) { if (!benchmark && sam_write1(out, h, b) < 0) { fprintf(stderr, "Error writing output.\n"); exit_code = 1; break; } if (nreads && --nreads == 0) break; } if (r < -1) { fprintf(stderr, "Error parsing input.\n"); exit_code = 1; } r = sam_close(out); if (r < 0) { fprintf(stderr, "Error closing output.\n"); exit_code = 1; } bam_destroy1(b); bam_hdr_destroy(h); r = sam_close(in); if (r < 0) { fprintf(stderr, "Error closing input.\n"); exit_code = 1; } return exit_code; }
int main(int argc, char *argv[]) { int ch; int count = INT_MAX; /* number of times to ping */ int pongcount = -1; char *samhost = "localhost"; unsigned short samport = 7656; while ((ch = getopt(argc, argv, "ac:h:mp:qv")) != -1) { switch (ch) { case 'a': /* bell */ bell = true; break; case 'c': /* packet count */ count = atoi(optarg); break; case 'h': /* SAM host */ samhost = optarg; break; case 'm': /* I2Ping emulation mode */ count = 3; mihi = true; quiet = true; break; case 'p': /* SAM port */ samport = atoi(optarg); break; case 'q': /* quiet mode */ quiet = true; break; case 'v': /* version */ puts("$Id: i2p-ping.c,v 1.6 2004/12/02 17:54:23 mpc Exp $"); puts("Copyright (c) 2004, Matthew P. Cashdollar <*****@*****.**>"); break; case '?': default: usage(); return 0; } } argc -= optind; argv += optind; if (argc == 0) { /* they forgot to specify a ping target */ fprintf(stderr, "Ping who?\n"); return 2; } /* Hook up the callback functions - required by LibSAM */ sam_closeback = &closeback; sam_connectback = &connectback; sam_databack = &databack; sam_diedback = &diedback; sam_logback = &logback; sam_namingback = &namingback; sam_statusback = &statusback; sam_sess_t *session = NULL; /* set to NULL to have LibSAM do the malloc */ session = sam_session_init(session); /* malloc and set defaults */ samerr_t rc = sam_connect(session, samhost, samport, "TRANSIENT", SAM_STREAM, 0); if (rc != SAM_OK) { fprintf(stderr, "SAM connection failed: %s\n", sam_strerror(rc)); sam_session_free(&session); return 3; } pongcount = 0; for (int j = 0; j < argc; j++) { if (strlen(argv[j]) == SAM_PUBKEY_LEN - 1) { memcpy(dest, argv[j], SAM_PUBKEY_LEN); gotdest = true; } else sam_naming_lookup(session, argv[j]); while (!gotdest) /* just wait for the naming lookup to complete */ sam_read_buffer(session); gotdest = false; for (int i = 0; i < count; ++i) { time_t start = time(0); sam_sid_t sid = sam_stream_connect(session, dest); while (laststream != sid && laststatus == SAM_NULL) sam_read_buffer(session); /* wait for the connect */ if (laststatus == SAM_OK) sam_stream_close(session, laststream); time_t finish = time(0); laststream = 0; if (laststatus == SAM_OK) { pongcount++; if (bell) printf("\a"); /* putchar() doesn't work for some reason */ if (!mihi) printf("%s: %.0fs\n", argv[j], difftime(finish, start)); else printf("+ "); } else { if (!mihi) printf("%s: %s\n", argv[j], sam_strerror(laststatus)); else printf("- "); } laststatus = SAM_NULL; } if (mihi) printf(" %s\n", argv[j]); } sam_close(session); sam_session_free(&session); return pongcount == 0 ? 1 : 0; }
int main_samview(int argc, char *argv[]) { samFile *in; char *fn_ref = 0; int flag = 0, c, clevel = -1, ignore_sam_err = 0; char moder[8]; bam_hdr_t *h; bam1_t *b; while ((c = getopt(argc, argv, "IbSl:t:")) >= 0) { switch (c) { case 'S': flag |= 1; break; case 'b': flag |= 2; break; case 'l': clevel = atoi(optarg); flag |= 2; break; case 't': fn_ref = optarg; break; case 'I': ignore_sam_err = 1; break; } } if (argc == optind) { fprintf(stderr, "Usage: samview [-bSI] [-l level] <in.bam>|<in.sam> [region]\n"); return 1; } strcpy(moder, "r"); if ((flag&1) == 0) strcat(moder, "b"); in = sam_open(argv[optind], moder, fn_ref); h = sam_hdr_read(in); h->ignore_sam_err = ignore_sam_err; b = bam_init1(); if ((flag&4) == 0) { // SAM/BAM output htsFile *out; char modew[8]; strcpy(modew, "w"); if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel); if (flag&2) strcat(modew, "b"); out = hts_open("-", modew, 0); sam_hdr_write(out, h); if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region int i; hts_idx_t *idx; if ((idx = bam_index_load(argv[optind])) == 0) { fprintf(stderr, "[E::%s] fail to load the BAM index\n", __func__); return 1; } for (i = optind + 1; i < argc; ++i) { hts_itr_t *iter; if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) { fprintf(stderr, "[E::%s] fail to parse region '%s'\n", __func__, argv[i]); continue; } while (bam_itr_next((BGZF*)in->fp, iter, b) >= 0) sam_write1(out, h, b); hts_itr_destroy(iter); } hts_idx_destroy(idx); } else while (sam_read1(in, h, b) >= 0) sam_write1(out, h, b); sam_close(out); } bam_destroy1(b); bam_hdr_destroy(h); sam_close(in); return 0; }
/** * Closes the file. */ void BAMOrderedReader::close() { sam_close(sam); }
int main_bedcov(int argc, char *argv[]) { gzFile fp; kstring_t str; kstream_t *ks; hts_idx_t **idx; aux_t **aux; int *n_plp, dret, i, n, c, min_mapQ = 0; int64_t *cnt; const bam_pileup1_t **plp; int usage = 0; sam_global_args ga = SAM_GLOBAL_ARGS_INIT; static const struct option lopts[] = { SAM_OPT_GLOBAL_OPTIONS('-', 0, '-', '-', 0), { NULL, 0, NULL, 0 } }; while ((c = getopt_long(argc, argv, "Q:", lopts, NULL)) >= 0) { switch (c) { case 'Q': min_mapQ = atoi(optarg); break; default: if (parse_sam_global_opt(c, optarg, lopts, &ga) == 0) break; /* else fall-through */ case '?': usage = 1; break; } if (usage) break; } if (usage || optind + 2 > argc) { fprintf(pysam_stderr, "Usage: samtools bedcov [options] <in.bed> <in1.bam> [...]\n\n"); fprintf(pysam_stderr, " -Q INT Only count bases of at least INT quality [0]\n"); sam_global_opt_help(pysam_stderr, "-.--."); return 1; } memset(&str, 0, sizeof(kstring_t)); n = argc - optind - 1; aux = calloc(n, sizeof(aux_t*)); idx = calloc(n, sizeof(hts_idx_t*)); for (i = 0; i < n; ++i) { aux[i] = calloc(1, sizeof(aux_t)); aux[i]->min_mapQ = min_mapQ; aux[i]->fp = sam_open_format(argv[i+optind+1], "r", &ga.in); if (aux[i]->fp) idx[i] = sam_index_load(aux[i]->fp, argv[i+optind+1]); if (aux[i]->fp == 0 || idx[i] == 0) { fprintf(pysam_stderr, "ERROR: fail to open index BAM file '%s'\n", argv[i+optind+1]); return 2; } // TODO bgzf_set_cache_size(aux[i]->fp, 20); aux[i]->header = sam_hdr_read(aux[i]->fp); if (aux[i]->header == NULL) { fprintf(pysam_stderr, "ERROR: failed to read header for '%s'\n", argv[i+optind+1]); return 2; } } cnt = calloc(n, 8); fp = gzopen(argv[optind], "rb"); ks = ks_init(fp); n_plp = calloc(n, sizeof(int)); plp = calloc(n, sizeof(bam_pileup1_t*)); while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) { char *p, *q; int tid, beg, end, pos; bam_mplp_t mplp; for (p = q = str.s; *p && *p != '\t'; ++p); if (*p != '\t') goto bed_error; *p = 0; tid = bam_name2id(aux[0]->header, q); *p = '\t'; if (tid < 0) goto bed_error; for (q = p = p + 1; isdigit(*p); ++p); if (*p != '\t') goto bed_error; *p = 0; beg = atoi(q); *p = '\t'; for (q = p = p + 1; isdigit(*p); ++p); if (*p == '\t' || *p == 0) { int c = *p; *p = 0; end = atoi(q); *p = c; } else goto bed_error; for (i = 0; i < n; ++i) { if (aux[i]->iter) hts_itr_destroy(aux[i]->iter); aux[i]->iter = sam_itr_queryi(idx[i], tid, beg, end); } mplp = bam_mplp_init(n, read_bam, (void**)aux); bam_mplp_set_maxcnt(mplp, 64000); memset(cnt, 0, 8 * n); while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) if (pos >= beg && pos < end) for (i = 0; i < n; ++i) cnt[i] += n_plp[i]; for (i = 0; i < n; ++i) { kputc('\t', &str); kputl(cnt[i], &str); } fputs(str.s, pysam_stdout) & fputc('\n', pysam_stdout); bam_mplp_destroy(mplp); continue; bed_error: fprintf(pysam_stderr, "Errors in BED line '%s'\n", str.s); } free(n_plp); free(plp); ks_destroy(ks); gzclose(fp); free(cnt); for (i = 0; i < n; ++i) { if (aux[i]->iter) hts_itr_destroy(aux[i]->iter); hts_idx_destroy(idx[i]); bam_hdr_destroy(aux[i]->header); sam_close(aux[i]->fp); free(aux[i]); } free(aux); free(idx); free(str.s); sam_global_args_free(&ga); return 0; }