int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data) { bam_plbuf_t *buf; int ret; bam1_t *b; b = bam_init1(); buf = bam_plbuf_init(func, func_data); bam_plbuf_set_mask(buf, mask); while ((ret = bam_read1(fp, b)) >= 0) bam_plbuf_push(b, buf); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); bam_destroy1(b); return 0; }
uint calculate_cov_params(const char* const bam_name, const int32_t tid, const int32_t start, const int32_t stop) { bamFile fp = bam_open(bam_name, "r"); bam_index_t* fp_index = bam_index_load(bam_name); bam_plbuf_t *buf; covdata* cvdt = ckallocz(sizeof(covdata)); cvdt->tid = tid; cvdt->begin = start; cvdt->end = stop; cvdt->coverage = ckallocz((cvdt->end - cvdt->begin) * sizeof(uint32_t)); buf = bam_plbuf_init(pileup_func, cvdt); bam_fetch(fp, fp_index, tid, start, stop, buf, fetch_func); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); // calculate the mean coverage in the region of the putative deletion uint i, covsum; for(i = 0, covsum = 0; i < (cvdt->end - cvdt->begin); i++){ covsum += cvdt->coverage[i]; } uint avgcov = floor(covsum * 1.0/(cvdt->end - cvdt->begin)); ckfree(cvdt->coverage); ckfree(cvdt); bam_close(fp); bam_index_destroy(fp_index); return avgcov; }
/* callback for bam_fetch() */ static int fetch_func(const bam1_t *b, void *data) { bam_plbuf_t *buf = data; if (b->core.flag & BAM_FPROPER_PAIR) { bam_plbuf_push(b, buf); } return 0; }
int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) { bam_plbuf_t *buf; int ret; bam1_t *b; b = bam_init1(); buf = bam_plbuf_init(func, func_data); if (mask < 0) mask = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP; else mask |= BAM_FUNMAP; while ((ret = samread(fp, b)) >= 0) { // bam_plp_push() itself now filters out unmapped reads only if (b->core.flag & mask) b->core.flag |= BAM_FUNMAP; bam_plbuf_push(b, buf); } bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); bam_destroy1(b); return 0; }
int main(int argc, char *argv[]) { char *progname; char *bamfilename; int32_t tid; samfile_t *bamin; bam_index_t *bamidx; bam_plbuf_t *buf; bam1_t *bam_read; uint32_t next_pos = 1; progname = *argv; argv++; argc--; if (argc < 2) { printf("Usage: %s bam_file tid\n", progname); exit(1); } else { bamfilename = argv[0]; tid = strtol(argv[1], NULL, 10); } /* try to open bam file */ bamin = samopen(bamfilename, "rb", NULL); if (!bamin) { fprintf(stderr, "Error opening bamfile %s\n", bamfilename); exit(1); } /* try to open index */ bamidx = bam_index_load(bamfilename); if (!bamidx) { fprintf(stderr, "Error opening index for %s\n", bamfilename); exit(1); } bam_read = bam_init1(); buf = bam_plbuf_init(&pileup_func, &next_pos); /* disable maximum pileup depth */ bam_plp_set_maxcnt(buf->iter, INT_MAX); bam_fetch(bamin->x.bam, bamidx, tid, 0, INT_MAX, buf, &fetch_func); bam_plbuf_push(0, buf); /* finish pileup */ bam_plbuf_destroy(buf); bam_destroy1(bam_read); bam_index_destroy(bamidx); samclose(bamin); return 0; }
int main(int argc, char *argv[]) { tmpstruct_t tmp; if (argc == 1) { fprintf(stderr, "Usage: calDepth <in.bam> [region]\n"); return 1; } tmp.beg = 0; tmp.end = 0x7fffffff; tmp.in = samopen(argv[1], "rb", 0); if (tmp.in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", argv[1]); return 1; } if (argc == 2) { // if a region is not specified sampileup(tmp.in, -1, pileup_func, &tmp); } else { int ref; bam_index_t *idx; bam_plbuf_t *buf; idx = bam_index_load(argv[1]); // load BAM index if (idx == 0) { fprintf(stderr, "BAM indexing file is not available.\n"); return 1; } bam_parse_region(tmp.in->header, argv[2], &ref, &tmp.beg, &tmp.end); // parse the region if (ref < 0) { fprintf(stderr, "Invalid region %s\n", argv[2]); return 1; } buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup bam_fetch(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, fetch_func); bam_plbuf_push(0, buf); // finalize pileup bam_index_destroy(idx); bam_plbuf_destroy(buf); } samclose(tmp.in); return 0; }
void mapper( char *ref, int length, int start_base_pos, const char *bam ) { anal_t input; gzFile pRef; kseq_t * seq = NULL; char chr[8] = { 0, }; int ret; bam_plbuf_t *buf; bam1_t *b; /* fprintf( stderr, "ref: %s\n", ref ); fprintf( stderr, "length: %d\n", length ); fprintf( stderr, "start_base_pos: %d\n", start_base_pos ); fprintf( stderr, "bam: %s\n", bam ); */ input.beg = 0; input.end = 0x7fffffff; input.in = samopen(bam, "rb", 0); if (input.in == 0) { fprintf(stderr, "Fail to open BAM file %s\n", bam); return; } pRef = gzopen( ref, "r" ); fprintf( stderr, "ref : %s\n", ref ); fprintf( stderr, "pRef: %p\n", pRef ); if( pRef == NULL ) { fprintf( stderr, "ref : %s\n", ref ); fprintf( stderr, "pRef: %p\n", pRef ); return; } seq = kseq_init( pRef ); b = bam_init1(); // alloc memory size of bam1_t //fprintf( stderr, "%\pn", b ); buf = bam_plbuf_init(pileup_func, &input); // alloc memory bam_plbuf_set_mask(buf, -1); while ((ret = samread( input.in, b)) >= 0) { bam_plbuf_push(b, buf); //fprintf( stderr, "%x\n", b->core.flag ); if( b->core.flag & 0x0004 ) // unmapped { // do nothing /* qname1 = strtok(bam1_qname(b), ":\t\n "); qname2 = strtok(NULL, ":\t\n "); qname3 = atoi(qname2); fprintf( stderr, "%s:%10d:%s:%d\t%c:%d:%d:%d\n", qname1, qname3, "*", b->core.pos, '*', b->core.flag, b->core.qual, ret ); */ fprintf( stdout, "%s:%s:%d\t%c:0x%x:%d:%d\n", bam1_qname(b), "*", b->core.pos+1, '*', b->core.flag, b->core.qual, ret ); /* fprintf( stderr, "%s:%s:%d\t%c:0x%x:%d:%d\n", bam1_qname(b), "*", b->core.pos, '*', b->core.flag, b->core.qual, ret ); */ } else { // to find a base in the reference genome, seq. if( ( seq != NULL ) && ( strcmp( input.in->header->target_name[b->core.tid], chr ) == 0 ) ) { // already found that // fprintf( stderr, "found : %s\n", chr ); }else { if( find_chr(input.in->header->target_name[b->core.tid], seq, chr) < 0 ) { fprintf( stderr, "ERROR : cannot find chromosome %s\n", \ input.in->header->target_name[b->core.tid] ); }else { fprintf( stderr, "FOUND CHR : %s\n", chr ); } } // remove not aligned to the chromosome fprintf( stdout, "%s:%s:%d\t%c:%d:%d:%d\n", bam1_qname(b), input.in->header->target_name[b->core.tid], b->core.pos+1, seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret ); /* fprintf( stderr, "%s:%s:%d\t%c:%d:%d:%d\n", bam1_qname(b), input.in->header->target_name[b->core.tid], b->core.pos, seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret ); */ } } // for the last bases... // printf("pos:%d(%c), flag:%d qual: %d(ret %d)\n", // b->core.pos+1, seq->seq.s[b->core.pos], b->core.flag, b->core.qual, ret ); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); // release memory bam_destroy1(b); // release memory size of bam1_t samclose(input.in); kseq_destroy( seq ); gzclose( pRef ); return; }
int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv) { return bam_plbuf_push(b, tv->plbuf); }
// callback for bam_fetch() static int fetch_func(const bam1_t *b, void *data) { bam_plbuf_t *buf = (bam_plbuf_t*)data; bam_plbuf_push(b, buf); return 0; }
int main_ld(int argc, char *argv[]) { int chr; //! chromosome identifier int beg; //! beginning coordinate for analysis int end; //! end coordinate for analysis int ref; //! ref long num_windows; //! number of windows std::string msg; //! string for error message bam_plbuf_t *buf; //! pileup buffer ldData t; // parse the command line options std::string region = t.parseCommandLine(argc, argv); // check input BAM file for errors t.checkBAM(); // initialize the sample data structure t.bam_smpl_init(); // add samples t.bam_smpl_add(); // initialize error model t.em = errmod_init(1.0-0.83); // parse genomic region int k = bam_parse_region(t.h, region, &chr, &beg, &end); if (k < 0) { msg = "Bad genome coordinates: " + region; fatal_error(msg, __FILE__, __LINE__, 0); } // fetch reference sequence t.ref_base = faidx_fetch_seq(t.fai_file, t.h->target_name[chr], 0, 0x7fffffff, &(t.len)); // calculate the number of windows if (t.flag & BAM_WINDOW) num_windows = ((end-beg)-1)/t.win_size; else { t.win_size = (end-beg); num_windows = 1; } // iterate through all windows along specified genomic region for (long cw=0; cw < num_windows; cw++) { // construct genome coordinate string std::string scaffold_name(t.h->target_name[chr]); std::ostringstream winc(scaffold_name); winc.seekp(0, std::ios::end); winc << ":" << beg+(cw*t.win_size)+1 << "-" << ((cw+1)*t.win_size)+(beg-1); std::string winCoord = winc.str(); // initialize number of sites to zero t.num_sites = 0; // parse the BAM file and check if region is retrieved from the reference if (t.flag & BAM_WINDOW) { k = bam_parse_region(t.h, winCoord, &ref, &(t.beg), &(t.end)); if (k < 0) { msg = "Bad window coordinates " + winCoord; fatal_error(msg, __FILE__, __LINE__, 0); } } else { ref = chr; t.beg = beg; t.end = end; if (ref < 0) { msg = "Bad scaffold name: " + region; fatal_error(msg, __FILE__, __LINE__, 0); } } // initialize nucdiv variables t.init_ld(); // create population assignments t.assign_pops(); // initialize pileup buf = bam_plbuf_init(make_ld, &t); // fetch region from bam file if ((bam_fetch(t.bam_in->x.bam, t.idx, ref, t.beg, t.end, buf, fetch_func)) < 0) { msg = "Failed to retrieve region " + region + " due to corrupted BAM index file"; fatal_error(msg, __FILE__, __LINE__, 0); } // finalize pileup bam_plbuf_push(0, buf); // calculate linkage disequilibrium statistics ld_func fp[3] = {&ldData::calc_zns, &ldData::calc_omegamax, &ldData::calc_wall}; (t.*fp[t.output])(); // print results to stdout t.print_ld(chr); // take out the garbage t.destroy_ld(); bam_plbuf_destroy(buf); } // end of window interation errmod_destroy(t.em); samclose(t.bam_in); bam_index_destroy(t.idx); t.bam_smpl_destroy(); free(t.ref_base); return 0; }