int main(int argc, char** argv) { if(argc < 3) { printf("No input nor output files provided"); return -1; } bamFile in = bam_open(argv[1], "r"); bam_header_t* header; if (in == NULL) { printf("opening input file failed"); return -1; } bam1_t* b = bam_init1(); bamFile out = bam_open(argv[2], "w"); if (out == NULL) { printf("opening input file failed"); return -1; } header = bam_header_read(in); if(bam_header_write(out, header) < 0) { printf("writing header failed"); } long nextPrunedId; if(!scanf ("%lu", &nextPrunedId)) { printf("warning: no ids provided"); return -1; } long id = 0; while (bam_read1(in, b) >= 0) { // write BAM back if (nextPrunedId != id++) { bam_write1(out, b); } else { // fprintf(stderr, "pruning: id: %lu, pos: %d, length: %d\n", nextPrunedId, b->core.pos, b->core.l_qseq); if(!scanf ("%lu", &nextPrunedId)) { break; } } } // closing all resources bam_header_destroy(header); bam_close(in); bam_close(out); bam_destroy1(b); return 0; }
int main_pad2unpad(int argc, char *argv[]) { bamFile in, out; if (argc == 1) { fprintf(stderr, "Usage: samtools depad <in.bam>\n"); return 1; } in = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r"); out = bam_dopen(fileno(stdout), "w"); bam_pad2unpad(in, out); bam_close(in); bam_close(out); return 0; }
int bam_mating(int argc, char *argv[]) { bamFile in, out; if (argc < 3) { fprintf(stderr, "Usage: samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n"); return 1; } in = (strcmp(argv[1], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[1], "r"); out = (strcmp(argv[2], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[2], "w"); bam_mating_core(in, out); bam_close(in); bam_close(out); return 0; }
int bam_flagstat(int argc, char *argv[]) { bamFile fp; bam_header_t *header; bam_flagstat_t *s; if (argc == optind) { fprintf(pysamerr, "Usage: samtools flagstat <in.bam>\n"); return 1; } fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); assert(fp); header = bam_header_read(fp); s = bam_flagstat_core(fp); printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]); printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]); printf("%lld + %lld mapped (%.2f%%:%.2f%%)\n", s->n_mapped[0], s->n_mapped[1], (float)s->n_mapped[0] / s->n_reads[0] * 100.0, (float)s->n_mapped[1] / s->n_reads[1] * 100.0); printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]); printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]); printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]); printf("%lld + %lld properly paired (%.2f%%:%.2f%%)\n", s->n_pair_good[0], s->n_pair_good[1], (float)s->n_pair_good[0] / s->n_pair_all[0] * 100.0, (float)s->n_pair_good[1] / s->n_pair_all[1] * 100.0); printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]); printf("%lld + %lld singletons (%.2f%%:%.2f%%)\n", s->n_sgltn[0], s->n_sgltn[1], (float)s->n_sgltn[0] / s->n_pair_all[0] * 100.0, (float)s->n_sgltn[1] / s->n_pair_all[1] * 100.0); printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]); printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]); free(s); bam_header_destroy(header); bam_close(fp); return 0; }
void SR_BamInStreamFree(SR_BamInStream* pBamInStream) { if (pBamInStream != NULL) { kh_destroy(queryName, pBamInStream->pNameHashes[PREV_BIN]); kh_destroy(queryName, pBamInStream->pNameHashes[CURR_BIN]); if (pBamInStream->pRetLists != NULL) free(pBamInStream->pRetLists); if (pBamInStream->pAlgnTypes != NULL) free(pBamInStream->pAlgnTypes); SR_BamMemPoolFree(pBamInStream->pMemPool); bam_close(pBamInStream->fpBamInput); bam_index_destroy(pBamInStream->pBamIndex); if (pBamInStream->pBamIterator != NULL) { bam_iter_destroy(*(pBamInStream->pBamIterator)); free(pBamInStream->pBamIterator); pBamInStream->pBamIterator = NULL; } free(pBamInStream); } }
static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout) { char *name, mode[3]; int i; bamFile fp; ks_mergesort(sort, k, buf, 0); name = (char*)calloc(strlen(prefix) + 20, 1); if (n >= 0) { sprintf(name, "%s.%.4d.bam", prefix, n); strcpy(mode, "w1"); } else { sprintf(name, "%s.bam", prefix); strcpy(mode, "w"); } fp = is_stdout? bam_dopen(fileno(stdout), mode) : bam_open(name, mode); if (fp == 0) { fprintf(stderr, "[sort_blocks] fail to create file %s.\n", name); free(name); // FIXME: possible memory leak return; } free(name); bam_header_write(fp, h); for (i = 0; i < k; ++i) bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data); bam_close(fp); }
uint calculate_cov_params(const char* const bam_name, const int32_t tid, const int32_t start, const int32_t stop) { bamFile fp = bam_open(bam_name, "r"); bam_index_t* fp_index = bam_index_load(bam_name); bam_plbuf_t *buf; covdata* cvdt = ckallocz(sizeof(covdata)); cvdt->tid = tid; cvdt->begin = start; cvdt->end = stop; cvdt->coverage = ckallocz((cvdt->end - cvdt->begin) * sizeof(uint32_t)); buf = bam_plbuf_init(pileup_func, cvdt); bam_fetch(fp, fp_index, tid, start, stop, buf, fetch_func); bam_plbuf_push(0, buf); bam_plbuf_destroy(buf); // calculate the mean coverage in the region of the putative deletion uint i, covsum; for(i = 0, covsum = 0; i < (cvdt->end - cvdt->begin); i++){ covsum += cvdt->coverage[i]; } uint avgcov = floor(covsum * 1.0/(cvdt->end - cvdt->begin)); ckfree(cvdt->coverage); ckfree(cvdt); bam_close(fp); bam_index_destroy(fp_index); return avgcov; }
void samclose(samfile_t *fp) { if (fp == 0) return; if (fp->header) bam_header_destroy(fp->header); if (fp->type & 1) bam_close(fp->x.bam); else if (fp->type == 2) sam_close(fp->x.tamr); free(fp); }
void samclose(samfile_t *fp) { if (fp == 0) return; if (fp->header) bam_header_destroy(fp->header); if (fp->type & TYPE_BAM) bam_close(fp->x.bam); else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); else fclose(fp->x.tamw); free(fp); }
/* check match between reference and bam files. prints an error * message and return non-zero on mismatch */ int checkref(char *fasta_file, char *bam_file) { int i = -1; bam_header_t *header; faidx_t *fai; char *ref; int ref_len = -1; bamFile bam_fp; if (! file_exists(fasta_file)) { LOG_FATAL("Fsata file %s does not exist. Exiting...\n", fasta_file); return 1; } if (0 != strcmp(bam_file, "-") && ! file_exists(bam_file)) { LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file); return 1; } bam_fp = strcmp(bam_file, "-") == 0 ? bam_dopen(fileno(stdin), "r") : bam_open(bam_file, "r"); header = bam_header_read(bam_fp); if (!header) { LOG_FATAL("Failed to read BAM header from %s\n", bam_file); return 1; } fai = fai_load(fasta_file); if (!fai) { LOG_FATAL("Failed to fasta index for %s\n", fasta_file); return 1; } for (i=0; i < header->n_targets; i++) { LOG_DEBUG("BAM header target %d of %d: name=%s len=%d\n", i+1, header->n_targets, header->target_name[i], header->target_len[i]); ref = faidx_fetch_seq(fai, header->target_name[i], 0, 0x7fffffff, &ref_len); if (NULL == ref) { LOG_FATAL("Failed to fetch sequence %s from fasta file\n", header->target_name[i]); return -1; } if (header->target_len[i] != ref_len) { LOG_FATAL("Sequence length mismatch for sequence %s (%dbp in fasta; %dbp in bam)\n", header->target_name[i], header->target_len[i], ref_len); return -1; } free(ref); } fai_destroy(fai); bam_header_destroy(header); bam_close(bam_fp); return 0; }
int bam_mating(int argc, char *argv[]) { bamFile in, out; int c, remove_reads=0; while ((c = getopt(argc, argv, "r")) >= 0) { switch (c) { case 'r': remove_reads=1; break; } } if (optind+1 >= argc) usage(); in = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r"); out = (strcmp(argv[optind+1], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[optind+1], "w"); bam_mating_core(in, out, remove_reads); bam_close(in); bam_close(out); return 0; }
void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; if (bs->is_bam) bam_close(bs->fp); else { gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); }
void closeBamFile(BamReaderData * data) { // Seriously, does samtools not provide any convience destructors!?? bam_mplp_destroy(data->iter); //bam_header_destroy(data->data->h); bam_close(data->data->fp); if (data->data->iter) bam_iter_destroy(data->data->iter); free(data->data); bam_index_destroy(data->idx); }
void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; if (bs->is_bam) { if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); } else { err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); }
bam_header_t* bam_header_new(int specie, int assembly, char* file_path) { bamFile bam_header_file; bam_header_t* bam_header_p; if ((specie == HUMAN) && (assembly == NCBI37)) { bam_header_file = bam_open(file_path, "r"); bam_header_p = bam_header_read(bam_header_file); bam_close(bam_header_file); } return bam_header_p; }
static void write_buffer(const char *fn, const char *mode, size_t l, bam1_p *buf, const bam_header_t *h, int n_threads) { size_t i; bamFile fp; fp = strcmp(fn, "-")? bam_open(fn, mode) : bam_dopen(fileno(stdout), mode); if (fp == 0) return; bam_header_write(fp, h); if (n_threads > 1) bgzf_mt(fp, n_threads, 256); for (i = 0; i < l; ++i) bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data); bam_close(fp); }
void bwa_seq_close(bwa_seqio_t *bs) { int i; if (bs == 0) return; if (bs->is_bam) bam_close(bs->fp); else { gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } for(i=0; i!=3; ++i) if(bs->sai[i]) fclose(bs->sai[i]); free(bs); }
void _check_is_bam(const char *filename) { int magic_len; char buf[4]; bamFile bfile = bam_open(filename, "r"); if (bfile == 0) Rf_error("failed to open SAM/BAM file\n file: '%s'", filename); magic_len = bam_read(bfile, buf, 4); bam_close(bfile); if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) Rf_error("'filename' is not a BAM file\n file: %s", filename); }
void bwa_seq_close(bwa_seqio_t *bs) { if (bs == 0) return; if (bs->is_bam) { #ifdef USE_HTSLIB if (0 != sam_close(bs->fp)) err_fatal_simple("Error closing sam/bam file"); bam_hdr_destroy(bs->h); #else if (0 != bam_close(bs->fp)) err_fatal_simple("Error closing bam file"); #endif } else { err_gzclose(bs->ks->f->f); kseq_destroy(bs->ks); } free(bs); }
int add_dindel(const char *bam_in, const char *bam_out, const char *ref) { data_t_dindel tmp; int count = 0; bam1_t *b = NULL; if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) { LOG_FATAL("Failed to open BAM file %s\n", bam_in); return 1; } if ((tmp.fai = fai_load(ref)) == 0) { LOG_FATAL("Failed to open reference file %s\n", ref); return 1; } /*warn_old_fai(ref);*/ if (!bam_out || bam_out[0] == '-') { tmp.out = bam_dopen(fileno(stdout), "w"); } else { tmp.out = bam_open(bam_out, "w"); } bam_header_write(tmp.out, tmp.in->header); b = bam_init1(); tmp.tid = -1; tmp.hpcount = 0; tmp.rlen = 0; while (samread(tmp.in, b) >= 0) { count++; dindel_fetch_func(b, &tmp); } bam_destroy1(b); if (tmp.hpcount) free(tmp.hpcount); samclose(tmp.in); bam_close(tmp.out); fai_destroy(tmp.fai); LOG_VERBOSE("Processed %d reads\n", count); return 0; }
int add_uniform(const char *bam_in, const char *bam_out, const int ins_qual, const int del_qual) { data_t_uniform tmp; uint8_t iq = ENCODE_Q(ins_qual+33); uint8_t dq = ENCODE_Q(del_qual+33); bam1_t *b = NULL; int count = 0; if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) { LOG_FATAL("Failed to open BAM file %s\n", bam_in); return 1; } tmp.iq = iq; tmp.dq = dq; if (!bam_out || bam_out[0] == '-') { tmp.out = bam_dopen(fileno(stdout), "w"); } else { tmp.out = bam_open(bam_out, "w"); } bam_header_write(tmp.out, tmp.in->header); b = bam_init1(); while (samread(tmp.in, b) >= 0) { count++; uniform_fetch_func(b, &tmp); } bam_destroy1(b); samclose(tmp.in); bam_close(tmp.out); LOG_VERBOSE("Processed %d reads\n", count); return 0; }
int bam_close_(bamFile fp) { return bam_close(fp); }
int main(int argc, char *argv[]) { bamFile in; sqlite3 * db; sqlite3_stmt * stmt; char * sErrMsg = NULL; char * tail = 0; int nRetCode; char sSQL [BUFFER_SIZE] = "\0"; char database[BUFFER_SIZE]; clock_t startClock,startClock2; if (argc != 2) { fprintf(stderr, "Usage: bamRindex <in.bam>\n"); return 1; } // Open file and exit if error //in = strcmp(argv[1], "-")? bam_open(argv[1], "rb") : bam_dopen(fileno(stdin), "rb"); //fprintf(stderr,"Options ok\n"); in = bam_open(argv[1], "rb"); if (in == 0 ) { fprintf(stderr, "ERROR: Fail to open BAM file %s\n", argv[1]); return 1; } //fprintf(stderr,"BAM opened\n"); assert(strcpy(database,argv[1])!=NULL); assert(strcat(database,".ridx")!=NULL); remove(database); // *********** // Read header bam_header_t *header; header = bam_header_read(in); // sorted by name? // Should not rely on the value in SO bam1_t *aln=bam_init1(); unsigned long num_alns=0; /*********************************************/ /* Open the Database and create the Schema */ // TODO: check the errors sqlite3_open(database, &db); sqlite3_exec(db, TABLE, NULL, NULL, &sErrMsg); // create the table SQLITE_CHECK_ERROR(); startClock = clock(); sqlite3_exec(db, "PRAGMA synchronous = 0;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); sqlite3_exec(db, "PRAGMA journal_mode = OFF;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); // Use up to 8GB of memory sqlite3_exec(db, "PRAGMA cache_size = -8000000;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); sqlite3_exec(db, "BEGIN TRANSACTION;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); while(bam_read1(in,aln)>=0) { // read alignment //aln->core.tid < 0 ? uint8_t *nh = bam_aux_get(aln, "NH"); uint8_t *nm = bam_aux_get(aln, "NM"); uint8_t *xs = bam_aux_get(aln, "XS"); BOOLEAN isPrimary; BOOLEAN isMapped; BOOLEAN notMapped; BOOLEAN isDuplicate; BOOLEAN isNotPassingQualityControls; BOOLEAN isPaired; BOOLEAN isSecondMateRead,isProperPair; //secondary alignment notMapped=(aln->core.flag & BAM_FUNMAP) ? TRUE: FALSE; //notMapped=((aln->core.flag & BAM_FUNMAP) || (aln->core.mtid ==0)) ? TRUE: FALSE; isMapped=!notMapped; isPrimary= (aln->core.flag & BAM_FSECONDARY) ? FALSE:TRUE; isProperPair=(aln->core.flag & BAM_FPROPER_PAIR) ? TRUE:FALSE; isPaired=(aln->core.flag & BAM_FPAIRED ) ? TRUE:FALSE; isSecondMateRead=(aln->core.flag & BAM_FREAD2 ) ? TRUE: FALSE; isNotPassingQualityControls=(aln->core.flag & BAM_FQCFAIL ) ? TRUE:FALSE; isDuplicate=(aln->core.flag & BAM_FDUP) ? TRUE: FALSE; BOOLEAN isSpliced=FALSE; BOOLEAN hasSimpleCigar=TRUE; int nSpliced=0; int i; if (aln->core.n_cigar != 0) { for (i = 0; i < aln->core.n_cigar; ++i) { char l="MIDNSHP=X"[bam1_cigar(aln)[i]&BAM_CIGAR_MASK]; //fprintf(stderr,"%c",l); if ( l == 'N' ) { isSpliced=TRUE; hasSimpleCigar=FALSE;++nSpliced;} if ( l != 'M' && l!='=' ) { hasSimpleCigar=FALSE;} } } //fprintf(stderr,"read %ld\n",num_alns); // isDuplicate,isNotPassingQualityControls, // isSpliced,isPAired,isPrimary,hasSimpleCigar,isSecondMateRead,isProperPair,nh,nm,qual/mapq,xs sprintf(sSQL,"INSERT into bam_index values (%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,'%c')", isDuplicate,isNotPassingQualityControls, nSpliced,isPaired,isPrimary,isMapped,hasSimpleCigar,isSecondMateRead,isProperPair, (nh==0?0:bam_aux2i(nh)),(nm==0?0:bam_aux2i(nm)), aln->core.qual, (xs==0?' ':(bam_aux2A(xs)==0?' ':bam_aux2A(xs)))); sqlite3_exec(db, sSQL, NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); ++num_alns; PRINT_ALNS_PROCESSED(num_alns); } bam_close(in); sqlite3_exec(db, "END TRANSACTION;", NULL, NULL, &sErrMsg); SQLITE_CHECK_ERROR(); printf("\nImported %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock))/CLOCKS_PER_SEC); // Create the indexes startClock2 = clock(); // generating the indexes does not pay off //sqlite3_exec(db, INDEXES, NULL, NULL, &sErrMsg); //printf("Indexed %d records in %4.2f seconds\n", num_alns, ( (double) (clock() - startClock2))/CLOCKS_PER_SEC); printf("Total time: %4.2f seconds\n", ((double)(clock() - startClock))/CLOCKS_PER_SEC); sqlite3_close(db); return 0; }
int main_depth(int argc, char *argv[]) #endif { int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure bam_header_t *h = 0; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line while ((n = getopt(argc, argv, "r:b:q:Q:")) >= 0) { switch (n) { case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold } } if (optind == argc) { fprintf(stderr, "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] <in1.bam> [...]\n"); return 1; } // initialize the auxiliary data structures n = argc - optind; // the number of BAMs on the command line data = (aux_t **) calloc(n, sizeof(void*)); // data[i] for the i-th input beg = 0; end = 1<<30; tid = -1; // set the default region for (i = 0; i < n; ++i) { bam_header_t *htmp; data[i] = (aux_t *) calloc(1, sizeof(aux_t)); data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM data[i]->min_mapQ = mapQ; // set the mapQ filter htmp = bam_header_read(data[i]->fp); // read the BAM header if (i == 0) { h = htmp; // keep the header of the 1st BAM if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header if (tid >= 0) { // if a region is specified and parsed successfully bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator bam_index_destroy(idx); // the index is not needed any more; phase out of the memory } } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization n_plp = (int*) calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = (bam_pileup1_t **) calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp) while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } free(n_plp); free(plp); bam_mplp_destroy(mplp); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); return 0; }
int main_depth(int argc, char *argv[]) #endif { int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure char *file_list = NULL, **fn = NULL; bam_header_t *h = 0; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold case 'f': file_list = optarg; break; } } if (optind == argc && !file_list) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, " -b <bed> list of positions or regions\n"); fprintf(stderr, " -f <list> list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -l <int> minQLen\n"); fprintf(stderr, " -q <int> base quality threshold\n"); fprintf(stderr, " -Q <int> mapping quality threshold\n"); fprintf(stderr, " -r <chr:from-to> region\n"); fprintf(stderr, "\n"); return 1; } // initialize the auxiliary data structures if (file_list) { if ( read_file_list(file_list,&nfiles,&fn) ) return 1; n = nfiles; argv = fn; optind = 0; } else n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(void*)); // data[i] for the i-th input beg = 0; end = 1<<30; tid = -1; // set the default region for (i = 0; i < n; ++i) { bam_header_t *htmp; data[i] = calloc(1, sizeof(aux_t)); data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM data[i]->min_mapQ = mapQ; // set the mapQ filter data[i]->min_len = min_len; // set the qlen filter htmp = bam_header_read(data[i]->fp); // read the BAM header if (i == 0) { h = htmp; // keep the header of the 1st BAM if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header if (tid >= 0) { // if a region is specified and parsed successfully bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator bam_index_destroy(idx); // the index is not needed any more; phase out of the memory } } // the core multi-pileup loop mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization bam_mplp_set_maxcnt(mplp,2147483647); // set max_depth to int max n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp) while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position if (pos < beg || pos >= end) continue; // out of range; skip if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster for (i = 0; i < n; ++i) { // base level filters have to go here int j, m = 0; for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality } printf("\t%d", n_plp[i] - m); // this the depth to output } putchar('\n'); } free(n_plp); free(plp); bam_mplp_destroy(mplp); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(reg); if (bed) bed_destroy(bed); if ( file_list ) { for (i=0; i<n; i++) free(fn[i]); free(fn); } return 0; }
int bam_merge_core2(int by_qname, const char *out, const char *headers, int n, char * const *fn, int flag, const char *reg, int level) #endif { bamFile fpout, *fp; heap1_t *heap; bam_header_t *hout = 0; bam_header_t *hheaders = NULL; int i, j, *RG_len = 0; uint64_t idx = 0; char **RG = 0, mode[8]; bam_iter_t *iter = 0; if (headers) { tamFile fpheaders = sam_open(headers); if (fpheaders == 0) { const char *message = strerror(errno); fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message); return -1; } hheaders = sam_header_read(fpheaders); sam_close(fpheaders); } g_is_by_qname = by_qname; fp = (bamFile*)calloc(n, sizeof(bamFile)); heap = (heap1_t*)calloc(n, sizeof(heap1_t)); iter = (bam_iter_t*)calloc(n, sizeof(bam_iter_t)); // prepare RG tag if (flag & MERGE_RG) { RG = (char**)calloc(n, sizeof(void*)); RG_len = (int*)calloc(n, sizeof(int)); for (i = 0; i != n; ++i) { int l = strlen(fn[i]); const char *s = fn[i]; if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4; for (j = l - 1; j >= 0; --j) if (s[j] == '/') break; ++j; l -= j; RG[i] = calloc(l + 1, 1); RG_len[i] = l; strncpy(RG[i], s + j, l); } } // read the first for (i = 0; i != n; ++i) { bam_header_t *hin; fp[i] = bam_open(fn[i], "r"); if (fp[i] == 0) { int j; fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]); for (j = 0; j < i; ++j) bam_close(fp[j]); free(fp); free(heap); // FIXME: possible memory leak return -1; } hin = bam_header_read(fp[i]); if (i == 0) { // the first BAM hout = hin; } else { // validate multiple baf int min_n_targets = hout->n_targets; if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets; for (j = 0; j < min_n_targets; ++j) if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n", hout->target_name[j], hin->target_name[j], fn[i]); return -1; } // If this input file has additional target reference sequences, // add them to the headers to be output if (hin->n_targets > hout->n_targets) { swap_header_targets(hout, hin); // FIXME Possibly we should also create @SQ text headers // for the newly added reference sequences } bam_header_destroy(hin); } } if (hheaders) { // If the text headers to be swapped in include any @SQ headers, // check that they are consistent with the existing binary list // of reference information. if (hheaders->n_targets > 0) { if (hout->n_targets != hheaders->n_targets) { fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers); if (!reg) return -1; } for (j = 0; j < hout->n_targets; ++j) if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) { fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers); if (!reg) return -1; } } swap_header_text(hout, hheaders); bam_header_destroy(hheaders); } if (reg) { int tid, beg, end; if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) { fprintf(stderr, "[%s] Malformated region string or undefined reference name\n", __func__); return -1; } for (i = 0; i < n; ++i) { bam_index_t *idx; idx = bam_index_load(fn[i]); iter[i] = bam_iter_query(idx, tid, beg, end); bam_index_destroy(idx); } } for (i = 0; i < n; ++i) { heap1_t *h = heap + i; h->i = i; h->b = (bam1_t*)calloc(1, sizeof(bam1_t)); if (bam_iter_read(fp[i], iter[i], h->b) >= 0) { h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b); h->idx = idx++; } else h->pos = HEAP_EMPTY; } if (flag & MERGE_UNCOMP) level = 0; else if (flag & MERGE_LEVEL1) level = 1; strcpy(mode, "w"); if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9); if ((fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w")) == 0) { fprintf(stderr, "[%s] fail to create the output file.\n", __func__); return -1; } bam_header_write(fpout, hout); bam_header_destroy(hout); #ifndef _PBGZF_USE if (!(flag & MERGE_UNCOMP)) bgzf_mt(fpout, n_threads, 256); #endif ks_heapmake(heap, n, heap); while (heap->pos != HEAP_EMPTY) { bam1_t *b = heap->b; if (flag & MERGE_RG) { uint8_t *rg = bam_aux_get(b, "RG"); if (rg) bam_aux_del(b, rg); bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]); } bam_write1_core(fpout, &b->core, b->data_len, b->data); if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) { heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b); heap->idx = idx++; } else if (j == -1) { heap->pos = HEAP_EMPTY; free(heap->b->data); free(heap->b); heap->b = 0; } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]); ks_heapadjust(heap, 0, n, heap); } if (flag & MERGE_RG) { for (i = 0; i != n; ++i) free(RG[i]); free(RG); free(RG_len); } for (i = 0; i != n; ++i) { bam_iter_destroy(iter[i]); bam_close(fp[i]); } bam_close(fpout); free(fp); free(heap); free(iter); return 0; }
/*! @abstract Sort an unsorted BAM file based on the chromosome order and the leftmost position of an alignment @param is_by_qname whether to sort by query name @param fn name of the file to be sorted @param prefix prefix of the output and the temporary files; upon sucessess, prefix.bam will be written. @param max_mem approxiate maximum memory (very inaccurate) @discussion It may create multiple temporary subalignment files and then merge them by calling bam_merge_core(). This function is NOT thread safe. */ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int sort_type) { int ret, i, n_files = 0; size_t mem, max_k, k, max_mem; bam_header_t *header; bamFile fp; bam1_t *b, **buf; char *fnout = 0; if (n_threads < 2) n_threads = 1; g_is_by_qname = is_by_qname; max_k = k = 0; mem = 0; max_mem = _max_mem * n_threads; buf = 0; fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); if (fp == 0) { fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn); return; } header = bam_header_read(fp); if (is_by_qname) change_SO(header, "queryname"); else change_SO(header, "coordinate"); // write sub files for (;;) { if (k == max_k) { size_t old_max = max_k; max_k = max_k? max_k<<1 : 0x10000; buf = realloc(buf, max_k * sizeof(void*)); memset(buf + old_max, 0, sizeof(void*) * (max_k - old_max)); } if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t)); b = buf[k]; if ((ret = bam_read1(fp, b)) < 0) break; if (b->data_len < b->m_data>>2) { // shrink b->m_data = b->data_len; kroundup32(b->m_data); b->data = realloc(b->data, b->m_data); } mem += sizeof(bam1_t) + b->m_data + sizeof(void*) + sizeof(void*); // two sizeof(void*) for the data allocated to pointer arrays ++k; if (mem >= max_mem) { n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type); mem = k = 0; } } if (ret != -1) fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n"); // output file name fnout = calloc(strlen(prefix) + 20, 1); if (is_stdout) sprintf(fnout, "-"); else sprintf(fnout, "%s.bam", prefix); // write the final output if (n_files == 0) { // a single block char mode[8]; strcpy(mode, "w"); if (level >= 0) sprintf(mode + 1, "%d", level < 9? level : 9); sort_aux_core(k, buf, sort_type); #ifndef _PBGZF_USE write_buffer(fnout, mode, k, buf, header, n_threads); #else write_buffer(fnout, mode, k, buf, header); #endif } else { // then merge char **fns; n_files = sort_blocks(n_files, k, buf, prefix, header, n_threads, sort_type); fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n_files); fns = (char**)calloc(n_files, sizeof(char*)); for (i = 0; i < n_files; ++i) { fns[i] = (char*)calloc(strlen(prefix) + 20, 1); sprintf(fns[i], "%s.%.4d.bam", prefix, i); } #ifndef _PBGZF_USE bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level); #else bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, level); #endif for (i = 0; i < n_files; ++i) { unlink(fns[i]); free(fns[i]); } free(fns); } free(fnout); // free for (k = 0; k < max_k; ++k) { if (!buf[k]) continue; free(buf[k]->data); free(buf[k]); } free(buf); bam_header_destroy(header); bam_close(fp); }
int main(int argc, char **argv) { cram_fd *out; bam_file_t *in; bam_seq_t *s = NULL; char *out_fn; int level = '\0'; // nul terminate string => auto level char out_mode[4]; int c, verbose = 0; int s_opt = 0, S_opt = 0, embed_ref = 0; char *arg_list, *ref_fn = NULL; while ((c = getopt(argc, argv, "u0123456789hvs:S:V:r:X")) != -1) { switch (c) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = c; break; case 'u': level = '0'; break; case 'h': usage(stdout); return 0; case 'v': verbose++; break; case 's': s_opt = atoi(optarg); break; case 'S': S_opt = atoi(optarg); break; case 'V': cram_set_option(NULL, CRAM_OPT_VERSION, optarg); break; case 'r': ref_fn = optarg; break; case 'X': embed_ref = 1; break; case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } if (argc - optind != 1 && argc - optind != 2) { usage(stderr); return 1; } /* opening */ if (NULL == (in = bam_open(argv[optind], "rb"))) { perror(argv[optind]); return 1; } out_fn = argc - optind == 2 ? argv[optind+1] : "-"; sprintf(out_mode, "wb%c", level); if (NULL == (out = cram_open(out_fn, out_mode))) { fprintf(stderr, "Error opening CRAM file '%s'.\n", out_fn); return 1; } /* SAM Header */ if (!(arg_list = stringify_argv(argc, argv))) return 1; sam_hdr_add_PG(in->header, "sam_to_cram", "VN", PACKAGE_VERSION, "CL", arg_list, NULL); free(arg_list); /* Find and load reference */ if (!ref_fn) { SAM_hdr_type *ty = sam_hdr_find(in->header, "SQ", NULL, NULL); if (ty) { SAM_hdr_tag *tag; if ((tag = sam_hdr_find_key(in->header, ty, "UR", NULL))) { ref_fn = tag->str + 3; if (strncmp(ref_fn, "file:", 5) == 0) ref_fn += 5; } } } out->header = in->header; if (ref_fn) cram_load_reference(out, ref_fn); if (!out->refs) { fprintf(stderr, "Unable to open reference.\n" "Please specify a valid reference with -r ref.fa option.\n"); return 1; } refs2id(out->refs, out->header); if (-1 == cram_write_SAM_hdr(out, in->header)) return 1; cram_set_option(out, CRAM_OPT_VERBOSITY, verbose); if (s_opt) cram_set_option(out, CRAM_OPT_SEQS_PER_SLICE, s_opt); if (S_opt) cram_set_option(out, CRAM_OPT_SLICES_PER_CONTAINER, S_opt); if (embed_ref) cram_set_option(out, CRAM_OPT_EMBED_REF, embed_ref); /* Sequence iterators */ while (bam_get_seq(in, &s) > 0) { if (-1 == cram_put_bam_seq(out, s)) { fprintf(stderr, "Failed in cram_put_bam_seq()\n"); return 1; } } bam_close(in); out->header = NULL; // freed by bam_close() if (-1 == cram_close(out)) { fprintf(stderr, "Failed in cram_close()\n"); return 1; } if (s) free(s); return 0; }
static int mpileup(mplp_conf_t *conf, int n, char **fn) { extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list); extern void bcf_call_del_rghash(void *rghash); mplp_aux_t **data; int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth; const bam_pileup1_t **plp; bam_mplp_t iter; bam_header_t *h = 0; char *ref; void *rghash = 0; bcf_callaux_t *bca = 0; bcf_callret1_t *bcr = 0; bcf_call_t bc; bcf_t *bp = 0; bcf_hdr_t *bh = 0; bam_sample_t *sm = 0; kstring_t buf; mplp_pileup_t gplp; memset(&gplp, 0, sizeof(mplp_pileup_t)); memset(&buf, 0, sizeof(kstring_t)); memset(&bc, 0, sizeof(bcf_call_t)); data = calloc(n, sizeof(void*)); plp = calloc(n, sizeof(void*)); n_plp = calloc(n, sizeof(int*)); sm = bam_smpl_init(); // read the header and initialize data for (i = 0; i < n; ++i) { bam_header_t *h_tmp; data[i] = calloc(1, sizeof(mplp_aux_t)); data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r"); data[i]->conf = conf; h_tmp = bam_header_read(data[i]->fp); data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text); rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); if (conf->reg) { int beg, end; bam_index_t *idx; idx = bam_index_load(fn[i]); if (idx == 0) { fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1); exit(1); } if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) { fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1); exit(1); } if (i == 0) tid0 = tid, beg0 = beg, end0 = end; data[i]->iter = bam_iter_query(idx, tid, beg, end); bam_index_destroy(idx); } if (i == 0) h = h_tmp; else { // FIXME: to check consistency bam_header_destroy(h_tmp); } } gplp.n = sm->n; gplp.n_plp = calloc(sm->n, sizeof(int)); gplp.m_plp = calloc(sm->n, sizeof(int)); gplp.plp = calloc(sm->n, sizeof(void*)); fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n); // write the VCF header if (conf->flag & MPLP_GLF) { kstring_t s; bh = calloc(1, sizeof(bcf_hdr_t)); s.l = s.m = 0; s.s = 0; bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w"); for (i = 0; i < h->n_targets; ++i) { kputs(h->target_name[i], &s); kputc('\0', &s); } bh->l_nm = s.l; bh->name = malloc(s.l); memcpy(bh->name, s.s, s.l); s.l = 0; for (i = 0; i < sm->n; ++i) { kputs(sm->smpl[i], &s); kputc('\0', &s); } bh->l_smpl = s.l; bh->sname = malloc(s.l); memcpy(bh->sname, s.s, s.l); bh->txt = malloc(strlen(BAM_VERSION) + 64); bh->l_txt = 1 + sprintf(bh->txt, "##samtoolsVersion=%s\n", BAM_VERSION); free(s.s); bcf_hdr_sync(bh); bcf_hdr_write(bp, bh); bca = bcf_call_init(-1., conf->min_baseQ); bcr = calloc(sm->n, sizeof(bcf_callret1_t)); bca->rghash = rghash; bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ; bca->min_frac = conf->min_frac; bca->min_support = conf->min_support; } if (tid0 >= 0 && conf->fai) { // region is set ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len); ref_tid = tid0; for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0; } else ref_tid = -1, ref = 0; iter = bam_mplp_init(n, mplp_func, (void**)data); max_depth = conf->max_depth; if (max_depth * sm->n > 1<<20) fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__); if (max_depth * sm->n < 8000) { max_depth = 8000 / sm->n; fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth); } max_indel_depth = conf->max_indel_depth * sm->n; bam_mplp_set_maxcnt(iter, max_depth); while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) { if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue; if (tid != ref_tid) { free(ref); ref = 0; if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len); for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid; ref_tid = tid; } if (conf->flag & MPLP_GLF) { int total_depth, _ref0, ref16; bcf1_t *b = calloc(1, sizeof(bcf1_t)); for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i]; group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG); _ref0 = (ref && pos < ref_len)? ref[pos] : 'N'; ref16 = bam_nt16_table[_ref0]; for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); bcf_call_combine(gplp.n, bcr, ref16, &bc); bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP), 0, 0); bcf_write(bp, bh, b); bcf_destroy(b); // call indels if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) { for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i); if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) { b = calloc(1, sizeof(bcf1_t)); bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0, (conf->flag&MPLP_FMT_SP), bca, ref); bcf_write(bp, bh, b); bcf_destroy(b); } } } else { printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N'); for (i = 0; i < n; ++i) { int j; printf("\t%d\t", n_plp[i]); if (n_plp[i] == 0) { printf("*\t*"); // FIXME: printf() is very slow... if (conf->flag & MPLP_PRINT_POS) printf("\t*"); } else { for (j = 0; j < n_plp[i]; ++j) pileup_seq(plp[i] + j, pos, ref_len, ref); putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { const bam_pileup1_t *p = plp[i] + j; int c = bam1_qual(p->b)[p->qpos] + 33; if (c > 126) c = 126; putchar(c); } if (conf->flag & MPLP_PRINT_MAPQ) { putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { int c = plp[i][j].b->core.qual + 33; if (c > 126) c = 126; putchar(c); } } if (conf->flag & MPLP_PRINT_POS) { putchar('\t'); for (j = 0; j < n_plp[i]; ++j) { if (j > 0) putchar(','); printf("%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow... } } } } putchar('\n'); } } bcf_close(bp); bam_smpl_destroy(sm); free(buf.s); for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]); free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp); bcf_call_del_rghash(rghash); bcf_hdr_destroy(bh); bcf_call_destroy(bca); free(bc.PL); free(bcr); bam_mplp_destroy(iter); bam_header_destroy(h); for (i = 0; i < n; ++i) { bam_close(data[i]->fp); if (data[i]->iter) bam_iter_destroy(data[i]->iter); free(data[i]); } free(data); free(plp); free(ref); free(n_plp); return 0; }
int main(int argc, char **argv) { cram_fd *fd; bam_file_t *bfd; bam_seq_t *bam = NULL; char mode[4] = {'w', '\0', '\0', '\0'}; char *prefix = NULL; int decode_md = 0; int C; int start, end; char ref_name[1024] = {0}, *arg_list, *ref_fn = NULL; int embed_ref = 0; while ((C = getopt(argc, argv, "bu0123456789mp:hr:R:X")) != -1) { switch (C) { case 'b': mode[1] = 'b'; break; case 'u': mode[2] = '0'; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': mode[2] = C; break; case 'm': decode_md = 1; break; case 'p': prefix = optarg; break; case 'h': usage(stdout); return 0; case 'r': ref_fn = optarg; break; case 'X': embed_ref = 1; break; case 'R': { char *cp = strchr(optarg, ':'); if (cp) { *cp = 0; switch (sscanf(cp+1, "%d-%d", &start, &end)) { case 1: end = start; break; case 2: break; default: fprintf(stderr, "Malformed range format\n"); return 1; } } else { start = INT_MIN; end = INT_MAX; } strncpy(ref_name, optarg, 1023); break; } case '?': fprintf(stderr, "Unrecognised option: -%c\n", optopt); usage(stderr); return 1; } } if (argc - optind != 1 && argc - optind != 2) { usage(stderr); return 1; } if (argc - optind == 1) { if (NULL == (bfd = bam_open("-", mode))) { fprintf(stderr, "Failed to open SAM/BAM output\n."); return 1; } } else { if (NULL == (bfd = bam_open(argv[optind+1], mode))) { fprintf(stderr, "Failed to open SAM/BAM output\n."); perror(argv[optind+1]); return 1; } } if (NULL == (fd = cram_open(argv[optind], "rb"))) { fprintf(stderr, "Error opening CRAM file '%s'.\n", argv[optind]); return 1; } if (*ref_name != 0) cram_index_load(fd, argv[optind]); if (prefix) cram_set_option(fd, CRAM_OPT_PREFIX, prefix); if (decode_md) cram_set_option(fd, CRAM_OPT_DECODE_MD, decode_md); if (embed_ref) cram_set_option(fd, CRAM_OPT_EMBED_REF, embed_ref); /* Find and load reference */ cram_load_reference(fd, ref_fn); if (!fd->refs && !embed_ref) { fprintf(stderr, "Unable to find an appropriate reference.\n" "Please specify a valid reference with -r ref.fa option.\n"); return 1; } bfd->header = fd->header; if (*ref_name != 0) { cram_range r; int refid = sam_hdr_name2ref(fd->header, ref_name); if (refid == -1 && *ref_name != '*') { fprintf(stderr, "Unknown reference name '%s'\n", ref_name); return 1; } r.refid = refid; r.start = start; r.end = end; cram_set_option(fd, CRAM_OPT_RANGE, &r); } /* SAM Header */ if (!(arg_list = stringify_argv(argc, argv))) return 1; sam_hdr_add_PG(bfd->header, "cram_to_sam", "VN", PACKAGE_VERSION, "CL", arg_list, NULL); free(arg_list); bam_write_header(bfd); while (cram_get_bam_seq(fd, &bam) == 0) { bam_put_seq(bfd, bam); } if (!cram_eof(fd)) { fprintf(stderr, "Error while reading file\n"); return 1; } cram_close(fd); bfd->header = NULL; bam_close(bfd); free(bam); return 0; }