/* * Reads a file and outputs a new BAM file to fd with 'h' replaced as * the header. No checks are made to the validity. */ int bam_reheader(BGZF *in, bam_hdr_t *h, int fd, const char *arg_list, int add_PG) { BGZF *fp; ssize_t len; uint8_t *buf; if (in->is_write) return -1; buf = malloc(BUF_SIZE); if (bam_hdr_read(in) == NULL) { fprintf(stderr, "Couldn't read header\n"); free(buf); return -1; } fp = bgzf_fdopen(fd, "w"); if (add_PG) { // Around the houses, but it'll do until we can manipulate bam_hdr_t natively. SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text); if (sam_hdr_add_PG(sh, "samtools", "VN", samtools_version(), arg_list ? "CL": NULL, arg_list ? arg_list : NULL, NULL) != 0) return -1; free(h->text); h->text = strdup(sam_hdr_str(sh)); h->l_text = sam_hdr_length(sh); if (!h->text) return -1; sam_hdr_free(sh); } bam_hdr_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) bgzf_raw_write(fp, buf, len); free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; }
void signalFromBAM(const string bamFileName, const string sigFileName, Parameters P) { bam1_t *bamA; bamA=bam_init1(); double nMult=0, nUniq=0; if (P.outWigFlags.norm==1) {//count reads in the BAM file BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r"); bam_hdr_t *bamHeader=bam_hdr_read(bamIn); while ( true ) {//until the end of file int bamBytes1=bam_read1(bamIn, bamA); if (bamBytes1<0) break; //end of file if (bamA->core.tid<0) continue; //unmapped read // if ( !std::regex_match(chrName.at(bamA->core.tid),std::regex(P.outWigReferencesPrefix))) continue; //reference does not mathc required references if ( P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) continue; //reference does not match required references uint8_t* aNHp=bam_aux_get(bamA,"NH"); if (aNHp!=NULL) { uint32_t aNH=bam_aux2i(aNHp); if (aNH==1) {//unique mappers ++nUniq; } else if (aNH>1) { nMult+=1.0/aNH; }; }; }; bgzf_close(bamIn); }; BGZF *bamIn=bgzf_open(bamFileName.c_str(),"r"); bam_hdr_t *bamHeader=bam_hdr_read(bamIn); int sigN=P.outWigFlags.strand ? 4 : 2; double *normFactor=new double[sigN]; ofstream **sigOutAll=new ofstream* [sigN]; string* sigOutFileName=new string[sigN]; sigOutFileName[0]=sigFileName+".Unique.str1.out"; sigOutFileName[1]=sigFileName+".UniqueMultiple.str1.out"; if (P.outWigFlags.strand) { sigOutFileName[2]=sigFileName+".Unique.str2.out"; sigOutFileName[3]=sigFileName+".UniqueMultiple.str2.out"; }; for (int ii=0; ii<sigN; ii++) { sigOutFileName[ii]+= (P.outWigFlags.format==0 ? ".bg" : ".wig"); sigOutAll[ii]=new ofstream ( sigOutFileName[ii].c_str() ); }; if (P.outWigFlags.norm==0) {//raw counts normFactor[0]=1; normFactor[1]=1; } else if (P.outWigFlags.norm==1) {//normlaized normFactor[0]=1.0e6 / nUniq; normFactor[1]=1.0e6 / (nUniq+nMult); for (int is=0;is<sigN;is++) {//formatting double output *sigOutAll[is]<<setiosflags(ios::fixed) << setprecision(5); }; }; if (P.outWigFlags.strand) { normFactor[2]=normFactor[0]; normFactor[3]=normFactor[1]; }; int iChr=-999; double *sigAll=NULL; uint32_t chrLen=0; while ( true ) {//until the end of file int bamBytes1=bam_read1(bamIn, bamA); if (bamA->core.tid!=iChr || bamBytes1<0) { //output to file if (iChr!=-999) {//iChr=-999 marks chromosomes that are not output, including unmapped reads for (int is=0;is<sigN;is++) { if (P.outWigFlags.format==1) { *sigOutAll[is] <<"variableStep chrom="<<bamHeader->target_name[iChr] <<"\n"; }; double prevSig=0; for (uint32_t ig=0;ig<chrLen;ig++) { double newSig=sigAll[sigN*ig+is]; if (P.outWigFlags.format==0) {//bedGraph if (newSig!=prevSig) { if (prevSig!=0) {//finish previous record *sigOutAll[is] <<ig<<"\t"<<prevSig*normFactor[is] <<"\n"; //1-based end }; if (newSig!=0) { *sigOutAll[is] << bamHeader->target_name[iChr] <<"\t"<< ig <<"\t"; //0-based beginning }; prevSig=newSig; }; } else if (P.outWigFlags.format==1){//wiggle if (newSig!=0) { *sigOutAll[is] <<ig+1<<"\t"<<newSig*normFactor[is] <<"\n"; }; }; }; }; }; if (bamBytes1<0) {//no more reads break; }; iChr=bamA->core.tid; if ( iChr==-1 || (P.outWigReferencesPrefix!="-" && (P.outWigReferencesPrefix.compare(0,P.outWigReferencesPrefix.size(),bamHeader->target_name[bamA->core.tid],P.outWigReferencesPrefix.size())!=0) ) ) { iChr=-999; continue; //reference does not match required references }; chrLen=bamHeader->target_len[iChr]+1;//one extra base at the end which sohuld always be 0 delete [] sigAll; sigAll= new double[sigN*chrLen]; memset(sigAll, 0, sizeof(*sigAll)*sigN*chrLen); }; // uint32_t nCigar =(bamA->core.flag<<16)>>16; // uint32_t mapFlag=bamA->core.flag>>16; // uint32_t mapQ=(bamA->core.flag<<16)>>24; #define BAM_CIGAR_OperationShift 4 #define BAM_CIGAR_LengthBits 28 #define BAM_CIGAR_M 0 #define BAM_CIGAR_I 1 #define BAM_CIGAR_D 2 #define BAM_CIGAR_N 3 #define BAM_CIGAR_S 4 #define BAM_CIGAR_H 5 #define BAM_CIGAR_P 6 #define BAM_CIGAR_EQ 7 #define BAM_CIGAR_X 8 //by default, alignments marked as duplicate are not processed if ( (bamA->core.flag & 0x400) > 0 ) continue; //NH attribute uint8_t* aNHp=bam_aux_get(bamA,"NH"); uint32_t aNH; if (aNHp==NULL) { aNH=1; //no NH tag: assume NH=1 //continue; //do not process lines without NH field } else { aNH=bam_aux2i(bam_aux_get(bamA,"NH")); //write a safer function allowing for lacking NH tag }; if (aNH==0) continue; //do not process lines without NH=0 uint32_t aG=bamA->core.pos; uint32_t iStrand=0; if (P.outWigFlags.strand) {//strand for stranded data from SAM flag iStrand= ( (bamA->core.flag & 0x10) > 0 ) == ( (bamA->core.flag & 0x80) == 0 );//0/1 for +/- }; if (P.outWigFlags.type==1) {//5' of the1st read signal only, RAMPAGE/CAGE if ( (bamA->core.flag & 0x80)>0) continue; //skip if this the second mate if (iStrand==0) { if (aNH==1) {//unique mappers sigAll[aG*sigN+0+2*iStrand]++; }; sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci continue; //record only the first position }; }; uint32_t* cigar=(uint32_t*) (bamA->data+bamA->core.l_qname); for (uint32_t ic=0; ic<bamA->core.n_cigar; ic++) { uint32_t cigOp=(cigar[ic]<<BAM_CIGAR_LengthBits)>>BAM_CIGAR_LengthBits; uint32_t cigL=cigar[ic]>>BAM_CIGAR_OperationShift; switch (cigOp) { case(BAM_CIGAR_D): case(BAM_CIGAR_N): aG+=cigL; break; case(BAM_CIGAR_M): if (P.outWigFlags.type==0 || (P.outWigFlags.type==2 && (bamA->core.flag & 0x80)>0 )) {//full signal, or second mate onyl signal for (uint32_t ig=0;ig<cigL;ig++) { if (aG>=chrLen) { cerr << "BUG: alignment extends past chromosome in signalFromBAM.cpp\n"; exit(-1); }; if (aNH==1) {//unique mappers sigAll[aG*sigN+0+2*iStrand]++; }; sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci aG++; }; } else { aG+=cigL; }; }; }; if (P.outWigFlags.type==1) {//full signal --aG; if (aNH==1) {//unique mappers sigAll[aG*sigN+0+2*iStrand]++; }; sigAll[aG*sigN+1+2*iStrand]+=1.0/aNH;//U+M, normalized by the number of multi-mapping loci }; }; delete [] sigAll; for (int is=0; is<sigN; is++) {// flush/close all signal files sigOutAll[is]->flush(); sigOutAll[is]->close(); }; };
void filterReads(char * inBamFile, char * outBamFile, int minMapQual, int minLen, int maxMisMatches, float minPcId, float minPcAln, int ignoreSuppAlignments, int ignoreSecondaryAlignments) { // int result = -1; int outResult = -1; int supp_check = 0x0; if (ignoreSuppAlignments) { supp_check |= BAM_FSUPPLEMENTARY; } if (ignoreSecondaryAlignments) { supp_check |= BAM_FSECONDARY; } // helper variables BGZF* in = 0; BGZF* out = 0; bam1_t *b = bam_init1(); bam_hdr_t *h; // open bam if ((in = bgzf_open(inBamFile, "r")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for reading.\n", inBamFile); } else if ((h = bam_hdr_read(in)) == 0) { // read header fprintf(stderr, "ERROR: Failed to read BAM header of file \"%s\".\n", inBamFile); } else if ((out = bgzf_open(outBamFile, "w")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for writing.\n", outBamFile); } else { // write and destroy header bam_hdr_write(out, h); bam_hdr_destroy(h); int line = 0; int matches, mismatches, qLen; float pcAln, pcId; int showStats = 0; // fetch alignments while ((result = bam_read1(in, b)) >= 0) { line += 1; // only primary mappings if ((b->core.flag & supp_check) != 0) { if (showStats) fprintf(stdout, "Rejected %d, non-primary\n", line); continue; } // only high quality if (b->core.qual < minMapQual) { if (showStats) fprintf(stdout, "Rejected %d, quality: %d\n", line, b->core.qual); continue; } // not too many absolute mismatches mismatches = bam_aux2i(bam_aux_get(b, "NM")); if (mismatches > maxMisMatches) { if (showStats) fprintf(stdout, "Rejected %d, mismatches: %d\n", line, mismatches); continue; } // not too short qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b)); if (qLen < minLen) { if (showStats) fprintf(stdout, "Rejected %d, length: %d\n", line, qLen); continue; } // only high percent identity matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b)); pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1 if (pcId < minPcId) { if (showStats) fprintf(stdout, "Rejected %d, identity pc: %.4f\n", line, pcId); continue; } // only high percent alignment pcAln = matches / (float)qLen; // percentage as float between 0 to 1 if (pcAln < minPcAln) { if (showStats) fprintf(stdout, "Rejected %d, alignment pc: %.4f\n", line, pcAln); continue; } if ((outResult = bam_write1(out, b)) < -1) { fprintf(stderr, "ERROR: Attempt to write read no. %d to file \"%s\" failed with code %d.\n", line, outBamFile, outResult); } } if (result < -1) { fprintf(stderr, "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n", line, inBamFile, result); } } if (in) bgzf_close(in); if (out) bgzf_close(out); bam_destroy1(b); }
void profileReads(char* bamFile, int ignoreSuppAlignments, int ignoreSecondaryAlignments) { // int result = -1; int supp_check = 0x0; if (ignoreSuppAlignments) { supp_check |= BAM_FSUPPLEMENTARY; } if (ignoreSecondaryAlignments) { supp_check |= BAM_FSECONDARY; } // helper variables BGZF* in = 0 ; bam1_t *b = bam_init1(); bam_hdr_t *h; // open bam if ((in = bgzf_open(bamFile, "r")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for reading.\n", bamFile); } else if ((h = bam_hdr_read(in)) == 0) { // read header fprintf(stderr, "ERROR: Failed to read BAM header of file \"%s\".\n", bamFile); } else { // destroy header bam_hdr_destroy(h); int line = 0; int supplementary, secondary; int mapQual; int matches, mismatches, qLen; float pcAln, pcId; int showStats = 0; uint8_t *aux_mismatches; // print header printf("line\tsupp\tsecondary\tmapQ\tmismatches\tmatches\tqLen\tpcId\tpcAln\n"); // fetch alignments while ((result = bam_read1(in, b)) >= 0) { line += 1; // only primary mappings if ((b->core.flag & supp_check) != 0) { if (showStats) fprintf(stdout, "Rejected %d, non-primary\n", line); continue; } supplementary = (b->core.flag & (1 | BAM_FSUPPLEMENTARY)) != 0; secondary = (b->core.flag & (1 | BAM_FSECONDARY)) != 0; // quality mapQual = b->core.qual; // bam_aux_get returns 0 if optional NM tag is missing if ((aux_mismatches = bam_aux_get(b, "NM"))) mismatches = bam_aux2i(aux_mismatches); else mismatches = 0; // length qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b)); // percent identity matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b)); pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1 // percent alignment pcAln = matches / (float)qLen; // percentage as float between 0 to 1 // print read values printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\t%.4f\n", line, supplementary, secondary, mapQual, mismatches, matches, qLen, pcId, pcAln); } if (result < -1) { fprintf(stderr, "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n", line, bamFile, result); } } if (in) bgzf_close(in); bam_destroy1(b); }
int main_bam2fq(int argc, char *argv[]) { BGZF *fp, *fpse = 0; bam1_t *b; uint8_t *buf; int max_buf, c, has12 = 0; kstring_t str; int64_t n_singletons = 0, n_reads = 0; char last[512], *fnse = 0; while ((c = getopt(argc, argv, "as:")) > 0) if (c == 'a') has12 = 1; else if (c == 's') fnse = optarg; if (argc == optind) { fprintf(stderr, "\nUsage: bam2fq [-a] [-s outSE] <in.bam>\n\n"); fprintf(stderr, "Options: -a append /1 and /2 to the read name\n"); fprintf(stderr, " -s FILE write singleton reads to FILE [assume single-end]\n"); fprintf(stderr, "\n"); return 1; } fp = strcmp(argv[optind], "-")? bgzf_open(argv[optind], "r") : bgzf_dopen(fileno(stdin), "r"); assert(fp); bam_hdr_destroy(bam_hdr_read(fp)); buf = 0; max_buf = 0; str.l = str.m = 0; str.s = 0; last[0] = 0; if (fnse) fpse = bgzf_open(fnse, "w1"); b = bam_init1(); while (bam_read1(fp, b) >= 0) { int i, qlen = b->core.l_qseq, is_print = 0; uint8_t *qual, *seq; if (b->flag&BAM_FSECONDARY) continue; // skip secondary alignments ++n_reads; if (fpse) { if (str.l && strcmp(last, bam_get_qname(b))) { bgzf_write(fpse, str.s, str.l); str.l = 0; ++n_singletons; } if (str.l) is_print = 1; strcpy(last, bam_get_qname(b)); } else is_print = 1; qual = bam_get_qual(b); kputc(qual[0] == 0xff? '>' : '@', &str); kputsn(bam_get_qname(b), b->core.l_qname - 1, &str); if (has12) { kputc('/', &str); kputw(b->core.flag>>6&3, &str); } kputc('\n', &str); if (max_buf < qlen + 1) { max_buf = qlen + 1; kroundup32(max_buf); buf = (uint8_t*)realloc(buf, max_buf); } buf[qlen] = 0; seq = bam_get_seq(b); for (i = 0; i < qlen; ++i) buf[i] = bam_seqi(seq, i); // copy the sequence if (bam_is_rev(b)) { // reverse complement for (i = 0; i < qlen>>1; ++i) { int8_t t = seq_comp_table[buf[qlen - 1 - i]]; buf[qlen - 1 - i] = seq_comp_table[buf[i]]; buf[i] = t; } if (qlen&1) buf[i] = seq_comp_table[buf[i]]; } for (i = 0; i < qlen; ++i) buf[i] = seq_nt16_str[buf[i]]; kputsn((char*)buf, qlen, &str); kputc('\n', &str); if (qual[0] != 0xff) { kputsn("+\n", 2, &str); for (i = 0; i < qlen; ++i) buf[i] = 33 + qual[i]; if (bam_is_rev(b)) { // reverse for (i = 0; i < qlen>>1; ++i) { uint8_t t = buf[qlen - 1 - i]; buf[qlen - 1 - i] = buf[i]; buf[i] = t; } } } kputsn((char*)buf, qlen, &str); kputc('\n', &str); if (is_print) { fwrite(str.s, 1, str.l, stdout); str.l = 0; } } if (fpse) { if (str.l) { bgzf_write(fpse, str.s, str.l); ++n_singletons; } fprintf(stderr, "[M::%s] discarded %lld singletons\n", __func__, (long long)n_singletons); bgzf_close(fpse); } fprintf(stderr, "[M::%s] processed %lld reads\n", __func__, (long long)n_reads); free(buf); free(str.s); bam_destroy1(b); bgzf_close(fp); return 0; }
int calcCoverage(char *fName, Slice *slice, htsFile *in, hts_idx_t *idx, int flags) { int ref; int begRange; int endRange; char region[1024]; char region_name[512]; if (Slice_getChrStart(slice) != 1) { fprintf(stderr, "Currently only allow a slice start position of 1\n"); return 1; } if (flags & M_UCSC_NAMING) { sprintf(region,"chr%s", Slice_getSeqRegionName(slice)); } else { sprintf(region,"%s", Slice_getSeqRegionName(slice)); } bam_hdr_t *header = bam_hdr_init(); header = bam_hdr_read(in->fp.bgzf); ref = bam_name2id(header, region); if (ref < 0) { fprintf(stderr, "Invalid region %s\n", region); exit(1); } sprintf(region,"%s:%ld-%ld", region_name, Slice_getSeqRegionStart(slice), Slice_getSeqRegionEnd(slice)); if (hts_parse_reg(region, &begRange, &endRange) == NULL) { fprintf(stderr, "Could not parse %s\n", region); exit(2); } bam_hdr_destroy(header); hts_itr_t *iter = sam_itr_queryi(idx, ref, begRange, endRange); bam1_t *b = bam_init1(); Coverage *coverage = calloc(Slice_getLength(slice),sizeof(Coverage)); long counter = 0; long overlapping = 0; long bad = 0; int startIndex = 0; while (bam_itr_next(in, iter, b) >= 0) { if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) { bad++; continue; } int end; //end = bam_calend(&b->core, bam1_cigar(b)); end = bam_endpos(b); // There is a special case for reads which have zero length and start at begRange (so end at begRange ie. before the first base we're interested in). // That is the reason for the || end == begRange test if (end == begRange) { continue; } counter++; if (!(counter%1000000)) { if (verbosity > 1) { printf("."); } fflush(stdout); } // Remember: b->core.pos is zero based! int cigInd; int refPos; int readPos; uint32_t *cigar = bam_get_cigar(b); for (cigInd = readPos = 0, refPos = b->core.pos; cigInd < b->core.n_cigar; ++cigInd) { int k; int lenCigBlock = cigar[cigInd]>>4; int op = cigar[cigInd]&0xf; if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) { for (k = 0; k < lenCigBlock; ++k) { //if (ref[refPos+k] == 0) break; // out of boundary coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; readPos += lenCigBlock; } else if (op == BAM_CDEL) { for (k = 0; k < lenCigBlock; ++k) { // if (ref[refPos+k] == 0) break; coverage[refPos+k].coverage++; } if (k < lenCigBlock) break; refPos += lenCigBlock; } else if (op == BAM_CSOFT_CLIP) { readPos += lenCigBlock; } else if (op == BAM_CHARD_CLIP) { } else if (op == BAM_CINS) { readPos += lenCigBlock; } else if (op == BAM_CREF_SKIP) { refPos += lenCigBlock; } } #ifdef DONE int j; int done = 0; int hadOverlap = 0; for (j=startIndex; j < Vector_getNumElement(genes) && !done; j++) { Gene *gene = Vector_getElementAt(genes,j); if (!gene) { continue; } // Remember: b->core.pos is zero based! if (b->core.pos < Gene_getEnd(gene) && end >= Gene_getStart(gene)) { int k; int doneGene = 0; for (k=0; k<Gene_getTranscriptCount(gene) && !doneGene; k++) { Transcript *trans = Gene_getTranscriptAt(gene,k); if (b->core.pos < Transcript_getEnd(trans) && end >= Transcript_getStart(trans)) { int m; for (m=0; m<Transcript_getExonCount(trans) && !doneGene; m++) { Exon *exon = Transcript_getExonAt(trans,m); if (b->core.pos < Exon_getEnd(exon) && end >= Exon_getStart(exon)) { // Only count as overlapping once (could be that a read overlaps more than one gene) if (!hadOverlap) { overlapping++; hadOverlap = 1; } gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); gs->score++; doneGene = 1; } } } } } else if (Gene_getStart(gene) > end) { done = 1; } else if (Gene_getEnd(gene) < b->core.pos+1) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); if (verbosity > 1) { printf("Removing gene %s (index %d) with extent %d to %d\n", Gene_getStableId(gene), gs->index, Gene_getStart(gene), Gene_getEnd(gene)); } Vector_setElementAt(genes,j,NULL); // Magic (very important for speed) - move startIndex to first non null gene int n; startIndex = 0; for (n=0;n<Vector_getNumElement(genes);n++) { void *v = Vector_getElementAt(genes,n); if (v != NULL) { break; } startIndex++; } if (verbosity > 1) { printf("startIndex now %d\n",startIndex); } } } #endif } if (verbosity > 1) { printf("\n"); } #ifdef DONE // Print out read counts for what ever's left in the genes array int n; for (n=0;n<Vector_getNumElement(genes);n++) { Gene *gene = Vector_getElementAt(genes,n); if (gene != NULL) { gs = IDHash_getValue(geneCountsHash, Gene_getDbID(gene)); printf("Gene %s (%s) score %ld\n",Gene_getStableId(gene), Gene_getDisplayXref(gene) ? DBEntry_getDisplayId(Gene_getDisplayXref(gene)) : "", gs->score); } } #endif printf("Read %ld reads. Number of bad reads (unmapped, qc fail, secondary, dup) %ld\n", counter, bad); long i; for (i=0; i< Slice_getLength(slice); i++) { printf("%ld %ld\n", i+1, coverage[i].coverage); } sam_itr_destroy(iter); bam_destroy1(b); return 1; }
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam) { BGZF *fp; uint8_t *buf; uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE]; const int es=BGZF_EMPTY_BLOCK_SIZE; int i; fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w"); if (fp == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam); return 1; } if (h) bam_hdr_write(fp, h); buf = (uint8_t*) malloc(BUF_SIZE); for(i = 0; i < nfn; ++i){ BGZF *in; bam_hdr_t *old; int len,j; in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } if (in->is_write) return -1; old = bam_hdr_read(in); if (old == NULL) { fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n", __func__, fn[i]); bgzf_close(in); return -1; } if (h == 0 && i == 0) bam_hdr_write(fp, old); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } j=0; while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) { if(len<es){ int diff=es-len; if(j==0) { fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]); return -1; } bgzf_raw_write(fp, ebuf, len); memcpy(ebuf,ebuf+len,diff); memcpy(ebuf+diff,buf,len); } else { if(j!=0) bgzf_raw_write(fp, ebuf, es); len-= es; memcpy(ebuf,buf+len,es); bgzf_raw_write(fp, buf, len); } j=1; } /* check final gzip block */ { const uint8_t gzip1=ebuf[0]; const uint8_t gzip2=ebuf[1]; const uint32_t isize=*((uint32_t*)(ebuf+es-4)); if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) { fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]); fprintf(stderr, " Possible output corruption.\n"); bgzf_raw_write(fp, ebuf, es); } } bam_hdr_destroy(old); bgzf_close(in); } free(buf); bgzf_close(fp); return 0; }