/* * Reads a file and outputs a new BAM file to fd with 'h' replaced as * the header. No checks are made to the validity. */ int bam_reheader(BGZF *in, bam_hdr_t *h, int fd, const char *arg_list, int add_PG) { BGZF *fp; ssize_t len; uint8_t *buf; if (in->is_write) return -1; buf = malloc(BUF_SIZE); if (bam_hdr_read(in) == NULL) { fprintf(stderr, "Couldn't read header\n"); free(buf); return -1; } fp = bgzf_fdopen(fd, "w"); if (add_PG) { // Around the houses, but it'll do until we can manipulate bam_hdr_t natively. SAM_hdr *sh = sam_hdr_parse_(h->text, h->l_text); if (sam_hdr_add_PG(sh, "samtools", "VN", samtools_version(), arg_list ? "CL": NULL, arg_list ? arg_list : NULL, NULL) != 0) return -1; free(h->text); h->text = strdup(sam_hdr_str(sh)); h->l_text = sam_hdr_length(sh); if (!h->text) return -1; sam_hdr_free(sh); } bam_hdr_write(fp, h); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) bgzf_raw_write(fp, buf, len); free(buf); fp->block_offset = in->block_offset = 0; bgzf_close(fp); return 0; }
void filterReads(char * inBamFile, char * outBamFile, int minMapQual, int minLen, int maxMisMatches, float minPcId, float minPcAln, int ignoreSuppAlignments, int ignoreSecondaryAlignments) { // int result = -1; int outResult = -1; int supp_check = 0x0; if (ignoreSuppAlignments) { supp_check |= BAM_FSUPPLEMENTARY; } if (ignoreSecondaryAlignments) { supp_check |= BAM_FSECONDARY; } // helper variables BGZF* in = 0; BGZF* out = 0; bam1_t *b = bam_init1(); bam_hdr_t *h; // open bam if ((in = bgzf_open(inBamFile, "r")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for reading.\n", inBamFile); } else if ((h = bam_hdr_read(in)) == 0) { // read header fprintf(stderr, "ERROR: Failed to read BAM header of file \"%s\".\n", inBamFile); } else if ((out = bgzf_open(outBamFile, "w")) == 0) { fprintf(stderr, "ERROR: Failed to open \"%s\" for writing.\n", outBamFile); } else { // write and destroy header bam_hdr_write(out, h); bam_hdr_destroy(h); int line = 0; int matches, mismatches, qLen; float pcAln, pcId; int showStats = 0; // fetch alignments while ((result = bam_read1(in, b)) >= 0) { line += 1; // only primary mappings if ((b->core.flag & supp_check) != 0) { if (showStats) fprintf(stdout, "Rejected %d, non-primary\n", line); continue; } // only high quality if (b->core.qual < minMapQual) { if (showStats) fprintf(stdout, "Rejected %d, quality: %d\n", line, b->core.qual); continue; } // not too many absolute mismatches mismatches = bam_aux2i(bam_aux_get(b, "NM")); if (mismatches > maxMisMatches) { if (showStats) fprintf(stdout, "Rejected %d, mismatches: %d\n", line, mismatches); continue; } // not too short qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b)); if (qLen < minLen) { if (showStats) fprintf(stdout, "Rejected %d, length: %d\n", line, qLen); continue; } // only high percent identity matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b)); pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1 if (pcId < minPcId) { if (showStats) fprintf(stdout, "Rejected %d, identity pc: %.4f\n", line, pcId); continue; } // only high percent alignment pcAln = matches / (float)qLen; // percentage as float between 0 to 1 if (pcAln < minPcAln) { if (showStats) fprintf(stdout, "Rejected %d, alignment pc: %.4f\n", line, pcAln); continue; } if ((outResult = bam_write1(out, b)) < -1) { fprintf(stderr, "ERROR: Attempt to write read no. %d to file \"%s\" failed with code %d.\n", line, outBamFile, outResult); } } if (result < -1) { fprintf(stderr, "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n", line, inBamFile, result); } } if (in) bgzf_close(in); if (out) bgzf_close(out); bam_destroy1(b); }
int bam_cat(int nfn, char * const *fn, const bam_hdr_t *h, const char* outbam) { BGZF *fp; uint8_t *buf; uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE]; const int es=BGZF_EMPTY_BLOCK_SIZE; int i; fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w"); if (fp == 0) { fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam); return 1; } if (h) bam_hdr_write(fp, h); buf = (uint8_t*) malloc(BUF_SIZE); for(i = 0; i < nfn; ++i){ BGZF *in; bam_hdr_t *old; int len,j; in = strcmp(fn[i], "-")? bgzf_open(fn[i], "r") : bgzf_fdopen(fileno(stdin), "r"); if (in == 0) { fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } if (in->is_write) return -1; old = bam_hdr_read(in); if (old == NULL) { fprintf(stderr, "[%s] ERROR: couldn't read header for '%s'.\n", __func__, fn[i]); bgzf_close(in); return -1; } if (h == 0 && i == 0) bam_hdr_write(fp, old); if (in->block_offset < in->block_length) { bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); bgzf_flush(fp); } j=0; while ((len = bgzf_raw_read(in, buf, BUF_SIZE)) > 0) { if(len<es){ int diff=es-len; if(j==0) { fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]); return -1; } bgzf_raw_write(fp, ebuf, len); memcpy(ebuf,ebuf+len,diff); memcpy(ebuf+diff,buf,len); } else { if(j!=0) bgzf_raw_write(fp, ebuf, es); len-= es; memcpy(ebuf,buf+len,es); bgzf_raw_write(fp, buf, len); } j=1; } /* check final gzip block */ { const uint8_t gzip1=ebuf[0]; const uint8_t gzip2=ebuf[1]; const uint32_t isize=*((uint32_t*)(ebuf+es-4)); if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) { fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]); fprintf(stderr, " Possible output corruption.\n"); bgzf_raw_write(fp, ebuf, es); } } bam_hdr_destroy(old); bgzf_close(in); } free(buf); bgzf_close(fp); return 0; }