Esempio n. 1
0
void filterReads(char * inBamFile,
                 char * outBamFile,
                 int minMapQual,
                 int minLen,
                 int maxMisMatches,
                 float minPcId,
                 float minPcAln,
                 int ignoreSuppAlignments,
                 int ignoreSecondaryAlignments) {
    //
    int result = -1;
    int outResult = -1;

    int supp_check = 0x0;
    if (ignoreSuppAlignments) {
        supp_check |= BAM_FSUPPLEMENTARY;
    }
    if (ignoreSecondaryAlignments) {
        supp_check |= BAM_FSECONDARY;
    }

    // helper variables
    BGZF* in = 0;
    BGZF* out = 0;
    bam1_t *b = bam_init1();
    bam_hdr_t *h;

    // open bam
    if ((in = bgzf_open(inBamFile, "r")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for reading.\n",
               inBamFile);
    }
    else if ((h = bam_hdr_read(in)) == 0) { // read header
        fprintf(stderr,
                "ERROR: Failed to read BAM header of file \"%s\".\n",
                inBamFile);
    }
    else if ((out = bgzf_open(outBamFile, "w")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for writing.\n",
               outBamFile);
    }
    else {
        // write and destroy header
        bam_hdr_write(out, h);
        bam_hdr_destroy(h);

        int line = 0;
        int matches, mismatches, qLen;
        float pcAln, pcId;
        int showStats = 0;

        // fetch alignments
        while ((result = bam_read1(in, b)) >= 0) {
            line += 1;

            // only primary mappings
            if ((b->core.flag & supp_check) != 0) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, non-primary\n", line);
                continue;
            }

            // only high quality
            if (b->core.qual < minMapQual) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, quality: %d\n", line, b->core.qual);
                continue;
            }

            // not too many absolute mismatches
            mismatches = bam_aux2i(bam_aux_get(b, "NM"));
            if (mismatches > maxMisMatches) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, mismatches: %d\n", line, mismatches);
                continue;
            }

            // not too short
            qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b));
            if (qLen < minLen) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, length: %d\n", line, qLen);
                continue;
            }

            // only high percent identity
            matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b));
            pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1
            if (pcId < minPcId) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, identity pc: %.4f\n", line, pcId);
                continue;
            }

            // only high percent alignment
            pcAln = matches / (float)qLen; // percentage as float between 0 to 1
            if (pcAln < minPcAln) {
                if (showStats)
                    fprintf(stdout, "Rejected %d, alignment pc: %.4f\n", line, pcAln);
                continue;
            }

            if ((outResult = bam_write1(out, b)) < -1) {
                fprintf(stderr,
                        "ERROR: Attempt to write read no. %d to file \"%s\" failed with code %d.\n",
                        line, outBamFile, outResult);
            }
        }
        if (result < -1) {
            fprintf(stderr,
                    "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n",
                    line, inBamFile, result);
        }
    }
    if (in) bgzf_close(in);
    if (out) bgzf_close(out);
    bam_destroy1(b);
}
Esempio n. 2
0
void profileReads(char* bamFile,
                  int ignoreSuppAlignments,
                  int ignoreSecondaryAlignments) {
    //
    int result = -1;

    int supp_check = 0x0;
    if (ignoreSuppAlignments) {
        supp_check |= BAM_FSUPPLEMENTARY;
    }
    if (ignoreSecondaryAlignments) {
        supp_check |= BAM_FSECONDARY;
    }

    // helper variables
    BGZF* in = 0 ;
    bam1_t *b = bam_init1();
    bam_hdr_t *h;

    // open bam
    if ((in = bgzf_open(bamFile, "r")) == 0) {
        fprintf(stderr,
               "ERROR: Failed to open \"%s\" for reading.\n",
               bamFile);
    }
    else if ((h = bam_hdr_read(in)) == 0) { // read header
        fprintf(stderr,
                "ERROR: Failed to read BAM header of file \"%s\".\n",
                bamFile);
    }
    else {
        // destroy header
        bam_hdr_destroy(h);

        int line = 0;
        
        int supplementary, secondary;
        int mapQual;
        int matches, mismatches, qLen;
        float pcAln, pcId;
        int showStats = 0;
        uint8_t *aux_mismatches;
        
        // print header
        printf("line\tsupp\tsecondary\tmapQ\tmismatches\tmatches\tqLen\tpcId\tpcAln\n");

        // fetch alignments
        while ((result = bam_read1(in, b)) >= 0) {
            line += 1;
            
            
            // only primary mappings
            if ((b->core.flag & supp_check) != 0) { 
                if (showStats)
                    fprintf(stdout, "Rejected %d, non-primary\n", line);
                continue;
            }
            supplementary = (b->core.flag & (1 | BAM_FSUPPLEMENTARY)) != 0;
            secondary = (b->core.flag & (1 | BAM_FSECONDARY)) != 0;
            // quality
            mapQual = b->core.qual;
            // bam_aux_get returns 0 if optional NM tag is missing
            if ((aux_mismatches = bam_aux_get(b, "NM")))
               mismatches = bam_aux2i(aux_mismatches);
            else
                mismatches = 0;
            // length
            qLen = bam_cigar2qlen((&b->core)->n_cigar, bam_get_cigar(b));
            // percent identity
            matches = bam_cigar2matches((&b->core)->n_cigar, bam_get_cigar(b));
            pcId = (matches - mismatches) / (float)matches; // percentage as float between 0 to 1
            // percent alignment
            pcAln = matches / (float)qLen; // percentage as float between 0 to 1
            
            // print read values
            printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.4f\t%.4f\n",
                   line, supplementary, secondary, mapQual, mismatches, matches,
                   qLen, pcId, pcAln);
        }
        if (result < -1) {
            fprintf(stderr,
                    "ERROR: retrieval of read no. %d from file \"%s\" failed with code %d.\n",
                    line, bamFile, result);
        }
    }
    if (in) bgzf_close(in);
    bam_destroy1(b);
}