Example #1
0
int main( int argc, const char * argv[] )
{
    args_t args = args_t( argc, argv );
    coverage_t::const_iterator cit;
    coverage_t coverage;
    vector< pair< int, int > > data;
    bam1_t * const in_bam = bam_init1();

    while ( args.bamin->next( in_bam ) ) {
        aligned_t read( in_bam );
        coverage.include( read );
    }

    for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) {
        map< elem_t, int >::const_iterator it;

        if ( cit->op != MATCH )
            continue;

        it = cit->obs.begin();

        if ( it == cit->obs.end() )
            continue;

        int cov = it->second;
        int max = it->second;

        for ( ++it; it != cit->obs.end(); ++it ) {
            cov += it->second;
            if ( it->second > max )
                max = it->second;
        }

        for ( it = cit->obs.begin(); it != cit->obs.end(); ++it )
            if ( it->second && it->second != max )
                data.push_back( make_pair( cov, cov - it->second ) );
    }

    rateclass_t rc( data, 3 );
    double lg_L, aicc;
    vector< pair< double, double > > params;

    rc( lg_L, aicc, params );

    const double bg = weighted_harmonic_mean( params );
    const double lg_bg = log( bg );
    const double lg_invbg = log( 1.0 - bg );

    params_json_dump( stderr, lg_L, aicc, params, bg );

    for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) {
        map< elem_t, int >::const_iterator it;

        if ( cit->op != MATCH )
            continue;

        it = cit->obs.begin();

        if ( it == cit->obs.end() )
            continue;

        int cov = it->second;
        int max = it->second;

        for ( ++it; it != cit->obs.end(); ++it ) {
            cov += it->second;
            if ( it->second > max )
                max = it->second;
        }

        string css;

        for ( it = cit->obs.begin(); it != cit->obs.end(); ++it )
            if ( it->second == max ) {
                string elem;
                it->first.get_seq( elem );
                css.append( elem );
                css.push_back( '/' );
            }

        // erase the trailing slash, in a compatible way
        css.erase( --css.end() );

        for ( it = cit->obs.begin(); it != cit->obs.end(); ++it ) {
            string elem;

            if ( !it->second || it->second == max )
                continue;

            const double prob = prob_background( lg_bg, lg_invbg, cov, it->second );

            if ( prob >= args.cutoff )
                continue;

            fprintf( stdout, "%d\t%s\t%d\t", cit->col + 1, css.c_str(), cov );

            it->first.get_seq( elem );
            fprintf( stdout, "%s", elem.c_str() );

            fprintf( stdout, ":%d:%.3e\n", it->second, prob );
        }

        fflush( stdout );
    }

    bam_destroy1( in_bam );

    return 0;
}
Example #2
0
int main( int argc, const char * argv[] )
{
    args_t args = args_t( argc, argv );
    coverage_t coverage;
    vector< cov_t > variants;
    vector< pair< int, int > > data;

    // accumulate the data at each position in a linked list
    {
        cov_citer cit;
        bam1_t * in_bam = bam_init1();

        while ( args.bamin->next( in_bam ) ) {
            aligned_t read( in_bam );
            coverage.include( read );
        }

        for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) {
            int cov = 0;

            for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it )
                cov += it->second;
           
            for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it )
                if ( it->second )
                    data.push_back( make_pair( cov, it->second ) );

#if 0
            obs_citer it = cit->obs.begin();
            int cov = 0, maj;

            if ( it == cit->obs.end() )
                continue;

            maj = it->second;
            cov += maj;

            for ( ++it; it != cit->obs.end(); ++it ) {
                if ( it->second > maj )
                    maj = it->second;
                cov += it->second;
            }

            data.push_back( make_pair( cov, maj ) );
#endif
        }

        bam_destroy1( in_bam );
    }

    // learn a joint multi-binomial model for the mutation rate classes
    {
        cov_iter cit;
        double lg_L, aicc, bg, lg_bg, lg_invbg;
        rateclass_t rc( data );
        vector< pair< double, double > > params;

        rc( lg_L, aicc, params );

        bg = params[ 0 ].second;
        lg_bg = log( bg );
        lg_invbg = log( 1.0 - bg );

        params_json_dump( stderr, lg_L, aicc, params );

        // cerr << "background: " << bg << endl;

        // determine which variants are above background and those which are not
        for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) {
            if ( cit->op == INS )
                continue;

            int cov = 0;

            for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it )
                cov += it->second;

            for ( obs_iter it = cit->obs.begin(); it != cit->obs.end(); ++it ) {
                const double p = prob_background( lg_bg, lg_invbg, cov, it->second );
                if ( p < args.cutoff ) {
                    cout << cit->col << "\t" << cov << "\t" << it->second;
                    for ( unsigned i = 0; i < it->first.size(); ++i )
                        cout << bits2nuc( it->first[ i ] );
                    cout << ":" << p << endl;
                    it->second = 1;
                }
                else {
                    it->second = 0;
                }
            }

#if 0
            variants.push_back( *cit );
#endif
        }
    }

    return 0;

    // write out the input reads, but only with "real" variants this time
    {
        bam1_t * const in_bam = bam_init1();

        if ( !args.bamin->seek0() ) {
            cerr << "unable to seek( 0 )" << endl;
            exit( 1 );
        }

        if ( !args.bamout->write_header( args.bamin->hdr ) ) {
            cerr << "error writing out BAM header" << endl;
            exit( 1 );
        }

        while ( args.bamin->next( in_bam ) ) {
            aligned_t read( in_bam );

            bam1_t * const out_bam = punchout_read( in_bam, variants, read );

            if ( !out_bam->core.l_qseq )
                continue;

            if ( !args.bamout->write( out_bam ) ) {
                cerr << "error writing out read" << endl;
                exit( 1 );
            }

            bam_destroy1( out_bam );
        }

        bam_destroy1( in_bam );
    }

    return 0;
}