int main( int argc, const char * argv[] ) { args_t args = args_t( argc, argv ); coverage_t::const_iterator cit; coverage_t coverage; vector< pair< int, int > > data; bam1_t * const in_bam = bam_init1(); while ( args.bamin->next( in_bam ) ) { aligned_t read( in_bam ); coverage.include( read ); } for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) { map< elem_t, int >::const_iterator it; if ( cit->op != MATCH ) continue; it = cit->obs.begin(); if ( it == cit->obs.end() ) continue; int cov = it->second; int max = it->second; for ( ++it; it != cit->obs.end(); ++it ) { cov += it->second; if ( it->second > max ) max = it->second; } for ( it = cit->obs.begin(); it != cit->obs.end(); ++it ) if ( it->second && it->second != max ) data.push_back( make_pair( cov, cov - it->second ) ); } rateclass_t rc( data, 3 ); double lg_L, aicc; vector< pair< double, double > > params; rc( lg_L, aicc, params ); const double bg = weighted_harmonic_mean( params ); const double lg_bg = log( bg ); const double lg_invbg = log( 1.0 - bg ); params_json_dump( stderr, lg_L, aicc, params, bg ); for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) { map< elem_t, int >::const_iterator it; if ( cit->op != MATCH ) continue; it = cit->obs.begin(); if ( it == cit->obs.end() ) continue; int cov = it->second; int max = it->second; for ( ++it; it != cit->obs.end(); ++it ) { cov += it->second; if ( it->second > max ) max = it->second; } string css; for ( it = cit->obs.begin(); it != cit->obs.end(); ++it ) if ( it->second == max ) { string elem; it->first.get_seq( elem ); css.append( elem ); css.push_back( '/' ); } // erase the trailing slash, in a compatible way css.erase( --css.end() ); for ( it = cit->obs.begin(); it != cit->obs.end(); ++it ) { string elem; if ( !it->second || it->second == max ) continue; const double prob = prob_background( lg_bg, lg_invbg, cov, it->second ); if ( prob >= args.cutoff ) continue; fprintf( stdout, "%d\t%s\t%d\t", cit->col + 1, css.c_str(), cov ); it->first.get_seq( elem ); fprintf( stdout, "%s", elem.c_str() ); fprintf( stdout, ":%d:%.3e\n", it->second, prob ); } fflush( stdout ); } bam_destroy1( in_bam ); return 0; }
int main( int argc, const char * argv[] ) { args_t args = args_t( argc, argv ); coverage_t coverage; vector< cov_t > variants; vector< pair< int, int > > data; // accumulate the data at each position in a linked list { cov_citer cit; bam1_t * in_bam = bam_init1(); while ( args.bamin->next( in_bam ) ) { aligned_t read( in_bam ); coverage.include( read ); } for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) { int cov = 0; for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it ) cov += it->second; for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it ) if ( it->second ) data.push_back( make_pair( cov, it->second ) ); #if 0 obs_citer it = cit->obs.begin(); int cov = 0, maj; if ( it == cit->obs.end() ) continue; maj = it->second; cov += maj; for ( ++it; it != cit->obs.end(); ++it ) { if ( it->second > maj ) maj = it->second; cov += it->second; } data.push_back( make_pair( cov, maj ) ); #endif } bam_destroy1( in_bam ); } // learn a joint multi-binomial model for the mutation rate classes { cov_iter cit; double lg_L, aicc, bg, lg_bg, lg_invbg; rateclass_t rc( data ); vector< pair< double, double > > params; rc( lg_L, aicc, params ); bg = params[ 0 ].second; lg_bg = log( bg ); lg_invbg = log( 1.0 - bg ); params_json_dump( stderr, lg_L, aicc, params ); // cerr << "background: " << bg << endl; // determine which variants are above background and those which are not for ( cit = coverage.begin(); cit != coverage.end(); ++cit ) { if ( cit->op == INS ) continue; int cov = 0; for ( obs_citer it = cit->obs.begin(); it != cit->obs.end(); ++it ) cov += it->second; for ( obs_iter it = cit->obs.begin(); it != cit->obs.end(); ++it ) { const double p = prob_background( lg_bg, lg_invbg, cov, it->second ); if ( p < args.cutoff ) { cout << cit->col << "\t" << cov << "\t" << it->second; for ( unsigned i = 0; i < it->first.size(); ++i ) cout << bits2nuc( it->first[ i ] ); cout << ":" << p << endl; it->second = 1; } else { it->second = 0; } } #if 0 variants.push_back( *cit ); #endif } } return 0; // write out the input reads, but only with "real" variants this time { bam1_t * const in_bam = bam_init1(); if ( !args.bamin->seek0() ) { cerr << "unable to seek( 0 )" << endl; exit( 1 ); } if ( !args.bamout->write_header( args.bamin->hdr ) ) { cerr << "error writing out BAM header" << endl; exit( 1 ); } while ( args.bamin->next( in_bam ) ) { aligned_t read( in_bam ); bam1_t * const out_bam = punchout_read( in_bam, variants, read ); if ( !out_bam->core.l_qseq ) continue; if ( !args.bamout->write( out_bam ) ) { cerr << "error writing out read" << endl; exit( 1 ); } bam_destroy1( out_bam ); } bam_destroy1( in_bam ); } return 0; }