Ejemplo n.º 1
0
static
void
runESL(const ESLOptions& opt)
{
    TimeTracker timer;
    timer.resume();
    {
        // early test that we have permission to write to output file
        OutStream outs(opt.outputFilename);
    }

    typedef std::shared_ptr<bam_streamer> stream_ptr;
    std::vector<stream_ptr> bamStreams;

    // setup all data for main alignment loop:
    for (const std::string& afile : opt.alignFileOpt.alignmentFilename)
    {
        stream_ptr tmp(new bam_streamer(afile.c_str(),
                                        (opt.region.empty()
                                         ? NULL
                                         : opt.region.c_str())));
        bamStreams.push_back(tmp);
    }

    const unsigned bamCount(bamStreams.size());

    assert(0 != bamCount);

    // check bam header compatibility:
    if (bamCount > 1)
    {
        /// TODO: provide a better error exception for failed bam header check:
        const bam_header_t* compareHeader(bamStreams[0]->get_header());
        for (unsigned bamIndex(1); bamIndex<bamCount; ++bamIndex)
        {
            const bam_header_t* indexHeader(bamStreams[bamIndex]->get_header());
            if (! check_header_compatibility(compareHeader,indexHeader))
            {
                log_os << "ERROR: incompatible bam headers between files:\n"
                       << "\t" << opt.alignFileOpt.alignmentFilename[0] << "\n"
                       << "\t" << opt.alignFileOpt.alignmentFilename[bamIndex] << "\n";
                exit(EXIT_FAILURE);
            }
        }
    }

    // assume headers compatible after this point....

    const bam_header_t& header(*(bamStreams[0]->get_header()));
    const bam_header_info bamHeader(header);

    int32_t tid(0), beginPos(0), endPos(0);
    parse_bam_region(bamHeader,opt.region,tid,beginPos,endPos);

    const GenomeInterval scanRegion(tid,beginPos,endPos);
#ifdef DEBUG_ESL
    static const std::string log_tag("EstimateSVLoci");
    log_os << log_tag << " scanRegion= " << scanRegion << "\n";
#endif

    // grab the reference for segment we're estimating plus a buffer around the segment edges:
    static const unsigned refEdgeBufferSize(500);

    reference_contig_segment refSegment;
    getIntervalReferenceSegment(opt.referenceFilename, bamHeader, refEdgeBufferSize, scanRegion, refSegment);

    SVLocusSetFinder locusFinder(opt, scanRegion, bamHeader, refSegment);

    input_stream_data sdata;
    for (unsigned bamIndex(0); bamIndex<bamCount; ++bamIndex)
    {
        sdata.register_reads(*bamStreams[bamIndex],bamIndex);
    }

    // loop through alignments:
    input_stream_handler sinput(sdata);
    while (sinput.next())
    {
        const input_record_info current(sinput.get_current());

        if (current.itype != INPUT_TYPE::READ)
        {
            log_os << "ERROR: invalid input condition.\n";
            exit(EXIT_FAILURE);
        }

        const bam_streamer& readStream(*bamStreams[current.sample_no]);
        const bam_record& read(*(readStream.get_record_ptr()));

        locusFinder.update(read, current.sample_no);
    }

    // finished updating:
    locusFinder.flush();
    timer.stop();
    const CpuTimes totalTimes(timer.getTimes());
#ifdef DEBUG_ESL
    log_os << log_tag << " found " << locusFinder.getLocusSet().size() << " loci. \n";
    log_os << log_tag << " totalTime: ";
    totalTimes.reportHr(log_os);
    log_os << "\n";
#endif
    locusFinder.setBuildTime(totalTimes);
    locusFinder.getLocusSet().save(opt.outputFilename.c_str());
}
Ejemplo n.º 2
0
static
void
runESL(const ESLOptions& opt)
{
    {
        // early test that we have permission to write to output file
        OutStream outs(opt.outputFilename);
    }

    typedef boost::shared_ptr<bam_streamer> stream_ptr;
    std::vector<stream_ptr> bam_streams;

    // setup all data for main alignment loop:
    BOOST_FOREACH(const std::string& afile, opt.alignmentFilename)
    {
        stream_ptr tmp(new bam_streamer(afile.c_str(),opt.region.c_str()));
        bam_streams.push_back(tmp);
    }

    // TODO check header compatibility between all open bam streams
    const unsigned n_inputs(bam_streams.size());

    // assume headers compatible after this point....

    assert(0 != n_inputs);

    const bam_header_t& header(*(bam_streams[0]->get_header()));
    const bam_header_info bamHeader(header);
    int32_t tid(0), beginPos(0), endPos(0);
    parse_bam_region(bamHeader,opt.region,tid,beginPos,endPos);

    const GenomeInterval scanRegion(tid,beginPos,endPos);

    SVLocusSetFinder locusFinder(opt,scanRegion);
    locusFinder.setBamHeader(header);

    input_stream_data sdata;
    for (unsigned i(0); i<n_inputs; ++i)
    {
        sdata.register_reads(*bam_streams[i],i);
    }

    // loop through alignments:
    input_stream_handler sinput(sdata);
    while (sinput.next())
    {
        const input_record_info current(sinput.get_current());

        if       (current.itype != INPUT_TYPE::READ)
        {
            log_os << "ERROR: invalid input condition.\n";
            exit(EXIT_FAILURE);
        }

        const bam_streamer& read_stream(*bam_streams[current.sample_no]);
        const bam_record& read(*(read_stream.get_record_ptr()));

        locusFinder.update(read,current.sample_no);
    }

    // finished updating:
    locusFinder.flush();

    locusFinder.getLocusSet().save(opt.outputFilename.c_str());
}