Пример #1
0
tabix_chrom_list::
tabix_chrom_list(const char* filename)
    : _nchrom(0)
    , _index(0)
    , _tptr(NULL)
    , _tfp(NULL)
{
    if (NULL == filename) {
        throw blt_exception("vcf filename is null ptr");
    }

    if ('\0' == *filename) {
        throw blt_exception("vcf filename is empty string");
    }

    tabix_t* _tfp = ti_open(filename, 0);

    if (NULL == _tfp) {
        log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    // read from a specific region:
    if (ti_lazy_index_load(_tfp) < 0) {
        log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    _tptr = ti_seqname(_tfp->idx,&_nchrom);
}
Пример #2
0
tabix_header_streamer::
tabix_header_streamer(const char* filename)
    : _is_record_set(false)
    , _is_stream_end(false)
    , _tfp(NULL)
    , _titer(NULL)
    , _linebuf(NULL)
{

    if (NULL == filename) {
        throw blt_exception("vcf filename is null ptr");
    }

    if ('\0' == *filename) {
        throw blt_exception("vcf filename is empty string");
    }

    _tfp = ti_open(filename, 0);

    if (NULL == _tfp) {
        log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    _titer = ti_query(_tfp, 0, 0, 0);
}
void
stage_data::
add_stage(const int id,
          const int parent_id,
          const unsigned parent_distance,
          const bool is_parent) {

    unsigned pos(0);
    if (is_parent) {
        idmap_t::iterator pit(_ids.find(parent_id));

        if (pit==_ids.end()) {
            std::ostringstream oss;
            oss << "ERROR: stage_data.add_stage() parent_id " << parent_id << " does not exist\n";
            throw blt_exception(oss.str().c_str());
        }

        pos=(pit->second+parent_distance);
    }
    const std::pair<idmap_t::iterator,bool> ret(_ids.insert(std::make_pair(id,pos)));
    if (! ret.second) {
        std::ostringstream oss;
        oss << "ERROR: stage_data.add_stage() id " << id << " already exists\n";
        throw blt_exception(oss.str().c_str());
    }
    _stage_pos.push_back(std::make_pair(pos,id));
    // not efficient to do this every time, but we always expect the
    // number of added stages to be very small:
    std::sort(_stage_pos.begin(),_stage_pos.end());
}
Пример #4
0
VcfRecord::
VcfRecord(const istream_line_splitter& vparse)
{
    const unsigned ws(vparse.n_word());
    if (static_cast<int>(ws) <= VCFID::INFO) {
        std::ostringstream oss;
        oss << "Too few fields (" << ws << ") in vcf record input:\n";
        vparse.dump(oss);
        throw blt_exception(oss.str().c_str());
    }
    
    _chrom = vparse.word[VCFID::CHROM];
    
    const char* pos_ptr(vparse.word[VCFID::POS]);
    _pos = parse_unsigned(pos_ptr);

    _id = vparse.word[VCFID::ID];

    _ref = vparse.word[VCFID::REF];
    assert(_ref.size() > 0);

    Splitter(vparse.word[VCFID::ALT],',',_alt);

    for(unsigned i(0);i<_alt.size();++i) {
        assert(_alt[i].size() > 0);
    }

    _qual = vparse.word[VCFID::QUAL];

    Splitter(vparse.word[VCFID::FILT],';',_filt);

    Splitter(vparse.word[VCFID::INFO],';',_info);
    
    if(ws > VCFID::FORMAT) {
        Splitter(vparse.word[VCFID::FORMAT],':',_format);
    }
    
    if(ws > VCFID::SAMPLE) {
        Splitter(vparse.word[VCFID::SAMPLE],':',_sample);
    }
    
    if(_format.size() != _sample.size()) {
        std::ostringstream oss;
        oss << "FORMAT and SAMPLE fields do not agree for vcf record:\n";
        vparse.dump(oss);
        throw blt_exception(oss.str().c_str());
    }
}
Пример #5
0
tabix_streamer::
tabix_streamer(const char* filename,
               const char* region)
    : _is_record_set(false)
    , _is_stream_end(false)
    , _record_no(0)
    , _stream_name(filename)
    , _tfp(NULL)
    , _titer(NULL)
    , _linebuf(NULL)
{

    if (NULL == filename) {
        throw blt_exception("vcf filename is null ptr");
    }

    if ('\0' == *filename) {
        throw blt_exception("vcf filename is empty string");
    }

    _tfp = ti_open(filename, 0);

    if (NULL == _tfp) {
        log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    // read from a specific region:
    if (ti_lazy_index_load(_tfp) < 0) {
        log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    if (NULL == region) {
        // read the whole VCF file:
        _titer = ti_query(_tfp, 0, 0, 0);
        return;
    }

    enforce_tabix_index(filename);

    int tid,beg,end;
    if (ti_parse_region(_tfp->idx, region, &tid, &beg, &end) == 0) {
        _titer = ti_queryi(_tfp, tid, beg, end);
    } else {
        _is_stream_end=true;
    }
}
void
stage_data::
unknown_id_error(const int id) const {
    std::ostringstream oss;
    oss << "ERROR: unknown stage_id requested: " << id << "\n";
    throw blt_exception(oss.str().c_str());
}
bool
export_stream_reader::
next() {

    if (_isp==0) return false;

    _isp->getline(_line_buf,_line_buf_size);

    if (! *_isp) {
        if (_isp->eof()) { // normal eof:
            _is_line_set=false;
            return false;
        }

        std::ostringstream oss;
        oss << "ERROR: Unexpected read failure in export_stream_reader.next(). Current object state:\n";
        oss << "\texport_stream_label: " << name() << "\n";
        oss << "\tfailed attempting to read export_line_no: " << (_line_no+1) << "\n";
        if ((! _isp->bad()) && ((strlen(_line_buf)+1) == _line_buf_size)) {
            oss << "\texport line length possibly exceeded line buffer size of: " << (_line_buf_size-1) << "\n";
        }
        throw blt_exception(oss.str().c_str());
    }

    try {
        _is_line_set=true;
        _line_no++;
        _elp.set_export_line(_line_buf);
    } catch (const blt_exception&) {
        log_os << "ERROR:: Exception caught in export_stream_reader.next() Current object state:\n";
        report_state(log_os);
        throw;
    }
    return true;
}
Пример #8
0
void
enforce_tabix_index(const char* f) {
    if (is_tabix_index(f)) return;

    std::ostringstream oss;
    oss << "ERROR: Missing or outdated index for vcf file: " << f << "\n";
    throw blt_exception(oss.str().c_str());
}
bool
istream_line_splitter::
parse_line() {
    _n_word=0;
    _is.getline(_buf,_buf_size);
    const unsigned previous_line_no(_line_no);
    if(! check_istream(_is,_line_no)) return false; // normal eof
    unsigned buflen(strlen(_buf));

    while(((buflen+1) == _buf_size) && (previous_line_no==_line_no)) {
        increase_buffer_size();
        _is.getline(_buf+buflen,_buf_size-buflen);
        if(! check_istream(_is,_line_no)) {
            std::ostringstream oss;
            oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n";
            throw blt_exception(oss.str().c_str());
        } 
        buflen=(strlen(_buf));
    }

    if((buflen+1) >_buf_size) {
        std::ostringstream oss;
        oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n";
        throw blt_exception(oss.str().c_str());
    }
    
    if(NULL == _buf) return false;
    assert(buflen);
    
    // do a low-level separator parse:
    {  
        char* p(_buf);
        word[0]=p;
        unsigned i(1);
        while(i<_max_word){  
            if((*p == '\n') || (*p == '\0')) break;
            if (*p == _sep) {
                *p = '\0';
                word[i++] = p+1;
            }  
            ++p;
        }
        _n_word=i;
    }
    return true;
}
Пример #10
0
static
void
parse_exception(const char* type_label,
                const char* parse_str) {

    std::ostringstream oss;
    oss << "ERROR: Can't parse " << type_label << " from string: '" << parse_str << "'";
    throw blt_exception(oss.str().c_str());
}
Пример #11
0
void
qphred_cache::
high_qscore_error(const int qscore,
                  const char* label)
{
    std::stringstream oss;
    oss << "ERROR: Attempting to lookup " << label << " score " << qscore << " which exceeds the maximum cached " << label << " score of " <<  MAX_QSCORE;
    throw blt_exception(oss.str().c_str());
}
Пример #12
0
void
qphred_cache::
invalid_qscore_error(const int qscore,
                     const char* label)
{
    std::stringstream oss;
    oss << "ERROR: Attempting to lookup invalid " << label << " score: " << qscore;
    throw blt_exception(oss.str().c_str());
}
Пример #13
0
static
void
unknown_md_error(const char* const md,
                 const char* const mdptr)
{

    std::ostringstream oss;
    oss << "ERROR: can't parse match descriptor string: " << md << "\n"
        << "\tunexpected character: '" << *mdptr << "' at position: " << (mdptr-md+1) << "\n";
    throw blt_exception(oss.str().c_str());
}
Пример #14
0
void
parse_gt(const char* gt,
         std::vector<int>& gti,
         const bool is_allow_bad_end_char)
{
    if (! gt_parse_helper::start(gt,gti,is_allow_bad_end_char))
    {
        std::ostringstream oss;
        oss << "ERROR: can't parse genotype string: '" << gt << "'\n";
        throw blt_exception(oss.str().c_str());
    }
}
Пример #15
0
bool
parse_tabix_region(const char* filename,
                   const char* region,
                   int& begin,
                   int& end) {

    if (NULL == region) {
        return false;
    }

    if (NULL == filename) {
        throw blt_exception("vcf filename is null ptr");
    }

    if ('\0' == *filename) {
        throw blt_exception("vcf filename is empty string");
    }

    enforce_tabix_index(filename);

    tabix_t* _tfp = ti_open(filename, 0);

    if (NULL == _tfp) {
        log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    // read from a specific region:
    if (ti_lazy_index_load(_tfp) < 0) {
        log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n";
        exit(EXIT_FAILURE);
    }

    int tid;
    const bool result(0 == ti_parse_region(_tfp->idx, region, &tid, &begin, &end));

    ti_close(_tfp);
    return result;
}
Пример #16
0
void
stage_manager::
validate_new_pos_value(const pos_t pos,
                       const int stage_id) {
    if (! is_new_pos_value_valid(pos,stage_id)) {
        std::ostringstream oss;
        oss << "ERROR:: reference sequence position difference too high for multi_stage_circular_buffer\n"
            << "current position:\t" << (pos+1) << "\n"
            << "top position for stage:\t" << (_max_pos+1) << "\n"
            << "stage id:\t" << stage_id << "\n";
        throw blt_exception(oss.str().c_str());
    }
}
Пример #17
0
bam_streamer::
bam_streamer(
    const char* filename,
    const char* region)
    : _is_record_set(false),
      _bfp(nullptr),
      _hidx(nullptr),
      _hitr(nullptr),
      _record_no(0),
      _stream_name(filename),
      _is_region(false)
{
    assert(nullptr != filename);
    if ('\0' == *filename)
    {
        throw blt_exception("Can't initialize bam_streamer with empty filename\n");
    }

    _bfp = samopen(filename, "rb", 0);

    if (nullptr == _bfp)
    {
        log_os << "ERROR: Failed to open SAM/BAM/CRAM file: " << filename << "\n";
        exit(EXIT_FAILURE);
    }

    if (nullptr == region)
    {
        // read the whole BAM file:

        if (_bfp->header->n_targets)
        {
            // parse a fake region so that header->hash is created
            std::string fake_region(target_id_to_name(0));
            fake_region += ":1-1";
            int ref,beg,end;
            bam_parse_region(_bfp->header, fake_region.c_str(), &ref, &beg, &end);
        }
        return;
    }

    // read a specific region of the bam file:
    set_new_region(region);
}
static
bool
check_istream(std::istream& is,
              unsigned& line_no) {

    if(is) {
        line_no++;
        // regular successful line read:
        return true; 
    }

    if     (is.eof()) return false;
    else if(is.fail()) {
        if(is.bad()) {
            std::ostringstream oss;
            oss << "ERROR: unexpected failure while attempting to read line " << (line_no+1) << "\n";
            throw blt_exception(oss.str().c_str());
        }
        is.clear();
    }

    // incomplete line read in this case, have to increase buffer size:
    return true;
}
//
// "indel_error" is the probability that the read supporting the indel case is an error
// "ref_error" is the probability that the read supporting the ref case is an error
//
void
get_indel_error_prob(const starling_options& client_opt,
                     const starling_indel_report_info& iri,
                     double& indel_error_prob,
                     double& ref_error_prob) {

    // cache results for any realistic homopolymer length:
    static const unsigned max_hpol_len(40);
    static bool is_init(false);

    // the pair is the spurious value for (insertion,deletion):
    //
    static std::pair<double,double> indel_error_prob_len[max_hpol_len];

    if(! is_init) {
        if(! client_opt.is_simple_indel_error) {
            double itmp(0);
            double dtmp(0);
            for(unsigned i(0); i<max_hpol_len; ++i) {
                get_indel_error_prob_hpol_len(i+1,itmp,dtmp);
                indel_error_prob_len[i] = std::make_pair(itmp,dtmp);
            }
        } else {
            const double ie(client_opt.simple_indel_error);
            for(unsigned i(0); i<max_hpol_len; ++i) {
                indel_error_prob_len[i] = std::make_pair(ie,ie);
            }
        }
        is_init=true;
    }

    const bool is_simple_indel(iri.it==INDEL::INSERT || iri.it==INDEL::DELETE);

    if(! is_simple_indel) {
        // breakpoints and swaps --
        // use zero repeat error for now.
        //
        // TODO - provide estimates for complex indels
        //
        indel_error_prob=std::max(indel_error_prob_len[0].first,indel_error_prob_len[0].second);
        ref_error_prob=indel_error_prob;
    } else {
        // treat everything besides simple homopolymer
        // contractions/expansions as homopolymer length 1:
        //
        if(iri.repeat_unit.size() == 1) {
            static const unsigned one(1);
            const unsigned ref_hpol_len = std::min(std::max(iri.ref_repeat_count,one),max_hpol_len);
            const unsigned indel_hpol_len = std::min(std::max(iri.indel_repeat_count,one),max_hpol_len);
            int indel_size(1);
            static const bool is_indel_size_dependent_error(false);
            if(is_indel_size_dependent_error) {
                indel_size=(std::abs(static_cast<long>(iri.ref_repeat_count)-
                                     static_cast<long>(iri.indel_repeat_count)));
            }

            if       (iri.it == INDEL::INSERT) {
                indel_error_prob=std::max(indel_error_prob_len[0].first,
                                          std::pow(indel_error_prob_len[ref_hpol_len-1].first,indel_size));
                ref_error_prob=std::max(indel_error_prob_len[0].second,
                                        std::pow(indel_error_prob_len[indel_hpol_len-1].second,indel_size));
            } else if(iri.it == INDEL::DELETE) {
                indel_error_prob=std::max(indel_error_prob_len[0].second,
                                          std::pow(indel_error_prob_len[ref_hpol_len-1].second,indel_size));
                ref_error_prob=std::max(indel_error_prob_len[0].first,
                                        std::pow(indel_error_prob_len[indel_hpol_len-1].first,indel_size));
            } else {
                log_os << "ERROR: Unknown indel type: " << iri.desc << "\n";
                throw blt_exception("Unknown indel type.");
            }
        } else {
            if       (iri.it == INDEL::INSERT) {
                indel_error_prob=indel_error_prob_len[0].first;
                ref_error_prob=indel_error_prob_len[0].second;
            } else if(iri.it == INDEL::DELETE) {
                indel_error_prob=indel_error_prob_len[0].second;
                ref_error_prob=indel_error_prob_len[0].first;
            } else {
                log_os << "ERROR: Unknown indel type: " << iri.desc << "\n";
                throw blt_exception("Unknown indel type.");
            }
        }
    }
}