tabix_chrom_list:: tabix_chrom_list(const char* filename) : _nchrom(0) , _index(0) , _tptr(NULL) , _tfp(NULL) { if (NULL == filename) { throw blt_exception("vcf filename is null ptr"); } if ('\0' == *filename) { throw blt_exception("vcf filename is empty string"); } tabix_t* _tfp = ti_open(filename, 0); if (NULL == _tfp) { log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n"; exit(EXIT_FAILURE); } // read from a specific region: if (ti_lazy_index_load(_tfp) < 0) { log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n"; exit(EXIT_FAILURE); } _tptr = ti_seqname(_tfp->idx,&_nchrom); }
tabix_header_streamer:: tabix_header_streamer(const char* filename) : _is_record_set(false) , _is_stream_end(false) , _tfp(NULL) , _titer(NULL) , _linebuf(NULL) { if (NULL == filename) { throw blt_exception("vcf filename is null ptr"); } if ('\0' == *filename) { throw blt_exception("vcf filename is empty string"); } _tfp = ti_open(filename, 0); if (NULL == _tfp) { log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n"; exit(EXIT_FAILURE); } _titer = ti_query(_tfp, 0, 0, 0); }
void stage_data:: add_stage(const int id, const int parent_id, const unsigned parent_distance, const bool is_parent) { unsigned pos(0); if (is_parent) { idmap_t::iterator pit(_ids.find(parent_id)); if (pit==_ids.end()) { std::ostringstream oss; oss << "ERROR: stage_data.add_stage() parent_id " << parent_id << " does not exist\n"; throw blt_exception(oss.str().c_str()); } pos=(pit->second+parent_distance); } const std::pair<idmap_t::iterator,bool> ret(_ids.insert(std::make_pair(id,pos))); if (! ret.second) { std::ostringstream oss; oss << "ERROR: stage_data.add_stage() id " << id << " already exists\n"; throw blt_exception(oss.str().c_str()); } _stage_pos.push_back(std::make_pair(pos,id)); // not efficient to do this every time, but we always expect the // number of added stages to be very small: std::sort(_stage_pos.begin(),_stage_pos.end()); }
VcfRecord:: VcfRecord(const istream_line_splitter& vparse) { const unsigned ws(vparse.n_word()); if (static_cast<int>(ws) <= VCFID::INFO) { std::ostringstream oss; oss << "Too few fields (" << ws << ") in vcf record input:\n"; vparse.dump(oss); throw blt_exception(oss.str().c_str()); } _chrom = vparse.word[VCFID::CHROM]; const char* pos_ptr(vparse.word[VCFID::POS]); _pos = parse_unsigned(pos_ptr); _id = vparse.word[VCFID::ID]; _ref = vparse.word[VCFID::REF]; assert(_ref.size() > 0); Splitter(vparse.word[VCFID::ALT],',',_alt); for(unsigned i(0);i<_alt.size();++i) { assert(_alt[i].size() > 0); } _qual = vparse.word[VCFID::QUAL]; Splitter(vparse.word[VCFID::FILT],';',_filt); Splitter(vparse.word[VCFID::INFO],';',_info); if(ws > VCFID::FORMAT) { Splitter(vparse.word[VCFID::FORMAT],':',_format); } if(ws > VCFID::SAMPLE) { Splitter(vparse.word[VCFID::SAMPLE],':',_sample); } if(_format.size() != _sample.size()) { std::ostringstream oss; oss << "FORMAT and SAMPLE fields do not agree for vcf record:\n"; vparse.dump(oss); throw blt_exception(oss.str().c_str()); } }
tabix_streamer:: tabix_streamer(const char* filename, const char* region) : _is_record_set(false) , _is_stream_end(false) , _record_no(0) , _stream_name(filename) , _tfp(NULL) , _titer(NULL) , _linebuf(NULL) { if (NULL == filename) { throw blt_exception("vcf filename is null ptr"); } if ('\0' == *filename) { throw blt_exception("vcf filename is empty string"); } _tfp = ti_open(filename, 0); if (NULL == _tfp) { log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n"; exit(EXIT_FAILURE); } // read from a specific region: if (ti_lazy_index_load(_tfp) < 0) { log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n"; exit(EXIT_FAILURE); } if (NULL == region) { // read the whole VCF file: _titer = ti_query(_tfp, 0, 0, 0); return; } enforce_tabix_index(filename); int tid,beg,end; if (ti_parse_region(_tfp->idx, region, &tid, &beg, &end) == 0) { _titer = ti_queryi(_tfp, tid, beg, end); } else { _is_stream_end=true; } }
void stage_data:: unknown_id_error(const int id) const { std::ostringstream oss; oss << "ERROR: unknown stage_id requested: " << id << "\n"; throw blt_exception(oss.str().c_str()); }
bool export_stream_reader:: next() { if (_isp==0) return false; _isp->getline(_line_buf,_line_buf_size); if (! *_isp) { if (_isp->eof()) { // normal eof: _is_line_set=false; return false; } std::ostringstream oss; oss << "ERROR: Unexpected read failure in export_stream_reader.next(). Current object state:\n"; oss << "\texport_stream_label: " << name() << "\n"; oss << "\tfailed attempting to read export_line_no: " << (_line_no+1) << "\n"; if ((! _isp->bad()) && ((strlen(_line_buf)+1) == _line_buf_size)) { oss << "\texport line length possibly exceeded line buffer size of: " << (_line_buf_size-1) << "\n"; } throw blt_exception(oss.str().c_str()); } try { _is_line_set=true; _line_no++; _elp.set_export_line(_line_buf); } catch (const blt_exception&) { log_os << "ERROR:: Exception caught in export_stream_reader.next() Current object state:\n"; report_state(log_os); throw; } return true; }
void enforce_tabix_index(const char* f) { if (is_tabix_index(f)) return; std::ostringstream oss; oss << "ERROR: Missing or outdated index for vcf file: " << f << "\n"; throw blt_exception(oss.str().c_str()); }
bool istream_line_splitter:: parse_line() { _n_word=0; _is.getline(_buf,_buf_size); const unsigned previous_line_no(_line_no); if(! check_istream(_is,_line_no)) return false; // normal eof unsigned buflen(strlen(_buf)); while(((buflen+1) == _buf_size) && (previous_line_no==_line_no)) { increase_buffer_size(); _is.getline(_buf+buflen,_buf_size-buflen); if(! check_istream(_is,_line_no)) { std::ostringstream oss; oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n"; throw blt_exception(oss.str().c_str()); } buflen=(strlen(_buf)); } if((buflen+1) >_buf_size) { std::ostringstream oss; oss << "ERROR: Unexpected read failure in parse_line() at line_no: " << _line_no << "\n"; throw blt_exception(oss.str().c_str()); } if(NULL == _buf) return false; assert(buflen); // do a low-level separator parse: { char* p(_buf); word[0]=p; unsigned i(1); while(i<_max_word){ if((*p == '\n') || (*p == '\0')) break; if (*p == _sep) { *p = '\0'; word[i++] = p+1; } ++p; } _n_word=i; } return true; }
static void parse_exception(const char* type_label, const char* parse_str) { std::ostringstream oss; oss << "ERROR: Can't parse " << type_label << " from string: '" << parse_str << "'"; throw blt_exception(oss.str().c_str()); }
void qphred_cache:: high_qscore_error(const int qscore, const char* label) { std::stringstream oss; oss << "ERROR: Attempting to lookup " << label << " score " << qscore << " which exceeds the maximum cached " << label << " score of " << MAX_QSCORE; throw blt_exception(oss.str().c_str()); }
void qphred_cache:: invalid_qscore_error(const int qscore, const char* label) { std::stringstream oss; oss << "ERROR: Attempting to lookup invalid " << label << " score: " << qscore; throw blt_exception(oss.str().c_str()); }
static void unknown_md_error(const char* const md, const char* const mdptr) { std::ostringstream oss; oss << "ERROR: can't parse match descriptor string: " << md << "\n" << "\tunexpected character: '" << *mdptr << "' at position: " << (mdptr-md+1) << "\n"; throw blt_exception(oss.str().c_str()); }
void parse_gt(const char* gt, std::vector<int>& gti, const bool is_allow_bad_end_char) { if (! gt_parse_helper::start(gt,gti,is_allow_bad_end_char)) { std::ostringstream oss; oss << "ERROR: can't parse genotype string: '" << gt << "'\n"; throw blt_exception(oss.str().c_str()); } }
bool parse_tabix_region(const char* filename, const char* region, int& begin, int& end) { if (NULL == region) { return false; } if (NULL == filename) { throw blt_exception("vcf filename is null ptr"); } if ('\0' == *filename) { throw blt_exception("vcf filename is empty string"); } enforce_tabix_index(filename); tabix_t* _tfp = ti_open(filename, 0); if (NULL == _tfp) { log_os << "ERROR: Failed to open VCF file: '" << filename << "'\n"; exit(EXIT_FAILURE); } // read from a specific region: if (ti_lazy_index_load(_tfp) < 0) { log_os << "ERROR: Failed to load index for vcf file: '" << filename << "'\n"; exit(EXIT_FAILURE); } int tid; const bool result(0 == ti_parse_region(_tfp->idx, region, &tid, &begin, &end)); ti_close(_tfp); return result; }
void stage_manager:: validate_new_pos_value(const pos_t pos, const int stage_id) { if (! is_new_pos_value_valid(pos,stage_id)) { std::ostringstream oss; oss << "ERROR:: reference sequence position difference too high for multi_stage_circular_buffer\n" << "current position:\t" << (pos+1) << "\n" << "top position for stage:\t" << (_max_pos+1) << "\n" << "stage id:\t" << stage_id << "\n"; throw blt_exception(oss.str().c_str()); } }
bam_streamer:: bam_streamer( const char* filename, const char* region) : _is_record_set(false), _bfp(nullptr), _hidx(nullptr), _hitr(nullptr), _record_no(0), _stream_name(filename), _is_region(false) { assert(nullptr != filename); if ('\0' == *filename) { throw blt_exception("Can't initialize bam_streamer with empty filename\n"); } _bfp = samopen(filename, "rb", 0); if (nullptr == _bfp) { log_os << "ERROR: Failed to open SAM/BAM/CRAM file: " << filename << "\n"; exit(EXIT_FAILURE); } if (nullptr == region) { // read the whole BAM file: if (_bfp->header->n_targets) { // parse a fake region so that header->hash is created std::string fake_region(target_id_to_name(0)); fake_region += ":1-1"; int ref,beg,end; bam_parse_region(_bfp->header, fake_region.c_str(), &ref, &beg, &end); } return; } // read a specific region of the bam file: set_new_region(region); }
static bool check_istream(std::istream& is, unsigned& line_no) { if(is) { line_no++; // regular successful line read: return true; } if (is.eof()) return false; else if(is.fail()) { if(is.bad()) { std::ostringstream oss; oss << "ERROR: unexpected failure while attempting to read line " << (line_no+1) << "\n"; throw blt_exception(oss.str().c_str()); } is.clear(); } // incomplete line read in this case, have to increase buffer size: return true; }
// // "indel_error" is the probability that the read supporting the indel case is an error // "ref_error" is the probability that the read supporting the ref case is an error // void get_indel_error_prob(const starling_options& client_opt, const starling_indel_report_info& iri, double& indel_error_prob, double& ref_error_prob) { // cache results for any realistic homopolymer length: static const unsigned max_hpol_len(40); static bool is_init(false); // the pair is the spurious value for (insertion,deletion): // static std::pair<double,double> indel_error_prob_len[max_hpol_len]; if(! is_init) { if(! client_opt.is_simple_indel_error) { double itmp(0); double dtmp(0); for(unsigned i(0); i<max_hpol_len; ++i) { get_indel_error_prob_hpol_len(i+1,itmp,dtmp); indel_error_prob_len[i] = std::make_pair(itmp,dtmp); } } else { const double ie(client_opt.simple_indel_error); for(unsigned i(0); i<max_hpol_len; ++i) { indel_error_prob_len[i] = std::make_pair(ie,ie); } } is_init=true; } const bool is_simple_indel(iri.it==INDEL::INSERT || iri.it==INDEL::DELETE); if(! is_simple_indel) { // breakpoints and swaps -- // use zero repeat error for now. // // TODO - provide estimates for complex indels // indel_error_prob=std::max(indel_error_prob_len[0].first,indel_error_prob_len[0].second); ref_error_prob=indel_error_prob; } else { // treat everything besides simple homopolymer // contractions/expansions as homopolymer length 1: // if(iri.repeat_unit.size() == 1) { static const unsigned one(1); const unsigned ref_hpol_len = std::min(std::max(iri.ref_repeat_count,one),max_hpol_len); const unsigned indel_hpol_len = std::min(std::max(iri.indel_repeat_count,one),max_hpol_len); int indel_size(1); static const bool is_indel_size_dependent_error(false); if(is_indel_size_dependent_error) { indel_size=(std::abs(static_cast<long>(iri.ref_repeat_count)- static_cast<long>(iri.indel_repeat_count))); } if (iri.it == INDEL::INSERT) { indel_error_prob=std::max(indel_error_prob_len[0].first, std::pow(indel_error_prob_len[ref_hpol_len-1].first,indel_size)); ref_error_prob=std::max(indel_error_prob_len[0].second, std::pow(indel_error_prob_len[indel_hpol_len-1].second,indel_size)); } else if(iri.it == INDEL::DELETE) { indel_error_prob=std::max(indel_error_prob_len[0].second, std::pow(indel_error_prob_len[ref_hpol_len-1].second,indel_size)); ref_error_prob=std::max(indel_error_prob_len[0].first, std::pow(indel_error_prob_len[indel_hpol_len-1].first,indel_size)); } else { log_os << "ERROR: Unknown indel type: " << iri.desc << "\n"; throw blt_exception("Unknown indel type."); } } else { if (iri.it == INDEL::INSERT) { indel_error_prob=indel_error_prob_len[0].first; ref_error_prob=indel_error_prob_len[0].second; } else if(iri.it == INDEL::DELETE) { indel_error_prob=indel_error_prob_len[0].second; ref_error_prob=indel_error_prob_len[0].first; } else { log_os << "ERROR: Unknown indel type: " << iri.desc << "\n"; throw blt_exception("Unknown indel type."); } } } }