// // First time through, malloc and fill the variant list from the config table // static vmiEnumParameterP getVariantList() { static vmiEnumParameterP list = NULL; if (!list) { Uns32 v = 1 + countVariants(); list = STYPE_CALLOC_N(vmiEnumParameter, v); vmiEnumParameterP prm; ppc32ConfigCP cfg; Uns32 i; for (i = 0, cfg = ppc32ConfigTable, prm = list; cfg->name; i++, cfg++, prm++) { prm->name = cfg->name; prm->value = i; } } return list; }
void BlockQuantify::count() { _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta)); #ifdef DEBUG_BLOCKQUANTIFY int lastpos = 0; std::cerr << "starting block." << "\n"; #endif auto current_bs_start = _impl->variants.begin(); std::string current_chr; int current_bs = -1; bool current_bs_valid = false; // function to compute the QQ values for truth variants in the current // benchmarking superlocus const auto update_bs_qq = [this, ¤t_bs_start](BlockQuantifyImpl::variantlist_t::iterator to) { std::vector<float> tp_qqs; for(auto cur = current_bs_start; cur != to; ++cur) { const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1); if(std::isnan(qqq)) { continue; } const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1); // we want the scores of all TPs in this BS if(bd == "TP") { tp_qqs.push_back(qqq); } } float t_qq = bcfhelpers::missing_float(); if(!tp_qqs.empty()) { t_qq = *(std::max_element(tp_qqs.begin(), tp_qqs.end())); } /** compute the median over all variants */ int fsize = bcf_hdr_nsamples(_impl->hdr); float * fmt = (float*)calloc((size_t) fsize, sizeof(float)); for(auto cur = current_bs_start; cur != to; ++cur) { const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0); bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize); if(bd != "TP") { fmt[0] = bcfhelpers::missing_float(); } else { fmt[0] = t_qq; } bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize); } free(fmt); #ifdef DEBUG_BLOCKQUANTIFY const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1); std::string values; for(float x : tp_qqs) { values += std::to_string(x) + ","; } std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n"; #endif }; for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it) { // update fields, must output GA4GH-compliant fields countVariants(*v_it); // determine benchmarking superlocus const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it); const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS"); if(!current_bs_valid) { current_bs = vbs; current_chr = vchr; current_bs_valid = true; } #ifdef DEBUG_BLOCKQUANTIFY std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n"; #endif if( current_bs_start != v_it && (vbs != current_bs || vbs < 0 || vchr != current_chr)) { update_bs_qq(v_it); current_bs = vbs; current_chr = vchr; current_bs_start = v_it; } } // write out final superlocus (if any) update_bs_qq(_impl->variants.end()); for(auto & v : _impl->variants) { #ifdef DEBUG_BLOCKQUANTIFY lastpos = v->pos; #endif // use BD and BVT to make ROCs rocEvaluate(v); } #ifdef DEBUG_BLOCKQUANTIFY std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n"; #endif _impl->fasta_to_use.reset(nullptr); }
void BlockQuantify::count() { _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta)); #ifdef DEBUG_BLOCKQUANTIFY int lastpos = 0; std::cerr << "starting block." << "\n"; #endif auto current_bs_start = _impl->variants.begin(); std::string current_chr; int current_bs = -1; bool current_bs_valid = false; // function to compute the QQ values for truth variants in the current // benchmarking superlocus const auto update_bs_filters = [this, ¤t_bs_start](BlockQuantifyImpl::variantlist_t::iterator to) { std::set<int> bs_filters; for(auto cur = current_bs_start; cur != to; ++cur) { for(int nf = 0; nf < (*cur)->d.n_flt; ++nf) { const int f = (*cur)->d.flt[nf]; if(f != bcf_hdr_id2int(_impl->hdr, BCF_DT_ID, "PASS")) { bs_filters.insert(f); } } } if(bs_filters.empty()) { return; } for(auto cur = current_bs_start; cur != to; ++cur) { const std::string bdt = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0); const std::string bvq = bcfhelpers::getFormatString(_impl->hdr, *cur, "BVT", 1); // filter TPs where the query call in NOCALL if(bdt == "TP" && bvq == "NOCALL") { for(auto f : bs_filters) { bcf_add_filter(_impl->hdr, *cur, f); } } } }; // function to compute the QQ values for truth variants in the current // benchmarking superlocus const auto update_bs_qq = [this, ¤t_bs_start](BlockQuantifyImpl::variantlist_t::iterator to) { std::vector<float> tp_qqs; for(auto cur = current_bs_start; cur != to; ++cur) { const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1); if(std::isnan(qqq)) { continue; } const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1); // we want the scores of all TPs in this BS if(bd == "TP") { tp_qqs.push_back(qqq); } } float t_qq = bcfhelpers::missing_float(); if(!tp_qqs.empty()) { t_qq = *(std::min_element(tp_qqs.begin(), tp_qqs.end())); } /** compute the median over all variants */ int fsize = bcf_hdr_nsamples(_impl->hdr); float * fmt = (float*)calloc((size_t) fsize, sizeof(float)); for(auto cur = current_bs_start; cur != to; ++cur) { const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0); bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize); if(bd != "TP") { fmt[0] = bcfhelpers::missing_float(); } else { const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1); const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1); if(bd == "TP" && !std::isnan(qqq)) { fmt[0] = qqq; } else { fmt[0] = t_qq; } } bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize); } free(fmt); #ifdef DEBUG_BLOCKQUANTIFY const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1); std::string values; for(float x : tp_qqs) { values += std::to_string(x) + ","; } std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n"; #endif }; const auto update_bs_conf_boundary_flag = [this, ¤t_bs_start](BlockQuantifyImpl::variantlist_t::iterator to) { static const int has_conf = 1; static const int has_non_conf = 2; int conf_non_conf = 0; for(auto cur = current_bs_start; cur != to; ++cur) { const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", ""); if(regions.find("CONF") == std::string::npos) { conf_non_conf |= has_non_conf; } else { conf_non_conf |= has_conf; } if(regions.find("TS_boundary") != std::string::npos) { conf_non_conf |= has_non_conf | has_conf; } } for(auto cur = current_bs_start; cur != to; ++cur) { const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", ""); if(conf_non_conf == (has_conf | has_non_conf)) { if(regions.find("TS_boundary") == std::string::npos) { bcf_update_info_string(_impl->hdr, *cur, "Regions", (regions.empty() ? "TS_boundary" : regions + ",TS_boundary").c_str()); } } else if(conf_non_conf == has_conf) { if(regions.find("TS_contained") == std::string::npos) { // also flag fully confident superloci bcf_update_info_string(_impl->hdr, *cur, "Regions", (regions.empty() ? "TS_contained" : regions + ",TS_contained").c_str()); } } } }; for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it) { // update fields, must output GA4GH-compliant fields countVariants(*v_it); // determine benchmarking superlocus const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it); const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS"); if(!current_bs_valid) { current_bs = vbs; current_chr = vchr; current_bs_valid = true; } #ifdef DEBUG_BLOCKQUANTIFY std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n"; #endif if( current_bs_start != v_it && (vbs != current_bs || vbs < 0 || vchr != current_chr)) { #ifdef DEBUG_BLOCKQUANTIFY std::cerr << "finishing BS = " << current_bs << " vbs = " << vbs << "\n"; #endif update_bs_qq(v_it); update_bs_filters(v_it); update_bs_conf_boundary_flag(v_it); current_bs = vbs; current_chr = vchr; current_bs_start = v_it; } } // do final superlocus (if any) update_bs_qq(_impl->variants.end()); update_bs_filters(_impl->variants.end()); update_bs_conf_boundary_flag(_impl->variants.end()); for(auto & v : _impl->variants) { #ifdef DEBUG_BLOCKQUANTIFY lastpos = v->pos; #endif // use BD and BVT to make ROCs rocEvaluate(v); } #ifdef DEBUG_BLOCKQUANTIFY std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n"; #endif _impl->fasta_to_use.reset(nullptr); }