示例#1
0
void BlockQuantify::count()
{
    _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta));
#ifdef DEBUG_BLOCKQUANTIFY
    int lastpos = 0;
    std::cerr << "starting block." << "\n";
#endif
    auto current_bs_start = _impl->variants.begin();
    std::string current_chr;
    int current_bs = -1;
    bool current_bs_valid = false;

    // function to compute the QQ values for truth variants in the current
    // benchmarking superlocus
    const auto update_bs_qq = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
    {
        std::vector<float> tp_qqs;
        for(auto cur = current_bs_start; cur != to; ++cur)
        {
            const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
            if(std::isnan(qqq))
            {
                continue;
            }
            const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
            // we want the scores of all TPs in this BS
            if(bd == "TP")
            {
                tp_qqs.push_back(qqq);
            }
        }

        float t_qq = bcfhelpers::missing_float();
        if(!tp_qqs.empty())
        {
            t_qq = *(std::max_element(tp_qqs.begin(), tp_qqs.end()));
        }

        /** compute the median over all variants */
        int fsize = bcf_hdr_nsamples(_impl->hdr);
        float * fmt = (float*)calloc((size_t) fsize, sizeof(float));
        for(auto cur = current_bs_start; cur != to; ++cur)
        {
            const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
            bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize);
            if(bd != "TP")
            {
                fmt[0] = bcfhelpers::missing_float();
            }
            else
            {
                fmt[0] = t_qq;
            }
            bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize);
        }
        free(fmt);

#ifdef DEBUG_BLOCKQUANTIFY
        const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1);
        std::string values;
        for(float x : tp_qqs)
        {
            values += std::to_string(x) + ",";
        }
        std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n";
#endif
    };


    for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it)
    {
        // update fields, must output GA4GH-compliant fields
        countVariants(*v_it);

        // determine benchmarking superlocus
        const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it);
        const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS");
        if(!current_bs_valid)
        {
            current_bs = vbs;
            current_chr = vchr;
            current_bs_valid = true;
        }

#ifdef DEBUG_BLOCKQUANTIFY
        std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n";
#endif

        if(   current_bs_start != v_it
                && (vbs != current_bs || vbs < 0 || vchr != current_chr))
        {
            update_bs_qq(v_it);
            current_bs = vbs;
            current_chr = vchr;
            current_bs_start = v_it;
        }
    }

    // write out final superlocus (if any)
    update_bs_qq(_impl->variants.end());

    for(auto & v : _impl->variants)
    {
#ifdef DEBUG_BLOCKQUANTIFY
        lastpos = v->pos;
#endif
        // use BD and BVT to make ROCs
        rocEvaluate(v);
    }
#ifdef DEBUG_BLOCKQUANTIFY
    std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n";
#endif
    _impl->fasta_to_use.reset(nullptr);
}
示例#2
0
    void BlockQuantify::count()
    {
        _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta));
#ifdef DEBUG_BLOCKQUANTIFY
        int lastpos = 0;
        std::cerr << "starting block." << "\n";
#endif
        auto current_bs_start = _impl->variants.begin();
        std::string current_chr;
        int current_bs = -1;
        bool current_bs_valid = false;

        // function to compute the QQ values for truth variants in the current
        // benchmarking superlocus
        const auto update_bs_filters = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            std::set<int> bs_filters;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                for(int nf = 0; nf < (*cur)->d.n_flt; ++nf)
                {
                    const int f = (*cur)->d.flt[nf];
                    if(f != bcf_hdr_id2int(_impl->hdr, BCF_DT_ID, "PASS"))
                    {
                        bs_filters.insert(f);
                    }
                }
            }

            if(bs_filters.empty())
            {
                return;
            }

            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string bdt = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
                const std::string bvq = bcfhelpers::getFormatString(_impl->hdr, *cur, "BVT", 1);
                // filter TPs where the query call in NOCALL
                if(bdt == "TP" && bvq == "NOCALL")
                {
                    for(auto f : bs_filters)
                    {
                        bcf_add_filter(_impl->hdr, *cur, f);
                    }
                }
            }
        };

        // function to compute the QQ values for truth variants in the current
        // benchmarking superlocus
        const auto update_bs_qq = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            std::vector<float> tp_qqs;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
                if(std::isnan(qqq))
                {
                    continue;
                }
                const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
                // we want the scores of all TPs in this BS
                if(bd == "TP")
                {
                    tp_qqs.push_back(qqq);
                }
            }

            float t_qq = bcfhelpers::missing_float();
            if(!tp_qqs.empty())
            {
                t_qq = *(std::min_element(tp_qqs.begin(), tp_qqs.end()));
            }

            /** compute the median over all variants */
            int fsize = bcf_hdr_nsamples(_impl->hdr);
            float * fmt = (float*)calloc((size_t) fsize, sizeof(float));
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
                bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize);
                if(bd != "TP")
                {
                    fmt[0] = bcfhelpers::missing_float();
                }
                else
                {
                    const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
                    const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
                    if(bd == "TP" && !std::isnan(qqq))
                    {
                        fmt[0] = qqq;
                    }
                    else
                    {
                        fmt[0] = t_qq;
                    }

                }
                bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize);
            }
            free(fmt);

#ifdef DEBUG_BLOCKQUANTIFY
            const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1);
            std::string values;
            for(float x : tp_qqs)
            {
                values += std::to_string(x) + ",";
            }
            std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n";
#endif
        };

        const auto update_bs_conf_boundary_flag = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            static const int has_conf = 1;
            static const int has_non_conf = 2;
            int conf_non_conf = 0;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", "");

                if(regions.find("CONF") == std::string::npos)
                {
                    conf_non_conf |= has_non_conf;
                }
                else
                {
                    conf_non_conf |= has_conf;
                }
                if(regions.find("TS_boundary") != std::string::npos)
                {
                    conf_non_conf |= has_non_conf | has_conf;
                }
            }

            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", "");

                if(conf_non_conf == (has_conf | has_non_conf))
                {
                    if(regions.find("TS_boundary") == std::string::npos)
                    {
                        bcf_update_info_string(_impl->hdr,
                                               *cur, "Regions",
                                               (regions.empty() ? "TS_boundary" :
                                                regions + ",TS_boundary").c_str());
                    }
                }
                else if(conf_non_conf == has_conf)
                {
                    if(regions.find("TS_contained") == std::string::npos)
                    {
                        // also flag fully confident superloci
                        bcf_update_info_string(_impl->hdr,
                                               *cur, "Regions",
                                               (regions.empty() ? "TS_contained" :
                                                regions + ",TS_contained").c_str());
                    }
                }
            }
        };


        for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it)
        {
            // update fields, must output GA4GH-compliant fields
            countVariants(*v_it);

            // determine benchmarking superlocus
            const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it);
            const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS");
            if(!current_bs_valid)
            {
                current_bs = vbs;
                current_chr = vchr;
                current_bs_valid = true;
            }

#ifdef DEBUG_BLOCKQUANTIFY
            std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n";
#endif

            if(   current_bs_start != v_it
               && (vbs != current_bs || vbs < 0 || vchr != current_chr))
            {
#ifdef DEBUG_BLOCKQUANTIFY
                std::cerr << "finishing BS = " << current_bs << " vbs = " << vbs << "\n";
#endif
                update_bs_qq(v_it);
                update_bs_filters(v_it);
                update_bs_conf_boundary_flag(v_it);
                current_bs = vbs;
                current_chr = vchr;
                current_bs_start = v_it;
            }
        }

        // do final superlocus (if any)
        update_bs_qq(_impl->variants.end());
        update_bs_filters(_impl->variants.end());
        update_bs_conf_boundary_flag(_impl->variants.end());

        for(auto & v : _impl->variants)
        {
#ifdef DEBUG_BLOCKQUANTIFY
            lastpos = v->pos;
#endif
            // use BD and BVT to make ROCs
            rocEvaluate(v);
        }
#ifdef DEBUG_BLOCKQUANTIFY
        std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n";
#endif
        _impl->fasta_to_use.reset(nullptr);
    }