Example #1
0
bcf1_t *process(bcf1_t *rec)
{
  float af;
  if(rec->n_allele==2) {
    bcf_get_format_float(in_hdr, rec, "GL", &gl, &ngl);
    assert(ngl==(3*n));
    //    bcf_get_genotypes(in_hdr, rec, &gt, &ngt);
    //    assert(ngt==(2*n));
    af =  estimate_gt();
//    fprintf(stderr,"%d AF=%f\n",rec->pos+1,af);
    bcf_update_format_float(out_hdr,rec,"DS",dosage,n);
    bcf_update_info_float(out_hdr,rec,"AF",&af,1); 
    bcf_update_genotypes(out_hdr, rec, gt, ngt);
  }
  return rec;
}
Example #2
0
bcf1_t *process(bcf1_t *rec)
{
    int i, n;
    if ( mode==GP_TO_GL )
    {
        n = bcf_get_format_float(in_hdr,rec,"GP",&farr,&mfarr);
        for (i=0; i<n; i++)
        {
            if ( bcf_float_is_missing(farr[i]) || bcf_float_is_vector_end(farr[i]) ) continue;
            farr[i] = farr[i] ? log(farr[i]) : -99;
        }
        bcf_update_format_float(out_hdr,rec,"GL",farr,n);
        if ( drop_source_tag )
            bcf_update_format_float(out_hdr,rec,"GP",NULL,0);
    }
    return rec;
}
Example #3
0
static void init_data(args_t *args)
{
    bcf_srs_t *files = bcf_sr_init();
    if ( args->regions_list )
    {
        if ( bcf_sr_set_regions(files, args->regions_list, args->regions_is_file)<0 )
            error("Failed to read the regions: %s\n", args->regions_list);
    }
    if ( args->targets_list )
    {
        if ( bcf_sr_set_targets(files, args->targets_list, args->targets_is_file, 0)<0 )
            error("Failed to read the targets: %s\n", args->targets_list);
    }
    if ( !bcf_sr_add_reader(files, args->fname) ) error("Failed to open %s: %s\n", args->fname,bcf_sr_strerror(files->errnum));
    bcf_hdr_t *hdr = files->readers[0].header;
    if ( !args->sample )
    {
        if ( bcf_hdr_nsamples(hdr)>1 ) error("Missing the option -s, --sample\n");
        args->sample = hdr->samples[0];
    }
    else if ( bcf_hdr_id2int(hdr,BCF_DT_SAMPLE,args->sample)<0 ) error("No such sample: %s\n", args->sample);
    int ret = bcf_hdr_set_samples(hdr, args->sample, 0);
    if ( ret<0 ) error("Error setting the sample: %s\n", args->sample);

    if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,bcf_hdr_id2int(hdr,BCF_DT_ID,"BAF")) )
        error("The tag FORMAT/BAF is not present in the VCF: %s\n", args->fname);

    int i;
    args->xvals = (double*) calloc(args->nbins,sizeof(double));
    for (i=0; i<args->nbins; i++) args->xvals[i] = 1.0*i/(args->nbins-1);

    // collect BAF distributions for all chromosomes
    int idist = -1, nbaf = 0, nprocessed = 0, ntotal = 0, prev_chr = -1;
    float *baf = NULL;
    while ( bcf_sr_next_line(files) )
    {
        ntotal++;

        bcf1_t *line = bcf_sr_get_line(files,0);
        if ( bcf_get_format_float(hdr,line,"BAF",&baf,&nbaf) != 1 ) continue;
        if ( bcf_float_is_missing(baf[0]) ) continue;

        nprocessed++;

        if ( prev_chr==-1 || prev_chr!=line->rid )
        {
            // new chromosome
            idist = args->ndist++;
            args->dist = (dist_t*) realloc(args->dist, sizeof(dist_t)*args->ndist);
            memset(&args->dist[idist],0,sizeof(dist_t));
            args->dist[idist].chr   = strdup(bcf_seqname(hdr,line));
            args->dist[idist].yvals = (double*) calloc(args->nbins,sizeof(double));
            args->dist[idist].xvals = args->xvals;
            args->dist[idist].nvals = args->nbins;
            prev_chr = line->rid;
        }
        int bin = baf[0]*(args->nbins-1);
        args->dist[idist].yvals[bin]++;   // the distribution
    }
    free(baf);
    bcf_sr_destroy(files);

    for (idist=0; idist<args->ndist; idist++)
    {
        #if 0
            int j;
            for (j=0; j<args->nbins; j++)
            {
                double x = args->dist[idist].xvals[j];
                args->dist[idist].yvals[j] = exp(-(x-0.5)*(x-0.5)/1e-3);
            }
        #endif
        init_dist(args, &args->dist[idist],args->verbose);
    }

    args->dat_fp = open_file(&args->dat_fname,"w","%s/dist.dat", args->output_dir);
    fprintf(args->dat_fp, "# This file was produced by: bcftools polysomy(%s+htslib-%s), the command line was:\n", bcftools_version(),hts_version());
    fprintf(args->dat_fp, "# \t bcftools %s ", args->argv[0]);
    for (i=1; i<args->argc; i++)
        fprintf(args->dat_fp, " %s",args->argv[i]);
    fprintf(args->dat_fp,"\n#\n");
    fprintf(args->dat_fp,"# DIST\t[2]Chrom\t[3]BAF\t[4]Normalized Count\n");
    fprintf(args->dat_fp,"# FIT\t[2]Goodness of Fit\t[3]iFrom\t[4]iTo\t[5]The Fitted Function\n");
    fprintf(args->dat_fp,"# CN\t[2]Chrom\t[3]Estimated Copy Number\t[4]Absolute fit deviation\n");

    char *fname = NULL;
    FILE *fp = open_file(&fname,"w","%s/dist.py", args->output_dir);
//-------- matplotlib script --------------
    fprintf(fp,
        "#!/usr/bin/env python\n"
        "#\n"
        "import matplotlib as mpl\n"
        "mpl.use('Agg')\n"
        "import matplotlib.pyplot as plt\n"
        "import csv,sys,argparse\n"
        "from math import exp\n"
        "\n"
        "outdir = '%s'\n"
        "\n"
        "def read_dat(dat,fit,cn):\n"
        "   csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE)\n"
        "   with open(outdir+'/dist.dat', 'rb') as f:\n"
        "      reader = csv.reader(f, 'tab')\n"
        "      for row in reader:\n"
        "          if row[0][0]=='#': continue\n"
        "          type = row[0]\n"
        "          chr  = row[1]\n"
        "          if type=='DIST':\n"
        "              if chr not in dat: dat[chr] = []\n"
        "              dat[chr].append(row)\n"
        "          elif type=='FIT':\n"
        "              if chr not in fit: fit[chr] = []\n"
        "              fit[chr].append(row)\n"
        "          elif type=='CN':\n"
        "              cn[chr] = row[2]\n"
        "\n"
        "def plot_dist(dat,fit,chr):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   ax.plot([x[2] for x in dat[chr]],[x[3] for x in dat[chr]],'k-',label='Distribution')\n"
        "   if chr in fit:\n"
        "       for i in range(len(fit[chr])):\n"
        "           pfit = fit[chr][i]\n"
        "           exec('def xfit(x): return '+pfit[5])\n"
        "           istart = int(pfit[3])\n"
        "           iend   = int(pfit[4])+1\n"
        "           vals   = dat[chr][istart:iend]\n"
        "           args   = {}\n"
        "           if i==0: args = {'label':'Target to Fit'}\n"
        "           ax.plot([x[2] for x in vals],[x[3] for x in vals],'r-',**args)\n"
        "           if i==0: args = {'label':'Best Fit'}\n"
        "           ax.plot([x[2] for x in vals],[xfit(float(x[2])) for x in vals],'g-',**args)\n"
        "   ax.set_title('BAF distribution, chr'+chr)\n"
        "   ax.set_xlabel('BAF')\n"
        "   ax.set_ylabel('Frequency')\n"
        "   ax.legend(loc='best',prop={'size':7},frameon=False)\n"
        "   plt.savefig(outdir+'/dist.chr'+chr+'.png')\n"
        "   plt.close()\n"
        "\n"
        "def plot_copy_number(cn):\n"
        "   fig, ax = plt.subplots(1, 1, figsize=(7,5))\n"
        "   xlabels = sorted(cn.keys())\n"
        "   xvals = range(len(xlabels))\n"
        "   yvals = [float(cn[x]) for x in xlabels]\n"
        "   ax.plot(xvals,yvals,'o',color='red')\n"
        "   for i in range(len(xvals)):\n"
        "       if yvals[i]==-1: ax.annotate('?', xy=(xvals[i],0.5),va='center',ha='center',color='red',fontweight='bold')\n"
        "   ax.tick_params(axis='both', which='major', labelsize=9)\n"
        "   ax.set_xticks(xvals)\n"
        "   ax.set_xticklabels(xlabels,rotation=45)\n"
        "   ax.set_xlim(-1,len(xlabels))\n"
        "   ax.set_ylim(0,5.0)\n"
        "   ax.set_yticks([1.0,2.0,3.0,4.0])\n"
        "   ax.set_xlabel('Chromosome')\n"
        "   ax.set_ylabel('Copy Number')\n"
        "   plt.savefig(outdir+'/copy-number.png')\n"
        "   plt.close()\n"
        "\n"
        "class myParser(argparse.ArgumentParser):\n"
        "   def error(self, message):\n"
        "       self.print_help()\n"
        "       sys.stderr.write('error: %%s\\n' %% message)\n"
        "       sys.exit(2)\n"
        "\n"
        "def main():\n"
        "   parser = myParser()\n"
        "   parser.add_argument('-a', '--all', action='store_true', help='Create all plots')\n"
        "   parser.add_argument('-c', '--copy-number', action='store_true', help='Create copy-number plot')\n"
        "   parser.add_argument('-d', '--distrib', metavar='CHR', help='Plot BAF distribution of a single chromosome')\n"
        "   args = parser.parse_args()\n"
        "   dat = {}; fit = {}; cn = {}\n"
        "   read_dat(dat,fit,cn)\n"
        "   if args.distrib!=None:\n"
        "       plot_dist(dat,fit,args.distrib)\n"
        "   if args.all:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "       plot_copy_number(cn)\n"
        "   elif args.copy_number:\n"
        "       plot_copy_number(cn)\n"
        "   else:\n"
        "       for chr in dat: plot_dist(dat,fit,chr)\n"
        "\n"
        "if __name__ == '__main__':\n"
        "   main()\n",
        args->output_dir);
//---------------------------------------
    chmod(fname, S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH|S_IXUSR|S_IXGRP|S_IXOTH);
    free(fname);
    fclose(fp);
}
Example #4
0
void BlockQuantify::count()
{
    _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta));
#ifdef DEBUG_BLOCKQUANTIFY
    int lastpos = 0;
    std::cerr << "starting block." << "\n";
#endif
    auto current_bs_start = _impl->variants.begin();
    std::string current_chr;
    int current_bs = -1;
    bool current_bs_valid = false;

    // function to compute the QQ values for truth variants in the current
    // benchmarking superlocus
    const auto update_bs_qq = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
    {
        std::vector<float> tp_qqs;
        for(auto cur = current_bs_start; cur != to; ++cur)
        {
            const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
            if(std::isnan(qqq))
            {
                continue;
            }
            const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
            // we want the scores of all TPs in this BS
            if(bd == "TP")
            {
                tp_qqs.push_back(qqq);
            }
        }

        float t_qq = bcfhelpers::missing_float();
        if(!tp_qqs.empty())
        {
            t_qq = *(std::max_element(tp_qqs.begin(), tp_qqs.end()));
        }

        /** compute the median over all variants */
        int fsize = bcf_hdr_nsamples(_impl->hdr);
        float * fmt = (float*)calloc((size_t) fsize, sizeof(float));
        for(auto cur = current_bs_start; cur != to; ++cur)
        {
            const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
            bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize);
            if(bd != "TP")
            {
                fmt[0] = bcfhelpers::missing_float();
            }
            else
            {
                fmt[0] = t_qq;
            }
            bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize);
        }
        free(fmt);

#ifdef DEBUG_BLOCKQUANTIFY
        const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1);
        std::string values;
        for(float x : tp_qqs)
        {
            values += std::to_string(x) + ",";
        }
        std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n";
#endif
    };


    for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it)
    {
        // update fields, must output GA4GH-compliant fields
        countVariants(*v_it);

        // determine benchmarking superlocus
        const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it);
        const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS");
        if(!current_bs_valid)
        {
            current_bs = vbs;
            current_chr = vchr;
            current_bs_valid = true;
        }

#ifdef DEBUG_BLOCKQUANTIFY
        std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n";
#endif

        if(   current_bs_start != v_it
                && (vbs != current_bs || vbs < 0 || vchr != current_chr))
        {
            update_bs_qq(v_it);
            current_bs = vbs;
            current_chr = vchr;
            current_bs_start = v_it;
        }
    }

    // write out final superlocus (if any)
    update_bs_qq(_impl->variants.end());

    for(auto & v : _impl->variants)
    {
#ifdef DEBUG_BLOCKQUANTIFY
        lastpos = v->pos;
#endif
        // use BD and BVT to make ROCs
        rocEvaluate(v);
    }
#ifdef DEBUG_BLOCKQUANTIFY
    std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n";
#endif
    _impl->fasta_to_use.reset(nullptr);
}
void union_data::readGenotypesVCF(string fvcf,string region) {
	int n_includedG = 0;
	int n_excludedG_mult = 0;
	int n_excludedG_void = 0;
	int n_excludedG_user = 0;
	int n_includedS = 0;
	vector < int > mappingS;
	genotype_id.clear();
	genotype_chr.clear();
	genotype_start.clear();
	genotype_end.clear();
	genotype_val.clear();
	genotype_count=0;
	genotype_id_to_idx.clear();

	//Opening files
	bcf_srs_t * sr =  bcf_sr_init();

    //vrb.bullet("target region [" + regionGenotype.get() + "]");
    //if (bcf_sr_set_regions(sr, regionGenotype.get().c_str(), 0) == -1) vrb.error("Cannot jump to region!");
	bcf_sr_set_regions(sr, region.c_str(), 0);
	if(!(bcf_sr_add_reader (sr, fvcf.c_str()))) {
		switch (sr->errnum) {
		case not_bgzf: vrb.error("File not compressed with bgzip!");
		case idx_load_failed: vrb.error("Impossible to load index file!");
		case file_type_error: vrb.error("File format not detected by htslib!");
		default : vrb.error("Unknown error!");
		}
	}

	//Sample processing
	int n_samples = bcf_hdr_nsamples(sr->readers[0].header);
	for (int i0 = 0 ; i0 < n_samples ; i0 ++) {
		mappingS.push_back(findSample(string(sr->readers[0].header->samples[i0])));
		if (mappingS.back() >= 0) n_includedS++;
	}


	//Read genotype data
	int ngt, ngt_arr = 0, nds, nds_arr = 0, * gt_arr = NULL, nsl, nsl_arr = 0, * sl_arr = NULL;
	float * ds_arr = NULL;
	bcf1_t * line;
    unsigned int linecount = 0;
	while(bcf_sr_next_line (sr)) {
        linecount ++;
        if (linecount % 100000 == 0) vrb.bullet("Read " + stb.str(linecount) + " lines");
		line =  bcf_sr_get_line(sr, 0);
		if (line->n_allele == 2) {
			ngt = bcf_get_genotypes(sr->readers[0].header, line, &gt_arr, &ngt_arr);
			nds = bcf_get_format_float(sr->readers[0].header, line,"DS", &ds_arr, &nds_arr);
			if (nds == n_samples || ngt == 2*n_samples) {
				bcf_unpack(line, BCF_UN_STR);
				string sid = string(line->d.id);
				if (filter_genotype.check(sid)) {
					genotype_id.push_back(sid);
					genotype_chr.push_back(string(bcf_hdr_id2name(sr->readers[0].header, line->rid)));
					string genotype_ref = string(line->d.allele[0]);
					genotype_start.push_back(line->pos + 1);
					nsl = bcf_get_info_int32(sr->readers[0].header, line, "END", &sl_arr, &nsl_arr);
					if (nsl >= 0 && nsl_arr == 1) genotype_end.push_back(sl_arr[0]);
					else genotype_end.push_back(genotype_start.back() + genotype_ref.size() - 1);
					genotype_val.push_back(vector < float > (sample_count, 0.0));

					for(int i = 0 ; i < n_samples ; i ++) {
						if (mappingS[i] >= 0) {
							if (nds > 0) genotype_val.back()[mappingS[i]] = ds_arr[i];
							else {
								if (gt_arr[2*i+0] == bcf_gt_missing || gt_arr[2*i+1] == bcf_gt_missing) genotype_val.back()[mappingS[i]] = bcf_float_missing;
								else genotype_val.back()[mappingS[i]] = bcf_gt_allele(gt_arr[2*i+0]) + bcf_gt_allele(gt_arr[2*i+1]);
							}
						}
					}
                    pair < string, int > temp (sid,n_includedG);
                    genotype_id_to_idx.insert(temp);
					n_includedG++;
				} else n_excludedG_user ++;
			} else n_excludedG_void ++;
		} else n_excludedG_mult ++;
	}

	//Finalize
	free(gt_arr);
	free(ds_arr);
	bcf_sr_destroy(sr);
	genotype_count = n_includedG;
	//vrb.bullet(stb.str(n_includedG) + " variants included");
	//if (n_excludedG_user > 0) vrb.bullet(stb.str(n_excludedG_user) + " variants excluded by user");
	//if (n_excludedG_mult > 0) vrb.bullet(stb.str(n_excludedG_mult) + " multi-allelic variants excluded");
	//if (n_excludedG_void > 0) vrb.bullet(stb.str(n_excludedG_void) + " uninformative variants excluded [no GT/DS]");
    //if (genotype_count == 0) vrb.leave("Cannot find genotypes in target region!");
}
Example #6
0
    void BlockQuantify::count()
    {
        _impl->fasta_to_use.reset(new FastaFile(_impl->ref_fasta));
#ifdef DEBUG_BLOCKQUANTIFY
        int lastpos = 0;
        std::cerr << "starting block." << "\n";
#endif
        auto current_bs_start = _impl->variants.begin();
        std::string current_chr;
        int current_bs = -1;
        bool current_bs_valid = false;

        // function to compute the QQ values for truth variants in the current
        // benchmarking superlocus
        const auto update_bs_filters = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            std::set<int> bs_filters;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                for(int nf = 0; nf < (*cur)->d.n_flt; ++nf)
                {
                    const int f = (*cur)->d.flt[nf];
                    if(f != bcf_hdr_id2int(_impl->hdr, BCF_DT_ID, "PASS"))
                    {
                        bs_filters.insert(f);
                    }
                }
            }

            if(bs_filters.empty())
            {
                return;
            }

            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string bdt = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
                const std::string bvq = bcfhelpers::getFormatString(_impl->hdr, *cur, "BVT", 1);
                // filter TPs where the query call in NOCALL
                if(bdt == "TP" && bvq == "NOCALL")
                {
                    for(auto f : bs_filters)
                    {
                        bcf_add_filter(_impl->hdr, *cur, f);
                    }
                }
            }
        };

        // function to compute the QQ values for truth variants in the current
        // benchmarking superlocus
        const auto update_bs_qq = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            std::vector<float> tp_qqs;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
                if(std::isnan(qqq))
                {
                    continue;
                }
                const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
                // we want the scores of all TPs in this BS
                if(bd == "TP")
                {
                    tp_qqs.push_back(qqq);
                }
            }

            float t_qq = bcfhelpers::missing_float();
            if(!tp_qqs.empty())
            {
                t_qq = *(std::min_element(tp_qqs.begin(), tp_qqs.end()));
            }

            /** compute the median over all variants */
            int fsize = bcf_hdr_nsamples(_impl->hdr);
            float * fmt = (float*)calloc((size_t) fsize, sizeof(float));
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 0);
                bcf_get_format_float(_impl->hdr, *cur, "QQ", &fmt, &fsize);
                if(bd != "TP")
                {
                    fmt[0] = bcfhelpers::missing_float();
                }
                else
                {
                    const float qqq = bcfhelpers::getFormatFloat(_impl->hdr, *cur, "QQ", 1);
                    const std::string bd = bcfhelpers::getFormatString(_impl->hdr, *cur, "BD", 1);
                    if(bd == "TP" && !std::isnan(qqq))
                    {
                        fmt[0] = qqq;
                    }
                    else
                    {
                        fmt[0] = t_qq;
                    }

                }
                bcf_update_format_float(_impl->hdr, *cur, "QQ", fmt, fsize);
            }
            free(fmt);

#ifdef DEBUG_BLOCKQUANTIFY
            const int bs = bcfhelpers::getInfoInt(_impl->hdr, *current_bs_start, "BS", -1);
            std::string values;
            for(float x : tp_qqs)
            {
                values += std::to_string(x) + ",";
            }
            std::cerr << "BS: " << bs << " T_QQ = " << t_qq << " [" << values << "]" << "\n";
#endif
        };

        const auto update_bs_conf_boundary_flag = [this, &current_bs_start](BlockQuantifyImpl::variantlist_t::iterator to)
        {
            static const int has_conf = 1;
            static const int has_non_conf = 2;
            int conf_non_conf = 0;
            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", "");

                if(regions.find("CONF") == std::string::npos)
                {
                    conf_non_conf |= has_non_conf;
                }
                else
                {
                    conf_non_conf |= has_conf;
                }
                if(regions.find("TS_boundary") != std::string::npos)
                {
                    conf_non_conf |= has_non_conf | has_conf;
                }
            }

            for(auto cur = current_bs_start; cur != to; ++cur)
            {
                const std::string regions = bcfhelpers::getInfoString(_impl->hdr, *cur, "Regions", "");

                if(conf_non_conf == (has_conf | has_non_conf))
                {
                    if(regions.find("TS_boundary") == std::string::npos)
                    {
                        bcf_update_info_string(_impl->hdr,
                                               *cur, "Regions",
                                               (regions.empty() ? "TS_boundary" :
                                                regions + ",TS_boundary").c_str());
                    }
                }
                else if(conf_non_conf == has_conf)
                {
                    if(regions.find("TS_contained") == std::string::npos)
                    {
                        // also flag fully confident superloci
                        bcf_update_info_string(_impl->hdr,
                                               *cur, "Regions",
                                               (regions.empty() ? "TS_contained" :
                                                regions + ",TS_contained").c_str());
                    }
                }
            }
        };


        for(auto v_it = _impl->variants.begin(); v_it != _impl->variants.end(); ++v_it)
        {
            // update fields, must output GA4GH-compliant fields
            countVariants(*v_it);

            // determine benchmarking superlocus
            const std::string vchr = bcfhelpers::getChrom(_impl->hdr, *v_it);
            const int vbs = bcfhelpers::getInfoInt(_impl->hdr, *v_it, "BS");
            if(!current_bs_valid)
            {
                current_bs = vbs;
                current_chr = vchr;
                current_bs_valid = true;
            }

#ifdef DEBUG_BLOCKQUANTIFY
            std::cerr << "current BS = " << current_bs << " vbs = " << vbs << "\n";
#endif

            if(   current_bs_start != v_it
               && (vbs != current_bs || vbs < 0 || vchr != current_chr))
            {
#ifdef DEBUG_BLOCKQUANTIFY
                std::cerr << "finishing BS = " << current_bs << " vbs = " << vbs << "\n";
#endif
                update_bs_qq(v_it);
                update_bs_filters(v_it);
                update_bs_conf_boundary_flag(v_it);
                current_bs = vbs;
                current_chr = vchr;
                current_bs_start = v_it;
            }
        }

        // do final superlocus (if any)
        update_bs_qq(_impl->variants.end());
        update_bs_filters(_impl->variants.end());
        update_bs_conf_boundary_flag(_impl->variants.end());

        for(auto & v : _impl->variants)
        {
#ifdef DEBUG_BLOCKQUANTIFY
            lastpos = v->pos;
#endif
            // use BD and BVT to make ROCs
            rocEvaluate(v);
        }
#ifdef DEBUG_BLOCKQUANTIFY
        std::cerr << "finished block " << lastpos << " - " << _impl->variants.size() << " records on thread " << std::this_thread::get_id() << "\n";
#endif
        _impl->fasta_to_use.reset(nullptr);
    }