//void EnsembleProcessOneVariant(BamMultiReader * bamReader, const string & local_contig_sequence, void EnsembleProcessOneVariant(PersistingThreadObjects &thread_objects, vcf::Variant ** candidate_variant, ExtendParameters * parameters, InputStructures &global_context) { ; EnsembleEval my_ensemble; my_ensemble.multi_allele_var.SetupAllAlleles(candidate_variant, thread_objects.local_contig_sequence, parameters, global_context); my_ensemble.multi_allele_var.FilterAllAlleles(candidate_variant, parameters->my_controls.filter_variant); // put filtering here in case we want to skip below entries my_ensemble.allele_eval.resize(my_ensemble.multi_allele_var.allele_identity_vector.size()); my_ensemble.SetupHypothesisChecks(parameters); // We read in one stack per multi-allele variant my_ensemble.my_data.StackUpOneVariant(thread_objects, my_ensemble.multi_allele_var.window_start, my_ensemble.multi_allele_var.window_end, candidate_variant, parameters, global_context); // glue in variants if (!my_ensemble.my_data.no_coverage) { int best_allele = TrySolveAllAllelesVsRef(my_ensemble, thread_objects, global_context); // output to variant GlueOutputVariant(my_ensemble, parameters, best_allele); // test diagnostic output for this ensemble if (parameters->program_flow.rich_json_diagnostic & (!((*(my_ensemble.multi_allele_var.variant))->isFiltered) | parameters->program_flow.skipCandidateGeneration | (*my_ensemble.multi_allele_var.variant)->isHotSpot)) // look at everything that came through JustOneDiagnosis(my_ensemble, parameters->program_flow.json_plot_dir); } else { AutoFailTheCandidate(my_ensemble.multi_allele_var.variant, parameters->my_controls.suppress_no_calls); } // Pointer to candidate variant should still point to the same element as my_ensemble.multi_allele_var.variant }
// return 0: normal termination // return 1: no data (empty read stack) // return 2: no data (no valid functional families on read stack) int EnsembleProcessOneVariant(PersistingThreadObjects &thread_objects, VariantCallerContext& vc, VariantCandidate &candidate_variant, const PositionInProgress& bam_position, vector< vector<MolecularFamily> > &molecular_families, int sample_index) { unsigned long t0 = clock(); string sample_name = (sample_index >= 0)? candidate_variant.variant.sampleNames[sample_index] : ""; const bool use_molecular_tag = vc.mol_tag_manager->tag_trimmer->HaveTags(); if(vc.parameters->program_flow.DEBUG > 0 ){ cout<< endl << "[tvc] Start EnsembleProcessOneVariant for (" << PrintVariant(candidate_variant.variant) << ")"<< endl << endl; } if (not use_molecular_tag){ RemoveVcfFormat(candidate_variant.variant, {"MDP", "MAO", "MRO", "MAF"}); } EnsembleEval my_ensemble(candidate_variant.variant); // Allele preparation my_ensemble.SetupAllAlleles(*vc.parameters, *vc.global_context, *vc.ref_reader); my_ensemble.FilterAllAlleles(vc.parameters->my_controls, candidate_variant.variant_specific_params); // put filtering here in case we want to skip below entries // set parameters for the evaluator my_ensemble.SetAndPropagateParameters(vc.parameters, use_molecular_tag, candidate_variant.variant_specific_params); if (vc.parameters->program_flow.DEBUG > 0){ list<list<int> >allele_groups; CandidateExaminer my_examiner(&thread_objects, &vc); my_examiner.SetupVariantCandidate(candidate_variant); my_examiner.FindLookAheadEnd0(); my_examiner.SplitCandidateVariant(allele_groups); } // We read in one stack per multi-allele variant if (use_molecular_tag){ my_ensemble.StackUpOneVariantMolTag(*vc.parameters, molecular_families, sample_index); } else{ my_ensemble.StackUpOneVariant(*vc.parameters, bam_position, sample_index); } // No data if (my_ensemble.read_stack.empty()) { DoStepsForNoData(candidate_variant, sample_name, sample_index, use_molecular_tag, "NODATA"); if(vc.parameters->program_flow.DEBUG > 0 ){ cout<< "+ No data: empty read stack!" << endl << endl << "[tvc] Complete EnsembleProcessOneVariant for ("<< PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl; } return 1; } // glue in variants my_ensemble.SpliceAllelesIntoReads(thread_objects, *vc.global_context, *vc.parameters, *vc.ref_reader); // Calculate flow-disruptiveness in the read level my_ensemble.FlowDisruptivenessInReadLevel(*vc.global_context); // fill in quantities derived from predictions my_ensemble.allele_eval.InitForInference(thread_objects, my_ensemble.read_stack, *vc.global_context, my_ensemble.allele_identity_vector); // No valid function family if (use_molecular_tag){ if (my_ensemble.allele_eval.total_theory.GetNumFuncFamilies() == 0) { DoStepsForNoData(candidate_variant, sample_name, sample_index, use_molecular_tag, "NOVALIDFUNCFAM"); if (vc.parameters->program_flow.DEBUG > 0){ cout << "+ No valid functional families on read stack!" << endl << endl << "[tvc] Complete EnsembleProcessOneVariant for ("<< PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl; } return 2; } } // do inference my_ensemble.allele_eval.ExecuteInference(); // set fd in the read_stack level. my_ensemble.FlowDisruptivenessInReadStackLevel(vc.parameters->my_controls.min_ratio_for_fd); // now we're in the guaranteed state of best index vector<float> semi_soft_allele_freq_est; int best_allele = my_ensemble.DetectBestMultiAllelePair(semi_soft_allele_freq_est); if (vc.parameters->my_controls.report_ppa){ my_ensemble.DetectPossiblePolyploidyAlleles(semi_soft_allele_freq_est, vc.parameters->my_controls, candidate_variant.variant_specific_params); } if (use_molecular_tag){ my_ensemble.CalculateTagSimilarity(*vc.mol_tag_manager, vc.parameters->my_controls.tag_sim_max_cov, sample_index); my_ensemble.VariantFamilySizeHistogram(); } // output to variant GlueOutputVariant(my_ensemble, candidate_variant, *vc.parameters, best_allele, sample_index); // output the inference results (MUQUAL, MUGT, MUGQ) if I turn on multi_min_allele_freq if (vc.parameters->program_flow.is_multi_min_allele_freq){ my_ensemble.MultiMinAlleleFreq(vc.parameters->program_flow.multi_min_allele_freq); } // test diagnostic output for this ensemble if (vc.parameters->program_flow.rich_json_diagnostic & (!(my_ensemble.variant->isFiltered) | my_ensemble.variant->isHotSpot)){ // look at everything that came through cout << "+ Dumping rich json diagnostic for (" << PrintVariant(candidate_variant.variant) << ")" << endl; JustOneDiagnosis(my_ensemble, *vc.global_context, vc.parameters->json_plot_dir, true); } if (vc.parameters->program_flow.minimal_diagnostic & (!(my_ensemble.variant->isFiltered) | my_ensemble.variant->isHotSpot)){ // look at everything that came through cout << "+ Dumping minimal json diagnostic for (" << PrintVariant(candidate_variant.variant) << ")" << endl; JustOneDiagnosis(my_ensemble, *vc.global_context, vc.parameters->json_plot_dir, false); } if(vc.parameters->program_flow.DEBUG > 0){ cout << endl << "[tvc] Complete EnsembleProcessOneVariant for (" << PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl; } return 0; }