コード例 #1
0
ファイル: HandleVariant.cpp プロジェクト: GerritvanNiekerk/TS
//void EnsembleProcessOneVariant(BamMultiReader * bamReader, const string & local_contig_sequence,
void EnsembleProcessOneVariant(PersistingThreadObjects &thread_objects, vcf::Variant ** candidate_variant,
                               ExtendParameters * parameters, InputStructures &global_context) {
    ;

    EnsembleEval my_ensemble;
    my_ensemble.multi_allele_var.SetupAllAlleles(candidate_variant, thread_objects.local_contig_sequence, parameters, global_context);
    my_ensemble.multi_allele_var.FilterAllAlleles(candidate_variant, parameters->my_controls.filter_variant); // put filtering here in case we want to skip below entries

    my_ensemble.allele_eval.resize(my_ensemble.multi_allele_var.allele_identity_vector.size());
    my_ensemble.SetupHypothesisChecks(parameters);

    // We read in one stack per multi-allele variant
    my_ensemble.my_data.StackUpOneVariant(thread_objects, my_ensemble.multi_allele_var.window_start,
                                          my_ensemble.multi_allele_var.window_end, candidate_variant, parameters, global_context);

    // glue in variants
    if (!my_ensemble.my_data.no_coverage) {
        int best_allele = TrySolveAllAllelesVsRef(my_ensemble, thread_objects, global_context);

        // output to variant
        GlueOutputVariant(my_ensemble, parameters, best_allele);

        // test diagnostic output for this ensemble
        if (parameters->program_flow.rich_json_diagnostic & (!((*(my_ensemble.multi_allele_var.variant))->isFiltered) | parameters->program_flow.skipCandidateGeneration | (*my_ensemble.multi_allele_var.variant)->isHotSpot)) // look at everything that came through
            JustOneDiagnosis(my_ensemble, parameters->program_flow.json_plot_dir);

    } else {
        AutoFailTheCandidate(my_ensemble.multi_allele_var.variant, parameters->my_controls.suppress_no_calls);
    }
    // Pointer to candidate variant should still point to the same element as my_ensemble.multi_allele_var.variant
}
コード例 #2
0
ファイル: HandleVariant.cpp プロジェクト: biocyberman/TS
// return 0: normal termination
// return 1: no data (empty read stack)
// return 2: no data (no valid functional families on read stack)
int EnsembleProcessOneVariant(PersistingThreadObjects &thread_objects, VariantCallerContext& vc,
    VariantCandidate &candidate_variant, const PositionInProgress& bam_position,
	vector< vector<MolecularFamily> > &molecular_families, int sample_index)
{
  unsigned long t0 = clock();
  string sample_name = (sample_index >= 0)? candidate_variant.variant.sampleNames[sample_index] : "";
  const bool use_molecular_tag = vc.mol_tag_manager->tag_trimmer->HaveTags();

  if(vc.parameters->program_flow.DEBUG > 0 ){
	  cout<< endl << "[tvc] Start EnsembleProcessOneVariant for (" << PrintVariant(candidate_variant.variant) << ")"<< endl << endl;
  }

  if (not use_molecular_tag){
	  RemoveVcfFormat(candidate_variant.variant, {"MDP", "MAO", "MRO", "MAF"});
  }

  EnsembleEval my_ensemble(candidate_variant.variant);

  // Allele preparation
  my_ensemble.SetupAllAlleles(*vc.parameters, *vc.global_context, *vc.ref_reader);
  my_ensemble.FilterAllAlleles(vc.parameters->my_controls, candidate_variant.variant_specific_params); // put filtering here in case we want to skip below entries

  // set parameters for the evaluator
  my_ensemble.SetAndPropagateParameters(vc.parameters, use_molecular_tag, candidate_variant.variant_specific_params);

  if (vc.parameters->program_flow.DEBUG > 0){
	  list<list<int> >allele_groups;
	  CandidateExaminer my_examiner(&thread_objects, &vc);
	  my_examiner.SetupVariantCandidate(candidate_variant);
	  my_examiner.FindLookAheadEnd0();
	  my_examiner.SplitCandidateVariant(allele_groups);
  }

  // We read in one stack per multi-allele variant
  if (use_molecular_tag){
	my_ensemble.StackUpOneVariantMolTag(*vc.parameters, molecular_families, sample_index);
  }
  else{
    my_ensemble.StackUpOneVariant(*vc.parameters, bam_position, sample_index);
  }

  // No data
  if (my_ensemble.read_stack.empty()) {
    DoStepsForNoData(candidate_variant, sample_name, sample_index, use_molecular_tag, "NODATA");
    if(vc.parameters->program_flow.DEBUG > 0 ){
	  cout<< "+ No data: empty read stack!" << endl << endl
          << "[tvc] Complete EnsembleProcessOneVariant for ("<< PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl;
	}
    return 1;
  }

  // glue in variants
  my_ensemble.SpliceAllelesIntoReads(thread_objects, *vc.global_context, *vc.parameters, *vc.ref_reader);

  // Calculate flow-disruptiveness in the read level
  my_ensemble.FlowDisruptivenessInReadLevel(*vc.global_context);

  // fill in quantities derived from predictions
  my_ensemble.allele_eval.InitForInference(thread_objects, my_ensemble.read_stack, *vc.global_context, my_ensemble.allele_identity_vector);

  // No valid function family
  if (use_molecular_tag){
    if (my_ensemble.allele_eval.total_theory.GetNumFuncFamilies() == 0) {
	  DoStepsForNoData(candidate_variant, sample_name, sample_index, use_molecular_tag, "NOVALIDFUNCFAM");
	  if (vc.parameters->program_flow.DEBUG > 0){
	    cout << "+ No valid functional families on read stack!" << endl << endl
			 << "[tvc] Complete EnsembleProcessOneVariant for ("<< PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl;
	  }
	  return 2;
	}
  }


  // do inference
  my_ensemble.allele_eval.ExecuteInference();

  // set fd in the read_stack level.
  my_ensemble.FlowDisruptivenessInReadStackLevel(vc.parameters->my_controls.min_ratio_for_fd);

  // now we're in the guaranteed state of best index
  vector<float> semi_soft_allele_freq_est;
  int best_allele = my_ensemble.DetectBestMultiAllelePair(semi_soft_allele_freq_est);
  if (vc.parameters->my_controls.report_ppa){
    my_ensemble.DetectPossiblePolyploidyAlleles(semi_soft_allele_freq_est, vc.parameters->my_controls, candidate_variant.variant_specific_params);
  }

  if (use_molecular_tag){
    my_ensemble.CalculateTagSimilarity(*vc.mol_tag_manager, vc.parameters->my_controls.tag_sim_max_cov, sample_index);
    my_ensemble.VariantFamilySizeHistogram();
  }

  // output to variant
  GlueOutputVariant(my_ensemble, candidate_variant, *vc.parameters, best_allele, sample_index);

  // output the inference results (MUQUAL, MUGT, MUGQ) if I turn on multi_min_allele_freq
  if (vc.parameters->program_flow.is_multi_min_allele_freq){
     my_ensemble.MultiMinAlleleFreq(vc.parameters->program_flow.multi_min_allele_freq);
  }


  // test diagnostic output for this ensemble
  if (vc.parameters->program_flow.rich_json_diagnostic & (!(my_ensemble.variant->isFiltered) | my_ensemble.variant->isHotSpot)){ // look at everything that came through
	  cout << "+ Dumping rich json diagnostic for (" << PrintVariant(candidate_variant.variant) << ")" << endl;
	  JustOneDiagnosis(my_ensemble, *vc.global_context, vc.parameters->json_plot_dir, true);
  }
  if (vc.parameters->program_flow.minimal_diagnostic & (!(my_ensemble.variant->isFiltered) | my_ensemble.variant->isHotSpot)){ // look at everything that came through
	  cout << "+ Dumping minimal json diagnostic for (" << PrintVariant(candidate_variant.variant) << ")" << endl;
	  JustOneDiagnosis(my_ensemble, *vc.global_context, vc.parameters->json_plot_dir, false);
  }

  if(vc.parameters->program_flow.DEBUG > 0){
      cout << endl << "[tvc] Complete EnsembleProcessOneVariant for (" << PrintVariant(candidate_variant.variant) << "). Processing time = " << (double) (clock() - t0) / 1E6 << " sec." << endl << endl;
  }
  return 0;
}