// Function to fill in predicted signal values void BaseHypothesisEvaluator(BamTools::BamAlignment &alignment, const string &flow_order_str, const string &alt_base_hyp, float &delta_score, float &fit_score, int heavy_verbose) { // --- Step 1: Initialize Objects and retrieve relevant tags delta_score = 1e5; fit_score = 1e5; vector<string> Hypotheses(2); vector<float> measurements, phase_params; int start_flow, num_flows, prefix_flow=0; if (not GetBamTags(alignment, flow_order_str.length(), measurements, phase_params, start_flow)) return; num_flows = measurements.size(); ion::FlowOrder flow_order(flow_order_str, num_flows); BasecallerRead master_read; master_read.SetData(measurements, flow_order.num_flows()); TreephaserLite treephaser(flow_order); treephaser.SetModelParameters(phase_params[0], phase_params[1]); // --- Step 2: Solve beginning of the read // Look at mapped vs. unmapped reads in BAM Hypotheses[0] = alignment.QueryBases; Hypotheses[1] = alt_base_hyp; // Safety: reverse complement reverse strand reads in mapped bam if (alignment.IsMapped() and alignment.IsReverseStrand()) { RevComplementInPlace(Hypotheses[0]); RevComplementInPlace(Hypotheses[1]); } prefix_flow = GetMasterReadPrefix(treephaser, flow_order, start_flow, Hypotheses[0], master_read); unsigned int prefix_size = master_read.sequence.size(); // --- Step 3: creating predictions for the individual hypotheses vector<BasecallerRead> hypothesesReads(Hypotheses.size()); vector<float> squared_distances(Hypotheses.size(), 0.0); int max_last_flow = 0; for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) { hypothesesReads[i_hyp] = master_read; // --- add hypothesis sequence to clipped prefix unsigned int i_base = 0; int i_flow = prefix_flow; while (i_base<Hypotheses[i_hyp].length() and i_base<(2*(unsigned int)flow_order.num_flows()-prefix_size)) { while (i_flow < flow_order.num_flows() and flow_order.nuc_at(i_flow) != Hypotheses[i_hyp][i_base]) i_flow++; if (i_flow < flow_order.num_flows() and i_flow > max_last_flow) max_last_flow = i_flow; if (i_flow >= flow_order.num_flows()) break; // Add base to sequence only if it fits into flow order hypothesesReads[i_hyp].sequence.push_back(Hypotheses[i_hyp][i_base]); i_base++; } i_flow = min(i_flow, flow_order.num_flows()-1); // Solver simulates beginning of the read and then fills in the remaining clipped bases for which we have flow information treephaser.Solve(hypothesesReads[i_hyp], num_flows, i_flow); } // Compute L2-distance of measurements and predictions for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) { for (int iFlow=0; iFlow<=max_last_flow; iFlow++) squared_distances[i_hyp] += (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)) * (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)); } // Delta: L2-distance of alternative base Hypothesis - L2-distance of bases as called delta_score = squared_distances.at(1) - squared_distances.at(0); fit_score = min(squared_distances.at(1), squared_distances.at(0)); // --- verbose --- if (heavy_verbose > 1 or (delta_score < 0 and heavy_verbose > 0)) { cout << "Processed read " << alignment.Name << endl; cout << "Delta Fit: " << delta_score << " Overall Fit: " << fit_score << endl; PredictionGenerationVerbose(Hypotheses, hypothesesReads, phase_params, flow_order, start_flow, prefix_size); } }