PersistingThreadObjects::PersistingThreadObjects(const InputStructures &global_context) : use_SSE_basecaller(global_context.use_SSE_basecaller), realigner(50, 1) { #ifdef __SSE3__ if (use_SSE_basecaller) { for (unsigned int iFO=0; iFO < global_context.flow_order_vector.size(); iFO++){ TreephaserSSE treephaser_sse(global_context.flow_order_vector.at(iFO), DPTreephaser::kWindowSizeDefault_); treephaserSSE_vector.push_back(treephaser_sse); } } else { #endif for (unsigned int iFO=0; iFO < global_context.flow_order_vector.size(); iFO++){ DPTreephaser dpTreephaser(global_context.flow_order_vector.at(iFO)); dpTreephaser_vector.push_back(dpTreephaser); } #ifdef __SSE3__ } #endif };
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { RcppMatrix<double> signal(Rsignal); RcppVector<int> keyFlow(RkeyFlow); string flowCycle = Rcpp::as<string>(RflowCycle); double cf = Rcpp::as<double>(Rcf); double ie = Rcpp::as<double>(Rie); double dr = Rcpp::as<double>(Rdr); string basecaller = Rcpp::as<string>(Rbasecaller); unsigned int nFlow = signal.cols(); unsigned int nRead = signal.rows(); if(basecaller != "treephaser-swan" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") { std::string exception = "base value for basecaller supplied: " + basecaller; exceptionMesg = copyMessageToR(exception.c_str()); } else if (flowCycle.length() < nFlow) { std::string exception = "Flow cycle is shorter than number of flows to solve"; exceptionMesg = copyMessageToR(exception.c_str()); } else { // Prepare objects for holding and passing back results RcppMatrix<double> predicted_out(nRead,nFlow); RcppMatrix<double> residual_out(nRead,nFlow); RcppMatrix<int> hpFlow_out(nRead,nFlow); std::vector< std::string> seq_out(nRead); // Set up key flow vector int nKeyFlow = keyFlow.size(); vector <int> keyVec(nKeyFlow); for(int iFlow=0; iFlow < nKeyFlow; iFlow++) keyVec[iFlow] = keyFlow(iFlow); // Iterate over all reads vector <float> sigVec(nFlow); string result; for(unsigned int iRead=0; iRead < nRead; iRead++) { for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) sigVec[iFlow] = (float) signal(iRead,iFlow); BasecallerRead read; read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1); DPTreephaser dpTreephaser(flowCycle.c_str(), flowCycle.length(), 8); if (basecaller == "dp-treephaser") dpTreephaser.SetModelParameters(cf, ie, dr); else dpTreephaser.SetModelParameters(cf, ie, 0); // Adaptive normalization // Execute the iterative solving-normalization routine if (basecaller == "dp-treephaser") dpTreephaser.NormalizeAndSolve4(read, nFlow); else if (basecaller == "treephaser-adaptive") dpTreephaser.NormalizeAndSolve3(read, nFlow); // Adaptive normalization else dpTreephaser.NormalizeAndSolve5(read, nFlow); // sliding window adaptive normalization read.flowToString(flowCycle,seq_out[iRead]); for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) { predicted_out(iRead,iFlow) = (double) read.prediction[iFlow]; residual_out(iRead,iFlow) = (double) read.normalizedMeasurements[iFlow] - read.prediction[iFlow]; hpFlow_out(iRead,iFlow) = (int) read.solution[iFlow]; } // Store results RcppResultSet rs; rs.add("seq", seq_out); rs.add("predicted", predicted_out); rs.add("residual", residual_out); rs.add("hpFlow", hpFlow_out); ret = rs.getReturnList(); } } } catch(std::exception& ex) { exceptionMesg = copyMessageToR(ex.what()); } catch(...) { exceptionMesg = copyMessageToR("unknown reason"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }
void RegionAnalysis::worker_Treephaser() { // Worker method: load regions one by one and process them until done // int numFlows = wells->NumFlows(); // DPTreephaser dpTreephaser(wells->FlowOrder(), numFlows, 8); DPTreephaser dpTreephaser(flowOrder.c_str(), numFlows, 8); std::deque<int> wellX; std::deque<int> wellY; std::deque<std::vector<float> > wellMeasurements; int iRegion; std::vector<BasecallerRead> data; data.reserve(MAX_CAFIE_READS_PER_REGION); while (wellsReader.loadNextRegion(wellX, wellY, wellMeasurements, iRegion)) { float parameters[3]; parameters[0] = 0.00; // CF - initial guess parameters[1] = 0.00; // IE - initial guess parameters[2] = 0.000; // DR - initial guess for (int globalIteration = 0; globalIteration < 5; globalIteration++) { dpTreephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]); data.clear(); // Iterate over live library wells and consider them as a part of the phase training set std::deque<int>::iterator x = wellX.begin(); std::deque<int>::iterator y = wellY.begin(); std::deque<std::vector<float> >::iterator measurements = wellMeasurements.begin(); for (; (x != wellX.end()) && (data.size() < MAX_CAFIE_READS_PER_REGION); x++, y++, measurements++) { if (!mask->Match(*x, *y, MaskLive)) continue; if (!mask->Match(*x, *y, MaskBead)) continue; int beadClass = 1; // 1 - library, 0 - TF if (!mask->Match(*x, *y, MaskLib)) { // Is it a library bead? if (!mask->Match(*x, *y, MaskTF)) // OK, is it at least a TF? continue; beadClass = 0; } data.push_back(BasecallerRead()); data.back().SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, libraryInfo[beadClass].Ionogram, libraryInfo[beadClass].numKeyFlows - 1); bool keypass = true; for (int iFlow = 0; iFlow < (libraryInfo[beadClass].numKeyFlows - 1); iFlow++) { if ((int) (data.back().measurements[iFlow] + 0.5) != libraryInfo[beadClass].Ionogram[iFlow]) keypass = false; if (isnan(data.back().measurements[iFlow])) keypass = false; } if (!keypass) { data.pop_back(); continue; } dpTreephaser.Solve(data.back(), std::min(100, numFlows)); data.back().Normalize(11, std::min(80, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); data.back().Normalize(11, std::min(100, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); float metric = 0; for (int iFlow = 20; (iFlow < 100) && (iFlow < numFlows); iFlow++) { if (data.back().normalizedMeasurements[iFlow] > 1.2) continue; float delta = data.back().normalizedMeasurements[iFlow] - data.back().prediction[iFlow]; if (!isnan(delta)) metric += delta * delta; else metric += 1e10; } if (metric > 1) { data.pop_back(); continue; } } if (data.size() < 10) break; // Perform parameter estimation NelderMeadOptimization(data, dpTreephaser, parameters, 50, 3); } pthread_mutex_lock(common_output_mutex); if (data.size() < 10) printf("Region % 3d: Using default phase parameters, %d reads insufficient for training\n", iRegion + 1, (int) data.size()); else printf("Region % 3d: Using %d reads for phase parameter training\n", iRegion + 1, (int) data.size()); // printf("o"); fflush(stdout); pthread_mutex_unlock(common_output_mutex); (*cf)[iRegion] = parameters[0]; (*ie)[iRegion] = parameters[1]; (*dr)[iRegion] = parameters[2]; } }
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller, SEXP RdiagonalStates, SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { Rcpp::NumericMatrix signal(Rsignal); Rcpp::IntegerVector keyFlow(RkeyFlow); string flowCycle = Rcpp::as<string>(RflowCycle); Rcpp::NumericVector cf_vec(Rcf); Rcpp::NumericVector ie_vec(Rie); Rcpp::NumericVector dr_vec(Rdr); string basecaller = Rcpp::as<string>(Rbasecaller); unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates); // Recalibration Variables string model_file = Rcpp::as<string>(RmodelFile); int model_threshold = Rcpp::as<int>(RmodelThreshold); Rcpp::IntegerVector x_values(Rxval); Rcpp::IntegerVector y_values(Ryval); RecalibrationModel recalModel; if (model_file.length() > 0) { recalModel.InitializeModel(model_file, model_threshold); } ion::FlowOrder flow_order(flowCycle, flowCycle.length()); unsigned int nFlow = signal.cols(); unsigned int nRead = signal.rows(); if(basecaller != "treephaser-swan" && basecaller != "treephaser-solve" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") { std::string exception = "base value for basecaller supplied: " + basecaller; exceptionMesg = strdup(exception.c_str()); } else if (flowCycle.length() < nFlow) { std::string exception = "Flow cycle is shorter than number of flows to solve"; exceptionMesg = strdup(exception.c_str()); } else { // Prepare objects for holding and passing back results Rcpp::NumericMatrix predicted_out(nRead,nFlow); Rcpp::NumericMatrix residual_out(nRead,nFlow); Rcpp::NumericMatrix norm_additive_out(nRead,nFlow); Rcpp::NumericMatrix norm_multipl_out(nRead,nFlow); std::vector< std::string> seq_out(nRead); // Set up key flow vector int nKeyFlow = keyFlow.size(); vector <int> keyVec(nKeyFlow); for(int iFlow=0; iFlow < nKeyFlow; iFlow++) keyVec[iFlow] = keyFlow(iFlow); // Iterate over all reads vector <float> sigVec(nFlow); BasecallerRead read; DPTreephaser dpTreephaser(flow_order); dpTreephaser.SetStateProgression((diagonalStates>0)); // In contrast to pipeline, we always use droop here. // To have the same behavior of treephaser-swan as in the pipeline, supply dr=0 bool per_read_phasing = true; if (cf_vec.size() == 1) { per_read_phasing = false; dpTreephaser.SetModelParameters((double)cf_vec(0), (double)ie_vec(0), (double)dr_vec(0)); } // Main loop iterating over reads and solving them for(unsigned int iRead=0; iRead < nRead; iRead++) { // Set phasing parameters for this read if (per_read_phasing) dpTreephaser.SetModelParameters((double)cf_vec(iRead), (double)ie_vec(iRead), (double)dr_vec(iRead)); // And load recalibration model if (recalModel.is_enabled()) { int my_x = (int)x_values(iRead); int my_y = (int)y_values(iRead); const vector<vector<vector<float> > > * aPtr = 0; const vector<vector<vector<float> > > * bPtr = 0; aPtr = recalModel.getAs(my_x, my_y); bPtr = recalModel.getBs(my_x, my_y); if (aPtr == 0 or bPtr == 0) { cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead); cout << endl; } dpTreephaser.SetAsBs(aPtr, bPtr); } for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) sigVec[iFlow] = (float) signal(iRead,iFlow); // Interface to just solve without any normalization if (basecaller == "treephaser-solve") { // Interface to just solve without any normalization read.SetData(sigVec, (int)nFlow); } else { read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1); } // Execute the iterative solving-normalization routine if (basecaller == "dp-treephaser") { dpTreephaser.NormalizeAndSolve_GainNorm(read, nFlow); } else if (basecaller == "treephaser-solve") { dpTreephaser.Solve(read, nFlow); } else if (basecaller == "treephaser-adaptive") { dpTreephaser.NormalizeAndSolve_Adaptive(read, nFlow); // Adaptive normalization } else { dpTreephaser.NormalizeAndSolve_SWnorm(read, nFlow); // sliding window adaptive normalization } seq_out[iRead].assign(read.sequence.begin(), read.sequence.end()); for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) { predicted_out(iRead,iFlow) = (double) read.prediction[iFlow]; residual_out(iRead,iFlow) = (double) read.normalized_measurements[iFlow] - read.prediction[iFlow]; norm_multipl_out(iRead,iFlow) = (double) read.multiplicative_correction.at(iFlow); norm_additive_out(iRead,iFlow) = (double) read.additive_correction.at(iFlow); } } // Store results ret = Rcpp::List::create(Rcpp::Named("seq") = seq_out, Rcpp::Named("predicted") = predicted_out, Rcpp::Named("residual") = residual_out, Rcpp::Named("norm_additive") = norm_additive_out, Rcpp::Named("norm_multipl") = norm_multipl_out); } } catch(std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error("c++ exception (unknown reason)"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }
RcppExport SEXP FitPhasingBurst(SEXP R_signal, SEXP R_flowCycle, SEXP R_read_sequence, SEXP R_phasing, SEXP R_burstFlows, SEXP R_maxEvalFlow, SEXP R_maxSimFlow) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { Rcpp::NumericMatrix signal(R_signal); Rcpp::NumericMatrix phasing(R_phasing); // Standard phasing parameters string flowCycle = Rcpp::as<string>(R_flowCycle); Rcpp::StringVector read_sequences(R_read_sequence); Rcpp::NumericVector phasing_burst(R_burstFlows); Rcpp::NumericVector max_eval_flow(R_maxEvalFlow); Rcpp::NumericVector max_sim_flow(R_maxSimFlow); int window_size = 38; // For normalization ion::FlowOrder flow_order(flowCycle, flowCycle.length()); unsigned int num_flows = flow_order.num_flows(); unsigned int num_reads = read_sequences.size(); // Containers to store results Rcpp::NumericVector null_fit(num_reads); Rcpp::NumericMatrix null_prediction(num_reads, num_flows); Rcpp::NumericVector best_fit(num_reads); Rcpp::NumericVector best_ie_value(num_reads); Rcpp::NumericMatrix best_prediction(num_reads, num_flows); BasecallerRead bc_read; DPTreephaser dpTreephaser(flow_order); DPPhaseSimulator PhaseSimulator(flow_order); vector<double> cf_vec(num_flows, 0.0); vector<double> ie_vec(num_flows, 0.0); vector<double> dr_vec(num_flows, 0.0); // IE Burst Estimation Loop for (unsigned int iRead=0; iRead<num_reads; iRead++) { // Set read object vector<float> my_signal(num_flows); for (unsigned int iFlow=0; iFlow<num_flows; iFlow++) my_signal.at(iFlow) = signal(iRead, iFlow); bc_read.SetData(my_signal, num_flows); string my_sequence = Rcpp::as<std::string>(read_sequences(iRead)); // Default phasing as baseline double my_best_fit, my_best_ie; double base_cf = (double)phasing(iRead, 0); double base_ie = (double)phasing(iRead, 1); double base_dr = (double)phasing(iRead, 2); int burst_flow = (int)phasing_burst(iRead); vector<float> my_best_prediction; cf_vec.assign(num_flows, base_cf); dr_vec.assign(num_flows, base_dr); int my_max_flow = min((int)num_flows, (int)max_sim_flow(iRead)); int my_eval_flow = min(my_max_flow, (int)max_eval_flow(iRead)); PhaseSimulator.SetBaseSequence(my_sequence); PhaseSimulator.SetMaxFlows(my_max_flow); PhaseSimulator.SetPhasingParameters_Basic(base_cf, base_ie, base_dr); PhaseSimulator.UpdateStates(my_max_flow); PhaseSimulator.GetPredictions(bc_read.prediction); dpTreephaser.WindowedNormalize(bc_read, (my_eval_flow/window_size), window_size, true); my_best_ie = base_ie; my_best_prediction = bc_read.prediction; my_best_fit = 0; for (int iFlow=0; iFlow<my_eval_flow; iFlow++) { double residual = bc_read.raw_measurements.at(iFlow) - bc_read.prediction.at(iFlow); my_best_fit += residual*residual; } for (unsigned int iFlow=0; iFlow<num_flows; iFlow++) null_prediction(iRead, iFlow) = bc_read.prediction.at(iFlow); null_fit(iRead) = my_best_fit; // Make sure that there are enough flows to fit a burst if (burst_flow < my_eval_flow-10) { int num_steps = 0; double step_size = 0.0; double step_start = 0.0; double step_end = 0.0; // Brute force phasing burst value estimation using grid search, crude first, then refine for (unsigned int iIteration = 0; iIteration<3; iIteration++) { switch(iIteration) { case 0: step_size = 0.05; step_end = 0.8; break; case 1: step_end = (floor(my_best_ie / step_size)*step_size) + step_size; step_start = max(0.0, (step_end - 2.0*step_size)); step_size = 0.01; break; default: step_end = (floor(my_best_ie / step_size)*step_size) + step_size; step_start = max(0.0, step_end - 2*step_size); step_size = step_size / 10; } num_steps = 1+ ((step_end - step_start) / step_size); for (int iPhase=0; iPhase <= num_steps; iPhase++) { double try_ie = step_start+(iPhase*step_size); ie_vec.assign(num_flows, try_ie); PhaseSimulator.SetBasePhasingParameters(burst_flow, cf_vec, ie_vec, dr_vec); PhaseSimulator.UpdateStates(my_max_flow); PhaseSimulator.GetPredictions(bc_read.prediction); dpTreephaser.WindowedNormalize(bc_read, (my_eval_flow/window_size), window_size, true); double my_fit = 0.0; for (int iFlow=burst_flow+1; iFlow<my_eval_flow; iFlow++) { double residual = bc_read.raw_measurements.at(iFlow) - bc_read.prediction.at(iFlow); my_fit += residual*residual; } if (my_fit < my_best_fit) { my_best_fit = my_fit; my_best_ie = try_ie; my_best_prediction = bc_read.prediction; } } } } // Set output information for this read best_fit(iRead) = my_best_fit; best_ie_value(iRead) = my_best_ie; for (unsigned int iFlow=0; iFlow<num_flows; iFlow++) best_prediction(iRead, iFlow) = my_best_prediction.at(iFlow); } ret = Rcpp::List::create(Rcpp::Named("null_fit") = null_fit, Rcpp::Named("null_prediction") = null_prediction, Rcpp::Named("burst_flow") = phasing_burst, Rcpp::Named("best_fit") = best_fit, Rcpp::Named("best_ie_value") = best_ie_value, Rcpp::Named("best_prediction") = best_prediction); } catch(std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error("c++ exception (unknown reason)"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }
RcppExport SEXP treePhaserSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rmaxflows, SEXP RgetStates, SEXP RdiagonalStates, SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { Rcpp::StringVector sequences(Rsequence); string flowCycle = Rcpp::as<string>(RflowCycle); Rcpp::NumericMatrix cf_vec(Rcf); Rcpp::NumericMatrix ie_vec(Rie); Rcpp::NumericMatrix dr_vec(Rdr); unsigned int max_flows = Rcpp::as<int>(Rmaxflows); unsigned int get_states = Rcpp::as<int>(RgetStates); unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates); // Recalibration Variables string model_file = Rcpp::as<string>(RmodelFile); int model_threshold = Rcpp::as<int>(RmodelThreshold); Rcpp::IntegerVector x_values(Rxval); Rcpp::IntegerVector y_values(Ryval); RecalibrationModel recalModel; if (model_file.length() > 0) { recalModel.InitializeModel(model_file, model_threshold); } ion::FlowOrder flow_order(flowCycle, flowCycle.length()); unsigned int nFlow = flow_order.num_flows(); unsigned int nRead = sequences.size(); max_flows = min(max_flows, nFlow); // Prepare objects for holding and passing back results Rcpp::NumericMatrix predicted_out(nRead,nFlow); vector<vector<float> > query_states; vector<int> hp_lengths; // Iterate over all sequences BasecallerRead read; DPTreephaser dpTreephaser(flow_order); bool per_read_phasing = true; if (cf_vec.ncol() == 1) { per_read_phasing = false; dpTreephaser.SetModelParameters((double)cf_vec(0,0), (double)ie_vec(0,0), (double)dr_vec(0,0)); } dpTreephaser.SetStateProgression((diagonalStates>0)); unsigned int max_length = (2*flow_order.num_flows()); for(unsigned int iRead=0; iRead<nRead; iRead++) { string mySequence = Rcpp::as<std::string>(sequences(iRead)); read.sequence.clear(); read.sequence.reserve(2*flow_order.num_flows()); for(unsigned int iBase=0; iBase<mySequence.length() and iBase<max_length; ++iBase){ read.sequence.push_back(mySequence.at(iBase)); } // Set phasing parameters for this read if (per_read_phasing) dpTreephaser.SetModelParameters((double)cf_vec(0,iRead), (double)ie_vec(0,iRead), (double)dr_vec(0,iRead)); // If you bothered specifying a recalibration model you probably want its effect on the predictions... if (recalModel.is_enabled()) { int my_x = (int)x_values(iRead); int my_y = (int)y_values(iRead); const vector<vector<vector<float> > > * aPtr = 0; const vector<vector<vector<float> > > * bPtr = 0; aPtr = recalModel.getAs(my_x, my_y); bPtr = recalModel.getBs(my_x, my_y); if (aPtr == 0 or bPtr == 0) { cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead); cout << endl; } dpTreephaser.SetAsBs(aPtr, bPtr); } if (nRead == 1 and get_states > 0) dpTreephaser.QueryAllStates(read, query_states, hp_lengths, max_flows); else dpTreephaser.Simulate(read, max_flows); for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow){ predicted_out(iRead,iFlow) = (double) read.prediction.at(iFlow); } } // Store results if (nRead == 1 and get_states > 0) { Rcpp::NumericMatrix states(hp_lengths.size(), nFlow); Rcpp::NumericVector HPlengths(hp_lengths.size()); for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){ HPlengths(iHP) = (double)hp_lengths[iHP]; for (unsigned int iFlow=0; iFlow<nFlow; iFlow++) states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow); } ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out, Rcpp::Named("states") = states, Rcpp::Named("HPlengths") = HPlengths); } else { ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out); } } catch(std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error("c++ exception (unknown reason)"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }
void CalculateHypDistances(const vector<float>& NormalizedMeasurements, const float& cf, const float& ie, const float& droop, const ion::FlowOrder& flow_order, const vector<string>& Hypotheses, const int& startFlow, vector<float>& DistanceObserved, vector<float>& DistanceHypotheses, vector<vector<float> >& predictions, vector<vector<float> >& normalizedMeasurements, int applyNormalization, int verbose) { // Create return data structures // Distance of normalized observations to different hypotheses: d(obs,h1), ... , d(obs,hN) DistanceObserved.assign(Hypotheses.size(), 0); // Distance of hypotheses to first hypothesis: d(h1,h2), ... , d(h1, hN) DistanceHypotheses.assign(Hypotheses.size()-1, 0); predictions.resize(Hypotheses.size()); normalizedMeasurements.resize(Hypotheses.size()); // Loading key normalized values into a read and performing adaptive normalization BasecallerRead read; read.key_normalizer = 1; read.raw_measurements = NormalizedMeasurements; read.normalized_measurements = NormalizedMeasurements; read.sequence.clear(); read.sequence.reserve(2*flow_order.num_flows()); read.prediction.assign(flow_order.num_flows(), 0); read.additive_correction.assign(flow_order.num_flows(), 0); read.multiplicative_correction.assign(flow_order.num_flows(), 1.0); int steps, window_size = 50; DPTreephaser dpTreephaser(flow_order); dpTreephaser.SetModelParameters(cf, ie, droop); // Solve beginning of maybe clipped read if (startFlow>0) dpTreephaser.Solve(read, (startFlow+20), 0); // StartFlow clipped? Get solved HP length at startFlow unsigned int base = 0; int flow = 0; int HPlength = 0; while (base<read.sequence.size()){ while (flow < flow_order.num_flows() and flow_order.nuc_at(flow) != read.sequence[base]) flow++; if (flow > startFlow or flow == flow_order.num_flows()) break; if (flow == startFlow) HPlength++; base++; } if (verbose>0) Rprintf("Solved %d bases until (not incl.) flow %d. HP of height %d at flow %d.\n", base, flow, HPlength, startFlow); // Get HP size at the start of the reference, i.e., Hypotheses[0] int count = 1; while (Hypotheses[0][count] == Hypotheses[0][0]) count++; if (verbose>0) Rprintf("Hypothesis starts with an HP of length %d\n", count); // Adjust the length of the prefix and erase extra solved bases if (HPlength>count) base -= count; else base -= HPlength; read.sequence.erase(read.sequence.begin()+base, read.sequence.end()); unsigned int prefix_size = read.sequence.size(); // creating predictions for the individual hypotheses vector<BasecallerRead> hypothesesReads(Hypotheses.size()); int max_last_flow = 0; for (unsigned int r=0; r<hypothesesReads.size(); ++r) { hypothesesReads[r] = read; // add hypothesis sequence to prefix for (base=0; base<Hypotheses[r].length() and base<(2*(unsigned int)flow_order.num_flows()-prefix_size); base++) hypothesesReads[r].sequence.push_back(Hypotheses[r][base]); // get last main incorporating flow int last_incorporating_flow = 0; base = 0; flow = 0; while (base<hypothesesReads[r].sequence.size() and flow<flow_order.num_flows()){ while (flow_order.nuc_at(flow) != hypothesesReads[r].sequence[base]) flow++; last_incorporating_flow = flow; if (last_incorporating_flow > max_last_flow) max_last_flow = last_incorporating_flow; base++; } // Simulate sequence dpTreephaser.Simulate(hypothesesReads[r], flow_order.num_flows()); // Adaptively normalize each hypothesis if (applyNormalization>0) { steps = last_incorporating_flow / window_size; dpTreephaser.WindowedNormalize(hypothesesReads[r], steps, window_size); } // Solver simulates beginning of the read and then fills in the remaining clipped bases dpTreephaser.Solve(hypothesesReads[r], flow_order.num_flows(), last_incorporating_flow); // Store predictions and adaptively normalized measurements predictions[r] = hypothesesReads[r].prediction; normalizedMeasurements[r] = hypothesesReads[r].normalized_measurements; } // --- Calculating distances --- // Include only flow values in the distance where the predictions differ by more than "threshold" float threshold = 0.05; // Do not include flows after main inc. flow of lastest hypothesis for (int flow=0; flow<(max_last_flow+1); ++flow) { bool includeFlow = false; for (unsigned int hyp=1; hyp<hypothesesReads.size(); ++hyp) if (abs(hypothesesReads[hyp].prediction[flow] - hypothesesReads[0].prediction[flow])>threshold) includeFlow = true; if (includeFlow) { for (unsigned int hyp=0; hyp<hypothesesReads.size(); ++hyp) { float residual = hypothesesReads[hyp].normalized_measurements[flow] - hypothesesReads[hyp].prediction[flow]; DistanceObserved[hyp] += residual * residual; if (hyp>0) { residual = hypothesesReads[0].prediction[flow] - hypothesesReads[hyp].prediction[flow]; DistanceHypotheses[hyp-1] += residual * residual; } } } } // --- verbose --- if (verbose>0){ Rprintf("Calculating distances between %d hypotheses starting at flow %d:\n", Hypotheses.size(), startFlow); for (unsigned int i=0; i<Hypotheses.size(); ++i){ for (unsigned int j=0; j<Hypotheses[i].length(); ++j) Rprintf("%c", Hypotheses[i][j]); Rprintf("\n"); } Rprintf("Solved read prefix: "); for (unsigned int j=0; j<prefix_size; ++j) Rprintf("%c", read.sequence[j]); Rprintf("\n"); Rprintf("Extended Hypotheses reads to:\n"); for (unsigned int i=0; i<hypothesesReads.size(); ++i){ for (unsigned int j=0; j<hypothesesReads[i].sequence.size(); ++j) Rprintf("%c", hypothesesReads[i].sequence[j]); Rprintf("\n"); } Rprintf("Calculated Distances d2(obs, H_i), d2(H_i, H_0):\n"); Rprintf("%f, 0\n", DistanceObserved[0]); for (unsigned int i=1; i<Hypotheses.size(); ++i) Rprintf("%f, %f\n", DistanceObserved[i], DistanceHypotheses[i-1]); } // --------------- */ }