float PhaseEstimator::EvaluateParameters(vector<BasecallerRead *>& useful_reads, DPTreephaser& treephaser, const float *parameters, const bool usePIDNorm) { float try_cf = parameters[0]; float try_ie = parameters[1]; float try_dr = parameters[2]; if (try_cf < 0 or try_ie < 0 or try_dr < 0 or try_cf > 0.04 or try_ie > 0.04 or try_dr > 0.01) return 1e10; treephaser.SetModelParameters(try_cf, try_ie, try_dr); float metric = 0; for (vector<BasecallerRead *>::iterator read = useful_reads.begin(); read != useful_reads.end(); ++read) { treephaser.Simulate(**read, 120); float normalizer = (usePIDNorm ? treephaser.PIDNormalize(**read, 8, 100) : treephaser.Normalize(**read, 20, 100)); for (unsigned int flow = 20; flow < 100 and flow < (*read)->raw_measurements.size(); flow++) { if ((*read)->raw_measurements[flow] > 1.2) continue; float delta = (*read)->raw_measurements[flow] - (*read)->prediction[flow] * normalizer; metric += delta * delta; } } return isnan(metric) ? 1e10 : metric; }
float PhaseEstimator::EvaluateParameters(vector<BasecallerRead *>& useful_reads, DPTreephaser& treephaser, const float *parameters) { float try_cf = parameters[0]; float try_ie = parameters[1]; float try_dr = parameters[2]; if (try_cf < 0 or try_ie < 0 or try_dr < 0 or try_cf > 0.04 or try_ie > 0.04 or try_dr > 0.01) return 1e10; treephaser.SetModelParameters(try_cf, try_ie, try_dr); float metric = 0; for (vector<BasecallerRead *>::iterator read = useful_reads.begin(); read != useful_reads.end(); ++read) { // Simulate phasing parameter treephaser.Simulate(**read, phasing_end_flow_+20); // Optionally determine optimal normalization for this parameter set? if (norm_during_param_eval_) NormalizeBasecallerRead(treephaser, **read, phasing_start_flow_, phasing_end_flow_); // Determine squared distance penalty for this parameter set for (int flow = phasing_start_flow_; flow < phasing_end_flow_ and flow < (int)(*read)->raw_measurements.size(); ++flow) { if ((*read)->raw_measurements[flow] > inclusion_threshold_) continue; // Keep key normalized raw measurements as a constant and normalize predictions towards key normalized values float delta = ((*read)->normalized_measurements[flow] - (*read)->prediction[flow]) * (*read)->multiplicative_correction[flow]; metric += delta * delta; } } return isnan(metric) ? 1e10 : metric; }
void PhaseEstimator::NormalizeBasecallerRead(DPTreephaser& treephaser, BasecallerRead& read, int start_flow, int end_flow) { switch (norm_method_) { case 0: treephaser.Normalize(read, start_flow, end_flow); break; case 1: treephaser.WindowedNormalize(read, (end_flow / windowSize_), windowSize_); break; case 2: treephaser.PIDNormalize(read, start_flow, end_flow); break; case 3: // Variable per-read normalization based on the number of negative valued zero-mers if (read.penalty_residual.at(0) > maxfrac_negative_flows_) treephaser.WindowedNormalize(read, (end_flow / windowSize_), windowSize_); else treephaser.Normalize(read, start_flow, end_flow); break; case 4: // "off" do not do anything break; default: cerr << "PhaseEstimator: Unknown normalization method " << norm_method_ << endl; exit(EXIT_FAILURE); } };
int HypothesisEvaluator::EvaluateOneHypothesis(DPTreephaser &working_treephaser, BasecallerRead ¤t_hypothesis, int applyNormalization) { int last_incorporating_flow = LastIncorporatingFlow(current_hypothesis); // Simulate sequence working_treephaser.Simulate(current_hypothesis, nFlows); // Adaptively normalize each hypothesis if (applyNormalization>0) { int window_size= 50; int steps = last_incorporating_flow / window_size; working_treephaser.WindowedNormalize(current_hypothesis, steps, window_size); } // Solver simulates beginning of the read and then fills in the remaining clipped bases working_treephaser.Solve(current_hypothesis, nFlows, last_incorporating_flow); /*cout << "Solved sequence of length: " << hypothesesReadsVector[r].sequence.size() << " ;nFlows = " << nFlows << endl; cout << "Total read: "; for (int i=0; i<hypothesesReadsVector[r].sequence.size(); i++) cout << hypothesesReadsVector[r].sequence[i]; cout << endl;*/ return(last_incorporating_flow); }
unsigned int HypothesisEvaluator::SolveBeginningOfRead(DPTreephaser &working_treephaser, BasecallerRead &master_read, const vector<string>& Hypotheses, int startFlow) { //cout << "Hypothesis sequence: " << Hypotheses[0] << endl; // Solve beginning of maybe clipped read if (startFlow>0) { int until_flow = min((startFlow+20), nFlows); working_treephaser.Solve(master_read, until_flow, 0); } /*cout << "Solved prefix of size " << read.sequence.size() << ": "; for (int i=0; i<read.sequence.size(); i++) cout << read.sequence[i]; cout << endl;*/ // StartFlow clipped? Get solved HP length at startFlow unsigned int base = 0; int flow = 0; int HPlength = 0; while (base<master_read.sequence.size()) { while (flow < treePhaserFlowOrder.num_flows() and treePhaserFlowOrder.nuc_at(flow) != master_read.sequence[base]) flow++; if (flow > startFlow or flow == treePhaserFlowOrder.num_flows()) break; if (flow == startFlow) HPlength++; base++; } // Get HP size at the start of the reference, i.e., Hypotheses[0] int count = 1; while (Hypotheses[0][count] == Hypotheses[0][0]) count++; // Adjust the length of the base prefix and erase extra solved bases if (HPlength>count) base -= count; else base -= HPlength; master_read.sequence.erase(master_read.sequence.begin()+base, master_read.sequence.end()); unsigned int prefix_size = master_read.sequence.size(); /*cout << "Shortened prefix to size " << prefix_size << " until startFlow" << startFlow << ": "; for (int i=0; i<read.sequence.size(); i++) cout << read.sequence[i]; cout << endl;*/ return(prefix_size); }
int HypothesisEvaluator::MatchedFilter(DPTreephaser &working_treephaser, vector<BasecallerRead> &hypothesesReadsVector,int max_last_flow, int refHpLen, int flowPosition, int startFlow, vector<float>& DistanceObserved, vector<float>& DistanceHypotheses) { // Matched Filter HP distance is computed here vector<float> query_state(nFlows); int rHpLen = 0; if (flowPosition<startFlow || flowPosition >= nFlows) { cout << "Calculate Distances: Unsupported flowPosition! startFlow: " << startFlow << " flowPosition: " << flowPosition << " nFlows: " << nFlows << endl; return -1; } //cout << "Calling Query state " << endl; //cout << "Hypothesis = " << hypothesesReadsVector[0] << endl; //cout << "flow position = " << flowPosition << endl; //cout << "Nflows = " << nFlows << endl; //int readSize = hypothesesReadsVector[0].sequence.size(); // for (int i = 0; i < readSize ; i++) // cout << "Base = " << hypothesesReadsVector[0].sequence.at(i) << " Measure = " << hypothesesReadsVector[0].normalized_measurements.at(i) << endl; working_treephaser.QueryState(hypothesesReadsVector[0], query_state, rHpLen, nFlows, flowPosition); if (rHpLen == 0) { if (DEBUG) { cerr << "Hypothesis evaluator error ReadHpLen = 0 " << endl; cerr << "Calling Query state " << endl; cerr << "Hypothesis = " << hypothesesReadsVector[0].sequence.size() << endl; cerr << "flow position = " << flowPosition << endl; cerr << "Nflows = " << nFlows << endl; int readSize = hypothesesReadsVector[0].sequence.size(); for (int i = 0; i < readSize ; i++) cerr << " i = " << i << " Base = " << hypothesesReadsVector[0].sequence.at(i) << " Measure = " << hypothesesReadsVector[0].normalized_measurements.at(i) << endl; } return -1; } //return -1; if (abs(rHpLen-refHpLen) < 3) { float filter_num = 0.0f; float filter_den = 0.0f; if (this->DEBUG) cout << "Matched filter details: " << endl; for (int flow=0; flow<nFlows; flow++) { filter_num += (hypothesesReadsVector[0].normalized_measurements[flow] - hypothesesReadsVector[0].prediction[flow] + ((float)rHpLen*query_state[flow])) * query_state[flow]; filter_den += query_state[flow] * query_state[flow]; if ((this->DEBUG) and(query_state[flow] > 0.02 or flow == flowPosition)) { cout << "Flow " << flow << " State " << query_state[flow] << " Local delta " << ((hypothesesReadsVector[0].normalized_measurements[flow] - hypothesesReadsVector[0].prediction[flow] + ((float)rHpLen*query_state[flow])) * query_state[flow]) << " Measurements " << hypothesesReadsVector[0].normalized_measurements[flow]; //printf("Flow %4d State %1.4f Local delta %1.4f Measurements %1.4f"); //flow, query_state[flow], //((read.normalized_measurements[flow] - read.prediction[flow] + ((float)rHpLen*query_state[flow])) * query_state[flow]), //read.normalized_measurements[flow]); if (flow == flowPosition) //printf(" ***\n"); cout << " ***" << endl; else //printf("\n"); cout << endl; } } DistanceObserved[0] = filter_num / filter_den; //cout << DistanceObserved[0] << endl; } else { if (DEBUG) cerr << "Wrong rHpLen : " << rHpLen << " " << refHpLen << endl; return -1; } return(0); }
float RegionAnalysis::evaluateParameters(std::vector<BasecallerRead> &dataAll, DPTreephaser& treephaser, float *parameters) { float metric = 0; if (clo->cfe_control.libPhaseEstimator == "nel-mead-treephaser") { if (parameters[0] < 0) // cf metric = 1e10; if (parameters[1] < 0) // ie metric = 1e10; if (parameters[2] < 0) // dr metric = 1e10; if (parameters[0] > 0.04) // cf metric = 1e10; if (parameters[1] > 0.04) // ie metric = 1e10; if (parameters[2] > 0.01) // dr metric = 1e10; if (metric == 0) { treephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]); for (std::vector<BasecallerRead>::iterator data = dataAll.begin(); data != dataAll.end(); data++) { treephaser.Simulate3(*data, 120); data->Normalize(20, 100); for (int iFlow = 20; iFlow < std::min(100, data->numFlows); iFlow++) { if (data->measurements[iFlow] > 1.2) continue; float delta = data->measurements[iFlow] - data->prediction[iFlow] * data->miscNormalizer; metric += delta * delta; } } } } else if (clo->cfe_control.libPhaseEstimator == "nel-mead-adaptive-treephaser") { if (parameters[0] < 0) // cf metric = 1e10; if (parameters[1] < 0) // ie metric = 1e10; if (parameters[0] > 0.04) // cf metric = 1e10; if (parameters[1] > 0.04) // ie metric = 1e10; if (metric == 0) { treephaser.SetModelParameters(parameters[0], parameters[1], 0); for (std::vector<BasecallerRead>::iterator data = dataAll.begin(); data != dataAll.end(); data++) { // treephaser.Simulate3(*data, 150); // data->AdaptiveNormalizationOfPredictions(3, 50); treephaser.Simulate3(*data, 200); data->AdaptiveNormalizationOfPredictions(4, 50); for (int iFlow = 25; iFlow < std::min(175, data->numFlows); iFlow++) { if (data->measurements[iFlow] > 1.2) continue; float delta = data->measurements[iFlow] - data->prediction[iFlow]; metric += delta * delta; } } } } // printf("CF = %1.2f%% IE = %1.2f%% DR = %1.2f%% V = %1.6f\n", // 100*parameters[0], 100*parameters[1], 100*parameters[2], metric); if (isnan(metric)) metric = 1e10; return metric; }