void PhaseEstimator::EstimatorWorker() { DPTreephaser treephaser(flow_order_, windowSize_); vector<BasecallerRead *> useful_reads; useful_reads.reserve(10000); while (true) { pthread_mutex_lock(&job_queue_mutex_); while (job_queue_.empty()) { if (jobs_in_progress_ == 0) { pthread_mutex_unlock(&job_queue_mutex_); return; } // No jobs available now, but more may come, so stick around pthread_cond_wait(&job_queue_cond_, &job_queue_mutex_); } Subblock &s = *job_queue_.front(); job_queue_.pop_front(); jobs_in_progress_++; pthread_mutex_unlock(&job_queue_mutex_); // Processing int numGlobalIterations = 1; // 3 iterations at top level, 1 at all other levels if (s.level == 1) numGlobalIterations = 3; for (int iGlobalIteration = 0; iGlobalIteration < numGlobalIterations; iGlobalIteration++) { ClockTimer timer; timer.StartTimer(); size_t iotimer = 0; treephaser.SetModelParameters(s.cf, s.ie, s.dr); useful_reads.clear(); for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); ++region) { iotimer += LoadRegion(*region); // Ensure region loaded. // Grab reads, filter // Enough reads? Stop. if (action_map_[*region] == 0 and region_num_reads_[*region]) action_map_[*region] = s.level; // Filter. Reads that survive filtering are stored in useful_reads //! \todo: Rethink filtering. Maybe a rule that adjusts the threshold to keep at least 20% of candidate reads. for (vector<BasecallerRead>::iterator R = region_reads_[*region].begin(); R != region_reads_[*region].end(); ++R) { for (int flow = 0; flow < flow_order_.num_flows(); flow++) R->normalized_measurements[flow] = R->raw_measurements[flow]; treephaser.Solve (*R, min(100, flow_order_.num_flows())); use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 40) : (void)treephaser.Normalize(*R, 11, 80); treephaser.Solve (*R, min(120, flow_order_.num_flows())); use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 80) : (void)treephaser.Normalize(*R, 11, 100); treephaser.Solve (*R, min(120, flow_order_.num_flows())); float metric = 0; for (int flow = 20; flow < 100 and flow < flow_order_.num_flows(); ++flow) { if (R->normalized_measurements[flow] > 1.2) continue; float delta = R->normalized_measurements[flow] - R->prediction[flow]; if (!isnan(delta)) metric += delta * delta; else metric += 1e10; } if (metric > residual_threshold_) { //printf("\nRejecting metric=%1.5f solution=%s", metric, R->sequence.c_str()); continue; } useful_reads.push_back(&(*R)); } if (useful_reads.size() >= 5000) break; } if (s.level > 1 and useful_reads.size() < 1000) // Not enough reads to even try break; // Do estimation with reads collected, update estimates float parameters[3]; parameters[0] = s.cf; parameters[1] = s.ie; parameters[2] = s.dr; NelderMeadOptimization(useful_reads, treephaser, parameters, use_pid_norm_); s.cf = parameters[0]; s.ie = parameters[1]; s.dr = parameters[2]; printf("Completed (%d,%d,%d) :(%2d-%2d)x(%2d-%2d), total time %5.2lf sec, i/o time %5.2lf sec, %d reads, CF=%1.2f%% IE=%1.2f%% DR=%1.2f%%\n", s.level, s.pos_x, s.pos_y, s.begin_x, s.end_x, s.begin_y, s.end_y, (double)timer.GetMicroSec()/1000000.0, (double)iotimer/1000000.0, (int)useful_reads.size(), 100.0*s.cf, 100.0*s.ie, 100.0*s.dr); } if (useful_reads.size() >= 1000 or s.level == 1) { for (int region_x = s.begin_x; region_x <= s.end_x and region_x < num_regions_x_; region_x++) { for (int region_y = s.begin_y; region_y <= s.end_y and region_y < num_regions_y_; region_y++) { int region = region_x + region_y * num_regions_x_; if (region_x == s.begin_x and region_y == s.begin_y) subblock_map_[region] = '+'; else if(region_x == s.begin_x and region_y == s.end_y) subblock_map_[region] = '+'; else if(region_x == s.end_x and region_y == s.begin_y) subblock_map_[region] = '+'; else if(region_x == s.end_x and region_y == s.end_y) subblock_map_[region] = '+'; else if (region_x == s.begin_x) subblock_map_[region] = '|'; else if (region_x == s.end_x) subblock_map_[region] = '|'; else if (region_y == s.begin_y) subblock_map_[region] = '-'; else if (region_y == s.end_y) subblock_map_[region] = '-'; } } } if (s.subblocks[0] == NULL or useful_reads.size() < 4000) { // Do not subdivide this block for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); region++) region_reads_[*region].clear(); pthread_mutex_lock(&job_queue_mutex_); jobs_in_progress_--; if (jobs_in_progress_ == 0) // No more work, let everyone know pthread_cond_broadcast(&job_queue_cond_); pthread_mutex_unlock(&job_queue_mutex_); } else { // Subdivide. Spawn new jobs: pthread_mutex_lock(&job_queue_mutex_); jobs_in_progress_--; for (int subjob = 0; subjob < 4; subjob++) { s.subblocks[subjob]->cf = s.cf; s.subblocks[subjob]->ie = s.ie; s.subblocks[subjob]->dr = s.dr; job_queue_.push_back(s.subblocks[subjob]); } pthread_cond_broadcast(&job_queue_cond_); // More work, let everyone know pthread_mutex_unlock(&job_queue_mutex_); } } }
// Function to fill in predicted signal values void BaseHypothesisEvaluator(BamTools::BamAlignment &alignment, const string &flow_order_str, const string &alt_base_hyp, float &delta_score, float &fit_score, int heavy_verbose) { // --- Step 1: Initialize Objects and retrieve relevant tags delta_score = 1e5; fit_score = 1e5; vector<string> Hypotheses(2); vector<float> measurements, phase_params; int start_flow, num_flows, prefix_flow=0; if (not GetBamTags(alignment, flow_order_str.length(), measurements, phase_params, start_flow)) return; num_flows = measurements.size(); ion::FlowOrder flow_order(flow_order_str, num_flows); BasecallerRead master_read; master_read.SetData(measurements, flow_order.num_flows()); TreephaserLite treephaser(flow_order); treephaser.SetModelParameters(phase_params[0], phase_params[1]); // --- Step 2: Solve beginning of the read // Look at mapped vs. unmapped reads in BAM Hypotheses[0] = alignment.QueryBases; Hypotheses[1] = alt_base_hyp; // Safety: reverse complement reverse strand reads in mapped bam if (alignment.IsMapped() and alignment.IsReverseStrand()) { RevComplementInPlace(Hypotheses[0]); RevComplementInPlace(Hypotheses[1]); } prefix_flow = GetMasterReadPrefix(treephaser, flow_order, start_flow, Hypotheses[0], master_read); unsigned int prefix_size = master_read.sequence.size(); // --- Step 3: creating predictions for the individual hypotheses vector<BasecallerRead> hypothesesReads(Hypotheses.size()); vector<float> squared_distances(Hypotheses.size(), 0.0); int max_last_flow = 0; for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) { hypothesesReads[i_hyp] = master_read; // --- add hypothesis sequence to clipped prefix unsigned int i_base = 0; int i_flow = prefix_flow; while (i_base<Hypotheses[i_hyp].length() and i_base<(2*(unsigned int)flow_order.num_flows()-prefix_size)) { while (i_flow < flow_order.num_flows() and flow_order.nuc_at(i_flow) != Hypotheses[i_hyp][i_base]) i_flow++; if (i_flow < flow_order.num_flows() and i_flow > max_last_flow) max_last_flow = i_flow; if (i_flow >= flow_order.num_flows()) break; // Add base to sequence only if it fits into flow order hypothesesReads[i_hyp].sequence.push_back(Hypotheses[i_hyp][i_base]); i_base++; } i_flow = min(i_flow, flow_order.num_flows()-1); // Solver simulates beginning of the read and then fills in the remaining clipped bases for which we have flow information treephaser.Solve(hypothesesReads[i_hyp], num_flows, i_flow); } // Compute L2-distance of measurements and predictions for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) { for (int iFlow=0; iFlow<=max_last_flow; iFlow++) squared_distances[i_hyp] += (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)) * (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)); } // Delta: L2-distance of alternative base Hypothesis - L2-distance of bases as called delta_score = squared_distances.at(1) - squared_distances.at(0); fit_score = min(squared_distances.at(1), squared_distances.at(0)); // --- verbose --- if (heavy_verbose > 1 or (delta_score < 0 and heavy_verbose > 0)) { cout << "Processed read " << alignment.Name << endl; cout << "Delta Fit: " << delta_score << " Overall Fit: " << fit_score << endl; PredictionGenerationVerbose(Hypotheses, hypothesesReads, phase_params, flow_order, start_flow, prefix_size); } }