void PhaseEstimator::EstimatorWorker() { DPTreephaser treephaser(flow_order_, windowSize_); vector<BasecallerRead *> useful_reads; useful_reads.reserve(10000); while (true) { pthread_mutex_lock(&job_queue_mutex_); while (job_queue_.empty()) { if (jobs_in_progress_ == 0) { pthread_mutex_unlock(&job_queue_mutex_); return; } // No jobs available now, but more may come, so stick around pthread_cond_wait(&job_queue_cond_, &job_queue_mutex_); } Subblock &s = *job_queue_.front(); job_queue_.pop_front(); jobs_in_progress_++; pthread_mutex_unlock(&job_queue_mutex_); // Processing int numGlobalIterations = 1; // 3 iterations at top level, 1 at all other levels if (s.level == 1) numGlobalIterations = 3; for (int iGlobalIteration = 0; iGlobalIteration < numGlobalIterations; iGlobalIteration++) { ClockTimer timer; timer.StartTimer(); size_t iotimer = 0; treephaser.SetModelParameters(s.cf, s.ie, s.dr); useful_reads.clear(); for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); ++region) { iotimer += LoadRegion(*region); // Ensure region loaded. // Grab reads, filter // Enough reads? Stop. if (action_map_[*region] == 0 and region_num_reads_[*region]) action_map_[*region] = s.level; // Filter. Reads that survive filtering are stored in useful_reads //! \todo: Rethink filtering. Maybe a rule that adjusts the threshold to keep at least 20% of candidate reads. for (vector<BasecallerRead>::iterator R = region_reads_[*region].begin(); R != region_reads_[*region].end(); ++R) { for (int flow = 0; flow < flow_order_.num_flows(); flow++) R->normalized_measurements[flow] = R->raw_measurements[flow]; treephaser.Solve (*R, min(100, flow_order_.num_flows())); use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 40) : (void)treephaser.Normalize(*R, 11, 80); treephaser.Solve (*R, min(120, flow_order_.num_flows())); use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 80) : (void)treephaser.Normalize(*R, 11, 100); treephaser.Solve (*R, min(120, flow_order_.num_flows())); float metric = 0; for (int flow = 20; flow < 100 and flow < flow_order_.num_flows(); ++flow) { if (R->normalized_measurements[flow] > 1.2) continue; float delta = R->normalized_measurements[flow] - R->prediction[flow]; if (!isnan(delta)) metric += delta * delta; else metric += 1e10; } if (metric > residual_threshold_) { //printf("\nRejecting metric=%1.5f solution=%s", metric, R->sequence.c_str()); continue; } useful_reads.push_back(&(*R)); } if (useful_reads.size() >= 5000) break; } if (s.level > 1 and useful_reads.size() < 1000) // Not enough reads to even try break; // Do estimation with reads collected, update estimates float parameters[3]; parameters[0] = s.cf; parameters[1] = s.ie; parameters[2] = s.dr; NelderMeadOptimization(useful_reads, treephaser, parameters, use_pid_norm_); s.cf = parameters[0]; s.ie = parameters[1]; s.dr = parameters[2]; printf("Completed (%d,%d,%d) :(%2d-%2d)x(%2d-%2d), total time %5.2lf sec, i/o time %5.2lf sec, %d reads, CF=%1.2f%% IE=%1.2f%% DR=%1.2f%%\n", s.level, s.pos_x, s.pos_y, s.begin_x, s.end_x, s.begin_y, s.end_y, (double)timer.GetMicroSec()/1000000.0, (double)iotimer/1000000.0, (int)useful_reads.size(), 100.0*s.cf, 100.0*s.ie, 100.0*s.dr); } if (useful_reads.size() >= 1000 or s.level == 1) { for (int region_x = s.begin_x; region_x <= s.end_x and region_x < num_regions_x_; region_x++) { for (int region_y = s.begin_y; region_y <= s.end_y and region_y < num_regions_y_; region_y++) { int region = region_x + region_y * num_regions_x_; if (region_x == s.begin_x and region_y == s.begin_y) subblock_map_[region] = '+'; else if(region_x == s.begin_x and region_y == s.end_y) subblock_map_[region] = '+'; else if(region_x == s.end_x and region_y == s.begin_y) subblock_map_[region] = '+'; else if(region_x == s.end_x and region_y == s.end_y) subblock_map_[region] = '+'; else if (region_x == s.begin_x) subblock_map_[region] = '|'; else if (region_x == s.end_x) subblock_map_[region] = '|'; else if (region_y == s.begin_y) subblock_map_[region] = '-'; else if (region_y == s.end_y) subblock_map_[region] = '-'; } } } if (s.subblocks[0] == NULL or useful_reads.size() < 4000) { // Do not subdivide this block for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); region++) region_reads_[*region].clear(); pthread_mutex_lock(&job_queue_mutex_); jobs_in_progress_--; if (jobs_in_progress_ == 0) // No more work, let everyone know pthread_cond_broadcast(&job_queue_cond_); pthread_mutex_unlock(&job_queue_mutex_); } else { // Subdivide. Spawn new jobs: pthread_mutex_lock(&job_queue_mutex_); jobs_in_progress_--; for (int subjob = 0; subjob < 4; subjob++) { s.subblocks[subjob]->cf = s.cf; s.subblocks[subjob]->ie = s.ie; s.subblocks[subjob]->dr = s.dr; job_queue_.push_back(s.subblocks[subjob]); } pthread_cond_broadcast(&job_queue_cond_); // More work, let everyone know pthread_mutex_unlock(&job_queue_mutex_); } } }
void RegionAnalysis::worker_Treephaser() { // Worker method: load regions one by one and process them until done // int numFlows = wells->NumFlows(); // DPTreephaser dpTreephaser(wells->FlowOrder(), numFlows, 8); DPTreephaser dpTreephaser(flowOrder.c_str(), numFlows, 8); std::deque<int> wellX; std::deque<int> wellY; std::deque<std::vector<float> > wellMeasurements; int iRegion; std::vector<BasecallerRead> data; data.reserve(MAX_CAFIE_READS_PER_REGION); while (wellsReader.loadNextRegion(wellX, wellY, wellMeasurements, iRegion)) { float parameters[3]; parameters[0] = 0.00; // CF - initial guess parameters[1] = 0.00; // IE - initial guess parameters[2] = 0.000; // DR - initial guess for (int globalIteration = 0; globalIteration < 5; globalIteration++) { dpTreephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]); data.clear(); // Iterate over live library wells and consider them as a part of the phase training set std::deque<int>::iterator x = wellX.begin(); std::deque<int>::iterator y = wellY.begin(); std::deque<std::vector<float> >::iterator measurements = wellMeasurements.begin(); for (; (x != wellX.end()) && (data.size() < MAX_CAFIE_READS_PER_REGION); x++, y++, measurements++) { if (!mask->Match(*x, *y, MaskLive)) continue; if (!mask->Match(*x, *y, MaskBead)) continue; int beadClass = 1; // 1 - library, 0 - TF if (!mask->Match(*x, *y, MaskLib)) { // Is it a library bead? if (!mask->Match(*x, *y, MaskTF)) // OK, is it at least a TF? continue; beadClass = 0; } data.push_back(BasecallerRead()); data.back().SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, libraryInfo[beadClass].Ionogram, libraryInfo[beadClass].numKeyFlows - 1); bool keypass = true; for (int iFlow = 0; iFlow < (libraryInfo[beadClass].numKeyFlows - 1); iFlow++) { if ((int) (data.back().measurements[iFlow] + 0.5) != libraryInfo[beadClass].Ionogram[iFlow]) keypass = false; if (isnan(data.back().measurements[iFlow])) keypass = false; } if (!keypass) { data.pop_back(); continue; } dpTreephaser.Solve(data.back(), std::min(100, numFlows)); data.back().Normalize(11, std::min(80, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); data.back().Normalize(11, std::min(100, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); float metric = 0; for (int iFlow = 20; (iFlow < 100) && (iFlow < numFlows); iFlow++) { if (data.back().normalizedMeasurements[iFlow] > 1.2) continue; float delta = data.back().normalizedMeasurements[iFlow] - data.back().prediction[iFlow]; if (!isnan(delta)) metric += delta * delta; else metric += 1e10; } if (metric > 1) { data.pop_back(); continue; } } if (data.size() < 10) break; // Perform parameter estimation NelderMeadOptimization(data, dpTreephaser, parameters, 50, 3); } pthread_mutex_lock(common_output_mutex); if (data.size() < 10) printf("Region % 3d: Using default phase parameters, %d reads insufficient for training\n", iRegion + 1, (int) data.size()); else printf("Region % 3d: Using %d reads for phase parameter training\n", iRegion + 1, (int) data.size()); // printf("o"); fflush(stdout); pthread_mutex_unlock(common_output_mutex); (*cf)[iRegion] = parameters[0]; (*ie)[iRegion] = parameters[1]; (*dr)[iRegion] = parameters[2]; } }