void RegionAnalysis::worker_Treephaser() { // Worker method: load regions one by one and process them until done // int numFlows = wells->NumFlows(); // DPTreephaser dpTreephaser(wells->FlowOrder(), numFlows, 8); DPTreephaser dpTreephaser(flowOrder.c_str(), numFlows, 8); std::deque<int> wellX; std::deque<int> wellY; std::deque<std::vector<float> > wellMeasurements; int iRegion; std::vector<BasecallerRead> data; data.reserve(MAX_CAFIE_READS_PER_REGION); while (wellsReader.loadNextRegion(wellX, wellY, wellMeasurements, iRegion)) { float parameters[3]; parameters[0] = 0.00; // CF - initial guess parameters[1] = 0.00; // IE - initial guess parameters[2] = 0.000; // DR - initial guess for (int globalIteration = 0; globalIteration < 5; globalIteration++) { dpTreephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]); data.clear(); // Iterate over live library wells and consider them as a part of the phase training set std::deque<int>::iterator x = wellX.begin(); std::deque<int>::iterator y = wellY.begin(); std::deque<std::vector<float> >::iterator measurements = wellMeasurements.begin(); for (; (x != wellX.end()) && (data.size() < MAX_CAFIE_READS_PER_REGION); x++, y++, measurements++) { if (!mask->Match(*x, *y, MaskLive)) continue; if (!mask->Match(*x, *y, MaskBead)) continue; int beadClass = 1; // 1 - library, 0 - TF if (!mask->Match(*x, *y, MaskLib)) { // Is it a library bead? if (!mask->Match(*x, *y, MaskTF)) // OK, is it at least a TF? continue; beadClass = 0; } data.push_back(BasecallerRead()); data.back().SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, libraryInfo[beadClass].Ionogram, libraryInfo[beadClass].numKeyFlows - 1); bool keypass = true; for (int iFlow = 0; iFlow < (libraryInfo[beadClass].numKeyFlows - 1); iFlow++) { if ((int) (data.back().measurements[iFlow] + 0.5) != libraryInfo[beadClass].Ionogram[iFlow]) keypass = false; if (isnan(data.back().measurements[iFlow])) keypass = false; } if (!keypass) { data.pop_back(); continue; } dpTreephaser.Solve(data.back(), std::min(100, numFlows)); data.back().Normalize(11, std::min(80, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); data.back().Normalize(11, std::min(100, numFlows)); dpTreephaser.Solve(data.back(), std::min(120, numFlows)); float metric = 0; for (int iFlow = 20; (iFlow < 100) && (iFlow < numFlows); iFlow++) { if (data.back().normalizedMeasurements[iFlow] > 1.2) continue; float delta = data.back().normalizedMeasurements[iFlow] - data.back().prediction[iFlow]; if (!isnan(delta)) metric += delta * delta; else metric += 1e10; } if (metric > 1) { data.pop_back(); continue; } } if (data.size() < 10) break; // Perform parameter estimation NelderMeadOptimization(data, dpTreephaser, parameters, 50, 3); } pthread_mutex_lock(common_output_mutex); if (data.size() < 10) printf("Region % 3d: Using default phase parameters, %d reads insufficient for training\n", iRegion + 1, (int) data.size()); else printf("Region % 3d: Using %d reads for phase parameter training\n", iRegion + 1, (int) data.size()); // printf("o"); fflush(stdout); pthread_mutex_unlock(common_output_mutex); (*cf)[iRegion] = parameters[0]; (*ie)[iRegion] = parameters[1]; (*dr)[iRegion] = parameters[2]; } }
size_t PhaseEstimator::LoadRegion(int region) { if (region_num_reads_[region] == 0) // Nothing to load ? return 0; if (region_reads_[region].size() > 0) // Region already loaded? return 0; ClockTimer timer; timer.StartTimer(); region_reads_[region].reserve(region_num_reads_[region]); int region_x = region % num_regions_x_; int region_y = region / num_regions_x_; int begin_x = region_x * region_size_x_; int begin_y = region_y * region_size_y_; int end_x = min(begin_x + region_size_x_, chip_size_x_); int end_y = min(begin_y + region_size_y_, chip_size_y_); // Mutex needed for wells access, but not needed for region_reads access pthread_mutex_lock(®ion_loader_mutex_); wells_->SetChunk(begin_y, end_y-begin_y, begin_x, end_x-begin_x, 0, flow_order_.num_flows()); wells_->ReadWells(); vector<float> well_buffer(flow_order_.num_flows()); for (int y = begin_y; y < end_y; y++) { for (int x = begin_x; x < end_x; x++) { if (get_subset(x,y) != train_subset_) continue; if (!mask_->Match(x, y, MaskLive)) continue; if (!mask_->Match(x, y, MaskBead)) continue; // A little help from friends in BkgModel if (mask_->Match(x, y, MaskFilteredBadResidual)) continue; if (mask_->Match(x, y, MaskFilteredBadPPF)) continue; if (mask_->Match(x, y, MaskFilteredBadKey)) continue; int cls = 0; if (!mask_->Match(x, y, MaskLib)) { // Not a library bead? cls = 1; if (!mask_->Match(x, y, MaskTF)) // Not a tf bead? continue; } for (int flow = 0; flow < flow_order_.num_flows(); ++flow) well_buffer[flow] = wells_->At(y,x,flow); // Sanity check. If there are NaNs in this read, print warning vector<int> nanflow; for (int flow = 0; flow < flow_order_.num_flows(); ++flow) { if (!isnan(well_buffer[flow])) continue; well_buffer[flow] = 0; nanflow.push_back(flow); } if(nanflow.size() > 0) { fprintf(stderr, "ERROR: BaseCaller read NaNs from wells file, x=%d y=%d flow=%d", x, y, nanflow[0]); for(unsigned int flow=1; flow < nanflow.size(); flow++) { fprintf(stderr, ",%d", nanflow[flow]); } fprintf(stderr, "\n"); fflush(stderr); } region_reads_[region].push_back(BasecallerRead()); if (use_pid_norm_) { region_reads_[region].back().SetDataAndKeyNormalizeNew(&well_buffer[0], flow_order_.num_flows(), keys_[cls].flows(), keys_[cls].flows_length()-1, false /*true*/); } else { region_reads_[region].back().SetDataAndKeyNormalize(&well_buffer[0], flow_order_.num_flows(), keys_[cls].flows(), keys_[cls].flows_length()-1); } bool keypass = true; for (int flow = 0; flow < (keys_[cls].flows_length() - 1); flow++) { if ((int) (region_reads_[region].back().raw_measurements[flow] + 0.5) != keys_[cls][flow]) keypass = false; if (isnan(region_reads_[region].back().raw_measurements[flow])) keypass = false; } if (!keypass) { region_reads_[region].pop_back(); continue; } } } pthread_mutex_unlock(®ion_loader_mutex_); region_num_reads_[region] = region_reads_[region].size(); return timer.GetMicroSec(); }
size_t PhaseEstimator::LoadRegion(int region) { if (region_num_reads_[region] == 0) // Nothing to load ? return 0; if (region_reads_[region].size() > 0) // Region already loaded? return 0; ClockTimer timer; timer.StartTimer(); region_reads_[region].reserve(region_num_reads_[region]); int region_x = region % num_regions_x_; int region_y = region / num_regions_x_; int begin_x = region_x * region_size_x_; int begin_y = region_y * region_size_y_; int end_x = min(begin_x + region_size_x_, chip_size_x_); int end_y = min(begin_y + region_size_y_, chip_size_y_); // Mutex needed for wells access, but not needed for region_reads access pthread_mutex_lock(®ion_loader_mutex_); wells_->SetChunk(begin_y, end_y-begin_y, begin_x, end_x-begin_x, 0, flow_order_.num_flows()); wells_->ReadWells(); vector<float> well_buffer(flow_order_.num_flows()); for (int y = begin_y; y < end_y; y++) { for (int x = begin_x; x < end_x; x++) { if (train_subset_count_ > 0 and get_subset(x,y) != train_subset_) continue; if (!mask_->Match(x, y, MaskLive)) continue; if (!mask_->Match(x, y, MaskBead)) continue; // A little help from friends in BkgModel if (mask_->Match(x, y, MaskFilteredBadResidual)) continue; if (mask_->Match(x, y, MaskFilteredBadPPF)) continue; if (mask_->Match(x, y, MaskFilteredBadKey)) continue; int cls = 0; if (!mask_->Match(x, y, MaskLib)) { // Not a library bead? cls = 1; if (!mask_->Match(x, y, MaskTF)) // Not a tf bead? continue; } for (int flow = 0; flow < flow_order_.num_flows(); ++flow) well_buffer[flow] = wells_->At(y,x,flow); // Sanity check. If there are NaNs in this read, print warning vector<int> nanflow; for (int flow = 0; flow < flow_order_.num_flows(); ++flow) { if (!isnan(well_buffer[flow])) continue; well_buffer[flow] = 0; nanflow.push_back(flow); } if(nanflow.size() > 0) { fprintf(stderr, "ERROR: BaseCaller read NaNs from wells file, x=%d y=%d flow=%d", x, y, nanflow[0]); for(unsigned int flow=1; flow < nanflow.size(); flow++) { fprintf(stderr, ",%d", nanflow[flow]); } fprintf(stderr, "\n"); fflush(stderr); } region_reads_[region].push_back(BasecallerRead()); bool keypass = true; if (key_norm_method_ == "adaptive") { keypass = region_reads_[region].back().SetDataAndKeyNormalizeNew(&well_buffer[0], flow_order_.num_flows(), keys_[cls].flows(), keys_[cls].flows_length()-1, false); } else if (key_norm_method_ == "off") { keypass = region_reads_[region].back().SetDataAndKeyPass(well_buffer, flow_order_.num_flows(), keys_[cls].flows(), keys_[cls].flows_length()-1); } else { keypass = region_reads_[region].back().SetDataAndKeyNormalize(&well_buffer[0], flow_order_.num_flows(), keys_[cls].flows(), keys_[cls].flows_length()-1); } // *** Compute some metrics - overload read.penalty_residual to store them if (keypass) { unsigned int num_zeromer_flows = 0, num_neg_zeromer_flows = 0; double squared_dist_int = 0.0; for (int flow=phasing_start_flow_; flow < phasing_end_flow_; ++flow){ if (region_reads_[region].back().raw_measurements.at(flow) < 0.5) { ++num_zeromer_flows; if (region_reads_[region].back().raw_measurements.at(flow) < 0.0) ++num_neg_zeromer_flows; } if (region_reads_[region].back().raw_measurements.at(flow) < inclusion_threshold_) { double delta = region_reads_[region].back().raw_measurements.at(flow) - round(region_reads_[region].back().raw_measurements.at(flow)); squared_dist_int += delta * delta; } } // Too few zero-mers or too much noise? Moving on along, don't waste time on investigating hopeless candidates. if (num_zeromer_flows < 5 or (float)squared_dist_int > residual_threshold_ + 1.5) keypass = false; else { // [0]=percent_neg_zeromer_flows [1]=squared_dist_int region_reads_[region].back().penalty_residual.assign(2, 0.0f); region_reads_[region].back().penalty_residual.at(0) = (float)num_neg_zeromer_flows / (float)num_zeromer_flows; region_reads_[region].back().penalty_residual.at(1) = squared_dist_int; } } // *** if (not keypass) { region_reads_[region].pop_back(); continue; } } } pthread_mutex_unlock(®ion_loader_mutex_); region_num_reads_[region] = region_reads_[region].size(); return timer.GetMicroSec(); }