示例#1
0
void PhaseEstimator::EstimatorWorker()
{

  DPTreephaser treephaser(flow_order_, windowSize_);
  vector<BasecallerRead *>  useful_reads;
  useful_reads.reserve(10000);

  while (true) {

    pthread_mutex_lock(&job_queue_mutex_);
    while (job_queue_.empty()) {
      if (jobs_in_progress_ == 0) {
        pthread_mutex_unlock(&job_queue_mutex_);
        return;
      }
      // No jobs available now, but more may come, so stick around
      pthread_cond_wait(&job_queue_cond_, &job_queue_mutex_);
    }
    Subblock &s = *job_queue_.front();
    job_queue_.pop_front();
    jobs_in_progress_++;
    pthread_mutex_unlock(&job_queue_mutex_);


    // Processing

    int numGlobalIterations = 1;  // 3 iterations at top level, 1 at all other levels
    if (s.level == 1)
      numGlobalIterations = 3;

    for (int iGlobalIteration = 0; iGlobalIteration < numGlobalIterations; iGlobalIteration++) {

      ClockTimer timer;
      timer.StartTimer();
      size_t iotimer = 0;

      treephaser.SetModelParameters(s.cf, s.ie, s.dr);
      useful_reads.clear();

      for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); ++region) {


        iotimer += LoadRegion(*region);
        // Ensure region loaded.
        // Grab reads, filter
        // Enough reads? Stop.

        if (action_map_[*region] == 0 and region_num_reads_[*region])
          action_map_[*region] = s.level;

        // Filter. Reads that survive filtering are stored in useful_reads
        //! \todo: Rethink filtering. Maybe a rule that adjusts the threshold to keep at least 20% of candidate reads.

        for (vector<BasecallerRead>::iterator R = region_reads_[*region].begin(); R != region_reads_[*region].end(); ++R) {

          for (int flow = 0; flow < flow_order_.num_flows(); flow++)
            R->normalized_measurements[flow] = R->raw_measurements[flow];

          treephaser.Solve    (*R, min(100, flow_order_.num_flows()));
          use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 40) : (void)treephaser.Normalize(*R, 11, 80);
          treephaser.Solve    (*R, min(120, flow_order_.num_flows()));
          use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 80) : (void)treephaser.Normalize(*R, 11, 100);
          treephaser.Solve    (*R, min(120, flow_order_.num_flows()));

          float metric = 0;
          for (int flow = 20; flow < 100 and flow < flow_order_.num_flows(); ++flow) {
            if (R->normalized_measurements[flow] > 1.2)
              continue;
            float delta = R->normalized_measurements[flow] - R->prediction[flow];
            if (!isnan(delta))
              metric += delta * delta;
            else
              metric += 1e10;
          }

          if (metric > residual_threshold_) {
            //printf("\nRejecting metric=%1.5f solution=%s", metric, R->sequence.c_str());
            continue;
          }
          useful_reads.push_back(&(*R));
        }

        if (useful_reads.size() >= 5000)
          break;
      }

      if (s.level > 1 and useful_reads.size() < 1000) // Not enough reads to even try
        break;

      // Do estimation with reads collected, update estimates
      float parameters[3];
      parameters[0] = s.cf;
      parameters[1] = s.ie;
      parameters[2] = s.dr;
      NelderMeadOptimization(useful_reads, treephaser, parameters, use_pid_norm_);
      s.cf = parameters[0];
      s.ie = parameters[1];
      s.dr = parameters[2];

      printf("Completed (%d,%d,%d) :(%2d-%2d)x(%2d-%2d), total time %5.2lf sec, i/o time %5.2lf sec, %d reads, CF=%1.2f%% IE=%1.2f%% DR=%1.2f%%\n",
          s.level, s.pos_x, s.pos_y, s.begin_x, s.end_x, s.begin_y, s.end_y,
          (double)timer.GetMicroSec()/1000000.0, (double)iotimer/1000000.0, (int)useful_reads.size(),
          100.0*s.cf, 100.0*s.ie, 100.0*s.dr);
    }

    if (useful_reads.size() >= 1000 or s.level == 1) {

      for (int region_x = s.begin_x; region_x <= s.end_x and region_x < num_regions_x_; region_x++) {
        for (int region_y = s.begin_y; region_y <= s.end_y and region_y < num_regions_y_; region_y++) {
          int region = region_x + region_y * num_regions_x_;
          if     (region_x == s.begin_x and region_y == s.begin_y)
            subblock_map_[region] = '+';
          else if(region_x == s.begin_x and region_y == s.end_y)
            subblock_map_[region] = '+';
          else if(region_x == s.end_x and region_y == s.begin_y)
            subblock_map_[region] = '+';
          else if(region_x == s.end_x and region_y == s.end_y)
            subblock_map_[region] = '+';
          else if (region_x == s.begin_x)
            subblock_map_[region] = '|';
          else if (region_x == s.end_x)
            subblock_map_[region] = '|';
          else if (region_y == s.begin_y)
            subblock_map_[region] = '-';
          else if (region_y == s.end_y)
            subblock_map_[region] = '-';
        }
      }
    }


    if (s.subblocks[0] == NULL or useful_reads.size() < 4000) {
      // Do not subdivide this block
      for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); region++)
        region_reads_[*region].clear();

      pthread_mutex_lock(&job_queue_mutex_);
      jobs_in_progress_--;
      if (jobs_in_progress_ == 0)  // No more work, let everyone know
        pthread_cond_broadcast(&job_queue_cond_);
      pthread_mutex_unlock(&job_queue_mutex_);

    } else {
      // Subdivide. Spawn new jobs:
      pthread_mutex_lock(&job_queue_mutex_);
      jobs_in_progress_--;
      for (int subjob = 0; subjob < 4; subjob++) {
        s.subblocks[subjob]->cf = s.cf;
        s.subblocks[subjob]->ie = s.ie;
        s.subblocks[subjob]->dr = s.dr;
        job_queue_.push_back(s.subblocks[subjob]);
      }
      pthread_cond_broadcast(&job_queue_cond_);  // More work, let everyone know
      pthread_mutex_unlock(&job_queue_mutex_);
    }
  }
}
示例#2
0
void RegionAnalysis::worker_Treephaser()
{

  // Worker method: load regions one by one and process them until done

//  int numFlows = wells->NumFlows();

//  DPTreephaser dpTreephaser(wells->FlowOrder(), numFlows, 8);
  DPTreephaser dpTreephaser(flowOrder.c_str(), numFlows, 8);

  std::deque<int> wellX;
  std::deque<int> wellY;
  std::deque<std::vector<float> > wellMeasurements;
  int iRegion;

  std::vector<BasecallerRead> data;
  data.reserve(MAX_CAFIE_READS_PER_REGION);

  while (wellsReader.loadNextRegion(wellX, wellY, wellMeasurements, iRegion)) {

    float parameters[3];
    parameters[0] = 0.00; // CF - initial guess
    parameters[1] = 0.00; // IE - initial guess
    parameters[2] = 0.000; // DR - initial guess

    for (int globalIteration = 0; globalIteration < 5; globalIteration++) {

      dpTreephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]);

      data.clear();

      // Iterate over live library wells and consider them as a part of the phase training set

      std::deque<int>::iterator x = wellX.begin();
      std::deque<int>::iterator y = wellY.begin();
      std::deque<std::vector<float> >::iterator measurements = wellMeasurements.begin();

      for (; (x != wellX.end()) && (data.size() < MAX_CAFIE_READS_PER_REGION); x++, y++, measurements++) {

        if (!mask->Match(*x, *y, MaskLive))
          continue;
        if (!mask->Match(*x, *y, MaskBead))
          continue;

        int beadClass = 1; // 1 - library, 0 - TF

        if (!mask->Match(*x, *y, MaskLib)) {  // Is it a library bead?
          if (!mask->Match(*x, *y, MaskTF))   // OK, is it at least a TF?
            continue;
          beadClass = 0;
        }

        data.push_back(BasecallerRead());

        data.back().SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, libraryInfo[beadClass].Ionogram, libraryInfo[beadClass].numKeyFlows - 1);

        bool keypass = true;
        for (int iFlow = 0; iFlow < (libraryInfo[beadClass].numKeyFlows - 1); iFlow++) {
          if ((int) (data.back().measurements[iFlow] + 0.5) != libraryInfo[beadClass].Ionogram[iFlow])
            keypass = false;
          if (isnan(data.back().measurements[iFlow]))
            keypass = false;
        }

        if (!keypass) {
          data.pop_back();
          continue;
        }

        dpTreephaser.Solve(data.back(), std::min(100, numFlows));
        data.back().Normalize(11, std::min(80, numFlows));
        dpTreephaser.Solve(data.back(), std::min(120, numFlows));
        data.back().Normalize(11, std::min(100, numFlows));
        dpTreephaser.Solve(data.back(), std::min(120, numFlows));


        float metric = 0;
        for (int iFlow = 20; (iFlow < 100) && (iFlow < numFlows); iFlow++) {
          if (data.back().normalizedMeasurements[iFlow] > 1.2)
            continue;
          float delta = data.back().normalizedMeasurements[iFlow] - data.back().prediction[iFlow];
          if (!isnan(delta))
            metric += delta * delta;
          else
            metric += 1e10;
        }

        if (metric > 1) {
          data.pop_back();
          continue;
        }

      }

      if (data.size() < 10)
        break;

      // Perform parameter estimation

      NelderMeadOptimization(data, dpTreephaser, parameters, 50, 3);
    }

    pthread_mutex_lock(common_output_mutex);
    if (data.size() < 10)
      printf("Region % 3d: Using default phase parameters, %d reads insufficient for training\n", iRegion + 1, (int) data.size());
    else
      printf("Region % 3d: Using %d reads for phase parameter training\n", iRegion + 1, (int) data.size());
    //      printf("o");
    fflush(stdout);
    pthread_mutex_unlock(common_output_mutex);

    (*cf)[iRegion] = parameters[0];
    (*ie)[iRegion] = parameters[1];
    (*dr)[iRegion] = parameters[2];

  }

}