コード例 #1
0
ファイル: PhaseEstimator.cpp プロジェクト: Brainiarc7/TS
void PhaseEstimator::EstimatorWorker()
{

  DPTreephaser treephaser(flow_order_, windowSize_);
  vector<BasecallerRead *>  useful_reads;
  useful_reads.reserve(10000);

  while (true) {

    pthread_mutex_lock(&job_queue_mutex_);
    while (job_queue_.empty()) {
      if (jobs_in_progress_ == 0) {
        pthread_mutex_unlock(&job_queue_mutex_);
        return;
      }
      // No jobs available now, but more may come, so stick around
      pthread_cond_wait(&job_queue_cond_, &job_queue_mutex_);
    }
    Subblock &s = *job_queue_.front();
    job_queue_.pop_front();
    jobs_in_progress_++;
    pthread_mutex_unlock(&job_queue_mutex_);


    // Processing

    int numGlobalIterations = 1;  // 3 iterations at top level, 1 at all other levels
    if (s.level == 1)
      numGlobalIterations = 3;

    for (int iGlobalIteration = 0; iGlobalIteration < numGlobalIterations; iGlobalIteration++) {

      ClockTimer timer;
      timer.StartTimer();
      size_t iotimer = 0;

      treephaser.SetModelParameters(s.cf, s.ie, s.dr);
      useful_reads.clear();

      for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); ++region) {


        iotimer += LoadRegion(*region);
        // Ensure region loaded.
        // Grab reads, filter
        // Enough reads? Stop.

        if (action_map_[*region] == 0 and region_num_reads_[*region])
          action_map_[*region] = s.level;

        // Filter. Reads that survive filtering are stored in useful_reads
        //! \todo: Rethink filtering. Maybe a rule that adjusts the threshold to keep at least 20% of candidate reads.

        for (vector<BasecallerRead>::iterator R = region_reads_[*region].begin(); R != region_reads_[*region].end(); ++R) {

          for (int flow = 0; flow < flow_order_.num_flows(); flow++)
            R->normalized_measurements[flow] = R->raw_measurements[flow];

          treephaser.Solve    (*R, min(100, flow_order_.num_flows()));
          use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 40) : (void)treephaser.Normalize(*R, 11, 80);
          treephaser.Solve    (*R, min(120, flow_order_.num_flows()));
          use_pid_norm_ ? (void)treephaser.PIDNormalize(*R, 8, 80) : (void)treephaser.Normalize(*R, 11, 100);
          treephaser.Solve    (*R, min(120, flow_order_.num_flows()));

          float metric = 0;
          for (int flow = 20; flow < 100 and flow < flow_order_.num_flows(); ++flow) {
            if (R->normalized_measurements[flow] > 1.2)
              continue;
            float delta = R->normalized_measurements[flow] - R->prediction[flow];
            if (!isnan(delta))
              metric += delta * delta;
            else
              metric += 1e10;
          }

          if (metric > residual_threshold_) {
            //printf("\nRejecting metric=%1.5f solution=%s", metric, R->sequence.c_str());
            continue;
          }
          useful_reads.push_back(&(*R));
        }

        if (useful_reads.size() >= 5000)
          break;
      }

      if (s.level > 1 and useful_reads.size() < 1000) // Not enough reads to even try
        break;

      // Do estimation with reads collected, update estimates
      float parameters[3];
      parameters[0] = s.cf;
      parameters[1] = s.ie;
      parameters[2] = s.dr;
      NelderMeadOptimization(useful_reads, treephaser, parameters, use_pid_norm_);
      s.cf = parameters[0];
      s.ie = parameters[1];
      s.dr = parameters[2];

      printf("Completed (%d,%d,%d) :(%2d-%2d)x(%2d-%2d), total time %5.2lf sec, i/o time %5.2lf sec, %d reads, CF=%1.2f%% IE=%1.2f%% DR=%1.2f%%\n",
          s.level, s.pos_x, s.pos_y, s.begin_x, s.end_x, s.begin_y, s.end_y,
          (double)timer.GetMicroSec()/1000000.0, (double)iotimer/1000000.0, (int)useful_reads.size(),
          100.0*s.cf, 100.0*s.ie, 100.0*s.dr);
    }

    if (useful_reads.size() >= 1000 or s.level == 1) {

      for (int region_x = s.begin_x; region_x <= s.end_x and region_x < num_regions_x_; region_x++) {
        for (int region_y = s.begin_y; region_y <= s.end_y and region_y < num_regions_y_; region_y++) {
          int region = region_x + region_y * num_regions_x_;
          if     (region_x == s.begin_x and region_y == s.begin_y)
            subblock_map_[region] = '+';
          else if(region_x == s.begin_x and region_y == s.end_y)
            subblock_map_[region] = '+';
          else if(region_x == s.end_x and region_y == s.begin_y)
            subblock_map_[region] = '+';
          else if(region_x == s.end_x and region_y == s.end_y)
            subblock_map_[region] = '+';
          else if (region_x == s.begin_x)
            subblock_map_[region] = '|';
          else if (region_x == s.end_x)
            subblock_map_[region] = '|';
          else if (region_y == s.begin_y)
            subblock_map_[region] = '-';
          else if (region_y == s.end_y)
            subblock_map_[region] = '-';
        }
      }
    }


    if (s.subblocks[0] == NULL or useful_reads.size() < 4000) {
      // Do not subdivide this block
      for (vector<int>::iterator region = s.sorted_regions.begin(); region != s.sorted_regions.end(); region++)
        region_reads_[*region].clear();

      pthread_mutex_lock(&job_queue_mutex_);
      jobs_in_progress_--;
      if (jobs_in_progress_ == 0)  // No more work, let everyone know
        pthread_cond_broadcast(&job_queue_cond_);
      pthread_mutex_unlock(&job_queue_mutex_);

    } else {
      // Subdivide. Spawn new jobs:
      pthread_mutex_lock(&job_queue_mutex_);
      jobs_in_progress_--;
      for (int subjob = 0; subjob < 4; subjob++) {
        s.subblocks[subjob]->cf = s.cf;
        s.subblocks[subjob]->ie = s.ie;
        s.subblocks[subjob]->dr = s.dr;
        job_queue_.push_back(s.subblocks[subjob]);
      }
      pthread_cond_broadcast(&job_queue_cond_);  // More work, let everyone know
      pthread_mutex_unlock(&job_queue_mutex_);
    }
  }
}
// Function to fill in predicted signal values
void BaseHypothesisEvaluator(BamTools::BamAlignment    &alignment,
                             const string              &flow_order_str,
                             const string              &alt_base_hyp,
                             float                     &delta_score,
                             float                     &fit_score,
                             int                       heavy_verbose) {

    // --- Step 1: Initialize Objects and retrieve relevant tags

	delta_score = 1e5;
	fit_score   = 1e5;
	vector<string>   Hypotheses(2);
    vector<float>    measurements, phase_params;
    int              start_flow, num_flows, prefix_flow=0;

    if (not GetBamTags(alignment, flow_order_str.length(), measurements, phase_params, start_flow))
      return;
	num_flows = measurements.size();
	ion::FlowOrder flow_order(flow_order_str, num_flows);
	BasecallerRead master_read;
	master_read.SetData(measurements, flow_order.num_flows());
	TreephaserLite   treephaser(flow_order);
    treephaser.SetModelParameters(phase_params[0], phase_params[1]);

    // --- Step 2: Solve beginning of the read
    // Look at mapped vs. unmapped reads in BAM
    Hypotheses[0] = alignment.QueryBases;
    Hypotheses[1] = alt_base_hyp;
    // Safety: reverse complement reverse strand reads in mapped bam
    if (alignment.IsMapped() and alignment.IsReverseStrand()) {
      RevComplementInPlace(Hypotheses[0]);
      RevComplementInPlace(Hypotheses[1]);
    }

    prefix_flow = GetMasterReadPrefix(treephaser, flow_order, start_flow, Hypotheses[0], master_read);
    unsigned int prefix_size = master_read.sequence.size();

    // --- Step 3: creating predictions for the individual hypotheses

    vector<BasecallerRead> hypothesesReads(Hypotheses.size());
    vector<float> squared_distances(Hypotheses.size(), 0.0);
    int max_last_flow = 0;

    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {

      hypothesesReads[i_hyp] = master_read;
      // --- add hypothesis sequence to clipped prefix
      unsigned int i_base = 0;
      int i_flow = prefix_flow;

      while (i_base<Hypotheses[i_hyp].length() and i_base<(2*(unsigned int)flow_order.num_flows()-prefix_size)) {
        while (i_flow < flow_order.num_flows() and flow_order.nuc_at(i_flow) != Hypotheses[i_hyp][i_base])
          i_flow++;
        if (i_flow < flow_order.num_flows() and i_flow > max_last_flow)
          max_last_flow = i_flow;
        if (i_flow >= flow_order.num_flows())
          break;
        // Add base to sequence only if it fits into flow order
        hypothesesReads[i_hyp].sequence.push_back(Hypotheses[i_hyp][i_base]);
        i_base++;
      }
      i_flow = min(i_flow, flow_order.num_flows()-1);

      // Solver simulates beginning of the read and then fills in the remaining clipped bases for which we have flow information
      treephaser.Solve(hypothesesReads[i_hyp], num_flows, i_flow);
    }
    // Compute L2-distance of measurements and predictions
    for (unsigned int i_hyp=0; i_hyp<hypothesesReads.size(); ++i_hyp) {
      for (int iFlow=0; iFlow<=max_last_flow; iFlow++)
        squared_distances[i_hyp] += (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow)) *
                                    (measurements.at(iFlow) - hypothesesReads[i_hyp].prediction.at(iFlow));
    }

    // Delta: L2-distance of alternative base Hypothesis - L2-distance of bases as called
    delta_score = squared_distances.at(1) - squared_distances.at(0);
    fit_score   = min(squared_distances.at(1), squared_distances.at(0));


    // --- verbose ---
    if (heavy_verbose > 1 or (delta_score < 0 and heavy_verbose > 0)) {
      cout << "Processed read " << alignment.Name << endl;
      cout << "Delta Fit: " << delta_score << " Overall Fit: " << fit_score << endl;
      PredictionGenerationVerbose(Hypotheses, hypothesesReads, phase_params, flow_order, start_flow, prefix_size);
    }

}