Пример #1
0
float PhaseEstimator::EvaluateParameters(vector<BasecallerRead *>& useful_reads, DPTreephaser& treephaser, const float *parameters, const bool usePIDNorm)
{
  float try_cf = parameters[0];
  float try_ie = parameters[1];
  float try_dr = parameters[2];
  if (try_cf < 0 or try_ie < 0 or try_dr < 0 or try_cf > 0.04 or try_ie > 0.04 or try_dr > 0.01)
    return 1e10;

  treephaser.SetModelParameters(try_cf, try_ie, try_dr);

  float metric = 0;
  for (vector<BasecallerRead *>::iterator read = useful_reads.begin(); read != useful_reads.end(); ++read) {

    treephaser.Simulate(**read, 120);
    float normalizer = (usePIDNorm ? treephaser.PIDNormalize(**read, 8, 100) : treephaser.Normalize(**read, 20, 100));

    for (unsigned int flow = 20; flow < 100 and flow < (*read)->raw_measurements.size(); flow++) {
      if ((*read)->raw_measurements[flow] > 1.2)
        continue;
      float delta = (*read)->raw_measurements[flow] - (*read)->prediction[flow] * normalizer;
      metric += delta * delta;
    }
  }

  return isnan(metric) ? 1e10 : metric;
}
Пример #2
0
float PhaseEstimator::EvaluateParameters(vector<BasecallerRead *>& useful_reads, DPTreephaser& treephaser, const float *parameters)
{
  float try_cf = parameters[0];
  float try_ie = parameters[1];
  float try_dr = parameters[2];
  if (try_cf < 0 or try_ie < 0 or try_dr < 0 or try_cf > 0.04 or try_ie > 0.04 or try_dr > 0.01)
    return 1e10;

  treephaser.SetModelParameters(try_cf, try_ie, try_dr);

  float metric = 0;
  for (vector<BasecallerRead *>::iterator read = useful_reads.begin(); read != useful_reads.end(); ++read) {

    // Simulate phasing parameter
    treephaser.Simulate(**read, phasing_end_flow_+20);

    // Optionally determine optimal normalization for this parameter set?
    if (norm_during_param_eval_)
      NormalizeBasecallerRead(treephaser, **read, phasing_start_flow_, phasing_end_flow_);

    // Determine squared distance penalty for this parameter set
    for (int flow = phasing_start_flow_; flow < phasing_end_flow_ and flow < (int)(*read)->raw_measurements.size(); ++flow) {
      if ((*read)->raw_measurements[flow] > inclusion_threshold_)
        continue;
      // Keep key normalized raw measurements as a constant and normalize predictions towards key normalized values
      float delta = ((*read)->normalized_measurements[flow] - (*read)->prediction[flow]) * (*read)->multiplicative_correction[flow];
      metric += delta * delta;
    }
  }

  return isnan(metric) ? 1e10 : metric;
}
Пример #3
0
void PhaseEstimator::NormalizeBasecallerRead(DPTreephaser& treephaser, BasecallerRead& read, int start_flow, int end_flow)
{
    switch (norm_method_) {
        case 0:
            treephaser.Normalize(read, start_flow, end_flow);
            break;
        case 1:
            treephaser.WindowedNormalize(read, (end_flow / windowSize_), windowSize_);
            break;
        case 2:
            treephaser.PIDNormalize(read, start_flow, end_flow);
            break;
        case 3: // Variable per-read normalization based on the number of negative valued zero-mers
            if (read.penalty_residual.at(0) >  maxfrac_negative_flows_)
              treephaser.WindowedNormalize(read, (end_flow / windowSize_), windowSize_);
            else
              treephaser.Normalize(read, start_flow, end_flow);
            break;
        case 4: // "off" do not do anything
            break;
        default:
            cerr << "PhaseEstimator: Unknown normalization method " << norm_method_ << endl;
            exit(EXIT_FAILURE);
    }
};
Пример #4
0
int HypothesisEvaluator::EvaluateOneHypothesis(DPTreephaser &working_treephaser, BasecallerRead &current_hypothesis, int applyNormalization) {

  int last_incorporating_flow = LastIncorporatingFlow(current_hypothesis);

  // Simulate sequence
  working_treephaser.Simulate(current_hypothesis, nFlows);

  // Adaptively normalize each hypothesis
  if (applyNormalization>0) {
    int window_size= 50;
    int steps = last_incorporating_flow / window_size;
    working_treephaser.WindowedNormalize(current_hypothesis, steps, window_size);
  }

  // Solver simulates beginning of the read and then fills in the remaining clipped bases
  working_treephaser.Solve(current_hypothesis, nFlows, last_incorporating_flow);
  /*cout << "Solved sequence of length: " << hypothesesReadsVector[r].sequence.size() << " ;nFlows = " << nFlows << endl;
  cout << "Total read: ";
  for (int i=0; i<hypothesesReadsVector[r].sequence.size(); i++)
   cout << hypothesesReadsVector[r].sequence[i];
  cout << endl;*/
  return(last_incorporating_flow);
}
Пример #5
0
unsigned int HypothesisEvaluator::SolveBeginningOfRead(DPTreephaser &working_treephaser, BasecallerRead &master_read,
    const vector<string>& Hypotheses, int startFlow) {
  //cout << "Hypothesis sequence: " << Hypotheses[0] << endl;
  // Solve beginning of maybe clipped read
  if (startFlow>0) {
    int until_flow = min((startFlow+20), nFlows);
    working_treephaser.Solve(master_read, until_flow, 0);
  }
  /*cout << "Solved prefix of size " << read.sequence.size() << ": ";
  for (int i=0; i<read.sequence.size(); i++)
   cout << read.sequence[i];
  cout << endl;*/
  // StartFlow clipped? Get solved HP length at startFlow
  unsigned int base = 0;
  int flow = 0;
  int HPlength = 0;
  while (base<master_read.sequence.size()) {
    while (flow < treePhaserFlowOrder.num_flows() and treePhaserFlowOrder.nuc_at(flow) != master_read.sequence[base])
      flow++;
    if (flow > startFlow or flow == treePhaserFlowOrder.num_flows())
      break;
    if (flow == startFlow)
      HPlength++;
    base++;
  }
  // Get HP size at the start of the reference, i.e., Hypotheses[0]
  int count = 1;
  while (Hypotheses[0][count] == Hypotheses[0][0])
    count++;
  // Adjust the length of the base prefix and erase extra solved bases
  if (HPlength>count)
    base -= count;
  else
    base -= HPlength;
  master_read.sequence.erase(master_read.sequence.begin()+base, master_read.sequence.end());
  unsigned int prefix_size = master_read.sequence.size();

  /*cout << "Shortened prefix to size " << prefix_size << " until startFlow" << startFlow << ": ";
  for (int i=0; i<read.sequence.size(); i++)
    cout << read.sequence[i];
  cout << endl;*/
  return(prefix_size);
}
Пример #6
0
int HypothesisEvaluator::MatchedFilter(DPTreephaser &working_treephaser, vector<BasecallerRead> &hypothesesReadsVector,int max_last_flow,
                                       int refHpLen, int flowPosition, int startFlow,
                                       vector<float>& DistanceObserved,
                                       vector<float>& DistanceHypotheses) {
  // Matched Filter HP distance is computed here
  vector<float> query_state(nFlows);
  int rHpLen = 0;
  if (flowPosition<startFlow || flowPosition >= nFlows) {
    cout << "Calculate Distances: Unsupported flowPosition! startFlow: " << startFlow << " flowPosition: " << flowPosition << " nFlows: " << nFlows << endl;
    return -1;
  }
  //cout << "Calling Query state " << endl;
  //cout << "Hypothesis = " << hypothesesReadsVector[0] << endl;
  //cout << "flow position = " << flowPosition << endl;
  //cout << "Nflows = " << nFlows << endl;
  //int readSize = hypothesesReadsVector[0].sequence.size();
// for (int i = 0; i < readSize ; i++)
//   cout << "Base = " << hypothesesReadsVector[0].sequence.at(i) << " Measure = " << hypothesesReadsVector[0].normalized_measurements.at(i) << endl;

  working_treephaser.QueryState(hypothesesReadsVector[0], query_state, rHpLen, nFlows, flowPosition);

  if (rHpLen == 0) {
    if (DEBUG) {
      cerr << "Hypothesis evaluator error ReadHpLen = 0 " << endl;
      cerr << "Calling Query state " << endl;
      cerr << "Hypothesis = " << hypothesesReadsVector[0].sequence.size() << endl;
      cerr << "flow position = " << flowPosition << endl;
      cerr << "Nflows = " << nFlows << endl;
      int readSize = hypothesesReadsVector[0].sequence.size();
      for (int i = 0; i < readSize ; i++)
        cerr << " i = " << i << " Base = " << hypothesesReadsVector[0].sequence.at(i) << " Measure = " << hypothesesReadsVector[0].normalized_measurements.at(i) << endl;
    }
    return -1;
  }
  //return -1;

  if (abs(rHpLen-refHpLen) < 3) {
    float filter_num = 0.0f;
    float filter_den = 0.0f;
    if (this->DEBUG)
      cout << "Matched filter details: " << endl;
    for (int flow=0; flow<nFlows; flow++) {
      filter_num += (hypothesesReadsVector[0].normalized_measurements[flow] - hypothesesReadsVector[0].prediction[flow] +
                     ((float)rHpLen*query_state[flow])) * query_state[flow];
      filter_den += query_state[flow] * query_state[flow];

      if ((this->DEBUG) and(query_state[flow] > 0.02 or flow == flowPosition)) {
        cout << "Flow " << flow << " State " << query_state[flow] << " Local delta "
             << ((hypothesesReadsVector[0].normalized_measurements[flow] - hypothesesReadsVector[0].prediction[flow] + ((float)rHpLen*query_state[flow])) * query_state[flow])
             << " Measurements " << hypothesesReadsVector[0].normalized_measurements[flow];
        //printf("Flow %4d  State %1.4f  Local delta %1.4f  Measurements %1.4f");
        //flow, query_state[flow],
        //((read.normalized_measurements[flow] - read.prediction[flow] + ((float)rHpLen*query_state[flow])) * query_state[flow]),
        //read.normalized_measurements[flow]);
        if (flow == flowPosition)
          //printf(" ***\n");
          cout << " ***" << endl;
        else
          //printf("\n");
          cout << endl;
      }
    }
    DistanceObserved[0] = filter_num / filter_den;
    //cout << DistanceObserved[0] << endl;
  } else {
    if (DEBUG)
      cerr << "Wrong rHpLen : " << rHpLen << " " << refHpLen << endl;
    return -1;
  }

  return(0);
}
Пример #7
0
float RegionAnalysis::evaluateParameters(std::vector<BasecallerRead> &dataAll, DPTreephaser& treephaser, float *parameters)
{
  float metric = 0;
  if (clo->cfe_control.libPhaseEstimator == "nel-mead-treephaser") {

    if (parameters[0] < 0) // cf
      metric = 1e10;
    if (parameters[1] < 0) // ie
      metric = 1e10;
    if (parameters[2] < 0) // dr
      metric = 1e10;

    if (parameters[0] > 0.04) // cf
      metric = 1e10;
    if (parameters[1] > 0.04) // ie
      metric = 1e10;
    if (parameters[2] > 0.01) // dr
      metric = 1e10;

    if (metric == 0) {

      treephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]);
      for (std::vector<BasecallerRead>::iterator data = dataAll.begin(); data != dataAll.end(); data++) {

        treephaser.Simulate3(*data, 120);
        data->Normalize(20, 100);

        for (int iFlow = 20; iFlow < std::min(100, data->numFlows); iFlow++) {
          if (data->measurements[iFlow] > 1.2)
            continue;
          float delta = data->measurements[iFlow] - data->prediction[iFlow] * data->miscNormalizer;
          metric += delta * delta;
        }
      }

    }

  } else if (clo->cfe_control.libPhaseEstimator == "nel-mead-adaptive-treephaser") {

    if (parameters[0] < 0) // cf
      metric = 1e10;
    if (parameters[1] < 0) // ie
      metric = 1e10;

    if (parameters[0] > 0.04) // cf
      metric = 1e10;
    if (parameters[1] > 0.04) // ie
      metric = 1e10;

    if (metric == 0) {

      treephaser.SetModelParameters(parameters[0], parameters[1], 0);
      for (std::vector<BasecallerRead>::iterator data = dataAll.begin(); data != dataAll.end(); data++) {

//        treephaser.Simulate3(*data, 150);
//        data->AdaptiveNormalizationOfPredictions(3, 50);
        treephaser.Simulate3(*data, 200);
        data->AdaptiveNormalizationOfPredictions(4, 50);

        for (int iFlow = 25; iFlow < std::min(175, data->numFlows); iFlow++) {
          if (data->measurements[iFlow] > 1.2)
            continue;
          float delta = data->measurements[iFlow] - data->prediction[iFlow];
          metric += delta * delta;
        }
      }
    }
  }
  //  printf("CF = %1.2f%%  IE = %1.2f%%  DR = %1.2f%%  V = %1.6f\n",
  //      100*parameters[0], 100*parameters[1], 100*parameters[2], metric);
  if (isnan(metric))
    metric = 1e10;

  return metric;
}