예제 #1
0
PersistingThreadObjects::PersistingThreadObjects(const InputStructures &global_context)
    : use_SSE_basecaller(global_context.use_SSE_basecaller), realigner(50, 1)
{
#ifdef __SSE3__
    if (use_SSE_basecaller) {
	  for (unsigned int iFO=0; iFO < global_context.flow_order_vector.size(); iFO++){
        TreephaserSSE     treephaser_sse(global_context.flow_order_vector.at(iFO), DPTreephaser::kWindowSizeDefault_);
        treephaserSSE_vector.push_back(treephaser_sse);
      }
    }
    else {
#endif
      for (unsigned int iFO=0; iFO < global_context.flow_order_vector.size(); iFO++){
        DPTreephaser      dpTreephaser(global_context.flow_order_vector.at(iFO));
        dpTreephaser_vector.push_back(dpTreephaser);
      }
#ifdef __SSE3__
    }
#endif
};
예제 #2
0
파일: treePhaser.cpp 프로젝트: alecw/TS
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {
    RcppMatrix<double>   signal(Rsignal);
    RcppVector<int>      keyFlow(RkeyFlow);
    string flowCycle   = Rcpp::as<string>(RflowCycle);
    double cf          = Rcpp::as<double>(Rcf);
    double ie          = Rcpp::as<double>(Rie);
    double dr          = Rcpp::as<double>(Rdr);
    string basecaller  = Rcpp::as<string>(Rbasecaller);
  
    unsigned int nFlow = signal.cols();
    unsigned int nRead = signal.rows();

    if(basecaller != "treephaser-swan" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") {
      std::string exception = "base value for basecaller supplied: " + basecaller;
      exceptionMesg = copyMessageToR(exception.c_str());
    } else if (flowCycle.length() < nFlow) {
      std::string exception = "Flow cycle is shorter than number of flows to solve";
      exceptionMesg = copyMessageToR(exception.c_str());
    } else {

      // Prepare objects for holding and passing back results
      RcppMatrix<double>        predicted_out(nRead,nFlow);
      RcppMatrix<double>        residual_out(nRead,nFlow);
      RcppMatrix<int>           hpFlow_out(nRead,nFlow);
      std::vector< std::string> seq_out(nRead);

      // Set up key flow vector
      int nKeyFlow = keyFlow.size(); 
      vector <int> keyVec(nKeyFlow);
      for(int iFlow=0; iFlow < nKeyFlow; iFlow++)
        keyVec[iFlow] = keyFlow(iFlow);

      // Iterate over all reads
      vector <float> sigVec(nFlow);
      string result;
      for(unsigned int iRead=0; iRead < nRead; iRead++) {
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++)
          sigVec[iFlow] = (float) signal(iRead,iFlow);
        BasecallerRead read;
        read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1);
        DPTreephaser dpTreephaser(flowCycle.c_str(), flowCycle.length(), 8);
        if (basecaller == "dp-treephaser")
          dpTreephaser.SetModelParameters(cf, ie, dr);
        else
          dpTreephaser.SetModelParameters(cf, ie, 0); // Adaptive normalization
          
        // Execute the iterative solving-normalization routine
        if (basecaller == "dp-treephaser")
          dpTreephaser.NormalizeAndSolve4(read, nFlow);
        else if (basecaller == "treephaser-adaptive")
          dpTreephaser.NormalizeAndSolve3(read, nFlow); // Adaptive normalization
        else
          dpTreephaser.NormalizeAndSolve5(read, nFlow); // sliding window adaptive normalization

        read.flowToString(flowCycle,seq_out[iRead]);
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) {
          predicted_out(iRead,iFlow) = (double) read.prediction[iFlow];
          residual_out(iRead,iFlow)  = (double) read.normalizedMeasurements[iFlow] - read.prediction[iFlow];
          hpFlow_out(iRead,iFlow)    = (int)    read.solution[iFlow];
        }

        // Store results
        RcppResultSet rs;
        rs.add("seq",        seq_out);
        rs.add("predicted",  predicted_out);
        rs.add("residual",   residual_out);
        rs.add("hpFlow",     hpFlow_out);

        ret = rs.getReturnList();
      }
    }
  } catch(std::exception& ex) {
    exceptionMesg = copyMessageToR(ex.what());
  } catch(...) {
    exceptionMesg = copyMessageToR("unknown reason");
  }
    
  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
예제 #3
0
파일: RegionAnalysis.cpp 프로젝트: alecw/TS
void RegionAnalysis::worker_Treephaser()
{

  // Worker method: load regions one by one and process them until done

//  int numFlows = wells->NumFlows();

//  DPTreephaser dpTreephaser(wells->FlowOrder(), numFlows, 8);
  DPTreephaser dpTreephaser(flowOrder.c_str(), numFlows, 8);

  std::deque<int> wellX;
  std::deque<int> wellY;
  std::deque<std::vector<float> > wellMeasurements;
  int iRegion;

  std::vector<BasecallerRead> data;
  data.reserve(MAX_CAFIE_READS_PER_REGION);

  while (wellsReader.loadNextRegion(wellX, wellY, wellMeasurements, iRegion)) {

    float parameters[3];
    parameters[0] = 0.00; // CF - initial guess
    parameters[1] = 0.00; // IE - initial guess
    parameters[2] = 0.000; // DR - initial guess

    for (int globalIteration = 0; globalIteration < 5; globalIteration++) {

      dpTreephaser.SetModelParameters(parameters[0], parameters[1], parameters[2]);

      data.clear();

      // Iterate over live library wells and consider them as a part of the phase training set

      std::deque<int>::iterator x = wellX.begin();
      std::deque<int>::iterator y = wellY.begin();
      std::deque<std::vector<float> >::iterator measurements = wellMeasurements.begin();

      for (; (x != wellX.end()) && (data.size() < MAX_CAFIE_READS_PER_REGION); x++, y++, measurements++) {

        if (!mask->Match(*x, *y, MaskLive))
          continue;
        if (!mask->Match(*x, *y, MaskBead))
          continue;

        int beadClass = 1; // 1 - library, 0 - TF

        if (!mask->Match(*x, *y, MaskLib)) {  // Is it a library bead?
          if (!mask->Match(*x, *y, MaskTF))   // OK, is it at least a TF?
            continue;
          beadClass = 0;
        }

        data.push_back(BasecallerRead());

        data.back().SetDataAndKeyNormalize(&(measurements->at(0)), numFlows, libraryInfo[beadClass].Ionogram, libraryInfo[beadClass].numKeyFlows - 1);

        bool keypass = true;
        for (int iFlow = 0; iFlow < (libraryInfo[beadClass].numKeyFlows - 1); iFlow++) {
          if ((int) (data.back().measurements[iFlow] + 0.5) != libraryInfo[beadClass].Ionogram[iFlow])
            keypass = false;
          if (isnan(data.back().measurements[iFlow]))
            keypass = false;
        }

        if (!keypass) {
          data.pop_back();
          continue;
        }

        dpTreephaser.Solve(data.back(), std::min(100, numFlows));
        data.back().Normalize(11, std::min(80, numFlows));
        dpTreephaser.Solve(data.back(), std::min(120, numFlows));
        data.back().Normalize(11, std::min(100, numFlows));
        dpTreephaser.Solve(data.back(), std::min(120, numFlows));


        float metric = 0;
        for (int iFlow = 20; (iFlow < 100) && (iFlow < numFlows); iFlow++) {
          if (data.back().normalizedMeasurements[iFlow] > 1.2)
            continue;
          float delta = data.back().normalizedMeasurements[iFlow] - data.back().prediction[iFlow];
          if (!isnan(delta))
            metric += delta * delta;
          else
            metric += 1e10;
        }

        if (metric > 1) {
          data.pop_back();
          continue;
        }

      }

      if (data.size() < 10)
        break;

      // Perform parameter estimation

      NelderMeadOptimization(data, dpTreephaser, parameters, 50, 3);
    }

    pthread_mutex_lock(common_output_mutex);
    if (data.size() < 10)
      printf("Region % 3d: Using default phase parameters, %d reads insufficient for training\n", iRegion + 1, (int) data.size());
    else
      printf("Region % 3d: Using %d reads for phase parameter training\n", iRegion + 1, (int) data.size());
    //      printf("o");
    fflush(stdout);
    pthread_mutex_unlock(common_output_mutex);

    (*cf)[iRegion] = parameters[0];
    (*ie)[iRegion] = parameters[1];
    (*dr)[iRegion] = parameters[2];

  }

}
예제 #4
0
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle,
                           SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller, SEXP RdiagonalStates,
                           SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {
    Rcpp::NumericMatrix      signal(Rsignal);
    Rcpp::IntegerVector      keyFlow(RkeyFlow);
    string flowCycle       = Rcpp::as<string>(RflowCycle);
    Rcpp::NumericVector      cf_vec(Rcf);
    Rcpp::NumericVector      ie_vec(Rie);
    Rcpp::NumericVector      dr_vec(Rdr);
    string basecaller      = Rcpp::as<string>(Rbasecaller);
    unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates);

    // Recalibration Variables
    string model_file      = Rcpp::as<string>(RmodelFile);
    int model_threshold    = Rcpp::as<int>(RmodelThreshold);
    Rcpp::IntegerVector      x_values(Rxval);
    Rcpp::IntegerVector      y_values(Ryval);
    RecalibrationModel       recalModel;
    if (model_file.length() > 0) {
      recalModel.InitializeModel(model_file, model_threshold);
    }


    ion::FlowOrder flow_order(flowCycle, flowCycle.length());
    unsigned int nFlow = signal.cols();
    unsigned int nRead = signal.rows();

    if(basecaller != "treephaser-swan" && basecaller != "treephaser-solve" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") {
      std::string exception = "base value for basecaller supplied: " + basecaller;
      exceptionMesg = strdup(exception.c_str());
    } else if (flowCycle.length() < nFlow) {
      std::string exception = "Flow cycle is shorter than number of flows to solve";
      exceptionMesg = strdup(exception.c_str());
    } else {

      // Prepare objects for holding and passing back results
      Rcpp::NumericMatrix        predicted_out(nRead,nFlow);
      Rcpp::NumericMatrix        residual_out(nRead,nFlow);
      Rcpp::NumericMatrix        norm_additive_out(nRead,nFlow);
      Rcpp::NumericMatrix        norm_multipl_out(nRead,nFlow);
      std::vector< std::string> seq_out(nRead);

      // Set up key flow vector
      int nKeyFlow = keyFlow.size(); 
      vector <int> keyVec(nKeyFlow);
      for(int iFlow=0; iFlow < nKeyFlow; iFlow++)
        keyVec[iFlow] = keyFlow(iFlow);

      // Iterate over all reads
      vector <float> sigVec(nFlow);
      BasecallerRead read;
      DPTreephaser dpTreephaser(flow_order);
      dpTreephaser.SetStateProgression((diagonalStates>0));

      // In contrast to pipeline, we always use droop here.
      // To have the same behavior of treephaser-swan as in the pipeline, supply dr=0
      bool per_read_phasing = true;
      if (cf_vec.size() == 1) {
        per_read_phasing = false;
        dpTreephaser.SetModelParameters((double)cf_vec(0), (double)ie_vec(0), (double)dr_vec(0));
      }
 
      // Main loop iterating over reads and solving them
      for(unsigned int iRead=0; iRead < nRead; iRead++) {

        // Set phasing parameters for this read
        if (per_read_phasing)
          dpTreephaser.SetModelParameters((double)cf_vec(iRead), (double)ie_vec(iRead), (double)dr_vec(iRead));
        // And load recalibration model
        if (recalModel.is_enabled()) {
          int my_x = (int)x_values(iRead);
          int my_y = (int)y_values(iRead);
          const vector<vector<vector<float> > > * aPtr = 0;
          const vector<vector<vector<float> > > * bPtr = 0;
          aPtr = recalModel.getAs(my_x, my_y);
          bPtr = recalModel.getBs(my_x, my_y);
          if (aPtr == 0 or bPtr == 0) {
            cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead);
            cout << endl;
          }
          dpTreephaser.SetAsBs(aPtr, bPtr);
        }

        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++)
          sigVec[iFlow] = (float) signal(iRead,iFlow);
        
        // Interface to just solve without any normalization
        if (basecaller == "treephaser-solve") { // Interface to just solve without any normalization
          read.SetData(sigVec, (int)nFlow);
        } 
        else {
          read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1);
        }
          
        // Execute the iterative solving-normalization routine
        if (basecaller == "dp-treephaser") {
          dpTreephaser.NormalizeAndSolve_GainNorm(read, nFlow);
        }
        else if (basecaller == "treephaser-solve") {
          dpTreephaser.Solve(read, nFlow);
        }
        else if (basecaller == "treephaser-adaptive") {
          dpTreephaser.NormalizeAndSolve_Adaptive(read, nFlow); // Adaptive normalization
        }
        else {
          dpTreephaser.NormalizeAndSolve_SWnorm(read, nFlow); // sliding window adaptive normalization
        }

        seq_out[iRead].assign(read.sequence.begin(), read.sequence.end());
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) {
          predicted_out(iRead,iFlow)     = (double) read.prediction[iFlow];
          residual_out(iRead,iFlow)      = (double) read.normalized_measurements[iFlow] - read.prediction[iFlow];
          norm_multipl_out(iRead,iFlow)  = (double) read.multiplicative_correction.at(iFlow);
          norm_additive_out(iRead,iFlow) = (double) read.additive_correction.at(iFlow);
        }
      }

      // Store results
      ret = Rcpp::List::create(Rcpp::Named("seq")       = seq_out,
                               Rcpp::Named("predicted") = predicted_out,
                               Rcpp::Named("residual")  = residual_out,
                               Rcpp::Named("norm_additive") = norm_additive_out,
                               Rcpp::Named("norm_multipl")  = norm_multipl_out);
    }
  } catch(std::exception& ex) {
    forward_exception_to_r(ex);
  } catch(...) {
    ::Rf_error("c++ exception (unknown reason)");
  }
    
  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
예제 #5
0
RcppExport SEXP FitPhasingBurst(SEXP R_signal, SEXP R_flowCycle, SEXP R_read_sequence,
                SEXP R_phasing, SEXP R_burstFlows, SEXP R_maxEvalFlow, SEXP R_maxSimFlow) {

 SEXP ret = R_NilValue;
 char *exceptionMesg = NULL;

 try {

     Rcpp::NumericMatrix  signal(R_signal);
     Rcpp::NumericMatrix  phasing(R_phasing);     // Standard phasing parameters
     string flowCycle   = Rcpp::as<string>(R_flowCycle);
     Rcpp::StringVector   read_sequences(R_read_sequence);
     Rcpp::NumericVector  phasing_burst(R_burstFlows);
     Rcpp::NumericVector  max_eval_flow(R_maxEvalFlow);
     Rcpp::NumericVector  max_sim_flow(R_maxSimFlow);
     int window_size    = 38; // For normalization


     ion::FlowOrder flow_order(flowCycle, flowCycle.length());
     unsigned int num_flows = flow_order.num_flows();
     unsigned int num_reads = read_sequences.size();


     // Containers to store results
     Rcpp::NumericVector null_fit(num_reads);
     Rcpp::NumericMatrix null_prediction(num_reads, num_flows);
     Rcpp::NumericVector best_fit(num_reads);
     Rcpp::NumericVector best_ie_value(num_reads);
     Rcpp::NumericMatrix best_prediction(num_reads, num_flows);


     BasecallerRead bc_read;
     DPTreephaser dpTreephaser(flow_order);
     DPPhaseSimulator PhaseSimulator(flow_order);
     vector<double> cf_vec(num_flows, 0.0);
     vector<double> ie_vec(num_flows, 0.0);
     vector<double> dr_vec(num_flows, 0.0);


     // IE Burst Estimation Loop
     for (unsigned int iRead=0; iRead<num_reads; iRead++) {

       // Set read object
       vector<float> my_signal(num_flows);
       for (unsigned int iFlow=0; iFlow<num_flows; iFlow++)
         my_signal.at(iFlow) = signal(iRead, iFlow);
       bc_read.SetData(my_signal, num_flows);
       string my_sequence = Rcpp::as<std::string>(read_sequences(iRead));

       // Default phasing as baseline
       double my_best_fit, my_best_ie;
       double base_cf  = (double)phasing(iRead, 0);
       double base_ie  = (double)phasing(iRead, 1);
       double base_dr  = (double)phasing(iRead, 2);
       int burst_flow = (int)phasing_burst(iRead);
       vector<float> my_best_prediction;

       cf_vec.assign(num_flows, base_cf);
       dr_vec.assign(num_flows, base_dr);
       int my_max_flow  = min((int)num_flows, (int)max_sim_flow(iRead));
       int my_eval_flow = min(my_max_flow, (int)max_eval_flow(iRead));

       PhaseSimulator.SetBaseSequence(my_sequence);
       PhaseSimulator.SetMaxFlows(my_max_flow);
       PhaseSimulator.SetPhasingParameters_Basic(base_cf, base_ie, base_dr);
       PhaseSimulator.UpdateStates(my_max_flow);
       PhaseSimulator.GetPredictions(bc_read.prediction);
       dpTreephaser.WindowedNormalize(bc_read, (my_eval_flow/window_size), window_size, true);


       my_best_ie = base_ie;
       my_best_prediction = bc_read.prediction;
       my_best_fit = 0;
       for (int iFlow=0; iFlow<my_eval_flow; iFlow++) {
         double residual = bc_read.raw_measurements.at(iFlow) - bc_read.prediction.at(iFlow);
         my_best_fit += residual*residual;
       }
       for (unsigned int iFlow=0; iFlow<num_flows; iFlow++)
         null_prediction(iRead, iFlow) = bc_read.prediction.at(iFlow);
       null_fit(iRead) = my_best_fit;

       // Make sure that there are enough flows to fit a burst
       if (burst_flow < my_eval_flow-10) {
    	 int    num_steps  = 0;
    	 double step_size  = 0.0;
    	 double step_start = 0.0;
    	 double step_end   = 0.0;

         // Brute force phasing burst value estimation using grid search, crude first, then refine
         for (unsigned int iIteration = 0; iIteration<3; iIteration++) {

           switch(iIteration) {
             case 0:
               step_size = 0.05;
               step_end = 0.8;
               break;
             case 1:
               step_end   = (floor(my_best_ie / step_size)*step_size) + step_size;
               step_start = max(0.0, (step_end - 2.0*step_size));
               step_size  = 0.01;
               break;
             default:
               step_end   = (floor(my_best_ie / step_size)*step_size) + step_size;
               step_start = max(0.0, step_end - 2*step_size);
               step_size = step_size / 10;
           }
           num_steps  = 1+ ((step_end - step_start) / step_size);

           for (int iPhase=0; iPhase <= num_steps; iPhase++) {

        	 double try_ie = step_start+(iPhase*step_size);
             ie_vec.assign(num_flows, try_ie);

             PhaseSimulator.SetBasePhasingParameters(burst_flow, cf_vec, ie_vec, dr_vec);
             PhaseSimulator.UpdateStates(my_max_flow);
             PhaseSimulator.GetPredictions(bc_read.prediction);
             dpTreephaser.WindowedNormalize(bc_read, (my_eval_flow/window_size), window_size, true);

             double my_fit = 0.0;
             for (int iFlow=burst_flow+1; iFlow<my_eval_flow; iFlow++) {
               double residual = bc_read.raw_measurements.at(iFlow) - bc_read.prediction.at(iFlow);
               my_fit += residual*residual;
             }
             if (my_fit < my_best_fit) {
               my_best_fit = my_fit;
               my_best_ie  = try_ie;
               my_best_prediction = bc_read.prediction;
             }
           }
         }
       }

       // Set output information for this read
       best_fit(iRead) = my_best_fit;
       best_ie_value(iRead)   = my_best_ie;
       for (unsigned int iFlow=0; iFlow<num_flows; iFlow++)
         best_prediction(iRead, iFlow) = my_best_prediction.at(iFlow);
     }

     ret = Rcpp::List::create(Rcpp::Named("null_fit")        = null_fit,
                              Rcpp::Named("null_prediction") = null_prediction,
                              Rcpp::Named("burst_flow")      = phasing_burst,
                              Rcpp::Named("best_fit")        = best_fit,
                              Rcpp::Named("best_ie_value")   = best_ie_value,
                              Rcpp::Named("best_prediction") = best_prediction);


 } catch(std::exception& ex) {
   forward_exception_to_r(ex);
 } catch(...) {
   ::Rf_error("c++ exception (unknown reason)");
 }

 if(exceptionMesg != NULL)
   Rf_error(exceptionMesg);
 return ret;

}
예제 #6
0
RcppExport SEXP treePhaserSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr,
                              SEXP Rmaxflows, SEXP RgetStates, SEXP RdiagonalStates,
                              SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {

    Rcpp::StringVector            sequences(Rsequence);
    string flowCycle            = Rcpp::as<string>(RflowCycle);
    Rcpp::NumericMatrix           cf_vec(Rcf);
    Rcpp::NumericMatrix           ie_vec(Rie);
    Rcpp::NumericMatrix           dr_vec(Rdr);
    unsigned int max_flows      = Rcpp::as<int>(Rmaxflows);
    unsigned int get_states     = Rcpp::as<int>(RgetStates);
    unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates);

    // Recalibration Variables
    string model_file      = Rcpp::as<string>(RmodelFile);
    int model_threshold    = Rcpp::as<int>(RmodelThreshold);
    Rcpp::IntegerVector      x_values(Rxval);
    Rcpp::IntegerVector      y_values(Ryval);
    RecalibrationModel       recalModel;
    if (model_file.length() > 0) {
      recalModel.InitializeModel(model_file, model_threshold);
    }

    ion::FlowOrder flow_order(flowCycle, flowCycle.length());
    unsigned int nFlow = flow_order.num_flows();
    unsigned int nRead = sequences.size();
    max_flows = min(max_flows, nFlow);

    // Prepare objects for holding and passing back results
    Rcpp::NumericMatrix       predicted_out(nRead,nFlow);
    vector<vector<float> >    query_states;
    vector<int>               hp_lengths;

    // Iterate over all sequences
    BasecallerRead read;
    DPTreephaser dpTreephaser(flow_order);
    bool per_read_phasing = true;
    if (cf_vec.ncol() == 1) {
      per_read_phasing = false;
      dpTreephaser.SetModelParameters((double)cf_vec(0,0), (double)ie_vec(0,0), (double)dr_vec(0,0));
    }
    dpTreephaser.SetStateProgression((diagonalStates>0));
    unsigned int max_length = (2*flow_order.num_flows());

    for(unsigned int iRead=0; iRead<nRead; iRead++) {

      string mySequence = Rcpp::as<std::string>(sequences(iRead));
      read.sequence.clear();
      read.sequence.reserve(2*flow_order.num_flows());
      for(unsigned int iBase=0; iBase<mySequence.length() and iBase<max_length; ++iBase){
        read.sequence.push_back(mySequence.at(iBase));
      }
      // Set phasing parameters for this read
      if (per_read_phasing)
        dpTreephaser.SetModelParameters((double)cf_vec(0,iRead), (double)ie_vec(0,iRead), (double)dr_vec(0,iRead));

      // If you bothered specifying a recalibration model you probably want its effect on the predictions...
      if (recalModel.is_enabled()) {
        int my_x = (int)x_values(iRead);
        int my_y = (int)y_values(iRead);
        const vector<vector<vector<float> > > * aPtr = 0;
        const vector<vector<vector<float> > > * bPtr = 0;
        aPtr = recalModel.getAs(my_x, my_y);
        bPtr = recalModel.getBs(my_x, my_y);
        if (aPtr == 0 or bPtr == 0) {
          cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead);
          cout << endl;
        }
        dpTreephaser.SetAsBs(aPtr, bPtr);
      }

      if (nRead == 1 and get_states > 0)
        dpTreephaser.QueryAllStates(read, query_states, hp_lengths, max_flows);
      else
        dpTreephaser.Simulate(read, max_flows);

      for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow){
		predicted_out(iRead,iFlow) = (double) read.prediction.at(iFlow);
      }
    }

    // Store results
    if (nRead == 1 and get_states > 0) {
      Rcpp::NumericMatrix        states(hp_lengths.size(), nFlow);
      Rcpp::NumericVector        HPlengths(hp_lengths.size());
      for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){
        HPlengths(iHP) = (double)hp_lengths[iHP];
        for (unsigned int iFlow=0; iFlow<nFlow; iFlow++)
          states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow);
      }
      ret = Rcpp::List::create(Rcpp::Named("sig")  = predicted_out,
                               Rcpp::Named("states")  = states,
                               Rcpp::Named("HPlengths")  = HPlengths);
    } else {
      ret = Rcpp::List::create(Rcpp::Named("sig")  = predicted_out);
    }

  } catch(std::exception& ex) {
    forward_exception_to_r(ex);
  } catch(...) {
    ::Rf_error("c++ exception (unknown reason)");
  }

  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
예제 #7
0
void CalculateHypDistances(const vector<float>& NormalizedMeasurements,
				  const float& cf,
				  const float& ie,
				  const float& droop,
				  const ion::FlowOrder& flow_order,
				  const vector<string>& Hypotheses,
				  const int& startFlow,
				  vector<float>& DistanceObserved,
				  vector<float>& DistanceHypotheses,
				  vector<vector<float> >& predictions,
				  vector<vector<float> >& normalizedMeasurements,
				  int applyNormalization,
				  int verbose)
{
	// Create return data structures
	// Distance of normalized observations to different hypotheses: d(obs,h1), ... , d(obs,hN)
	DistanceObserved.assign(Hypotheses.size(), 0);
	// Distance of hypotheses to first hypothesis: d(h1,h2), ... , d(h1, hN)
	DistanceHypotheses.assign(Hypotheses.size()-1, 0);
	predictions.resize(Hypotheses.size());
	normalizedMeasurements.resize(Hypotheses.size());

	// Loading key normalized values into a read and performing adaptive normalization
	BasecallerRead read;
	read.key_normalizer = 1;
	read.raw_measurements = NormalizedMeasurements;
	read.normalized_measurements = NormalizedMeasurements;
	read.sequence.clear();
	read.sequence.reserve(2*flow_order.num_flows());
	read.prediction.assign(flow_order.num_flows(), 0);
	read.additive_correction.assign(flow_order.num_flows(), 0);
	read.multiplicative_correction.assign(flow_order.num_flows(), 1.0);

	int steps, window_size = 50;
	DPTreephaser dpTreephaser(flow_order);
	dpTreephaser.SetModelParameters(cf, ie, droop);

	// Solve beginning of maybe clipped read
	if (startFlow>0)
		dpTreephaser.Solve(read, (startFlow+20), 0);
	// StartFlow clipped? Get solved HP length at startFlow
    unsigned int base = 0;
    int flow = 0;
    int HPlength = 0;
    while (base<read.sequence.size()){
    	while (flow < flow_order.num_flows() and flow_order.nuc_at(flow) != read.sequence[base])
    		flow++;
    	if (flow > startFlow or flow == flow_order.num_flows())
    		break;
    	if (flow == startFlow)
    		HPlength++;
    	base++;
    }
    if (verbose>0)
      Rprintf("Solved %d bases until (not incl.) flow %d. HP of height %d at flow %d.\n", base, flow, HPlength, startFlow);
    // Get HP size at the start of the reference, i.e., Hypotheses[0]
    int count = 1;
    while (Hypotheses[0][count] == Hypotheses[0][0])
    	count++;
    if (verbose>0)
      Rprintf("Hypothesis starts with an HP of length %d\n", count);
    // Adjust the length of the prefix and erase extra solved bases
    if (HPlength>count)
    	base -= count;
    else
    	base -= HPlength;
    read.sequence.erase(read.sequence.begin()+base, read.sequence.end());
    unsigned int prefix_size = read.sequence.size();

	// creating predictions for the individual hypotheses
	vector<BasecallerRead> hypothesesReads(Hypotheses.size());
	int max_last_flow  = 0;

	for (unsigned int r=0; r<hypothesesReads.size(); ++r) {

		hypothesesReads[r] = read;
		// add hypothesis sequence to prefix
		for (base=0; base<Hypotheses[r].length() and base<(2*(unsigned int)flow_order.num_flows()-prefix_size); base++)
			hypothesesReads[r].sequence.push_back(Hypotheses[r][base]);

		// get last main incorporating flow
		int last_incorporating_flow = 0;
		base = 0;
		flow = 0;
        while (base<hypothesesReads[r].sequence.size() and flow<flow_order.num_flows()){
            while (flow_order.nuc_at(flow) != hypothesesReads[r].sequence[base])
                flow++;
		    last_incorporating_flow = flow;
		    if (last_incorporating_flow > max_last_flow)
		    	max_last_flow = last_incorporating_flow;
		    base++;
		}

		// Simulate sequence
		dpTreephaser.Simulate(hypothesesReads[r], flow_order.num_flows());

		// Adaptively normalize each hypothesis
		if (applyNormalization>0) {
		    steps = last_incorporating_flow / window_size;
		    dpTreephaser.WindowedNormalize(hypothesesReads[r], steps, window_size);
		}

		// Solver simulates beginning of the read and then fills in the remaining clipped bases
		dpTreephaser.Solve(hypothesesReads[r], flow_order.num_flows(), last_incorporating_flow);

		// Store predictions and adaptively normalized measurements
		predictions[r] = hypothesesReads[r].prediction;
		normalizedMeasurements[r] = hypothesesReads[r].normalized_measurements;
	}


	// --- Calculating distances ---
	// Include only flow values in the distance where the predictions differ by more than "threshold"
	float threshold = 0.05;

	// Do not include flows after main inc. flow of lastest hypothesis
	for (int flow=0; flow<(max_last_flow+1); ++flow) {
		bool includeFlow = false;
		for (unsigned int hyp=1; hyp<hypothesesReads.size(); ++hyp)
			if (abs(hypothesesReads[hyp].prediction[flow] - hypothesesReads[0].prediction[flow])>threshold)
				includeFlow = true;

		if (includeFlow) {
			for (unsigned int hyp=0; hyp<hypothesesReads.size(); ++hyp) {
				float residual = hypothesesReads[hyp].normalized_measurements[flow] - hypothesesReads[hyp].prediction[flow];
				DistanceObserved[hyp] += residual * residual;
				if (hyp>0) {
					residual = hypothesesReads[0].prediction[flow] - hypothesesReads[hyp].prediction[flow];
					DistanceHypotheses[hyp-1] += residual * residual;
				}
			}
		}

	}

	// --- verbose ---
	if (verbose>0){
	  Rprintf("Calculating distances between %d hypotheses starting at flow %d:\n", Hypotheses.size(), startFlow);
	  for (unsigned int i=0; i<Hypotheses.size(); ++i){
		for (unsigned int j=0; j<Hypotheses[i].length(); ++j)
			Rprintf("%c", Hypotheses[i][j]);
		Rprintf("\n");
	  }
	  Rprintf("Solved read prefix: ");
	  for (unsigned int j=0; j<prefix_size; ++j)
		Rprintf("%c", read.sequence[j]);
	  Rprintf("\n");
	  Rprintf("Extended Hypotheses reads to:\n");
	  for (unsigned int i=0; i<hypothesesReads.size(); ++i){
		for (unsigned int j=0; j<hypothesesReads[i].sequence.size(); ++j)
		  Rprintf("%c", hypothesesReads[i].sequence[j]);
		Rprintf("\n");
	  }
	  Rprintf("Calculated Distances d2(obs, H_i), d2(H_i, H_0):\n");
	  Rprintf("%f, 0\n", DistanceObserved[0]);
	  for (unsigned int i=1; i<Hypotheses.size(); ++i)
		Rprintf("%f, %f\n", DistanceObserved[i], DistanceHypotheses[i-1]);
    }
    // --------------- */

}