Exemplo n.º 1
0
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle,
                           SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller, SEXP RdiagonalStates,
                           SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {
    Rcpp::NumericMatrix      signal(Rsignal);
    Rcpp::IntegerVector      keyFlow(RkeyFlow);
    string flowCycle       = Rcpp::as<string>(RflowCycle);
    Rcpp::NumericVector      cf_vec(Rcf);
    Rcpp::NumericVector      ie_vec(Rie);
    Rcpp::NumericVector      dr_vec(Rdr);
    string basecaller      = Rcpp::as<string>(Rbasecaller);
    unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates);

    // Recalibration Variables
    string model_file      = Rcpp::as<string>(RmodelFile);
    int model_threshold    = Rcpp::as<int>(RmodelThreshold);
    Rcpp::IntegerVector      x_values(Rxval);
    Rcpp::IntegerVector      y_values(Ryval);
    RecalibrationModel       recalModel;
    if (model_file.length() > 0) {
      recalModel.InitializeModel(model_file, model_threshold);
    }


    ion::FlowOrder flow_order(flowCycle, flowCycle.length());
    unsigned int nFlow = signal.cols();
    unsigned int nRead = signal.rows();

    if(basecaller != "treephaser-swan" && basecaller != "treephaser-solve" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") {
      std::string exception = "base value for basecaller supplied: " + basecaller;
      exceptionMesg = strdup(exception.c_str());
    } else if (flowCycle.length() < nFlow) {
      std::string exception = "Flow cycle is shorter than number of flows to solve";
      exceptionMesg = strdup(exception.c_str());
    } else {

      // Prepare objects for holding and passing back results
      Rcpp::NumericMatrix        predicted_out(nRead,nFlow);
      Rcpp::NumericMatrix        residual_out(nRead,nFlow);
      Rcpp::NumericMatrix        norm_additive_out(nRead,nFlow);
      Rcpp::NumericMatrix        norm_multipl_out(nRead,nFlow);
      std::vector< std::string> seq_out(nRead);

      // Set up key flow vector
      int nKeyFlow = keyFlow.size(); 
      vector <int> keyVec(nKeyFlow);
      for(int iFlow=0; iFlow < nKeyFlow; iFlow++)
        keyVec[iFlow] = keyFlow(iFlow);

      // Iterate over all reads
      vector <float> sigVec(nFlow);
      BasecallerRead read;
      DPTreephaser dpTreephaser(flow_order);
      dpTreephaser.SetStateProgression((diagonalStates>0));

      // In contrast to pipeline, we always use droop here.
      // To have the same behavior of treephaser-swan as in the pipeline, supply dr=0
      bool per_read_phasing = true;
      if (cf_vec.size() == 1) {
        per_read_phasing = false;
        dpTreephaser.SetModelParameters((double)cf_vec(0), (double)ie_vec(0), (double)dr_vec(0));
      }
 
      // Main loop iterating over reads and solving them
      for(unsigned int iRead=0; iRead < nRead; iRead++) {

        // Set phasing parameters for this read
        if (per_read_phasing)
          dpTreephaser.SetModelParameters((double)cf_vec(iRead), (double)ie_vec(iRead), (double)dr_vec(iRead));
        // And load recalibration model
        if (recalModel.is_enabled()) {
          int my_x = (int)x_values(iRead);
          int my_y = (int)y_values(iRead);
          const vector<vector<vector<float> > > * aPtr = 0;
          const vector<vector<vector<float> > > * bPtr = 0;
          aPtr = recalModel.getAs(my_x, my_y);
          bPtr = recalModel.getBs(my_x, my_y);
          if (aPtr == 0 or bPtr == 0) {
            cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead);
            cout << endl;
          }
          dpTreephaser.SetAsBs(aPtr, bPtr);
        }

        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++)
          sigVec[iFlow] = (float) signal(iRead,iFlow);
        
        // Interface to just solve without any normalization
        if (basecaller == "treephaser-solve") { // Interface to just solve without any normalization
          read.SetData(sigVec, (int)nFlow);
        } 
        else {
          read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1);
        }
          
        // Execute the iterative solving-normalization routine
        if (basecaller == "dp-treephaser") {
          dpTreephaser.NormalizeAndSolve_GainNorm(read, nFlow);
        }
        else if (basecaller == "treephaser-solve") {
          dpTreephaser.Solve(read, nFlow);
        }
        else if (basecaller == "treephaser-adaptive") {
          dpTreephaser.NormalizeAndSolve_Adaptive(read, nFlow); // Adaptive normalization
        }
        else {
          dpTreephaser.NormalizeAndSolve_SWnorm(read, nFlow); // sliding window adaptive normalization
        }

        seq_out[iRead].assign(read.sequence.begin(), read.sequence.end());
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) {
          predicted_out(iRead,iFlow)     = (double) read.prediction[iFlow];
          residual_out(iRead,iFlow)      = (double) read.normalized_measurements[iFlow] - read.prediction[iFlow];
          norm_multipl_out(iRead,iFlow)  = (double) read.multiplicative_correction.at(iFlow);
          norm_additive_out(iRead,iFlow) = (double) read.additive_correction.at(iFlow);
        }
      }

      // Store results
      ret = Rcpp::List::create(Rcpp::Named("seq")       = seq_out,
                               Rcpp::Named("predicted") = predicted_out,
                               Rcpp::Named("residual")  = residual_out,
                               Rcpp::Named("norm_additive") = norm_additive_out,
                               Rcpp::Named("norm_multipl")  = norm_multipl_out);
    }
  } catch(std::exception& ex) {
    forward_exception_to_r(ex);
  } catch(...) {
    ::Rf_error("c++ exception (unknown reason)");
  }
    
  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
Exemplo n.º 2
0
RcppExport SEXP treePhaser(SEXP Rsignal, SEXP RkeyFlow, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rbasecaller)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {
    RcppMatrix<double>   signal(Rsignal);
    RcppVector<int>      keyFlow(RkeyFlow);
    string flowCycle   = Rcpp::as<string>(RflowCycle);
    double cf          = Rcpp::as<double>(Rcf);
    double ie          = Rcpp::as<double>(Rie);
    double dr          = Rcpp::as<double>(Rdr);
    string basecaller  = Rcpp::as<string>(Rbasecaller);
  
    unsigned int nFlow = signal.cols();
    unsigned int nRead = signal.rows();

    if(basecaller != "treephaser-swan" && basecaller != "dp-treephaser" && basecaller != "treephaser-adaptive") {
      std::string exception = "base value for basecaller supplied: " + basecaller;
      exceptionMesg = copyMessageToR(exception.c_str());
    } else if (flowCycle.length() < nFlow) {
      std::string exception = "Flow cycle is shorter than number of flows to solve";
      exceptionMesg = copyMessageToR(exception.c_str());
    } else {

      // Prepare objects for holding and passing back results
      RcppMatrix<double>        predicted_out(nRead,nFlow);
      RcppMatrix<double>        residual_out(nRead,nFlow);
      RcppMatrix<int>           hpFlow_out(nRead,nFlow);
      std::vector< std::string> seq_out(nRead);

      // Set up key flow vector
      int nKeyFlow = keyFlow.size(); 
      vector <int> keyVec(nKeyFlow);
      for(int iFlow=0; iFlow < nKeyFlow; iFlow++)
        keyVec[iFlow] = keyFlow(iFlow);

      // Iterate over all reads
      vector <float> sigVec(nFlow);
      string result;
      for(unsigned int iRead=0; iRead < nRead; iRead++) {
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++)
          sigVec[iFlow] = (float) signal(iRead,iFlow);
        BasecallerRead read;
        read.SetDataAndKeyNormalize(&(sigVec[0]), (int)nFlow, &(keyVec[0]), nKeyFlow-1);
        DPTreephaser dpTreephaser(flowCycle.c_str(), flowCycle.length(), 8);
        if (basecaller == "dp-treephaser")
          dpTreephaser.SetModelParameters(cf, ie, dr);
        else
          dpTreephaser.SetModelParameters(cf, ie, 0); // Adaptive normalization
          
        // Execute the iterative solving-normalization routine
        if (basecaller == "dp-treephaser")
          dpTreephaser.NormalizeAndSolve4(read, nFlow);
        else if (basecaller == "treephaser-adaptive")
          dpTreephaser.NormalizeAndSolve3(read, nFlow); // Adaptive normalization
        else
          dpTreephaser.NormalizeAndSolve5(read, nFlow); // sliding window adaptive normalization

        read.flowToString(flowCycle,seq_out[iRead]);
        for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) {
          predicted_out(iRead,iFlow) = (double) read.prediction[iFlow];
          residual_out(iRead,iFlow)  = (double) read.normalizedMeasurements[iFlow] - read.prediction[iFlow];
          hpFlow_out(iRead,iFlow)    = (int)    read.solution[iFlow];
        }

        // Store results
        RcppResultSet rs;
        rs.add("seq",        seq_out);
        rs.add("predicted",  predicted_out);
        rs.add("residual",   residual_out);
        rs.add("hpFlow",     hpFlow_out);

        ret = rs.getReturnList();
      }
    }
  } catch(std::exception& ex) {
    exceptionMesg = copyMessageToR(ex.what());
  } catch(...) {
    exceptionMesg = copyMessageToR("unknown reason");
  }
    
  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
Exemplo n.º 3
0
RcppExport SEXP phaseSolve(SEXP Rsignal, SEXP RflowCycle, SEXP RnucConc, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP RhpScale, SEXP RdroopType, SEXP RmaxAdv, SEXP RnIterations, SEXP RresidualScale, SEXP RresidualScaleMinFlow, SEXP RresidualScaleMaxFlow, SEXP RextraTaps, SEXP RdebugBaseCall)
{
    SEXP ret = R_NilValue;
    char *exceptionMesg = NULL;

    try {
        Rcpp::NumericMatrix signal(Rsignal);
        string flowCycle         = Rcpp::as<string>(RflowCycle);
        Rcpp::NumericMatrix cc(RnucConc);
        Rcpp::NumericVector cf(Rcf);
        Rcpp::NumericVector ie(Rie);
        Rcpp::NumericVector dr(Rdr);
        Rcpp::NumericVector hpScale(RhpScale);
        string drType                     = Rcpp::as<string>(RdroopType);
        unsigned int maxAdv               = (unsigned int) Rcpp::as<int>(RmaxAdv);
        unsigned int nIterations          = (unsigned int) Rcpp::as<int>(RnIterations);
        unsigned int extraTaps            = (unsigned int) Rcpp::as<int>(RextraTaps);
        bool residualScale                = Rcpp::as<bool>(RresidualScale);
        int residualScaleMinFlow          = Rcpp::as<int>(RresidualScaleMinFlow);
        int residualScaleMaxFlow          = Rcpp::as<int>(RresidualScaleMaxFlow);
        bool debugBaseCall                = Rcpp::as<bool>(RdebugBaseCall);

        unsigned int nFlow = signal.cols();
        unsigned int nRead = signal.rows();

        DroopType droopType;
        bool badDroopType = false;
        if(drType == "ONLY_WHEN_INCORPORATING") {
            droopType = ONLY_WHEN_INCORPORATING;
        } else if(drType == "EVERY_FLOW") {
            droopType = EVERY_FLOW;
        } else {
            badDroopType = true;
        }

        if(badDroopType) {
            std::string exception = "bad droop type supplied\n";
            exceptionMesg = strdup(exception.c_str());
        } else if(cc.rows() != (int) N_NUCLEOTIDES) {
            std::string exception = "concentration matrix should have 4 rows\n";
            exceptionMesg = strdup(exception.c_str());
        } else if(cc.cols() != (int) N_NUCLEOTIDES) {
            std::string exception = "concentration matrix should have 4 columns\n";
            exceptionMesg = strdup(exception.c_str());
        } else {
            // recast cf, ie, dr, hpScale
            weight_vec_t cfMod(cf.size());
            for(int i=0; i<cf.size(); i++)
                cfMod[i] = cf(i);
            weight_vec_t ieMod(ie.size());
            for(int i=0; i<ie.size(); i++)
                ieMod[i] = ie(i);
            weight_vec_t drMod(dr.size());
            for(int i=0; i<dr.size(); i++)
                drMod[i] = dr(i);
            weight_vec_t hpScaleMod(hpScale.size());
            for(int i=0; i<hpScale.size(); i++)
                hpScaleMod[i] = hpScale(i);

            // recast nuc concentration
            vector<weight_vec_t> ccMod(cc.rows());
            for(int iRow=0; iRow < cc.rows(); iRow++) {
                ccMod[iRow].resize(cc.cols());
                for(unsigned int iCol=0; iCol < N_NUCLEOTIDES; iCol++)
                    ccMod[iRow][iCol] = cc(iRow,iCol);
            }

            // Other recasts
            hpLen_t maxAdvMod = (hpLen_t) maxAdv;

            // Prepare objects for holding and passing back results
            Rcpp::NumericMatrix        predicted_out(nRead,nFlow);
            Rcpp::NumericMatrix        residual_out(nRead,nFlow);
            Rcpp::IntegerMatrix        hpFlow_out(nRead,nFlow);
            std::vector< std::string>  seq_out(nRead);
            Rcpp::NumericMatrix        multiplier_out(nRead,1+nIterations);

            // Iterate over all reads
            weight_vec_t sigMod(nFlow);
            string result;
            for(unsigned int iRead=0; iRead < nRead; iRead++) {
                for(unsigned int iFlow=0; iFlow < nFlow; iFlow++)
                    sigMod[iFlow] = (weight_t) signal(iRead,iFlow);

                PhaseSolve p;
                p.SetResidualScale(residualScale);
                if(residualScaleMinFlow >= 0)
                    p.SetResidualScaleMinFlow((unsigned int) residualScaleMinFlow);
                if(residualScaleMaxFlow >= 0)
                    p.SetResidualScaleMaxFlow((unsigned int) residualScaleMaxFlow);
                p.setExtraTaps(extraTaps);
                p.setHpScale(hpScaleMod);
                p.setPhaseParam(flowCycle,maxAdvMod,ccMod,cfMod,ieMod,drMod,droopType);
                p.GreedyBaseCall(sigMod, nIterations, debugBaseCall);
                p.getSeq(result);
                seq_out[iRead] = result;
                weight_vec_t & predicted = p.GetPredictedSignal();
                weight_vec_t & residual  = p.GetResidualSignal();
                hpLen_vec_t &  hpFlow    = p.GetPredictedHpFlow();
                for(unsigned int iFlow=0; iFlow < nFlow; iFlow++) {
                    predicted_out(iRead,iFlow) = (double) predicted[iFlow];
                    residual_out(iRead,iFlow)  = (double) residual[iFlow];
                    hpFlow_out(iRead,iFlow)    = (int)    hpFlow[iFlow];
                }
                if(residualScale) {
                    weight_vec_t & multiplier  = p.GetMultiplier();
                    // We re-order these so the last multiplier comes first.  This is for convenience
                    // as it allows us grab the first col of the matrix as the last multiplier applied
                    // even if each read ended up taking different numbers of iterations.
                    unsigned int i1,i2;
                    for(i1=0,i2=multiplier.size()-1; i1 < multiplier.size(); i1++,i2--) {
                        multiplier_out(iRead,i1) = (double) multiplier[i2];
                    }
                    // If the read took fewer than all available iterations, pad with zero
                    for(; i1 <= nIterations; i1++) {
                        multiplier_out(iRead,i1) = 0;
                    }
                }

            }

            // Store results
            std::map<std::string,SEXP> map;
            map["seq"]          = Rcpp::wrap( seq_out );
            map["predicted"]    = Rcpp::wrap( predicted_out );
            map["residual"]     = Rcpp::wrap( residual_out );
            map["hpFlow"]       = Rcpp::wrap( hpFlow_out );
            if(residualScale)
                map["multiplier"] = Rcpp::wrap( multiplier_out );

            ret = Rcpp::wrap( map );
        }
    } catch(std::exception& ex) {
        forward_exception_to_r(ex);
    } catch(...) {
        ::Rf_error("c++ exception (unknown reason)");
    }

    if(exceptionMesg != NULL)
        Rf_error(exceptionMesg);

    return ret;
}