static bool same_size( const FusionSeq &x1 , const FusionSeq &x2 )
 {
     typedef boost::fusion::vector< const FusionSeq& , const FusionSeq& > Sequences;
     Sequences sequences( x1 , x2 );
     return boost::fusion::all( boost::fusion::zip_view< Sequences >( sequences ) ,
                                boost::fusion::make_fused( detail::same_size_fusion() ) );
 }
void test_sequence_collection()
{
    ds2i::global_parameters params;
    uint64_t universe = 10000;
    typedef ds2i::sequence_collection<BaseSequence>
        collection_type;
    typename collection_type::builder b(params);

    std::vector<std::vector<uint64_t>> sequences(30);
    for (auto& seq: sequences) {
        double avg_gap = 1.1 + double(rand()) / RAND_MAX * 10;
        uint64_t n = uint64_t(universe / avg_gap);
        seq = random_sequence(universe, n, true);
        b.add_sequence(seq.begin(), seq.back() + 1, n);
    }

    {
        collection_type coll;
        b.build(coll);
        succinct::mapper::freeze(coll, "temp.bin");
    }

    {
        collection_type coll;
        boost::iostreams::mapped_file_source m("temp.bin");
        succinct::mapper::map(coll, m);

        for (size_t i = 0; i < sequences.size(); ++i) {
            test_sequence(coll[i], sequences[i]);
        }
    }
}
Example #3
0
shared_ptr<VectorSiteContainer> SiteContainerBuilder::read_phylip_dna_file(
        string filename, bool interleaved) {
    Phylip reader{true, !interleaved, 100, true, "  "};
    SequenceContainer* alignment = reader.readSequences(filename, &AlphabetTools::DNA_ALPHABET);
    shared_ptr<VectorSiteContainer> sequences(new VectorSiteContainer(*alignment));
    delete alignment;
    if (sequences->getNumberOfSequences() == 0) {
        sequences.reset();
        throw Exception("The alignment is empty - did you specify the right file format?");
    }
    return sequences;
}
Example #4
0
shared_ptr<VectorSiteContainer> SiteContainerBuilder::read_fasta_protein_file(
        string filename) {
    Fasta reader;
    SequenceContainer* alignment = reader.readSequences(filename, &AlphabetTools::PROTEIN_ALPHABET);
    shared_ptr<VectorSiteContainer> sequences(new VectorSiteContainer(*alignment));
    delete alignment;
    if (sequences->getNumberOfSequences() == 0) {
        sequences.reset();
        throw Exception("The alignment is empty - did you specify the right file format?");
    }
    return sequences;
}
Example #5
0
bool ParserFactory::tryToParse (std::string & text)
{
	BOOST_FOREACH (auto p, parsers)
	{
		boost::shared_ptr <std::vector<Sequence>> sequences (new std::vector<Sequence>);
		if (p->tryToParse (text))
		{
			p->getSequences (*sequences);
			if (sequences->size())
				allSequences.push_back (sequences);
			return true;
		}
	}
Example #6
0
 static void resize( FusionSeq &x1 , const FusionSeq &x2 )
 {
     typedef boost::fusion::vector< FusionSeq& , const FusionSeq& > Sequences;
     Sequences sequences( x1 , x2 );
     boost::fusion::for_each( boost::fusion::zip_view< Sequences >( sequences ) , boost::fusion::make_fused( detail::resizer() ) );
 }
Example #7
0
int main (int argc, const char ** argv)
{

    
    ProgramArgument program_args=ProcessArguments(argc,argv);
    
    
    bool print_ali=program_args.print_ali;

    ParseMatrix submatParser(program_args.matrixname);
#ifdef DEBUG
    std::cout << "Print Matrix\n";
    submatParser.print();
#endif
    SubstitutionMatrix subMatrix(submatParser.scores, submatParser.row_index);
#ifdef DEBUG
    std::cout << "Print SubMatrix!\n";
    subMatrix.print();
#endif
    SequenceLibrary sequences(program_args.seqlib);
    const size_t max_seq_size = sequences.get_max_seq_size();
    PairsLibrary pairs(program_args.pairs,&sequences);
    GotohEight gotohAlgo(max_seq_size,&subMatrix, program_args.go, program_args.ge, program_args.mode);
    SequenceEight sequenceQuery(max_seq_size, subMatrix.aa2short,subMatrix.short2aa);
    SequenceEight sequenceTemplate(max_seq_size, subMatrix.aa2short,subMatrix.short2aa);

    
        
    char buffer[2097152];
    std::cout.rdbuf()->pubsetbuf(buffer, 2097152);
    
    const char ** template_sequences=(const char **)memalign(16,(VEC_SIZE)*sizeof(const char *)); 
    const char ** template_keys=(const char **)memalign(16,(VEC_SIZE)*sizeof(const char *)); 
    
    std::map< std::string,std::vector<std::pair<std::string,size_t> > >::iterator it = pairs.pairs.begin();    
    for( ; it != pairs.pairs.end(); ++it ){
        std::pair<std::string, size_t> query = sequences.get_sequence(it->first);
        std::string query_key = it->first.c_str();
        sequenceQuery.MapOneSEQToSEQ8(query.first.c_str());
        std::vector<std::pair<std::string, size_t> > t_seq = it->second;
        sort (t_seq.begin(), t_seq.end(), sort_seq_vector); 

        int elem_count=0;
        size_t t_seq_size = t_seq.size();
        for(std::vector<int>::size_type i = 0; i < t_seq_size ; i++) {
            std::string template_key=t_seq[i].first;
            template_keys[i%VEC_SIZE] = template_key.c_str();
            template_sequences[i%VEC_SIZE] = sequences.get_sequence(template_key).first.c_str();
            elem_count++;
            if((i+1)%VEC_SIZE==0){
                elem_count=0;
                sequenceTemplate.MapSEQArrayToSEQ8(template_sequences, VEC_SIZE);
                GotohEight::GotohMatrix matrix= gotohAlgo.calc_matrix(&sequenceQuery, &sequenceTemplate);
                if(print_ali == true){
                    std::vector<char *> alignments=gotohAlgo.backtrace(matrix,&sequenceQuery,&sequenceTemplate);
                        for(size_t i = 0; i < 8; i++) {
                            if(program_args.check==true){
                                float check_score=gotohAlgo.calc_check_score(alignments[(i*2)],alignments[(i*2)+1]);
                                printf("check score: %.3f\n",check_score);
                            }
                            print_ali_result(alignments[(i*2)],alignments[(i*2)+1],matrix.score[i],query_key,template_keys[i]);
                        }
                }else{    
                    print_score(matrix,query_key,template_keys,VEC_SIZE);
                }
            }
            
        }
        if(elem_count!=0){
            sequenceTemplate.MapSEQArrayToSEQ8(template_sequences, elem_count);
            GotohEight::GotohMatrix matrix= gotohAlgo.calc_matrix(&sequenceQuery, &sequenceTemplate);
            if(print_ali == true){
                std::vector<char *> alignments=gotohAlgo.backtrace(matrix,&sequenceQuery,&sequenceTemplate);
                for(size_t i = 0; i < elem_count; i++) {
                    if(program_args.check==true){
                        float check_score=gotohAlgo.calc_check_score(alignments[(i*2)],alignments[(i*2)+1]);
                        printf("check score: %.3f\n",check_score);
                    }
                print_ali_result(alignments[(i*2)],alignments[(i*2)+1],matrix.score[i],query_key,template_keys[i]);
                }
            }else{    
                print_score(matrix,query_key,template_keys,elem_count);
            }
        }
        
    }
    std::cout.flush();
    free(template_keys);
    free(template_sequences);
    
    
    
    
    return 0;
}
Example #8
0
RcppExport SEXP DPPhaseSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr,
                           SEXP Rmaxflows, SEXP RgetStates, SEXP RnucContamination)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {

    Rcpp::StringVector        sequences(Rsequence);
    string flowCycle        = Rcpp::as<string>(RflowCycle);
    Rcpp::NumericMatrix       cf_mat(Rcf);
    Rcpp::NumericMatrix       ie_mat(Rie);
    Rcpp::NumericMatrix       dr_mat(Rdr);
    Rcpp::NumericMatrix       nuc_contamination(RnucContamination);
    unsigned int max_flows  = Rcpp::as<int>(Rmaxflows);
    unsigned int get_states = Rcpp::as<int>(RgetStates);

    ion::FlowOrder flow_order(flowCycle, flowCycle.length());
    unsigned int nFlow = flow_order.num_flows();
    unsigned int nRead = sequences.size();
    max_flows = min(max_flows, nFlow);

    vector<vector<double> >    nuc_availbality;
    nuc_availbality.resize(nuc_contamination.nrow());
    for (unsigned int iFlowNuc=0; iFlowNuc < nuc_availbality.size(); iFlowNuc++){
      nuc_availbality.at(iFlowNuc).resize(nuc_contamination.ncol());
      for (unsigned int iNuc=0; iNuc < nuc_availbality.at(iFlowNuc).size(); iNuc++){
        nuc_availbality.at(iFlowNuc).at(iNuc) = nuc_contamination(iFlowNuc, iNuc);
      }
    }

    // Prepare objects for holding and passing back results
    Rcpp::NumericMatrix       predicted_out(nRead,nFlow);
    Rcpp::StringVector        seq_out(nRead);


    // Set Phasing Model
    DPPhaseSimulator PhaseSimulator(flow_order);
    PhaseSimulator.UpdateNucAvailability(nuc_availbality);

    bool per_read_phasing = true;
    if (cf_mat.nrow() == 1 and cf_mat.ncol() == 1) {
      per_read_phasing = false;
      cout << "DPPhaseSim: Single Phase Parameter set detected." << endl; // XXX
      PhaseSimulator.SetPhasingParameters_Basic((double)cf_mat(0,0), (double)ie_mat(0,0), (double)dr_mat(0,0));
    } else if (cf_mat.nrow() == (int)nFlow and cf_mat.ncol() == (int)nFlow) {
    	cout << "DPPhaseSim: Full Phase Parameter set detected." << endl; // XXX
      per_read_phasing = false;
      vector<vector<double> > cf(nFlow);
      vector<vector<double> > ie(nFlow);
      vector<vector<double> > dr(nFlow);
      for (unsigned int iFlowNuc=0; iFlowNuc < nFlow; iFlowNuc++){
        cf.at(iFlowNuc).resize(nFlow);
        ie.at(iFlowNuc).resize(nFlow);
        dr.at(iFlowNuc).resize(nFlow);
        for (unsigned int iFlow=0; iFlow < nFlow; iFlow++){
          cf.at(iFlowNuc).at(iFlow) = cf_mat(iFlowNuc, iFlow);
          ie.at(iFlowNuc).at(iFlow) = ie_mat(iFlowNuc, iFlow);
          dr.at(iFlowNuc).at(iFlow) = dr_mat(iFlowNuc, iFlow);
        }
      }
      PhaseSimulator.SetPhasingParameters_Full(cf, ie, dr);
    }
    else
      cout << "DPPhaseSim: Per Read Phase Parameter set detected." << endl; //XXX


    // --- Iterate over all sequences

    string         my_sequence, sim_sequence;
    vector<float>  my_prediction;

    for(unsigned int iRead=0; iRead<nRead; iRead++) {

      if (per_read_phasing)
        PhaseSimulator.SetPhasingParameters_Basic((double)cf_mat(0,iRead), (double)ie_mat(0,iRead), (double)dr_mat(0,iRead));

      my_sequence = Rcpp::as<std::string>(sequences(iRead));
      PhaseSimulator.Simulate(my_sequence, my_prediction, max_flows);

      PhaseSimulator.GetSimSequence(sim_sequence); // Simulated sequence might be shorter than input sequence.
      seq_out(iRead) = sim_sequence;
      for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow) {
		predicted_out(iRead,iFlow) = (double) my_prediction.at(iFlow);
      }
      //cout << "--- DPPhaseSim: Done simulating read "<< iRead << " of " << nRead << endl; // XXX
    }

    // --- Store results
    if (nRead == 1 and get_states > 0) {

      vector<vector<float> >    query_states;
      vector<int>               hp_lengths;
      PhaseSimulator.GetStates(query_states, hp_lengths);

      Rcpp::NumericMatrix       states(hp_lengths.size(), nFlow);
      Rcpp::NumericVector       HPlengths(hp_lengths.size());

      for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){
        HPlengths(iHP) = (double)hp_lengths[iHP];
        for (unsigned int iFlow=0; iFlow<nFlow; iFlow++)
          states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow);
      }

      ret = Rcpp::List::create(Rcpp::Named("sig")       = predicted_out,
                               Rcpp::Named("seq")       = seq_out,
                               Rcpp::Named("states")    = states,
                               Rcpp::Named("HPlengths") = HPlengths);
    } else {
      ret = Rcpp::List::create(Rcpp::Named("sig")  = predicted_out,
                               Rcpp::Named("seq")  = seq_out);
    }

  } catch(std::exception& ex) {
    forward_exception_to_r(ex);
  } catch(...) {
    ::Rf_error("c++ exception (unknown reason)");
  }

  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}
Example #9
0
RcppExport SEXP treePhaserSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr,
                              SEXP Rmaxflows, SEXP RgetStates, SEXP RdiagonalStates,
                              SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval)
{
  SEXP ret = R_NilValue;
  char *exceptionMesg = NULL;

  try {

    Rcpp::StringVector            sequences(Rsequence);
    string flowCycle            = Rcpp::as<string>(RflowCycle);
    Rcpp::NumericMatrix           cf_vec(Rcf);
    Rcpp::NumericMatrix           ie_vec(Rie);
    Rcpp::NumericMatrix           dr_vec(Rdr);
    unsigned int max_flows      = Rcpp::as<int>(Rmaxflows);
    unsigned int get_states     = Rcpp::as<int>(RgetStates);
    unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates);

    // Recalibration Variables
    string model_file      = Rcpp::as<string>(RmodelFile);
    int model_threshold    = Rcpp::as<int>(RmodelThreshold);
    Rcpp::IntegerVector      x_values(Rxval);
    Rcpp::IntegerVector      y_values(Ryval);
    RecalibrationModel       recalModel;
    if (model_file.length() > 0) {
      recalModel.InitializeModel(model_file, model_threshold);
    }

    ion::FlowOrder flow_order(flowCycle, flowCycle.length());
    unsigned int nFlow = flow_order.num_flows();
    unsigned int nRead = sequences.size();
    max_flows = min(max_flows, nFlow);

    // Prepare objects for holding and passing back results
    Rcpp::NumericMatrix       predicted_out(nRead,nFlow);
    vector<vector<float> >    query_states;
    vector<int>               hp_lengths;

    // Iterate over all sequences
    BasecallerRead read;
    DPTreephaser dpTreephaser(flow_order);
    bool per_read_phasing = true;
    if (cf_vec.ncol() == 1) {
      per_read_phasing = false;
      dpTreephaser.SetModelParameters((double)cf_vec(0,0), (double)ie_vec(0,0), (double)dr_vec(0,0));
    }
    dpTreephaser.SetStateProgression((diagonalStates>0));
    unsigned int max_length = (2*flow_order.num_flows());

    for(unsigned int iRead=0; iRead<nRead; iRead++) {

      string mySequence = Rcpp::as<std::string>(sequences(iRead));
      read.sequence.clear();
      read.sequence.reserve(2*flow_order.num_flows());
      for(unsigned int iBase=0; iBase<mySequence.length() and iBase<max_length; ++iBase){
        read.sequence.push_back(mySequence.at(iBase));
      }
      // Set phasing parameters for this read
      if (per_read_phasing)
        dpTreephaser.SetModelParameters((double)cf_vec(0,iRead), (double)ie_vec(0,iRead), (double)dr_vec(0,iRead));

      // If you bothered specifying a recalibration model you probably want its effect on the predictions...
      if (recalModel.is_enabled()) {
        int my_x = (int)x_values(iRead);
        int my_y = (int)y_values(iRead);
        const vector<vector<vector<float> > > * aPtr = 0;
        const vector<vector<vector<float> > > * bPtr = 0;
        aPtr = recalModel.getAs(my_x, my_y);
        bPtr = recalModel.getBs(my_x, my_y);
        if (aPtr == 0 or bPtr == 0) {
          cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead);
          cout << endl;
        }
        dpTreephaser.SetAsBs(aPtr, bPtr);
      }

      if (nRead == 1 and get_states > 0)
        dpTreephaser.QueryAllStates(read, query_states, hp_lengths, max_flows);
      else
        dpTreephaser.Simulate(read, max_flows);

      for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow){
		predicted_out(iRead,iFlow) = (double) read.prediction.at(iFlow);
      }
    }

    // Store results
    if (nRead == 1 and get_states > 0) {
      Rcpp::NumericMatrix        states(hp_lengths.size(), nFlow);
      Rcpp::NumericVector        HPlengths(hp_lengths.size());
      for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){
        HPlengths(iHP) = (double)hp_lengths[iHP];
        for (unsigned int iFlow=0; iFlow<nFlow; iFlow++)
          states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow);
      }
      ret = Rcpp::List::create(Rcpp::Named("sig")  = predicted_out,
                               Rcpp::Named("states")  = states,
                               Rcpp::Named("HPlengths")  = HPlengths);
    } else {
      ret = Rcpp::List::create(Rcpp::Named("sig")  = predicted_out);
    }

  } catch(std::exception& ex) {
    forward_exception_to_r(ex);
  } catch(...) {
    ::Rf_error("c++ exception (unknown reason)");
  }

  if(exceptionMesg != NULL)
    Rf_error(exceptionMesg);

  return ret;
}