static bool same_size( const FusionSeq &x1 , const FusionSeq &x2 ) { typedef boost::fusion::vector< const FusionSeq& , const FusionSeq& > Sequences; Sequences sequences( x1 , x2 ); return boost::fusion::all( boost::fusion::zip_view< Sequences >( sequences ) , boost::fusion::make_fused( detail::same_size_fusion() ) ); }
void test_sequence_collection() { ds2i::global_parameters params; uint64_t universe = 10000; typedef ds2i::sequence_collection<BaseSequence> collection_type; typename collection_type::builder b(params); std::vector<std::vector<uint64_t>> sequences(30); for (auto& seq: sequences) { double avg_gap = 1.1 + double(rand()) / RAND_MAX * 10; uint64_t n = uint64_t(universe / avg_gap); seq = random_sequence(universe, n, true); b.add_sequence(seq.begin(), seq.back() + 1, n); } { collection_type coll; b.build(coll); succinct::mapper::freeze(coll, "temp.bin"); } { collection_type coll; boost::iostreams::mapped_file_source m("temp.bin"); succinct::mapper::map(coll, m); for (size_t i = 0; i < sequences.size(); ++i) { test_sequence(coll[i], sequences[i]); } } }
shared_ptr<VectorSiteContainer> SiteContainerBuilder::read_phylip_dna_file( string filename, bool interleaved) { Phylip reader{true, !interleaved, 100, true, " "}; SequenceContainer* alignment = reader.readSequences(filename, &AlphabetTools::DNA_ALPHABET); shared_ptr<VectorSiteContainer> sequences(new VectorSiteContainer(*alignment)); delete alignment; if (sequences->getNumberOfSequences() == 0) { sequences.reset(); throw Exception("The alignment is empty - did you specify the right file format?"); } return sequences; }
shared_ptr<VectorSiteContainer> SiteContainerBuilder::read_fasta_protein_file( string filename) { Fasta reader; SequenceContainer* alignment = reader.readSequences(filename, &AlphabetTools::PROTEIN_ALPHABET); shared_ptr<VectorSiteContainer> sequences(new VectorSiteContainer(*alignment)); delete alignment; if (sequences->getNumberOfSequences() == 0) { sequences.reset(); throw Exception("The alignment is empty - did you specify the right file format?"); } return sequences; }
bool ParserFactory::tryToParse (std::string & text) { BOOST_FOREACH (auto p, parsers) { boost::shared_ptr <std::vector<Sequence>> sequences (new std::vector<Sequence>); if (p->tryToParse (text)) { p->getSequences (*sequences); if (sequences->size()) allSequences.push_back (sequences); return true; } }
static void resize( FusionSeq &x1 , const FusionSeq &x2 ) { typedef boost::fusion::vector< FusionSeq& , const FusionSeq& > Sequences; Sequences sequences( x1 , x2 ); boost::fusion::for_each( boost::fusion::zip_view< Sequences >( sequences ) , boost::fusion::make_fused( detail::resizer() ) ); }
int main (int argc, const char ** argv) { ProgramArgument program_args=ProcessArguments(argc,argv); bool print_ali=program_args.print_ali; ParseMatrix submatParser(program_args.matrixname); #ifdef DEBUG std::cout << "Print Matrix\n"; submatParser.print(); #endif SubstitutionMatrix subMatrix(submatParser.scores, submatParser.row_index); #ifdef DEBUG std::cout << "Print SubMatrix!\n"; subMatrix.print(); #endif SequenceLibrary sequences(program_args.seqlib); const size_t max_seq_size = sequences.get_max_seq_size(); PairsLibrary pairs(program_args.pairs,&sequences); GotohEight gotohAlgo(max_seq_size,&subMatrix, program_args.go, program_args.ge, program_args.mode); SequenceEight sequenceQuery(max_seq_size, subMatrix.aa2short,subMatrix.short2aa); SequenceEight sequenceTemplate(max_seq_size, subMatrix.aa2short,subMatrix.short2aa); char buffer[2097152]; std::cout.rdbuf()->pubsetbuf(buffer, 2097152); const char ** template_sequences=(const char **)memalign(16,(VEC_SIZE)*sizeof(const char *)); const char ** template_keys=(const char **)memalign(16,(VEC_SIZE)*sizeof(const char *)); std::map< std::string,std::vector<std::pair<std::string,size_t> > >::iterator it = pairs.pairs.begin(); for( ; it != pairs.pairs.end(); ++it ){ std::pair<std::string, size_t> query = sequences.get_sequence(it->first); std::string query_key = it->first.c_str(); sequenceQuery.MapOneSEQToSEQ8(query.first.c_str()); std::vector<std::pair<std::string, size_t> > t_seq = it->second; sort (t_seq.begin(), t_seq.end(), sort_seq_vector); int elem_count=0; size_t t_seq_size = t_seq.size(); for(std::vector<int>::size_type i = 0; i < t_seq_size ; i++) { std::string template_key=t_seq[i].first; template_keys[i%VEC_SIZE] = template_key.c_str(); template_sequences[i%VEC_SIZE] = sequences.get_sequence(template_key).first.c_str(); elem_count++; if((i+1)%VEC_SIZE==0){ elem_count=0; sequenceTemplate.MapSEQArrayToSEQ8(template_sequences, VEC_SIZE); GotohEight::GotohMatrix matrix= gotohAlgo.calc_matrix(&sequenceQuery, &sequenceTemplate); if(print_ali == true){ std::vector<char *> alignments=gotohAlgo.backtrace(matrix,&sequenceQuery,&sequenceTemplate); for(size_t i = 0; i < 8; i++) { if(program_args.check==true){ float check_score=gotohAlgo.calc_check_score(alignments[(i*2)],alignments[(i*2)+1]); printf("check score: %.3f\n",check_score); } print_ali_result(alignments[(i*2)],alignments[(i*2)+1],matrix.score[i],query_key,template_keys[i]); } }else{ print_score(matrix,query_key,template_keys,VEC_SIZE); } } } if(elem_count!=0){ sequenceTemplate.MapSEQArrayToSEQ8(template_sequences, elem_count); GotohEight::GotohMatrix matrix= gotohAlgo.calc_matrix(&sequenceQuery, &sequenceTemplate); if(print_ali == true){ std::vector<char *> alignments=gotohAlgo.backtrace(matrix,&sequenceQuery,&sequenceTemplate); for(size_t i = 0; i < elem_count; i++) { if(program_args.check==true){ float check_score=gotohAlgo.calc_check_score(alignments[(i*2)],alignments[(i*2)+1]); printf("check score: %.3f\n",check_score); } print_ali_result(alignments[(i*2)],alignments[(i*2)+1],matrix.score[i],query_key,template_keys[i]); } }else{ print_score(matrix,query_key,template_keys,elem_count); } } } std::cout.flush(); free(template_keys); free(template_sequences); return 0; }
RcppExport SEXP DPPhaseSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rmaxflows, SEXP RgetStates, SEXP RnucContamination) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { Rcpp::StringVector sequences(Rsequence); string flowCycle = Rcpp::as<string>(RflowCycle); Rcpp::NumericMatrix cf_mat(Rcf); Rcpp::NumericMatrix ie_mat(Rie); Rcpp::NumericMatrix dr_mat(Rdr); Rcpp::NumericMatrix nuc_contamination(RnucContamination); unsigned int max_flows = Rcpp::as<int>(Rmaxflows); unsigned int get_states = Rcpp::as<int>(RgetStates); ion::FlowOrder flow_order(flowCycle, flowCycle.length()); unsigned int nFlow = flow_order.num_flows(); unsigned int nRead = sequences.size(); max_flows = min(max_flows, nFlow); vector<vector<double> > nuc_availbality; nuc_availbality.resize(nuc_contamination.nrow()); for (unsigned int iFlowNuc=0; iFlowNuc < nuc_availbality.size(); iFlowNuc++){ nuc_availbality.at(iFlowNuc).resize(nuc_contamination.ncol()); for (unsigned int iNuc=0; iNuc < nuc_availbality.at(iFlowNuc).size(); iNuc++){ nuc_availbality.at(iFlowNuc).at(iNuc) = nuc_contamination(iFlowNuc, iNuc); } } // Prepare objects for holding and passing back results Rcpp::NumericMatrix predicted_out(nRead,nFlow); Rcpp::StringVector seq_out(nRead); // Set Phasing Model DPPhaseSimulator PhaseSimulator(flow_order); PhaseSimulator.UpdateNucAvailability(nuc_availbality); bool per_read_phasing = true; if (cf_mat.nrow() == 1 and cf_mat.ncol() == 1) { per_read_phasing = false; cout << "DPPhaseSim: Single Phase Parameter set detected." << endl; // XXX PhaseSimulator.SetPhasingParameters_Basic((double)cf_mat(0,0), (double)ie_mat(0,0), (double)dr_mat(0,0)); } else if (cf_mat.nrow() == (int)nFlow and cf_mat.ncol() == (int)nFlow) { cout << "DPPhaseSim: Full Phase Parameter set detected." << endl; // XXX per_read_phasing = false; vector<vector<double> > cf(nFlow); vector<vector<double> > ie(nFlow); vector<vector<double> > dr(nFlow); for (unsigned int iFlowNuc=0; iFlowNuc < nFlow; iFlowNuc++){ cf.at(iFlowNuc).resize(nFlow); ie.at(iFlowNuc).resize(nFlow); dr.at(iFlowNuc).resize(nFlow); for (unsigned int iFlow=0; iFlow < nFlow; iFlow++){ cf.at(iFlowNuc).at(iFlow) = cf_mat(iFlowNuc, iFlow); ie.at(iFlowNuc).at(iFlow) = ie_mat(iFlowNuc, iFlow); dr.at(iFlowNuc).at(iFlow) = dr_mat(iFlowNuc, iFlow); } } PhaseSimulator.SetPhasingParameters_Full(cf, ie, dr); } else cout << "DPPhaseSim: Per Read Phase Parameter set detected." << endl; //XXX // --- Iterate over all sequences string my_sequence, sim_sequence; vector<float> my_prediction; for(unsigned int iRead=0; iRead<nRead; iRead++) { if (per_read_phasing) PhaseSimulator.SetPhasingParameters_Basic((double)cf_mat(0,iRead), (double)ie_mat(0,iRead), (double)dr_mat(0,iRead)); my_sequence = Rcpp::as<std::string>(sequences(iRead)); PhaseSimulator.Simulate(my_sequence, my_prediction, max_flows); PhaseSimulator.GetSimSequence(sim_sequence); // Simulated sequence might be shorter than input sequence. seq_out(iRead) = sim_sequence; for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow) { predicted_out(iRead,iFlow) = (double) my_prediction.at(iFlow); } //cout << "--- DPPhaseSim: Done simulating read "<< iRead << " of " << nRead << endl; // XXX } // --- Store results if (nRead == 1 and get_states > 0) { vector<vector<float> > query_states; vector<int> hp_lengths; PhaseSimulator.GetStates(query_states, hp_lengths); Rcpp::NumericMatrix states(hp_lengths.size(), nFlow); Rcpp::NumericVector HPlengths(hp_lengths.size()); for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){ HPlengths(iHP) = (double)hp_lengths[iHP]; for (unsigned int iFlow=0; iFlow<nFlow; iFlow++) states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow); } ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out, Rcpp::Named("seq") = seq_out, Rcpp::Named("states") = states, Rcpp::Named("HPlengths") = HPlengths); } else { ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out, Rcpp::Named("seq") = seq_out); } } catch(std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error("c++ exception (unknown reason)"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }
RcppExport SEXP treePhaserSim(SEXP Rsequence, SEXP RflowCycle, SEXP Rcf, SEXP Rie, SEXP Rdr, SEXP Rmaxflows, SEXP RgetStates, SEXP RdiagonalStates, SEXP RmodelFile, SEXP RmodelThreshold, SEXP Rxval, SEXP Ryval) { SEXP ret = R_NilValue; char *exceptionMesg = NULL; try { Rcpp::StringVector sequences(Rsequence); string flowCycle = Rcpp::as<string>(RflowCycle); Rcpp::NumericMatrix cf_vec(Rcf); Rcpp::NumericMatrix ie_vec(Rie); Rcpp::NumericMatrix dr_vec(Rdr); unsigned int max_flows = Rcpp::as<int>(Rmaxflows); unsigned int get_states = Rcpp::as<int>(RgetStates); unsigned int diagonalStates = Rcpp::as<int>(RdiagonalStates); // Recalibration Variables string model_file = Rcpp::as<string>(RmodelFile); int model_threshold = Rcpp::as<int>(RmodelThreshold); Rcpp::IntegerVector x_values(Rxval); Rcpp::IntegerVector y_values(Ryval); RecalibrationModel recalModel; if (model_file.length() > 0) { recalModel.InitializeModel(model_file, model_threshold); } ion::FlowOrder flow_order(flowCycle, flowCycle.length()); unsigned int nFlow = flow_order.num_flows(); unsigned int nRead = sequences.size(); max_flows = min(max_flows, nFlow); // Prepare objects for holding and passing back results Rcpp::NumericMatrix predicted_out(nRead,nFlow); vector<vector<float> > query_states; vector<int> hp_lengths; // Iterate over all sequences BasecallerRead read; DPTreephaser dpTreephaser(flow_order); bool per_read_phasing = true; if (cf_vec.ncol() == 1) { per_read_phasing = false; dpTreephaser.SetModelParameters((double)cf_vec(0,0), (double)ie_vec(0,0), (double)dr_vec(0,0)); } dpTreephaser.SetStateProgression((diagonalStates>0)); unsigned int max_length = (2*flow_order.num_flows()); for(unsigned int iRead=0; iRead<nRead; iRead++) { string mySequence = Rcpp::as<std::string>(sequences(iRead)); read.sequence.clear(); read.sequence.reserve(2*flow_order.num_flows()); for(unsigned int iBase=0; iBase<mySequence.length() and iBase<max_length; ++iBase){ read.sequence.push_back(mySequence.at(iBase)); } // Set phasing parameters for this read if (per_read_phasing) dpTreephaser.SetModelParameters((double)cf_vec(0,iRead), (double)ie_vec(0,iRead), (double)dr_vec(0,iRead)); // If you bothered specifying a recalibration model you probably want its effect on the predictions... if (recalModel.is_enabled()) { int my_x = (int)x_values(iRead); int my_y = (int)y_values(iRead); const vector<vector<vector<float> > > * aPtr = 0; const vector<vector<vector<float> > > * bPtr = 0; aPtr = recalModel.getAs(my_x, my_y); bPtr = recalModel.getBs(my_x, my_y); if (aPtr == 0 or bPtr == 0) { cout << "Error finding a recalibration model for x: " << x_values(iRead) << " y: " << y_values(iRead); cout << endl; } dpTreephaser.SetAsBs(aPtr, bPtr); } if (nRead == 1 and get_states > 0) dpTreephaser.QueryAllStates(read, query_states, hp_lengths, max_flows); else dpTreephaser.Simulate(read, max_flows); for(unsigned int iFlow=0; iFlow<nFlow and iFlow<max_flows; ++iFlow){ predicted_out(iRead,iFlow) = (double) read.prediction.at(iFlow); } } // Store results if (nRead == 1 and get_states > 0) { Rcpp::NumericMatrix states(hp_lengths.size(), nFlow); Rcpp::NumericVector HPlengths(hp_lengths.size()); for (unsigned int iHP=0; iHP<hp_lengths.size(); iHP++){ HPlengths(iHP) = (double)hp_lengths[iHP]; for (unsigned int iFlow=0; iFlow<nFlow; iFlow++) states(iHP, iFlow) = (double)query_states.at(iHP).at(iFlow); } ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out, Rcpp::Named("states") = states, Rcpp::Named("HPlengths") = HPlengths); } else { ret = Rcpp::List::create(Rcpp::Named("sig") = predicted_out); } } catch(std::exception& ex) { forward_exception_to_r(ex); } catch(...) { ::Rf_error("c++ exception (unknown reason)"); } if(exceptionMesg != NULL) Rf_error(exceptionMesg); return ret; }