/*private*/ LineSequencer::Sequences* LineSequencer::findSequences() { Sequences *sequences = new Sequences(); planargraph::algorithm::ConnectedSubgraphFinder csFinder(graph); vector<planargraph::Subgraph*> subgraphs; csFinder.getConnectedSubgraphs(subgraphs); for (vector<planargraph::Subgraph*>::const_iterator it=subgraphs.begin(), endIt=subgraphs.end(); it!=endIt; ++it ) { planargraph::Subgraph* subgraph = *it; if (hasSequence(*subgraph)) { planargraph::DirectedEdge::NonConstList* seq=findSequence(*subgraph); sequences->push_back(seq); } else { // if any subgraph cannot be sequenced, abort delete subgraph; delAll(*sequences); delete sequences; return nullptr; } delete subgraph; } return sequences; }
// la séquence initiale est lue et découpée en sous-séquences de // longueur 20 ; // chaque sous-séquence est terminée par une observation END pour que // le HMM généré possède un état terminal Sequences build_initial_sequences(const std::string& name) { Sequence sequence = read_initial_sequence(name, false); Sequences sequences; unsigned int length = sequence.size() / 20; Sequence::const_iterator it = sequence.begin(); unsigned int added = 0; unsigned int index = 0; sequences.push_back(Sequence()); while (it != sequence.end()) { if (added == length) { sequences[index].push_back(Observation(0, END)); ++index; added = 0; sequences.push_back(Sequence()); } sequences[index].push_back(*it); ++added; ++it; } if (sequence.size() % 20 != 0) { sequences[index].push_back(Observation(0, END)); } return sequences; }
int main(int argc, char** argv) { if (argc < 4) { std::cout << "Usage: get_TrieArray <fastafile> <matrix> <peptideLength> <outfile> " << std::endl; return -1; } string fastafile(argv[1]); string matrix(argv[2]); cout << "test" << endl; int peptideLength(atoi(argv[3])); string outname(argv[4]); //std::cout << fastafile << "\t" << outname << std::endl; //---------------------------------------------------------------------------------------- cout << "Reading FASTA file..." << endl; Sequences s(fastafile); cout << "Read " << s.size() << " sequences." << endl; cout << "Generating peptides..." << endl; Sequences ninemers; generateAllSubstrings(ninemers, s, peptideLength); cout << "Generated " << ninemers.size() << " peptides." << endl; s.clear(); //Matrix m("/abi-projects/dist2self/matrices/BLOSUM45_distance_normal.dat"); cout << "Initializing trie. " << endl; Trie t; Matrix m(matrix); cout << "Initializing trie. " << endl; Trie t; Matrix::IndexSequence indices; for (size_t i = 0; i < ninemers.size(); ++i) { m.translate(ninemers[i], indices); t.add(indices); } t.dump(); cout << "Converting to trie array." << endl; TrieArray ta(t, peptideLength); cout << "Done." << endl; // std::ofstream ofs("test.trie"); std::ofstream ofs(outname.c_str()); boost::archive::text_oarchive oa(ofs); ta.save(oa,1); }
// fonction de sauvegarde de la liste des sous-séquences dans les // fichiers .input void save_sequences(const std::string& name, const Sequences& sequences) { std::ofstream f((boost::format("seq-%1%.input") % name).str().c_str()); for (Sequences::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { for (Sequence::const_iterator it2 = it->begin(); it2 != it->end(); ++it2) { f << it2->first << "|" << it2->second << " "; } f << std::endl; } f.close(); }
void generateAllSubstrings(Sequences& substrings, const Sequences& s, size_t len) { for (size_t i = 0; i < s.size(); ++i) { for (int j = 0; j <= ((int)s[i].size() - (int)len); ++j) { string sub(string(&(s[i][j]), len)); substrings.push_back(sub); if (sub.size() != len) cout << "generateAllSubstrings size: " << sub.size() << endl; } } }
int main(int argc, char** argv) { if (argc < 2) { std::cout << "Usage: get_TrieArray <fastafile> <outfile> " << std::endl; return -1; } ofstream myfile; string fastafile(argv[1]); const char* outname(argv[2]); myfile.open(outname); std::cout << fastafile << "\t" << outname << std::endl; //---------------------------------------------------------------------------------------- cout << "Reading FASTA file..." << endl; Sequences s(fastafile); cout << "Read " << s.size() << " sequences." << endl; cout << "Generating ninemers..." << endl; Sequences ninemers; generateAllSubstrings(ninemers, s, 9); cout << "Generated " << ninemers.size() << " ninemers." << endl; s.clear(); for (size_t i = 0; i < ninemers.size(); ++i) { myfile << "> " << i << endl; myfile << ninemers[i] << endl; } myfile.close(); cout << "Done." << endl; }