// la séquence initiale est lue et découpée en sous-séquences de // longueur 20 ; // chaque sous-séquence est terminée par une observation END pour que // le HMM généré possède un état terminal Sequences build_initial_sequences(const std::string& name) { Sequence sequence = read_initial_sequence(name, false); Sequences sequences; unsigned int length = sequence.size() / 20; Sequence::const_iterator it = sequence.begin(); unsigned int added = 0; unsigned int index = 0; sequences.push_back(Sequence()); while (it != sequence.end()) { if (added == length) { sequences[index].push_back(Observation(0, END)); ++index; added = 0; sequences.push_back(Sequence()); } sequences[index].push_back(*it); ++added; ++it; } if (sequence.size() % 20 != 0) { sequences[index].push_back(Observation(0, END)); } return sequences; }
/*private*/ LineSequencer::Sequences* LineSequencer::findSequences() { Sequences *sequences = new Sequences(); planargraph::algorithm::ConnectedSubgraphFinder csFinder(graph); vector<planargraph::Subgraph*> subgraphs; csFinder.getConnectedSubgraphs(subgraphs); for (vector<planargraph::Subgraph*>::const_iterator it=subgraphs.begin(), endIt=subgraphs.end(); it!=endIt; ++it ) { planargraph::Subgraph* subgraph = *it; if (hasSequence(*subgraph)) { planargraph::DirectedEdge::NonConstList* seq=findSequence(*subgraph); sequences->push_back(seq); } else { // if any subgraph cannot be sequenced, abort delete subgraph; delAll(*sequences); delete sequences; return nullptr; } delete subgraph; } return sequences; }
void generateAllSubstrings(Sequences& substrings, const Sequences& s, size_t len) { for (size_t i = 0; i < s.size(); ++i) { for (int j = 0; j <= ((int)s[i].size() - (int)len); ++j) { string sub(string(&(s[i][j]), len)); substrings.push_back(sub); if (sub.size() != len) cout << "generateAllSubstrings size: " << sub.size() << endl; } } }