Position stringToHypergraph(std::string const& utf8string, IMutableHypergraph<Arc>* pHgResult, StringToHypergraphOptions const& opts = StringToHypergraphOptions(), TokenWeights const& inputWeights = TokenWeights()) { std::vector<std::string> utf8chars; Util::toUtf8Chs(utf8string, utf8chars); return stringToHypergraph(utf8chars, pHgResult, opts, inputWeights); }
void toHypergraph(std::string const& line, IMutableHypergraph<A>* phg, std::size_t lineNum = 0) const { Strings words = parseTokens(line, (ParseTokensOptions const&)*this); SDL_DEBUG(Hypergraph.HgConvertString, lineNum << ": " << printer(words, Util::RangeSep(" ", "", ""))); SDL_INFO(Hypergraph.HgConvertString, lineNum << ": len=" << words.size()); phg->clear(properties()); assert(phg->storesArcs()); assert(phg->getVocabulary()); stringToHypergraph(words, phg); }
void stringPairToFst(Strings const& inputTokens, std::vector<std::string> const& outputTokens, IMutableHypergraph<Arc>* pHgResult, StringToHypergraphOptions const& opts = StringToHypergraphOptions()) { if (inputTokens.size() != outputTokens.size()) { SDL_THROW_LOG(Hypergraph.stringPairToFst, IndexException, "The two strings must have same number of words"); } // 1. Create simple FSA from input tokens: stringToHypergraph(inputTokens, pHgResult, opts); // 2. Insert output tokens: IVocabularyPtr pVoc = pHgResult->getVocabulary(); std::vector<std::string>::const_iterator it = outputTokens.begin(); StateId stateId = pHgResult->start(); const StateId finalId = pHgResult->final(); while (stateId != finalId) { Arc* arc = pHgResult->outArc(stateId, 0); const Sym sym = opts.terminalMaybeUnk(pVoc.get(), *it); setFsmOutputLabel(pHgResult, *arc, sym); ++it; stateId = arc->head(); } }