示例#1
0
Position stringToHypergraph(std::string const& utf8string, IMutableHypergraph<Arc>* pHgResult,
                            StringToHypergraphOptions const& opts = StringToHypergraphOptions(),
                            TokenWeights const& inputWeights = TokenWeights()) {
  std::vector<std::string> utf8chars;
  Util::toUtf8Chs(utf8string, utf8chars);
  return stringToHypergraph(utf8chars, pHgResult, opts, inputWeights);
}
示例#2
0
 void toHypergraph(std::string const& line, IMutableHypergraph<A>* phg, std::size_t lineNum = 0) const {
   Strings words = parseTokens(line, (ParseTokensOptions const&)*this);
   SDL_DEBUG(Hypergraph.HgConvertString, lineNum << ": " << printer(words, Util::RangeSep(" ", "", "")));
   SDL_INFO(Hypergraph.HgConvertString, lineNum << ": len=" << words.size());
   phg->clear(properties());
   assert(phg->storesArcs());
   assert(phg->getVocabulary());
   stringToHypergraph(words, phg);
 }
示例#3
0
void stringPairToFst(Strings const& inputTokens, std::vector<std::string> const& outputTokens,
                     IMutableHypergraph<Arc>* pHgResult,
                     StringToHypergraphOptions const& opts = StringToHypergraphOptions()) {
  if (inputTokens.size() != outputTokens.size()) {
    SDL_THROW_LOG(Hypergraph.stringPairToFst, IndexException,
                  "The two strings must have same number of words");
  }

  // 1. Create simple FSA from input tokens:
  stringToHypergraph(inputTokens, pHgResult, opts);

  // 2. Insert output tokens:
  IVocabularyPtr pVoc = pHgResult->getVocabulary();
  std::vector<std::string>::const_iterator it = outputTokens.begin();
  StateId stateId = pHgResult->start();
  const StateId finalId = pHgResult->final();
  while (stateId != finalId) {
    Arc* arc = pHgResult->outArc(stateId, 0);
    const Sym sym = opts.terminalMaybeUnk(pVoc.get(), *it);
    setFsmOutputLabel(pHgResult, *arc, sym);
    ++it;
    stateId = arc->head();
  }
}