void add(std::string const& line, std::set<Sym>* allLabels, IVocabularyPtr& pVoc, bool testMode) { std::stringstream ss(line); std::string word, pos, label; ss >> word >> pos >> label; if (word.empty() || pos.empty() || label.empty()) { SDL_THROW_LOG(CrfDemo, InvalidInputException, "Bad line: " << line); } words.push_back(pVoc->add(word, kTerminal)); poss.push_back(pVoc->add(pos, kTerminal)); labels.push_back(pVoc->add(label, kTerminal)); SDL_DEBUG(CrfDemo, "id(" << word << "): " << words.back()); SDL_DEBUG(CrfDemo, "id(" << pos << "): " << poss.back()); SDL_DEBUG(CrfDemo, "id(" << label << "): " << labels.back()); if (!testMode) { allLabels->insert(labels.back()); } }
void stringPairToFst(Strings const& inputTokens, std::vector<std::string> const& outputTokens, IMutableHypergraph<Arc>* pHgResult, StringToHypergraphOptions const& opts = StringToHypergraphOptions()) { if (inputTokens.size() != outputTokens.size()) { SDL_THROW_LOG(Hypergraph.stringPairToFst, IndexException, "The two strings must have same number of words"); } // 1. Create simple FSA from input tokens: stringToHypergraph(inputTokens, pHgResult, opts); // 2. Insert output tokens: IVocabularyPtr pVoc = pHgResult->getVocabulary(); std::vector<std::string>::const_iterator it = outputTokens.begin(); StateId stateId = pHgResult->start(); const StateId finalId = pHgResult->final(); while (stateId != finalId) { Arc* arc = pHgResult->outArc(stateId, 0); const Sym sym = opts.terminalMaybeUnk(pVoc.get(), *it); setFsmOutputLabel(pHgResult, *arc, sym); ++it; stateId = arc->head(); } }
inline void writeLabel(Util::StringBuilder& out, Sym sym, IVocabularyPtr const& voc, SymbolQuotation quote = kQuoted) { writeLabel(out, sym, voc.get(), quote); }
inline void writeLabel(std::ostream& out, Sym sym, IVocabularyPtr const& voc, SymbolQuotation quote = kQuoted) { writeLabel(out, sym, voc.get(), quote); }
void lookupAndPrintSymbols(ForwardIterator begin, ForwardIterator end, IVocabularyPtr const& pVoc, std::ostream& out) { lookupAndPrintSymbols(begin, end, pVoc.get(), out); }
GetSymbol(IVocabularyPtr const& pVoc) : pVoc(pVoc.get()) {}
inline void print(std::ostream &out, Syms const& phrase, IVocabularyPtr const& vocab) { Util::printRangeState(out, vocab.get(), phrase); }
inline void print(std::ostream &out, Sym sym, IVocabularyPtr const& vocab , char const* variablePrefix="X") { print(out, sym, vocab.get(), variablePrefix); }
inline std::string typedStrOrIndex(Sym sym, IVocabularyPtr const& pVoc) { return typedStrOrIndex(sym, pVoc.get()); }
inline std::string orTypedIndex(Sym sym, IVocabularyPtr const& pVoc) { return orTypedIndex(sym, pVoc.get()); }
inline std::string orId(Sym sym, IVocabularyPtr const& pVoc) { return orId(sym, pVoc.get()); }