void PhraseNode::AddTargetPhrase(size_t pos, const SourcePhrase &sourcePhrase , TargetPhrase *targetPhrase, OnDiskWrapper &onDiskWrapper , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort) { size_t phraseSize = sourcePhrase.GetSize(); if (pos < phraseSize) { const Word &word = sourcePhrase.GetWord(pos); PhraseNode &node = m_children[word]; if (m_currChild != &node) { // new node node.SetPos(pos); if (m_currChild) { m_currChild->Save(onDiskWrapper, pos, tableLimit); } m_currChild = &node; } // keep searching for target phrase node.. node.AddTargetPhrase(pos + 1, sourcePhrase, targetPhrase, onDiskWrapper, tableLimit, counts, spShort); } else { // drilled down to the right node m_counts = counts; targetPhrase->SetSourcePhrase(spShort); m_targetPhraseColl.AddTargetPhrase(targetPhrase); } }
void XenIO::writeNewPT(boost::shared_ptr<PhraseTable> ptrPT, boost::shared_ptr<Score> ptrScore) { XenOption* opt = XenOption::getInstance(); Score sc2; if (opt->getLocal()) { std::vector<SourcePhrase> srcPh = ptrPT->getSrcPhrases(); for (unsigned int i = 0; i < srcPh.size(); i++) { SourcePhrase sP = srcPh[i]; for (unsigned int j = 0; j < sP.getScoresXE()->getSize(); j++) { sc2.addScore(sP.getScoresXE()->getScore(j)); } } } try { std::cout << "Writing new phrase-table to " + opt->getOutName() << std::endl; std::string oF = opt->getOutName(); boost::iostreams::filtering_ostream out; out.push(boost::iostreams::gzip_compressor()); out.push(boost::iostreams::file_sink(oF.c_str(), std::ios_base::out | std::ios_base::binary)); if (!out.good()) throw XenCommon::XenCEption("Can't write to " + opt->getOutName() + ".gz"); for (unsigned int i = 0; i < ptrScore->getSize(); i++) { out << ptrPT->getSource(i) << " ||| " << ptrPT->getTarget(i) << " ||| " << ptrPT->getScores(i) << " " << XenCommon::toString(ptrScore->getScore(i)); if (opt->getLocal()) out << " " << XenCommon::toString(sc2.getScore(i)); out << " ||| " << ptrPT->getAlignment(i) << " ||| " << ptrPT->getCounts(i) << std::endl; if (out.bad()) throw XenCommon::XenCEption("Something went wrong in output stream..."); } out.flush(); out.reset(); } catch (XenCommon::XenCEption &e) { throw; } }
int main(int argc, char **argv) { int tableLimit = 20; std::string ttable = ""; bool useAlignments = false; for(int i = 1; i < argc; i++) { if(!strcmp(argv[i], "-tlimit")) { if(i + 1 == argc) usage(); tableLimit = atoi(argv[++i]); } else if(!strcmp(argv[i], "-t")) { if(i + 1 == argc) usage(); ttable = argv[++i]; } else usage(); } if(ttable == "") usage(); OnDiskWrapper onDiskWrapper; bool retDb = onDiskWrapper.BeginLoad(ttable); CHECK(retDb); cerr << "Ready..." << endl; std::string line; while(getline(std::cin, line)) { std::vector<std::string> tokens; tokens = Moses::Tokenize(line, " "); cerr << "line: " << line << endl; // create source phrase SourcePhrase sourcePhrase; for (size_t pos = 0; pos < tokens.size(); ++pos) { const string &tok = tokens[pos]; if (pos == tokens.size() - 1) { // last position. LHS non-term Tokenize(sourcePhrase, tok, false, true, onDiskWrapper); } else { Tokenize(sourcePhrase, tok, true, true, onDiskWrapper); } } const PhraseNode *node = &onDiskWrapper.GetRootSourceNode(); cerr << "node=" << node << endl; assert(node); for (size_t pos = 0; pos < sourcePhrase.GetSize(); ++pos) { const Word &word = sourcePhrase.GetWord(pos); cerr << word << " "; node = node->GetChild(word, onDiskWrapper); cerr << "node=" << node << endl; if (node == NULL) { break; } } if (node) { // source phrase points to a bunch of rules const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper); string str = coll->GetDebugStr(); cout << "Found " << coll->GetSize() << endl; for (size_t ind = 0; ind < coll->GetSize(); ++ind) { const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind); cerr << " "; targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab()); cerr << endl; } } else { cout << "Not found" << endl; } std::cout << '\n'; std::cout.flush(); } cerr << "Finished." << endl; }