void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) { // populate with rules for this sentence long translationId = source.GetTranslationId(); string grammarFile = GetFilePath() + "/grammar." + SPrint(translationId) + ".gz"; std::auto_ptr<RuleTableLoader> loader = RuleTableLoaderFactory::Create(grammarFile); bool ret = loader->Load(m_input, m_output, grammarFile, m_tableLimit, *this); CHECK(ret); }
void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source) { // clear out rules for previous sentence m_collection.Clear(); // populate with rules for this sentence long translationId = source.GetTranslationId(); string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId); // data from file InputFileStream inFile(grammarFile); std::auto_ptr<RuleTableLoader> loader = RuleTableLoaderFactory::Create(grammarFile); bool ret = loader->Load(*m_input, *m_output, inFile, *m_weight, m_tableLimit, *m_languageModels, m_wpProducer, *this); CHECK(ret); }
void CoveredReferenceFeature::EvaluateWithSourceContext(const InputType &input , const InputPath &inputPath , const TargetPhrase &targetPhrase , const StackVec *stackVec , ScoreComponentCollection &scoreBreakdown , ScoreComponentCollection *estimatedFutureScore) const { long id = input.GetTranslationId(); boost::unordered_map<long, std::multiset<string> >::const_iterator refIt = m_refs.find(id); multiset<string> wordsInPhrase = GetWordsInPhrase(targetPhrase); multiset<string> covered; set_intersection(wordsInPhrase.begin(), wordsInPhrase.end(), refIt->second.begin(), refIt->second.end(), inserter(covered, covered.begin())); vector<float> scores; scores.push_back(covered.size()); scoreBreakdown.Assign(this, scores); estimatedFutureScore->Assign(this, scores); }
void PhraseDictionaryFuzzyMatch::CleanUpAfterSentenceProcessing(const InputType &source) { m_collection.erase(source.GetTranslationId()); }
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence) { char dirName[] = "/tmp/moses.XXXXXX"; char *temp = mkdtemp(dirName); UTIL_THROW_IF2(temp == NULL, "Couldn't create temporary directory " << dirName); string dirNameStr(dirName); string inFileName(dirNameStr + "/in"); ofstream inFile(inFileName.c_str()); for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) { inFile << inputSentence.GetWord(i); } inFile << endl; inFile.close(); long translationId = inputSentence.GetTranslationId(); string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr); // populate with rules for this sentence PhraseDictionaryNodeMemory &rootNode = m_collection[translationId]; FormatType format = MosesFormat; // data from file InputFileStream inStream(ptFileName); // copied from class LoaderStandard PrintUserTime("Start loading fuzzy-match phrase model"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; while(getline(inStream, lineOrig)) { const string *line; if (format == HieroFormat) { // reformat line UTIL_THROW(util::Exception, "Cannot be Hiero format"); //line = ReformatHieroRule(lineOrig); } else { // do nothing to format of line line = &lineOrig; } vector<string> tokens; vector<float> scoreVector; TokenizeMultiCharSeparator(tokens, *line , "|||" ); if (tokens.size() != 4 && tokens.size() != 5) { stringstream strme; strme << "Syntax error at " << ptFileName << ":" << count; UserMessage::Add(strme.str()); abort(); } const string &sourcePhraseString = tokens[0] , &targetPhraseString = tokens[1] , &scoreString = tokens[2] , &alignString = tokens[3]; bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } Tokenize<float>(scoreVector, scoreString); const size_t numScoreComponents = GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { stringstream strme; strme << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count; UserMessage::Add(strme.str()); abort(); } UTIL_THROW_IF2(scoreVector.size() != numScoreComponents, "Number of scores incorrectly specified"); // parse source & find pt node // constituent labels Word *sourceLHS; Word *targetLHS; // source Phrase sourcePhrase( 0); sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS); // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(); targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); // component score, for n-best output std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore); std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore); targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); count++; if (format == HieroFormat) { // reformat line delete line; } else { // do nothing } } // sort and prune each target phrase collection SortAndPrune(rootNode); //removedirectoryrecursively(dirName); }
void PhraseDictionaryFuzzyMatch::CleanUp(const InputType &source) { m_collection.erase(source.GetTranslationId()); }