FFState* LexicalReordering::Evaluate(const Hypothesis& hypo, const FFState* prev_state, ScoreComponentCollection* out) const { Scores score(GetNumScoreComponents(), 0); const LexicalReorderingState *prev = dynamic_cast<const LexicalReorderingState *>(prev_state); LexicalReorderingState *next_state = prev->Expand(hypo.GetTranslationOption(), score); out->PlusEquals(this, score); return next_state; }
std::vector<float> LexicalReordering::CalcScore(Hypothesis* hypothesis) const { std::vector<float> score(GetNumScoreComponents(), 0); std::vector<float> values; //for every direction for(size_t i = 0; i < m_Direction.size(); ++i){ //grab data if(Forward == m_Direction[i]){ //relates to prev hypothesis as we dont know next phrase for current yet //sanity check: is there a previous hypothesis? if(0 == hypothesis->GetPrevHypo()->GetId()){ continue; //no score continue with next direction } //grab probs for prev hypothesis const ScoreComponentCollection &reorderingScoreColl = hypothesis->GetPrevHypo()->GetCachedReorderingScore(); values = reorderingScoreColl.GetScoresForProducer(this); /* values = m_Table->GetScore((hypothesis->GetPrevHypo()->GetSourcePhrase()).GetSubString(hypothesis->GetPrevHypo()->GetCurrSourceWordsRange()), hypothesis->GetPrevHypo()->GetCurrTargetPhrase(), auxGetContext(hypothesis->GetPrevHypo())); */ } if(Backward == m_Direction[i]) { const ScoreComponentCollection &reorderingScoreColl = hypothesis->GetCachedReorderingScore(); values = reorderingScoreColl.GetScoresForProducer(this); /* values = m_Table->GetScore(hypothesis->GetSourcePhrase().GetSubString(hypothesis->GetCurrSourceWordsRange()), hypothesis->GetCurrTargetPhrase(), auxGetContext(hypothesis)); */ } //add score //sanity check: do we have any probs? assert(values.size() == (GetNumOrientationTypes() * m_Direction.size())); OrientationType orientation = GetOrientationType(hypothesis); float value = values[orientation + i * GetNumOrientationTypes()]; if(m_OneScorePerDirection){ //one score per direction score[i] = value; } else { //one score per direction and orientation score[orientation + i * GetNumOrientationTypes()] = value; } } return score; }
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence) { char dirName[] = "/tmp/moses.XXXXXX"; char *temp = mkdtemp(dirName); UTIL_THROW_IF2(temp == NULL, "Couldn't create temporary directory " << dirName); string dirNameStr(dirName); string inFileName(dirNameStr + "/in"); ofstream inFile(inFileName.c_str()); for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i) { inFile << inputSentence.GetWord(i); } inFile << endl; inFile.close(); long translationId = inputSentence.GetTranslationId(); string ptFileName = m_FuzzyMatchWrapper->Extract(translationId, dirNameStr); // populate with rules for this sentence PhraseDictionaryNodeMemory &rootNode = m_collection[translationId]; FormatType format = MosesFormat; // data from file InputFileStream inStream(ptFileName); // copied from class LoaderStandard PrintUserTime("Start loading fuzzy-match phrase model"); const StaticData &staticData = StaticData::Instance(); const std::string& factorDelimiter = staticData.GetFactorDelimiter(); string lineOrig; size_t count = 0; while(getline(inStream, lineOrig)) { const string *line; if (format == HieroFormat) { // reformat line UTIL_THROW(util::Exception, "Cannot be Hiero format"); //line = ReformatHieroRule(lineOrig); } else { // do nothing to format of line line = &lineOrig; } vector<string> tokens; vector<float> scoreVector; TokenizeMultiCharSeparator(tokens, *line , "|||" ); if (tokens.size() != 4 && tokens.size() != 5) { stringstream strme; strme << "Syntax error at " << ptFileName << ":" << count; UserMessage::Add(strme.str()); abort(); } const string &sourcePhraseString = tokens[0] , &targetPhraseString = tokens[1] , &scoreString = tokens[2] , &alignString = tokens[3]; bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos); if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) { TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n"); continue; } Tokenize<float>(scoreVector, scoreString); const size_t numScoreComponents = GetNumScoreComponents(); if (scoreVector.size() != numScoreComponents) { stringstream strme; strme << "Size of scoreVector != number (" << scoreVector.size() << "!=" << numScoreComponents << ") of score components on line " << count; UserMessage::Add(strme.str()); abort(); } UTIL_THROW_IF2(scoreVector.size() != numScoreComponents, "Number of scores incorrectly specified"); // parse source & find pt node // constituent labels Word *sourceLHS; Word *targetLHS; // source Phrase sourcePhrase( 0); sourcePhrase.CreateFromString(Input, m_input, sourcePhraseString, factorDelimiter, &sourceLHS); // create target phrase obj TargetPhrase *targetPhrase = new TargetPhrase(); targetPhrase->CreateFromString(Output, m_output, targetPhraseString, factorDelimiter, &targetLHS); // rest of target phrase targetPhrase->SetAlignmentInfo(alignString); targetPhrase->SetTargetLHS(targetLHS); //targetPhrase->SetDebugOutput(string("New Format pt ") + line); // component score, for n-best output std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore); std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore); targetPhrase->GetScoreBreakdown().Assign(this, scoreVector); targetPhrase->Evaluate(sourcePhrase, GetFeaturesToApply()); TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS); phraseColl.Add(targetPhrase); count++; if (format == HieroFormat) { // reformat line delete line; } else { // do nothing } } // sort and prune each target phrase collection SortAndPrune(rootNode); //removedirectoryrecursively(dirName); }