void ChartParser::Create(const WordsRange &wordsRange, ChartParserCallback &to) { assert(m_decodeGraphList.size() == m_ruleLookupManagers.size()); std::vector <DecodeGraph*>::const_iterator iterDecodeGraph; std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin(); for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) { const DecodeGraph &decodeGraph = **iterDecodeGraph; assert(decodeGraph.GetSize() == 1); ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers; size_t maxSpan = decodeGraph.GetMaxChartSpan(); if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) { ruleLookupManager.GetChartRuleCollection(wordsRange, to); } } if (wordsRange.GetNumWordsCovered() == 1 && wordsRange.GetStartPos() != 0 && wordsRange.GetStartPos() != m_source.GetSize()-1) { bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption(); if (to.Empty() || alwaysCreateDirectTranslationOption) { // create unknown words for 1 word coverage where we don't have any trans options const Word &sourceWord = m_source.GetWord(wordsRange.GetStartPos()); m_unknown.Process(sourceWord, wordsRange, to); } } }
void ChartRuleLookupManagerCYKPlus::AddCompletedRule( const DottedRule &dottedRule, const TargetPhraseCollection &tpc, const WordsRange &range, ChartParserCallback &outColl) { // Determine the rule's rank. size_t rank = 0; const DottedRule *node = &dottedRule; while (!node->IsRoot()) { if (node->IsNonTerminal()) { ++rank; } node = node->GetPrev(); } // Fill m_stackVec with a stack pointer for each non-terminal. m_stackVec.resize(rank); node = &dottedRule; while (rank > 0) { if (node->IsNonTerminal()) { m_stackVec[--rank] = &node->GetChartCellLabel(); } node = node->GetPrev(); } // Add the (TargetPhraseCollection, StackVec) pair to the collection. outColl.Add(tpc, m_stackVec, range); }
void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection( const InputPath &inputPath, size_t lastPos, ChartParserCallback &outColl) { const Range &range = inputPath.GetWordsRange(); size_t startPos = range.GetStartPos(); size_t absEndPos = range.GetEndPos(); m_lastPos = lastPos; m_stackVec.clear(); m_stackScores.clear(); m_outColl = &outColl; m_unaryPos = absEndPos-1; // rules ending in this position are unary and should not be added to collection // create/update data structure to quickly look up all chart cells that match start position and label. UpdateCompressedMatrix(startPos, absEndPos, lastPos); const PhraseDictionaryNodeMemory &rootNode = m_ruleTable.GetRootNode(GetParser().GetTranslationId()); // all rules starting with terminal if (startPos == absEndPos) { GetTerminalExtension(&rootNode, startPos); } // all rules starting with nonterminal else if (absEndPos > startPos) { GetNonTerminalExtension(&rootNode, startPos); } // copy temporarily stored rules to out collection CompletedRuleCollection & rules = m_completedRules[absEndPos]; for (vector<CompletedRule*>::const_iterator iter = rules.begin(); iter != rules.end(); ++iter) { outColl.Add((*iter)->GetTPC(), (*iter)->GetStackVector(), range); } rules.Clear(); }
void ChartParserUnknown::Process(const Word &sourceWord, const WordsRange &range, ChartParserCallback &to) { // unknown word, add as trans opt const StaticData &staticData = StaticData::Instance(); const UnknownWordPenaltyProducer &unknownWordPenaltyProducer = UnknownWordPenaltyProducer::Instance(); size_t isDigit = 0; if (staticData.GetDropUnknown()) { const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface const StringPiece s = f->GetString(); isDigit = s.find_first_of("0123456789"); if (isDigit == string::npos) isDigit = 0; else isDigit = 1; // modify the starting bitmap } Phrase* unksrc = new Phrase(1); unksrc->AddWord() = sourceWord; Word &newWord = unksrc->GetWord(0); newWord.SetIsOOV(true); m_unksrcs.push_back(unksrc); //TranslationOption *transOpt; if (! staticData.GetDropUnknown() || isDigit) { // loop const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); UnknownLHSList::const_iterator iterLHS; for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) { const string &targetLHSStr = iterLHS->first; float prob = iterLHS->second; // lhs //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal(); Word *targetLHS = new Word(true); targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true); UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS"); // add to dictionary TargetPhrase *targetPhrase = new TargetPhrase(); Word &targetWord = targetPhrase->AddWord(); targetWord.CreateUnknownWord(sourceWord); // scores float unknownScore = FloorScore(TransformScore(prob)); targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore); targetPhrase->Evaluate(*unksrc); targetPhrase->SetTargetLHS(targetLHS); targetPhrase->SetAlignmentInfo("0-0"); if (staticData.IsDetailedTreeFragmentsTranslationReportingEnabled()) { targetPhrase->SetProperty("Tree","[ " + (*targetLHS)[0]->GetString().as_string() + " "+sourceWord[0]->GetString().as_string()+" ]"); } // chart rule to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range); } // for (iterLHS } else { // drop source word. create blank trans opt float unknownScore = FloorScore(-numeric_limits<float>::infinity()); TargetPhrase *targetPhrase = new TargetPhrase(); // loop const UnknownLHSList &lhsList = staticData.GetUnknownLHS(); UnknownLHSList::const_iterator iterLHS; for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) { const string &targetLHSStr = iterLHS->first; //float prob = iterLHS->second; Word *targetLHS = new Word(true); targetLHS->CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true); UTIL_THROW_IF2(targetLHS->GetFactor(0) == NULL, "Null factor for target LHS"); targetPhrase->GetScoreBreakdown().Assign(&unknownWordPenaltyProducer, unknownScore); targetPhrase->Evaluate(*unksrc); targetPhrase->SetTargetLHS(targetLHS); // chart rule to.AddPhraseOOV(*targetPhrase, m_cacheTargetPhraseCollection, range); } } }