PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &source, const TargetPhrase &target) { const size_t size = source.GetSize(); const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo(); AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin(); PhraseDictionaryNodeSCFG *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); if (word.IsNonTerminal()) { // indexed by source label 1st const Word &sourceNonTerm = word; assert(iterAlign != target.GetAlignmentInfo().end()); assert(iterAlign->first == pos); size_t targetNonTermInd = iterAlign->second; ++iterAlign; const Word &targetNonTerm = target.GetWord(targetNonTermInd); currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm); } else { currNode = currNode->GetOrCreateChild(word); } assert(currNode != NULL); } return *currNode; }
PhraseDictionaryNodeSCFG &PhraseDictionaryMinSpan::GetOrCreateNode(const Phrase &source, const TargetPhrase &target) { const size_t size = source.GetSize(); const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo(); AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin(); PhraseDictionaryNodeSCFG *currNode = &m_collection; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); if (word.IsNonTerminal()) { // indexed by source label 1st const Word &sourceNonTerm = word; //new : inserted for testing if(!(iterAlign != target.GetAlignmentInfo().end())) { VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl); //std::cout << "Bad rule : " << source << " ||| " << target << std::endl; } assert(iterAlign != target.GetAlignmentInfo().end()); //new : inserted for testing if(!(iterAlign->first == pos)) { VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl); //std::cout << "Bad rule : " << source << " ||| " << target << std::endl; } assert(iterAlign->first == pos); size_t targetNonTermInd = iterAlign->second; ++iterAlign; const Word &targetNonTerm = target.GetWord(targetNonTermInd); //new : inserted for testing if(!targetNonTerm.IsNonTerminal()) { VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl); } currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm); } else { currNode = currNode->GetOrCreateChild(word); } assert(currNode != NULL); } return *currNode; }
void PhraseDictionaryFuzzyMatch::SortAndPrune(PhraseDictionaryNodeSCFG &rootNode) { if (GetTableLimit()) { rootNode.Sort(GetTableLimit()); } }
PhraseDictionaryNodeSCFG &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeSCFG &rootNode , const Phrase &source , const TargetPhrase &target , const Word &sourceLHS) { cerr << source << endl << target << endl; const size_t size = source.GetSize(); const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm(); AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin(); PhraseDictionaryNodeSCFG *currNode = &rootNode; for (size_t pos = 0 ; pos < size ; ++pos) { const Word& word = source.GetWord(pos); if (word.IsNonTerminal()) { // indexed by source label 1st const Word &sourceNonTerm = word; CHECK(iterAlign != alignmentInfo.end()); CHECK(iterAlign->first == pos); size_t targetNonTermInd = iterAlign->second; ++iterAlign; const Word &targetNonTerm = target.GetWord(targetNonTermInd); currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm); } else { currNode = currNode->GetOrCreateChild(word); } CHECK(currNode != NULL); } // finally, the source LHS //currNode = currNode->GetOrCreateChild(sourceLHS); //CHECK(currNode != NULL); return *currNode; }
// Given a partial rule application ending at startPos-1 and given the sets of // source and target non-terminals covering the span [startPos, endPos], // determines the full or partial rule applications that can be produced through // extending the current rule application by a single non-terminal. void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication( const PhraseDictionaryNodeSCFG & node, const WordConsumed *prevWordConsumed, size_t startPos, size_t endPos, size_t stackInd, const NonTerminalSet & sourceNonTerms, const NonTerminalSet & targetNonTerms, ProcessedRuleColl & processedRuleColl) { const PhraseDictionaryNodeSCFG::NonTerminalMap & nonTermMap = node.GetNonTerminalMap(); const size_t numChildren = nonTermMap.size(); if (numChildren == 0) { return; } const size_t numSourceNonTerms = sourceNonTerms.size(); const size_t numTargetNonTerms = targetNonTerms.size(); const size_t numCombinations = numSourceNonTerms * numTargetNonTerms; // We can search by either: // 1. Enumerating all possible source-target NT pairs that are valid for // the span and then searching for matching children in the node, // or // 2. Iterating over all the NT children in the node, searching // for each source and target NT in the span's sets. // We'll do whichever minimises the number of lookups: if (numCombinations <= numChildren*2) { NonTerminalSet::const_iterator p = sourceNonTerms.begin(); NonTerminalSet::const_iterator sEnd = sourceNonTerms.end(); for (; p != sEnd; ++p) { const Word & sourceNonTerm = *p; NonTerminalSet::const_iterator q = targetNonTerms.begin(); NonTerminalSet::const_iterator tEnd = targetNonTerms.end(); for (; q != tEnd; ++q) { const Word & targetNonTerm = *q; const PhraseDictionaryNodeSCFG * child = node.GetChild(sourceNonTerm, targetNonTerm); if (child == NULL) { continue; } WordConsumed * wc = new WordConsumed(startPos, endPos, targetNonTerm, prevWordConsumed); ProcessedRule * rule = new ProcessedRule(*child, wc); processedRuleColl.Add(stackInd, rule); } } } else { PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator p; PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator end = nonTermMap.end(); for (p = nonTermMap.begin(); p != end; ++p) { const PhraseDictionaryNodeSCFG::NonTerminalMapKey & key = p->first; const Word & sourceNonTerm = key.first; if (sourceNonTerms.find(sourceNonTerm) == sourceNonTerms.end()) { continue; } const Word & targetNonTerm = key.second; if (targetNonTerms.find(targetNonTerm) == targetNonTerms.end()) { continue; } const PhraseDictionaryNodeSCFG & child = p->second; WordConsumed * wc = new WordConsumed(startPos, endPos, targetNonTerm, prevWordConsumed); ProcessedRule * rule = new ProcessedRule(child, wc); processedRuleColl.Add(stackInd, rule); } } }