PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
{
  const size_t size = source.GetSize();

  const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
  AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();

  PhraseDictionaryNodeSCFG *currNode = &m_collection;
  for (size_t pos = 0 ; pos < size ; ++pos) {
    const Word& word = source.GetWord(pos);

    if (word.IsNonTerminal()) {
      // indexed by source label 1st
      const Word &sourceNonTerm = word;

      assert(iterAlign != target.GetAlignmentInfo().end());
      assert(iterAlign->first == pos);
      size_t targetNonTermInd = iterAlign->second;
      ++iterAlign;
      const Word &targetNonTerm = target.GetWord(targetNonTermInd);

      currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
    } else {
      currNode = currNode->GetOrCreateChild(word);
    }

    assert(currNode != NULL);
  }

  return *currNode;
}
PhraseDictionaryNodeSCFG &PhraseDictionaryMinSpan::GetOrCreateNode(const Phrase &source, const TargetPhrase &target)
{
  const size_t size = source.GetSize();

  const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
  AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();

  PhraseDictionaryNodeSCFG *currNode = &m_collection;
  for (size_t pos = 0 ; pos < size ; ++pos) {
    const Word& word = source.GetWord(pos);

    if (word.IsNonTerminal()) {
      // indexed by source label 1st
      const Word &sourceNonTerm = word;

      //new : inserted for testing
        if(!(iterAlign != target.GetAlignmentInfo().end()))
        {
            VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl);
            //std::cout << "Bad rule : " << source << " ||| " << target << std::endl;
        }

      assert(iterAlign != target.GetAlignmentInfo().end());
       //new : inserted for testing
        if(!(iterAlign->first == pos))
        {
            VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl);
            //std::cout << "Bad rule : " << source << " ||| " << target << std::endl;
        }
      assert(iterAlign->first == pos);
      size_t targetNonTermInd = iterAlign->second;
      ++iterAlign;
      const Word &targetNonTerm = target.GetWord(targetNonTermInd);

       //new : inserted for testing
        if(!targetNonTerm.IsNonTerminal())
        {
           VERBOSE(1,"Bad rule : " << source << " ||| " << target << endl);
        }


      currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
    } else {
      currNode = currNode->GetOrCreateChild(word);
    }

    assert(currNode != NULL);
  }

  return *currNode;
}
Example #3
0
 void PhraseDictionaryFuzzyMatch::SortAndPrune(PhraseDictionaryNodeSCFG &rootNode)
 {
   if (GetTableLimit())
   {
     rootNode.Sort(GetTableLimit());
   }
 }
Example #4
0
 PhraseDictionaryNodeSCFG &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeSCFG &rootNode
                                                                 , const Phrase &source
                                                                 , const TargetPhrase &target
                                                                 , const Word &sourceLHS)
 {
   cerr << source << endl << target << endl;
   const size_t size = source.GetSize();
   
   const AlignmentInfo &alignmentInfo = target.GetAlignNonTerm();
   AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
   
   PhraseDictionaryNodeSCFG *currNode = &rootNode;
   for (size_t pos = 0 ; pos < size ; ++pos) {
     const Word& word = source.GetWord(pos);
     
     if (word.IsNonTerminal()) {
       // indexed by source label 1st
       const Word &sourceNonTerm = word;
       
       CHECK(iterAlign != alignmentInfo.end());
       CHECK(iterAlign->first == pos);
       size_t targetNonTermInd = iterAlign->second;
       ++iterAlign;
       const Word &targetNonTerm = target.GetWord(targetNonTermInd);
       
       currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
     } else {
       currNode = currNode->GetOrCreateChild(word);
     }
     
     CHECK(currNode != NULL);
   }
   
   // finally, the source LHS
   //currNode = currNode->GetOrCreateChild(sourceLHS);
   //CHECK(currNode != NULL);
   
   
   return *currNode;
 }
// Given a partial rule application ending at startPos-1 and given the sets of
// source and target non-terminals covering the span [startPos, endPos],
// determines the full or partial rule applications that can be produced through
// extending the current rule application by a single non-terminal.
void ChartRuleLookupManagerMemory::ExtendPartialRuleApplication(
    const PhraseDictionaryNodeSCFG & node,
    const WordConsumed *prevWordConsumed,
    size_t startPos,
    size_t endPos,
    size_t stackInd,
    const NonTerminalSet & sourceNonTerms,
    const NonTerminalSet & targetNonTerms,
    ProcessedRuleColl & processedRuleColl)
{
    const PhraseDictionaryNodeSCFG::NonTerminalMap & nonTermMap =
        node.GetNonTerminalMap();

    const size_t numChildren = nonTermMap.size();
    if (numChildren == 0)
    {
        return;
    }
    const size_t numSourceNonTerms = sourceNonTerms.size();
    const size_t numTargetNonTerms = targetNonTerms.size();
    const size_t numCombinations = numSourceNonTerms * numTargetNonTerms;

    // We can search by either:
    //   1. Enumerating all possible source-target NT pairs that are valid for
    //      the span and then searching for matching children in the node,
    // or
    //   2. Iterating over all the NT children in the node, searching
    //      for each source and target NT in the span's sets.
    // We'll do whichever minimises the number of lookups:
    if (numCombinations <= numChildren*2)
    {
        NonTerminalSet::const_iterator p = sourceNonTerms.begin();
        NonTerminalSet::const_iterator sEnd = sourceNonTerms.end();
        for (; p != sEnd; ++p)
        {
            const Word & sourceNonTerm = *p;
            NonTerminalSet::const_iterator q = targetNonTerms.begin();
            NonTerminalSet::const_iterator tEnd = targetNonTerms.end();
            for (; q != tEnd; ++q)
            {
                const Word & targetNonTerm = *q;
                const PhraseDictionaryNodeSCFG * child =
                    node.GetChild(sourceNonTerm, targetNonTerm);
                if (child == NULL)
                {
                    continue;
                }
                WordConsumed * wc = new WordConsumed(startPos, endPos,
                                                     targetNonTerm,
                                                     prevWordConsumed);
                ProcessedRule * rule = new ProcessedRule(*child, wc);
                processedRuleColl.Add(stackInd, rule);
            }
        }
    }
    else
    {
        PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator p;
        PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator end =
                                                            nonTermMap.end();
        for (p = nonTermMap.begin(); p != end; ++p)
        {
            const PhraseDictionaryNodeSCFG::NonTerminalMapKey & key = p->first;
            const Word & sourceNonTerm = key.first;
            if (sourceNonTerms.find(sourceNonTerm) == sourceNonTerms.end())
            {
                continue;
            }
            const Word & targetNonTerm = key.second;
            if (targetNonTerms.find(targetNonTerm) == targetNonTerms.end())
            {
                continue;
            }
            const PhraseDictionaryNodeSCFG & child = p->second;
            WordConsumed * wc = new WordConsumed(startPos, endPos,
                                                 targetNonTerm,
                                                 prevWordConsumed);
            ProcessedRule * rule = new ProcessedRule(child, wc);
            processedRuleColl.Add(stackInd, rule);
        }
    }
}