// search all possible terminal extensions of a partial rule (pointed at by node) at a given position
// recursively try to expand partial rules into full rules up to m_lastPos.
void ChartRuleLookupManagerMemoryPerSentence::GetTerminalExtension(
  const PhraseDictionaryNodeMemory *node,
  size_t pos)
{

  const Word &sourceWord = GetSourceAt(pos).GetLabel();
  const PhraseDictionaryNodeMemory::TerminalMap & terminals = node->GetTerminalMap();

  // if node has small number of terminal edges, test word equality for each.
  if (terminals.size() < 5) {
    for (PhraseDictionaryNodeMemory::TerminalMap::const_iterator iter = terminals.begin(); iter != terminals.end(); ++iter) {
      const Word & word = iter->first;
      if (TerminalEqualityPred()(word, sourceWord)) {
        const PhraseDictionaryNodeMemory *child = & iter->second;
        AddAndExtend(child, pos);
        break;
      }
    }
  }
  // else, do hash lookup
  else {
    const PhraseDictionaryNodeMemory *child = node->GetChild(sourceWord);
    if (child != NULL) {
      AddAndExtend(child, pos);
    }
  }
}
void ChartRuleLookupManagerMemory::GetChartRuleCollection(
  const WordsRange &range,
  ChartParserCallback &outColl)
{
  size_t relEndPos = range.GetEndPos() - range.GetStartPos();
  size_t absEndPos = range.GetEndPos();

  // MAIN LOOP. create list of nodes of target phrases

  // get list of all rules that apply to spans at same starting position
  DottedRuleColl &dottedRuleCol = *m_dottedRuleColls[range.GetStartPos()];
  const DottedRuleList &expandableDottedRuleList = dottedRuleCol.GetExpandableDottedRuleList();
  
  const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos);

  // loop through the rules
  // (note that expandableDottedRuleList can be expanded as the loop runs 
  //  through calls to ExtendPartialRuleApplication())
  for (size_t ind = 0; ind < expandableDottedRuleList.size(); ++ind) {
    // rule we are about to extend
    const DottedRuleInMemory &prevDottedRule = *expandableDottedRuleList[ind];
    // we will now try to extend it, starting after where it ended
    size_t startPos = prevDottedRule.IsRoot()
                    ? range.GetStartPos()
                    : prevDottedRule.GetWordsRange().GetEndPos() + 1;

    // search for terminal symbol
    // (if only one more word position needs to be covered)
    if (startPos == absEndPos) {

      // look up in rule dictionary, if the current rule can be extended
      // with the source word in the last position
      const Word &sourceWord = sourceWordLabel.GetLabel();
      const PhraseDictionaryNodeSCFG *node = prevDottedRule.GetLastNode().GetChild(sourceWord);

      // if we found a new rule -> create it and add it to the list
      if (node != NULL) {
				// create the rule
#ifdef USE_BOOST_POOL
        DottedRuleInMemory *dottedRule = m_dottedRulePool.malloc();
        new (dottedRule) DottedRuleInMemory(*node, sourceWordLabel,
                                            prevDottedRule);
#else
        DottedRuleInMemory *dottedRule = new DottedRuleInMemory(*node,
                                                                sourceWordLabel,
                                                                prevDottedRule);
#endif
        dottedRuleCol.Add(relEndPos+1, dottedRule);
      }
    }

    // search for non-terminals
    size_t endPos, stackInd;

    // span is already complete covered? nothing can be done
    if (startPos > absEndPos)
      continue;

    else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
      // We're at the root of the prefix tree so won't try to cover the full
      // span (i.e. we don't allow non-lexical unary rules).  However, we need
      // to match non-unary rules that begin with a non-terminal child, so we
      // do that in two steps: during this iteration we search for non-terminals
      // that cover all but the last source word in the span (there won't
      // already be running nodes for these because that would have required a
      // non-lexical unary rule match for an earlier span).  Any matches will
      // result in running nodes being appended to the list and on subsequent
      // iterations (for this same span), we'll extend them to cover the final
      // word.
      endPos = absEndPos - 1;
      stackInd = relEndPos;
    }
    else 
    {
      endPos = absEndPos;
      stackInd = relEndPos + 1;
    }


    ExtendPartialRuleApplication(prevDottedRule, startPos, endPos, stackInd,
                                 dottedRuleCol);
  }

  // list of rules that that cover the entire span
  DottedRuleList &rules = dottedRuleCol.Get(relEndPos + 1);

  // look up target sides for the rules
  DottedRuleList::const_iterator iterRule;
  for (iterRule = rules.begin(); iterRule != rules.end(); ++iterRule) {
    const DottedRuleInMemory &dottedRule = **iterRule;
    const PhraseDictionaryNodeSCFG &node = dottedRule.GetLastNode();

    // look up target sides
    const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();

    // add the fully expanded rule (with lexical target side)
    if (tpc != NULL) {
      AddCompletedRule(dottedRule, *tpc, range, outColl);
    }
  }

  dottedRuleCol.Clear(relEndPos+1);
}
void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
  const WordsRange &range,
  ChartParserCallback &outColl)
{
  const StaticData &staticData = StaticData::Instance();
  size_t relEndPos = range.GetEndPos() - range.GetStartPos();
  size_t absEndPos = range.GetEndPos();

  // MAIN LOOP. create list of nodes of target phrases
  DottedRuleStackOnDisk &expandableDottedRuleList = *m_expandableDottedRuleListVec[range.GetStartPos()];

  // sort save nodes so only do nodes with most counts
  expandableDottedRuleList.SortSavedNodes();

  const DottedRuleStackOnDisk::SavedNodeColl &savedNodeColl = expandableDottedRuleList.GetSavedNodeColl();
  //cerr << "savedNodeColl=" << savedNodeColl.size() << " ";

  const ChartCellLabel &sourceWordLabel = GetSourceAt(absEndPos);

  for (size_t ind = 0; ind < (savedNodeColl.size()) ; ++ind) {
    const SavedNodeOnDisk &savedNode = *savedNodeColl[ind];

    const DottedRuleOnDisk &prevDottedRule = savedNode.GetDottedRule();
    const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();
    size_t startPos = prevDottedRule.IsRoot() ? range.GetStartPos() : prevDottedRule.GetWordsRange().GetEndPos() + 1;

    // search for terminal symbol
    if (startPos == absEndPos) {
      OnDiskPt::Word *sourceWordBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceWordLabel.GetLabel());

      if (sourceWordBerkeleyDb != NULL) {
        const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceWordBerkeleyDb, m_dbWrapper);
        if (node != NULL) {
          // TODO figure out why source word is needed from node, not from sentence
          // prob to do with factors or non-term
          //const Word &sourceWord = node->GetSourceWord();
          DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, sourceWordLabel, prevDottedRule);
          expandableDottedRuleList.Add(relEndPos+1, dottedRule);

          // cache for cleanup
          m_sourcePhraseNode.push_back(node);
        }

        delete sourceWordBerkeleyDb;
      }
    }

    // search for non-terminals
    size_t endPos, stackInd;
    if (startPos > absEndPos)
      continue;
    else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
      // start.
      endPos = absEndPos - 1;
      stackInd = relEndPos;
    } else {
      endPos = absEndPos;
      stackInd = relEndPos + 1;
    }

    // size_t nonTermNumWordsCovered = endPos - startPos + 1;

    // get target nonterminals in this span from chart
    const ChartCellLabelSet &chartNonTermSet =
      GetTargetLabelSet(startPos, endPos);

    //const Word &defaultSourceNonTerm = staticData.GetInputDefaultNonTerminal()
    //                                   ,&defaultTargetNonTerm = staticData.GetOutputDefaultNonTerminal();

    // go through each SOURCE lhs
    const NonTerminalSet &sourceLHSSet = GetSentence().GetLabelSet(startPos, endPos);

    NonTerminalSet::const_iterator iterSourceLHS;
    for (iterSourceLHS = sourceLHSSet.begin(); iterSourceLHS != sourceLHSSet.end(); ++iterSourceLHS) {
      const Word &sourceLHS = *iterSourceLHS;

      OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS);

      if (sourceLHSBerkeleyDb == NULL) {
        delete sourceLHSBerkeleyDb;
        continue; // vocab not in pt. node definately won't be in there
      }

      const OnDiskPt::PhraseNode *sourceNode = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
      delete sourceLHSBerkeleyDb;

      if (sourceNode == NULL)
        continue; // didn't find source node

      // go through each TARGET lhs
      ChartCellLabelSet::const_iterator iterChartNonTerm;
      for (iterChartNonTerm = chartNonTermSet.begin(); iterChartNonTerm != chartNonTermSet.end(); ++iterChartNonTerm) {
        const ChartCellLabel &cellLabel = iterChartNonTerm->second;

        //cerr << sourceLHS << " " << defaultSourceNonTerm << " " << chartNonTerm << " " << defaultTargetNonTerm << endl;

        //bool isSyntaxNonTerm = (sourceLHS != defaultSourceNonTerm) || (chartNonTerm != defaultTargetNonTerm);
        bool doSearch = true; //isSyntaxNonTerm ? nonTermNumWordsCovered <=  maxSyntaxSpan :
        //						nonTermNumWordsCovered <= maxDefaultSpan;

        if (doSearch) {

          OnDiskPt::Word *chartNonTermBerkeleyDb = m_dbWrapper.ConvertFromMoses(Output, m_outputFactorsVec, cellLabel.GetLabel());

          if (chartNonTermBerkeleyDb == NULL)
            continue;

          const OnDiskPt::PhraseNode *node = sourceNode->GetChild(*chartNonTermBerkeleyDb, m_dbWrapper);
          delete chartNonTermBerkeleyDb;

          if (node == NULL)
            continue;

          // found matching entry
          //const Word &sourceWord = node->GetSourceWord();
          DottedRuleOnDisk *dottedRule = new DottedRuleOnDisk(*node, cellLabel, prevDottedRule);
          expandableDottedRuleList.Add(stackInd, dottedRule);

          m_sourcePhraseNode.push_back(node);
        }
      } // for (iterChartNonTerm

      delete sourceNode;

    } // for (iterLabelListf

    // return list of target phrases
    DottedRuleCollOnDisk &nodes = expandableDottedRuleList.Get(relEndPos + 1);

    // source LHS
    DottedRuleCollOnDisk::const_iterator iterDottedRuleColl;
    for (iterDottedRuleColl = nodes.begin(); iterDottedRuleColl != nodes.end(); ++iterDottedRuleColl) {
      // node of last source word
      const DottedRuleOnDisk &prevDottedRule = **iterDottedRuleColl;
      if (prevDottedRule.Done())
        continue;
      prevDottedRule.Done(true);

      const OnDiskPt::PhraseNode &prevNode = prevDottedRule.GetLastNode();

      //get node for each source LHS
      const NonTerminalSet &lhsSet = GetSentence().GetLabelSet(range.GetStartPos(), range.GetEndPos());
      NonTerminalSet::const_iterator iterLabelSet;
      for (iterLabelSet = lhsSet.begin(); iterLabelSet != lhsSet.end(); ++iterLabelSet) {
        const Word &sourceLHS = *iterLabelSet;

        OnDiskPt::Word *sourceLHSBerkeleyDb = m_dbWrapper.ConvertFromMoses(Input, m_inputFactorsVec, sourceLHS);
        if (sourceLHSBerkeleyDb == NULL)
          continue;

        const TargetPhraseCollection *targetPhraseCollection = NULL;
        const OnDiskPt::PhraseNode *node = prevNode.GetChild(*sourceLHSBerkeleyDb, m_dbWrapper);
        if (node) {
          UINT64 tpCollFilePos = node->GetValue();
          std::map<UINT64, const TargetPhraseCollection*>::const_iterator iterCache = m_cache.find(tpCollFilePos);
          if (iterCache == m_cache.end()) {

            const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);

            std::vector<float> weightT = staticData.GetWeights(&m_dictionary);
            targetPhraseCollection
            = tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
                                               ,m_outputFactorsVec
                                               ,m_dictionary
                                               ,weightT
                                               ,m_filePath
                                               , m_dbWrapper.GetVocab());

            delete tpcollBerkeleyDb;
            m_cache[tpCollFilePos] = targetPhraseCollection;
          } else {
            // just get out of cache
            targetPhraseCollection = iterCache->second;
          }

          CHECK(targetPhraseCollection);
          if (!targetPhraseCollection->IsEmpty()) {
            AddCompletedRule(prevDottedRule, *targetPhraseCollection,
                             range, outColl);
          }

        } // if (node)

        delete node;
        delete sourceLHSBerkeleyDb;
      }
    }
  } // for (size_t ind = 0; ind < savedNodeColl.size(); ++ind)

  //cerr << numDerivations << " ";
}