예제 #1
0
ComposedRule::ComposedRule(const Subgraph &baseRule)
    : m_baseRule(baseRule)
    , m_depth(baseRule.GetDepth())
    , m_size(baseRule.GetSize())
    , m_nodeCount(baseRule.GetNodeCount())
{
    const std::set<const Node *> &leaves = baseRule.GetLeaves();
    for (std::set<const Node *>::const_iterator p = leaves.begin();
            p != leaves.end(); ++p) {
        if ((*p)->GetType() == TREE) {
            m_openAttachmentPoints.push(*p);
        }
    }
}
예제 #2
0
ScfgRule::ScfgRule(const Subgraph &fragment)
    : m_sourceLHS("X", NonTerminal)
    , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
    , m_pcfgScore(fragment.GetPcfgScore())
{
  // Source RHS

  const std::set<const Node *> &leaves = fragment.GetLeaves();

  std::vector<const Node *> sourceRHSNodes;
  sourceRHSNodes.reserve(leaves.size());
  for (std::set<const Node *>::const_iterator p(leaves.begin());
       p != leaves.end(); ++p) {
    const Node &leaf = **p;
    if (!leaf.GetSpan().empty()) {
      sourceRHSNodes.push_back(&leaf);
    }
  }

  std::sort(sourceRHSNodes.begin(), sourceRHSNodes.end(), PartitionOrderComp);

  // Build a mapping from target nodes to source-order indices, so that we
  // can construct the Alignment object later.
  std::map<const Node *, std::vector<int> > sourceOrder;

  m_sourceRHS.reserve(sourceRHSNodes.size());
  int srcIndex = 0;
  for (std::vector<const Node *>::const_iterator p(sourceRHSNodes.begin());
       p != sourceRHSNodes.end(); ++p, ++srcIndex) {
    const Node &sinkNode = **p;
    if (sinkNode.GetType() == TREE) {
      m_sourceRHS.push_back(Symbol("X", NonTerminal));
      sourceOrder[&sinkNode].push_back(srcIndex);
    } else {
      assert(sinkNode.GetType() == SOURCE);
      m_sourceRHS.push_back(Symbol(sinkNode.GetLabel(), Terminal));
      // Add all aligned target words to the sourceOrder map
      const std::vector<Node *> &parents(sinkNode.GetParents());
      for (std::vector<Node *>::const_iterator q(parents.begin());
           q != parents.end(); ++q) {
        if ((*q)->GetType() == TARGET) {
          sourceOrder[*q].push_back(srcIndex);
        }
      }
    }
  }

  // Target RHS + alignment

  std::vector<const Node *> targetLeaves;
  fragment.GetTargetLeaves(targetLeaves);

  m_alignment.reserve(targetLeaves.size());  // might be too much but that's OK
  m_targetRHS.reserve(targetLeaves.size());

  for (std::vector<const Node *>::const_iterator p(targetLeaves.begin());
       p != targetLeaves.end(); ++p) {
    const Node &leaf = **p;
    if (leaf.GetSpan().empty()) {
      // The node doesn't cover any source words, so we can only add
      // terminals to the target RHS (not a non-terminal).
      std::vector<std::string> targetWords(leaf.GetTargetWords());
      for (std::vector<std::string>::const_iterator q(targetWords.begin());
           q != targetWords.end(); ++q) {
        m_targetRHS.push_back(Symbol(*q, Terminal));
      }
    } else if (leaf.GetType() == SOURCE) {
      // Do nothing
    } else {
      SymbolType type = (leaf.GetType() == TREE) ? NonTerminal : Terminal;
      m_targetRHS.push_back(Symbol(leaf.GetLabel(), type));

      int tgtIndex = m_targetRHS.size()-1;
      std::map<const Node *, std::vector<int> >::iterator q(sourceOrder.find(&leaf));
      assert(q != sourceOrder.end());
      std::vector<int> &sourceNodes = q->second;
      for (std::vector<int>::iterator r(sourceNodes.begin());
           r != sourceNodes.end(); ++r) {
        int srcIndex = *r;
        m_alignment.push_back(std::make_pair(srcIndex, tgtIndex));
      }
    }
  }
}
예제 #3
0
StsgRule::StsgRule(const Subgraph &fragment)
  : m_targetSide(fragment, true)
{
  // Source side

  const std::set<const Node *> &sinkNodes = fragment.GetLeaves();

  // Collect the subset of sink nodes that excludes target nodes with
  // empty spans.
  std::vector<const Node *> productiveSinks;
  productiveSinks.reserve(sinkNodes.size());
  for (std::set<const Node *>::const_iterator p = sinkNodes.begin();
       p != sinkNodes.end(); ++p) {
    const Node *sink = *p;
    if (!sink->GetSpan().empty()) {
      productiveSinks.push_back(sink);
    }
  }

  // Sort them into the order defined by their spans.
  std::sort(productiveSinks.begin(), productiveSinks.end(), PartitionOrderComp);

  // Build a map from target nodes to source-order indices, so that we
  // can construct the Alignment object later.
  std::map<const Node *, std::vector<int> > sinkToSourceIndices;
  std::map<const Node *, int> nonTermSinkToSourceIndex;

  m_sourceSide.reserve(productiveSinks.size());
  int srcIndex = 0;
  int nonTermCount = 0;
  for (std::vector<const Node *>::const_iterator p = productiveSinks.begin();
       p != productiveSinks.end(); ++p, ++srcIndex) {
    const Node &sink = **p;
    if (sink.GetType() == TREE) {
      m_sourceSide.push_back(Symbol("X", NonTerminal));
      sinkToSourceIndices[&sink].push_back(srcIndex);
      nonTermSinkToSourceIndex[&sink] = nonTermCount++;
    } else {
      assert(sink.GetType() == SOURCE);
      m_sourceSide.push_back(Symbol(sink.GetLabel(), Terminal));
      // Add all aligned target words to the sinkToSourceIndices map
      const std::vector<Node *> &parents(sink.GetParents());
      for (std::vector<Node *>::const_iterator q = parents.begin();
           q != parents.end(); ++q) {
        if ((*q)->GetType() == TARGET) {
          sinkToSourceIndices[*q].push_back(srcIndex);
        }
      }
    }
  }

  // Alignment

  std::vector<const Node *> targetLeaves;
  m_targetSide.GetTargetLeaves(targetLeaves);

  m_alignment.reserve(targetLeaves.size());
  m_nonTermAlignment.resize(nonTermCount);

  for (int i = 0, j = 0; i < targetLeaves.size(); ++i) {
    const Node *leaf = targetLeaves[i];
    assert(leaf->GetType() != SOURCE);
    if (leaf->GetSpan().empty()) {
      continue;
    }
    std::map<const Node *, std::vector<int> >::iterator p =
      sinkToSourceIndices.find(leaf);
    assert(p != sinkToSourceIndices.end());
    std::vector<int> &sourceNodes = p->second;
    for (std::vector<int>::iterator r = sourceNodes.begin();
         r != sourceNodes.end(); ++r) {
      int srcIndex = *r;
      m_alignment.push_back(std::make_pair(srcIndex, i));
    }
    if (leaf->GetType() == TREE) {
      m_nonTermAlignment[nonTermSinkToSourceIndex[leaf]] = j++;
    }
  }
}