ComposedRule::ComposedRule(const Subgraph &baseRule) : m_baseRule(baseRule) , m_depth(baseRule.GetDepth()) , m_size(baseRule.GetSize()) , m_nodeCount(baseRule.GetNodeCount()) { const std::set<const Node *> &leaves = baseRule.GetLeaves(); for (std::set<const Node *>::const_iterator p = leaves.begin(); p != leaves.end(); ++p) { if ((*p)->GetType() == TREE) { m_openAttachmentPoints.push(*p); } } }
ScfgRule::ScfgRule(const Subgraph &fragment) : m_sourceLHS("X", NonTerminal) , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal) , m_pcfgScore(fragment.GetPcfgScore()) { // Source RHS const std::set<const Node *> &leaves = fragment.GetLeaves(); std::vector<const Node *> sourceRHSNodes; sourceRHSNodes.reserve(leaves.size()); for (std::set<const Node *>::const_iterator p(leaves.begin()); p != leaves.end(); ++p) { const Node &leaf = **p; if (!leaf.GetSpan().empty()) { sourceRHSNodes.push_back(&leaf); } } std::sort(sourceRHSNodes.begin(), sourceRHSNodes.end(), PartitionOrderComp); // Build a mapping from target nodes to source-order indices, so that we // can construct the Alignment object later. std::map<const Node *, std::vector<int> > sourceOrder; m_sourceRHS.reserve(sourceRHSNodes.size()); int srcIndex = 0; for (std::vector<const Node *>::const_iterator p(sourceRHSNodes.begin()); p != sourceRHSNodes.end(); ++p, ++srcIndex) { const Node &sinkNode = **p; if (sinkNode.GetType() == TREE) { m_sourceRHS.push_back(Symbol("X", NonTerminal)); sourceOrder[&sinkNode].push_back(srcIndex); } else { assert(sinkNode.GetType() == SOURCE); m_sourceRHS.push_back(Symbol(sinkNode.GetLabel(), Terminal)); // Add all aligned target words to the sourceOrder map const std::vector<Node *> &parents(sinkNode.GetParents()); for (std::vector<Node *>::const_iterator q(parents.begin()); q != parents.end(); ++q) { if ((*q)->GetType() == TARGET) { sourceOrder[*q].push_back(srcIndex); } } } } // Target RHS + alignment std::vector<const Node *> targetLeaves; fragment.GetTargetLeaves(targetLeaves); m_alignment.reserve(targetLeaves.size()); // might be too much but that's OK m_targetRHS.reserve(targetLeaves.size()); for (std::vector<const Node *>::const_iterator p(targetLeaves.begin()); p != targetLeaves.end(); ++p) { const Node &leaf = **p; if (leaf.GetSpan().empty()) { // The node doesn't cover any source words, so we can only add // terminals to the target RHS (not a non-terminal). std::vector<std::string> targetWords(leaf.GetTargetWords()); for (std::vector<std::string>::const_iterator q(targetWords.begin()); q != targetWords.end(); ++q) { m_targetRHS.push_back(Symbol(*q, Terminal)); } } else if (leaf.GetType() == SOURCE) { // Do nothing } else { SymbolType type = (leaf.GetType() == TREE) ? NonTerminal : Terminal; m_targetRHS.push_back(Symbol(leaf.GetLabel(), type)); int tgtIndex = m_targetRHS.size()-1; std::map<const Node *, std::vector<int> >::iterator q(sourceOrder.find(&leaf)); assert(q != sourceOrder.end()); std::vector<int> &sourceNodes = q->second; for (std::vector<int>::iterator r(sourceNodes.begin()); r != sourceNodes.end(); ++r) { int srcIndex = *r; m_alignment.push_back(std::make_pair(srcIndex, tgtIndex)); } } } }
StsgRule::StsgRule(const Subgraph &fragment) : m_targetSide(fragment, true) { // Source side const std::set<const Node *> &sinkNodes = fragment.GetLeaves(); // Collect the subset of sink nodes that excludes target nodes with // empty spans. std::vector<const Node *> productiveSinks; productiveSinks.reserve(sinkNodes.size()); for (std::set<const Node *>::const_iterator p = sinkNodes.begin(); p != sinkNodes.end(); ++p) { const Node *sink = *p; if (!sink->GetSpan().empty()) { productiveSinks.push_back(sink); } } // Sort them into the order defined by their spans. std::sort(productiveSinks.begin(), productiveSinks.end(), PartitionOrderComp); // Build a map from target nodes to source-order indices, so that we // can construct the Alignment object later. std::map<const Node *, std::vector<int> > sinkToSourceIndices; std::map<const Node *, int> nonTermSinkToSourceIndex; m_sourceSide.reserve(productiveSinks.size()); int srcIndex = 0; int nonTermCount = 0; for (std::vector<const Node *>::const_iterator p = productiveSinks.begin(); p != productiveSinks.end(); ++p, ++srcIndex) { const Node &sink = **p; if (sink.GetType() == TREE) { m_sourceSide.push_back(Symbol("X", NonTerminal)); sinkToSourceIndices[&sink].push_back(srcIndex); nonTermSinkToSourceIndex[&sink] = nonTermCount++; } else { assert(sink.GetType() == SOURCE); m_sourceSide.push_back(Symbol(sink.GetLabel(), Terminal)); // Add all aligned target words to the sinkToSourceIndices map const std::vector<Node *> &parents(sink.GetParents()); for (std::vector<Node *>::const_iterator q = parents.begin(); q != parents.end(); ++q) { if ((*q)->GetType() == TARGET) { sinkToSourceIndices[*q].push_back(srcIndex); } } } } // Alignment std::vector<const Node *> targetLeaves; m_targetSide.GetTargetLeaves(targetLeaves); m_alignment.reserve(targetLeaves.size()); m_nonTermAlignment.resize(nonTermCount); for (int i = 0, j = 0; i < targetLeaves.size(); ++i) { const Node *leaf = targetLeaves[i]; assert(leaf->GetType() != SOURCE); if (leaf->GetSpan().empty()) { continue; } std::map<const Node *, std::vector<int> >::iterator p = sinkToSourceIndices.find(leaf); assert(p != sinkToSourceIndices.end()); std::vector<int> &sourceNodes = p->second; for (std::vector<int>::iterator r = sourceNodes.begin(); r != sourceNodes.end(); ++r) { int srcIndex = *r; m_alignment.push_back(std::make_pair(srcIndex, i)); } if (leaf->GetType() == TREE) { m_nonTermAlignment[nonTermSinkToSourceIndex[leaf]] = j++; } } }