// Converts a SyntaxNode tree to a Moses::GHKM::ParseTree. std::auto_ptr<ParseTree> XmlTreeParser::ConvertTree( const SyntaxNode &tree, const std::vector<std::string> &words) { std::auto_ptr<ParseTree> root(new ParseTree(tree.GetLabel())); const std::vector<SyntaxNode*> &children = tree.GetChildren(); if (children.empty()) { if (tree.GetStart() != tree.GetEnd()) { std::ostringstream msg; msg << "leaf node covers multiple words (" << tree.GetStart() << "-" << tree.GetEnd() << "): this is currently unsupported"; throw Exception(msg.str()); } std::auto_ptr<ParseTree> leaf(new ParseTree(words[tree.GetStart()])); leaf->SetParent(root.get()); root->AddChild(leaf.release()); } else { for (std::vector<SyntaxNode*>::const_iterator p = children.begin(); p != children.end(); ++p) { assert(*p); std::auto_ptr<ParseTree> child = ConvertTree(**p, words); child->SetParent(root.get()); root->AddChild(child.release()); } } return root; }
void SyntaxTree::ConnectNodes() { typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator; SyntaxNode *prev = 0; // Iterate over all start indices from lowest to highest. for (SyntaxTreeIndexIterator p = m_index.begin(); p != m_index.end(); ++p) { const SyntaxTreeIndex2 &inner = p->second; // Iterate over all end indices from highest to lowest. for (InnerIterator q = inner.rbegin(); q != inner.rend(); ++q) { const std::vector<SyntaxNode*> &nodes = q->second; // Iterate over all nodes that cover the same span in order of tree // depth, top-most first. for (std::vector<SyntaxNode*>::const_reverse_iterator r = nodes.rbegin(); r != nodes.rend(); ++r) { SyntaxNode *node = *r; if (!prev) { // node is the root. m_top = node; node->SetParent(0); } else if (prev->GetStart() == node->GetStart()) { // prev is the parent of node. assert(prev->GetEnd() >= node->GetEnd()); node->SetParent(prev); prev->AddChild(node); } else { // prev is a descendant of node's parent. The lowest common // ancestor of prev and node will be node's parent. SyntaxNode *ancestor = prev->GetParent(); while (ancestor->GetEnd() < node->GetEnd()) { ancestor = ancestor->GetParent(); } assert(ancestor); node->SetParent(ancestor); ancestor->AddChild(node); } prev = node; } } } }