NistXmlTestset::NistXmlTestset(const std::string &file) : logger_(logkw::channel = "NistXmlTestset") { Arabica::SAX2DOM::Parser<std::string> domParser; Arabica::SAX::InputSource<std::string> is(file); Arabica::SAX::CatchErrorHandler<std::string> errh; domParser.setErrorHandler(errh); domParser.parse(is); if(errh.errorsReported()) BOOST_LOG_SEV(logger_, error) << errh.errors(); Arabica::DOM::Document<std::string> doc = domParser.getDocument(); if(doc == 0) { BOOST_LOG_SEV(logger_, error) << "Error parsing input file: " << file; exit(1); } doc.getDocumentElement().normalize(); Arabica::XPath::XPath<std::string> xp; Arabica::XPath::NodeSet<std::string> docnodes = xp.compile("/mteval/srcset/doc").evaluateAsNodeSet(doc.getDocumentElement()); docnodes.to_document_order(); BOOST_FOREACH(Arabica::DOM::Node<std::string> n, docnodes) documents_.push_back(boost::make_shared<NistXmlDocument>(n)); outdoc_ = static_cast<Arabica::DOM::Document<std::string> >(doc.cloneNode(true)); Arabica::DOM::Element<std::string> srcset = static_cast<Arabica::DOM::Element<std::string> >( xp.compile("/mteval/srcset").evaluateAsNodeSet(outdoc_.getDocumentElement())[0]); Arabica::DOM::Element<std::string> tstset = outdoc_.createElement("tstset"); int docno = 0; while(srcset.hasChildNodes()) { Arabica::DOM::Node<std::string> n = srcset.removeChild(srcset.getFirstChild()); tstset.appendChild(n); if(n.getNodeType() == Arabica::DOM::Node<std::string>::ELEMENT_NODE && n.getNodeName() == "doc") documents_[docno++]->setOutputNode(n); } tstset.setAttribute("setid", srcset.getAttribute("setid")); tstset.setAttribute("srclang", srcset.getAttribute("srclang")); tstset.setAttribute("trglang", "TRGLANG"); tstset.setAttribute("sysid", "SYSID"); srcset.getParentNode().replaceChild(tstset, srcset); }
int main(int argc, char** argv) { factory_ = Arabica::SimpleDOM::DOMImplementation<string_type, string_adaptor>::getDOMImplementation(); document_ = factory_.createDocument(SA::construct_from_utf8(""), SA::construct_from_utf8(""), 0); root_ = document_.createElement("root"); document_.appendChild(root_); assert(root_); element1_ = document_.createElement(SA::construct_from_utf8("child1")); element2_ = document_.createElement(SA::construct_from_utf8("child2")); element3_ = document_.createElement(SA::construct_from_utf8("child3")); element1_.setAttribute(SA::construct_from_utf8("one"), SA::construct_from_utf8("1")); element2_.setAttribute(SA::construct_from_utf8("one"), SA::construct_from_utf8("1")); element2_.setAttribute(SA::construct_from_utf8("two"), SA::construct_from_utf8("1")); element2_.setAttribute(SA::construct_from_utf8("three"), SA::construct_from_utf8("1")); element2_.setAttribute(SA::construct_from_utf8("four"), SA::construct_from_utf8("1")); text_ = document_.createTextNode(SA::construct_from_utf8("data")); comment_ = document_.createComment(SA::construct_from_utf8("comment")); processingInstruction_ = document_.createProcessingInstruction(SA::construct_from_utf8("target"), SA::construct_from_utf8("data")); element2_.appendChild(text_); spinkle_ = document_.createElement(SA::construct_from_utf8("spinkle")); element2_.appendChild(spinkle_); element2_.appendChild(comment_); element2_.appendChild(processingInstruction_); attr_ = element1_.getAttributeNode(SA::construct_from_utf8("one")); root_.appendChild(element1_); root_.appendChild(element2_); root_.appendChild(element3_); chapters_ = factory_.createDocument(SA::construct_from_utf8(""), SA::construct_from_utf8(""), 0); chapters_.appendChild(chapters_.createElement(SA::construct_from_utf8("document"))); chapters_.getFirstChild().appendChild(chapters_.createElement(SA::construct_from_utf8("chapter"))).appendChild(chapters_.createTextNode(SA::construct_from_utf8("one"))); chapters_.getFirstChild().appendChild(chapters_.createElement(SA::construct_from_utf8("chapter"))).appendChild(chapters_.createTextNode(SA::construct_from_utf8("two"))); chapters_.getFirstChild().appendChild(chapters_.createElement(SA::construct_from_utf8("chapter"))).appendChild(chapters_.createTextNode(SA::construct_from_utf8("three"))); chapters_.getFirstChild().appendChild(chapters_.createElement(SA::construct_from_utf8("chapter"))).appendChild(chapters_.createTextNode(SA::construct_from_utf8("four"))); chapters_.getFirstChild().appendChild(chapters_.createElement(SA::construct_from_utf8("chapter"))).appendChild(chapters_.createTextNode(SA::construct_from_utf8("five"))); numbers_ = factory_.createDocument(SA::construct_from_utf8(""), SA::construct_from_utf8(""), 0); numbers_.appendChild(numbers_.createElement(SA::construct_from_utf8("doc"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("1"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("2"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("3"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("4"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("5"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("6"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("7"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("8"))); numbers_.getFirstChild().appendChild(numbers_.createElement(SA::construct_from_utf8("number"))).appendChild(numbers_.createTextNode(SA::construct_from_utf8("9"))); std::cout << document_ << std::endl; std::cout << numbers_ << std::endl; std::cout << chapters_ << std::endl; if (true) { using namespace Arabica::XPath; using namespace Arabica::DOM; XPathValue<string_type, string_adaptor> result = parser.evaluate(SA::construct_from_utf8("//*"), document_); for(int i = 0; i < result.asNodeSet().size(); i++) { Node<string_type, string_adaptor> node = result.asNodeSet()[i]; std::string xpathExpr = uscxml::DOMUtils::xPathForNode(node); if (xpathExpr.size()) { XPathValue<string_type, string_adaptor> innerResult = parser.evaluate(xpathExpr, document_); assert(innerResult.asNodeSet().size() > 0); assert(innerResult.asNodeSet().size() == 1); assert(innerResult.asNodeSet()[0] == node); } else { assert(node.getNodeType() != Node_base::ELEMENT_NODE); } } } if (false) { using namespace Arabica::XPath; StringVariableResolver svr; svr.setVariable(SA::construct_from_utf8("index"), SA::construct_from_utf8("1")); parser.setVariableResolver(svr); XPathValue<string_type, string_adaptor> result = parser.evaluate(SA::construct_from_utf8("/root/*[@two = $index]"), document_); assert(NODE_SET == result.type()); assert(element2_ == result.asNodeSet()[0]); parser.resetVariableResolver(); } // test18 if (false) { using namespace Arabica::XPath; XPathExpression<string_type, string_adaptor> xpath = parser.compile(SA::construct_from_utf8("root/*[position() = 2]")); XPathValue<string_type, string_adaptor> result = xpath.evaluate(document_); assert(NODE_SET == result.type()); assert(1 == result.asNodeSet().size()); Arabica::DOM::Node<string_type, string_adaptor> n = result.asNodeSet()[0]; assert(element2_ == n); } // test19 if (false) { using namespace Arabica::XPath; Arabica::DOM::DocumentFragment<string_type, string_adaptor> frag = document_.createDocumentFragment(); frag.appendChild(document_.createElement(SA::construct_from_utf8("foo"))); NodeSetVariableResolver svr; NodeSet<string_type, string_adaptor> ns; ns.push_back(frag); svr.setVariable(SA::construct_from_utf8("fruit"), ns); parser.setVariableResolver(svr); XPathValue<string_type, string_adaptor> result = parser.evaluate_expr(SA::construct_from_utf8("$fruit/foo|/root/child3"), document_); assert(NODE_SET == result.type()); assert(2 == result.asNodeSet().size()); assert(element3_ == result.asNodeSet()[0]); } // testUnion11 if (false) { using namespace Arabica::XPath; XPathValue<string_type, string_adaptor> result = parser.evaluate_expr(SA::construct_from_utf8("local-name(/root)"), document_); assert(STRING == result.type()); assert(SA::construct_from_utf8("root") == result.asString()); } // testLocalNameFn1 if (0) { using namespace Arabica::XPath; Arabica::DOM::DocumentFragment<std::string> frag = document_.createDocumentFragment(); frag.appendChild(document_.createElement("foo")); NodeSetVariableResolver svr; NodeSet<string_type, string_adaptor> ns; ns.push_back(frag); svr.setVariable("fruit", ns); parser.setVariableResolver(svr); XPathValue<string_type, string_adaptor> result = parser.evaluate(SA::construct_from_utf8("local-name($fruit/foo) == 'foo'"), document_); std::cout << result.asBool() << std::endl; } }
Copy_base() { Arabica::XPath::XPath<std::string> compiler; namespace_select_ = compiler.compile("namespace::node()"); } // Copy_base