/******************************************************************************* 14.9.1.1 fn-zorba-xml:canonicalize ********************************************************************************/ bool FnZorbaCanonicalizeIterator::nextImpl(store::Item_t& result, PlanState& planState) const { zstring lDocString; xmlDocPtr lDoc; xmlChar* lResult; std::istream* lInstream = NULL; char buf[1024]; store::Item_t tempItem; FnZorbaCanonicalizeIteratorState* state; DEFAULT_STACK_INIT(FnZorbaCanonicalizeIteratorState, state, planState); // Read the XML string // if the XML string is a streamable string it will have to be materialized // since the libxml2 xmlReadMemory functions can't work with streamable strings consumeNext(result, theChildren[0].getp(), planState); // read options if (theChildren.size() == 2) { consumeNext(tempItem, theChildren[1].getp(), planState); zorba::processOptions(tempItem, state->theProperties, theSctx, loc); } try { if (result->isStreamable()) { lInstream = &result->getStream(); while (lInstream->good()) { lInstream->read(buf, 1024); lDocString.append(buf, static_cast<zstring::size_type>(lInstream->gcount())); } } else { result->getStringValue2(lDocString); } int lOptions = XML_PARSE_NOERROR | state->theProperties.toLibXmlOptions(); lDoc = xmlReadMemory(lDocString.c_str(), lDocString.size(), "input.xml", NULL, lOptions); if (!lDoc) { zstring lErrorMsg; lErrorMsg = "\"" + lDocString + "\""; throw XQUERY_EXCEPTION(err::FOCZ0001, ERROR_PARAMS("x:canonicalize()", lErrorMsg ), ERROR_LOC(loc)); } xmlC14NDocDumpMemory(lDoc, NULL, 2/*XML_C14N_1_1*/, NULL, 1, &lResult); lDocString = zstring((char*)lResult); xmlFree(lResult); xmlFreeDoc(lDoc); } catch ( std::exception const& ) { zstring lErrorMsg; lErrorMsg = "\"" + lDocString + "\""; throw XQUERY_EXCEPTION(err::FOCZ0001, ERROR_PARAMS("x:canonicalize()", lErrorMsg ), ERROR_LOC(loc)); } STACK_PUSH(GENV_ITEMFACTORY->createString(result, lDocString), state); STACK_END(state); }
/* * :call-seq: * document.canonicalize -> String * document.canonicalize(options) -> String * * Returns a string containing the canonicalized form of the document. * Implemented to include all of the functionality of the libxml2 * {xmlC14NDocDumpMemory}[http://xmlsoft.org/html/libxml-c14n.html#xmlC14NDocDumpMemory] * method. * * === Options * [comments] * * *Type:* Boolean * * *Default:* false * Specifies if comments should be output. * * Must be boolean, otherwise defaults to false. * [inclusive_ns_prefixes] * * *Type:* Array of strings * * *Default:* empty array * Array of namespace prefixes to include in exclusive canonicalization only. * * The last item in the list is reserved for a NULL value because the C method demands it, therefore * up to the first 255 valid entries will be used. * * <em>Only used for *XML_C14N_EXCLUSIVE_1_0* mode. Ignored otherwise.</em> * [mode] * * *Type:* XML::Document Constant * * *Default:* XML_C14N_1_0 * Specifies the mode of canonicalization. * * *NOTE:* XML_C14N_1_1 may not be fully implemented upon compilation due to C library compatibility. * Please check if XML_C14N_1_0 and XML_C14N_1_1 are the same value prior to using XML_C14N_1_1. * [nodes] * * *Type:* Array of XML::Node objects * * *Default:* empty array * XML::Nodes to include in the canonicalization process * * For large lists of more than 256 valid namespaces, up to the first 256 valid entries will be used. */ static VALUE rxml_document_canonicalize( int argc, VALUE *argv, VALUE self ) { int length; xmlDocPtr xdoc; xmlChar *buffer = NULL; VALUE option_hash = Qnil; // :comments option VALUE comments = Qfalse; // :mode option int c14n_mode = XML_C14N_1_0; // :inclusive_ns_prefixes option (ARRAY) #define C14N_NS_LIMIT 256 xmlChar * inc_ns_prefixes_ptr[C14N_NS_LIMIT]; /* At least one NULL value must be defined in the array or the extension will * segfault when using XML_C14N_EXCLUSIVE_1_0 mode. * API docs: "list of inclusive namespace prefixes ended with a NULL" */ inc_ns_prefixes_ptr[0] = NULL; // :nodes option (ARRAY) #define C14N_NODESET_LIMIT 256 xmlNodePtr node_ptr_array[C14N_NODESET_LIMIT]; xmlNodeSet nodeset = { 0, C14N_NODESET_LIMIT, NULL }; rb_scan_args(argc, argv, "01", &option_hash); // Do stuff if ruby hash passed as argument if (!NIL_P(option_hash)) { Check_Type(option_hash, T_HASH); VALUE o_comments = rb_hash_aref(option_hash, ID2SYM(rb_intern("comments"))); comments = (RTEST(o_comments) ? 1 : 0); VALUE o_mode = rb_hash_aref(option_hash, ID2SYM(rb_intern("mode"))); if (!NIL_P(o_mode)) { Check_Type(o_mode, T_FIXNUM); c14n_mode = NUM2INT(o_mode); //TODO: clean this up //if (c14n_mode > 2) { c14n_mode = 0; } //mode_int = (NUM2INT(o_mode) > 2 ? 0 : NUM2INT(o_mode)); } VALUE o_i_ns_prefixes = rb_hash_aref(option_hash, ID2SYM(rb_intern("inclusive_ns_prefixes"))); if (!NIL_P(o_i_ns_prefixes)) { Check_Type(o_i_ns_prefixes, T_ARRAY); VALUE * list_in = RARRAY(o_i_ns_prefixes)->ptr; int list_size = RARRAY(o_i_ns_prefixes)->len; int i; int p = 0; //pointer array index if (list_size > 0) { for(i=0; i < list_size; ++i) { if (p >= C14N_NS_LIMIT) { break; } if (RTEST(list_in[i])) { if (TYPE(list_in[i]) == T_STRING) { inc_ns_prefixes_ptr[p] = (xmlChar *)StringValueCStr(list_in[i]); p++; } } } } // ensure p is not out of bound p = (p >= C14N_NS_LIMIT ? (C14N_NS_LIMIT-1) : p); // API docs: "list of inclusive namespace prefixes ended with a NULL" // Set last element to NULL inc_ns_prefixes_ptr[p] = NULL; } //o_ns_prefixes will free at end of block VALUE o_nodes = rb_hash_aref(option_hash, ID2SYM(rb_intern("nodes"))); if (!NIL_P(o_nodes)) { Check_Type(o_nodes, T_ARRAY); VALUE * list_in = RARRAY(o_nodes)->ptr; int node_list_size = RARRAY(o_nodes)->len; int i; int p = 0; // index of pointer array for(i=0; i < node_list_size; ++i){ if (p >= C14N_NODESET_LIMIT) { break; } if (RTEST(list_in[i])) { xmlNodePtr node_ptr; Data_Get_Struct(list_in[i], xmlNode, node_ptr); node_ptr_array[p] = node_ptr; p++; } } // Need to set values in nodeset struct nodeset.nodeNr = (node_list_size > C14N_NODESET_LIMIT ? C14N_NODESET_LIMIT : node_list_size); nodeset.nodeTab = node_ptr_array; } }//option_hash Data_Get_Struct(self, xmlDoc, xdoc); length = xmlC14NDocDumpMemory( xdoc, (nodeset.nodeNr == 0 ? NULL : &nodeset), c14n_mode, &inc_ns_prefixes_ptr, comments, &buffer ); VALUE result = Qnil; if (buffer) { result = rxml_new_cstr((const char*) buffer, NULL); xmlFree(buffer); } return result; }
static int test_c14n(const char* xml_filename, int with_comments, int exclusive, const char* xpath_filename, xmlChar **inclusive_namespaces) { xmlDocPtr doc; xmlXPathObjectPtr xpath = NULL; xmlChar *result = NULL; int ret; /* * build an XML tree from a the file; we need to add default * attributes and resolve all character and entities references */ xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; xmlSubstituteEntitiesDefault(1); doc = xmlReadFile(xml_filename, NULL, XML_PARSE_DTDATTR | XML_PARSE_NOENT); if (doc == NULL) { fprintf(stderr, "Error: unable to parse file \"%s\"\n", xml_filename); return(-1); } /* * Check the document is of the right kind */ if(xmlDocGetRootElement(doc) == NULL) { fprintf(stderr,"Error: empty document for file \"%s\"\n", xml_filename); xmlFreeDoc(doc); return(-1); } /* * load xpath file if specified */ if(xpath_filename) { xpath = load_xpath_expr(doc, xpath_filename); if(xpath == NULL) { fprintf(stderr,"Error: unable to evaluate xpath expression\n"); xmlFreeDoc(doc); return(-1); } } /* * Canonical form */ /* fprintf(stderr,"File \"%s\" loaded: start canonization\n", xml_filename); */ ret = xmlC14NDocDumpMemory(doc, (xpath) ? xpath->nodesetval : NULL, exclusive, inclusive_namespaces, with_comments, &result); if(ret >= 0) { if(result != NULL) { write(1, result, ret); xmlFree(result); } } else { fprintf(stderr,"Error: failed to canonicalize XML file \"%s\" (ret=%d)\n", xml_filename, ret); if(result != NULL) xmlFree(result); xmlFreeDoc(doc); return(-1); } /* * Cleanup */ if(xpath != NULL) xmlXPathFreeObject(xpath); xmlFreeDoc(doc); return(ret); }