/* convert the whole document to EBCDIC */ APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *pool, apr_xml_doc *pdoc, apr_xlate_t *convset) { apr_status_t status; /* Don't convert the namespaces: they are constant! */ if (pdoc->namespaces != NULL) { int i; apr_array_header_t *namespaces; namespaces = apr_array_make(pool, pdoc->namespaces->nelts, sizeof(const char *)); if (namespaces == NULL) return APR_ENOMEM; for (i = 0; i < pdoc->namespaces->nelts; i++) { apr_size_t inbytes_left, outbytes_left; char *ptr = (char *) APR_XML_GET_URI_ITEM(pdoc->namespaces, i); ptr = apr_pstrdup(pool, ptr); if ( ptr == NULL) return APR_ENOMEM; inbytes_left = outbytes_left = strlen(ptr); status = apr_xlate_conv_buffer(convset, ptr, &inbytes_left, ptr, &outbytes_left); if (status) { return status; } apr_xml_insert_uri(namespaces, ptr); } pdoc->namespaces = namespaces; } return apr_xml_parser_convert_elem(pdoc->root, convset); }
APR_DECLARE(apr_xml_parser *) apr_xml_parser_create_ex(apr_pool_t *pool, void *start_func, void *end_func, void *cdata_func) { apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser)); parser->impl = apr_xml_get_parser_impl(); parser->p = pool; parser->doc = apr_pcalloc(pool, sizeof(*parser->doc)); parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *)); /* ### is there a way to avoid hard-coding this? */ apr_xml_insert_uri(parser->doc->namespaces, APR_KW_DAV); parser->xp = XML_ParserCreate(NULL); if (parser->xp == NULL) { (*apr_pool_abort_get(pool))(APR_ENOMEM); return NULL; } apr_pool_cleanup_register(pool, parser, cleanup_parser, apr_pool_cleanup_null); XML_SetUserData(parser->xp, parser); XML_SetElementHandler(parser->xp, start_func, end_func); XML_SetCharacterDataHandler(parser->xp, cdata_func); /* Prevent the "billion laughs" attack against expat by disabling * internal entity expansion. With 2.x, forcibly stop the parser * if an entity is declared - this is safer and a more obvious * failure mode. With older versions, installing a noop * DefaultHandler means that internal entities will be expanded as * the empty string, which is also sufficient to prevent the * attack. */ #if XML_MAJOR_VERSION > 1 XML_SetEntityDeclHandler(parser->xp, entity_declaration); #else XML_SetDefaultHandler(parser->xp, default_handler); #endif return parser; }
static void start_handler(void *userdata, const char *name, const char **attrs) { apr_xml_parser *parser = userdata; apr_xml_elem *elem; apr_xml_attr *attr; apr_xml_attr *prev; char *colon; const char *quoted; char *elem_name; /* punt once we find an error */ if (parser->error) return; elem = apr_pcalloc(parser->p, sizeof(*elem)); /* prep the element */ elem->name = elem_name = apr_pstrdup(parser->p, name); /* fill in the attributes (note: ends up in reverse order) */ while (*attrs) { attr = apr_palloc(parser->p, sizeof(*attr)); attr->name = apr_pstrdup(parser->p, *attrs++); attr->value = apr_pstrdup(parser->p, *attrs++); attr->next = elem->attr; elem->attr = attr; } /* hook the element into the tree */ if (parser->cur_elem == NULL) { /* no current element; this also becomes the root */ parser->cur_elem = parser->doc->root = elem; } else { /* this element appeared within the current elem */ elem->parent = parser->cur_elem; /* set up the child/sibling links */ if (elem->parent->last_child == NULL) { /* no first child either */ elem->parent->first_child = elem->parent->last_child = elem; } else { /* hook onto the end of the parent's children */ elem->parent->last_child->next = elem; elem->parent->last_child = elem; } /* this element is now the current element */ parser->cur_elem = elem; } /* scan the attributes for namespace declarations */ for (prev = NULL, attr = elem->attr; attr; attr = attr->next) { if (strncmp(attr->name, APR_KW_xmlns, 5) == 0) { const char *prefix = &attr->name[5]; apr_xml_ns_scope *ns_scope; /* test for xmlns:foo= form and xmlns= form */ if (*prefix == 0x3A) { /* a namespace prefix declaration must have a non-empty value. */ if (attr->value[0] == '\0') { parser->error = APR_XML_NS_ERROR_INVALID_DECL; return; } ++prefix; } else if (*prefix != '\0') { /* advance "prev" since "attr" is still present */ prev = attr; continue; } /* quote the URI before we ever start working with it */ quoted = apr_xml_quote_string(parser->p, attr->value, 1); /* build and insert the new scope */ ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope)); ns_scope->prefix = prefix; ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted); ns_scope->emptyURI = *quoted == '\0'; ns_scope->next = elem->ns_scope; elem->ns_scope = ns_scope; /* remove this attribute from the element */ if (prev == NULL) elem->attr = attr->next; else prev->next = attr->next; /* Note: prev will not be advanced since we just removed "attr" */ } else if (strcmp(attr->name, APR_KW_xmlns_lang) == 0) { /* save away the language (in quoted form) */ elem->lang = apr_xml_quote_string(parser->p, attr->value, 1); /* remove this attribute from the element */ if (prev == NULL) elem->attr = attr->next; else prev->next = attr->next; /* Note: prev will not be advanced since we just removed "attr" */ } else { /* advance "prev" since "attr" is still present */ prev = attr; } } /* ** If an xml:lang attribute didn't exist (lang==NULL), then copy the ** language from the parent element (if present). ** ** NOTE: elem_size() *depends* upon this pointer equality. */ if (elem->lang == NULL && elem->parent != NULL) elem->lang = elem->parent->lang; /* adjust the element's namespace */ colon = strchr(elem_name, 0x3A); if (colon == NULL) { /* * The element is using the default namespace, which will always * be found. Either it will be "no namespace", or a default * namespace URI has been specified at some point. */ elem->ns = find_prefix(parser, ""); } else if (APR_XML_NS_IS_RESERVED(elem->name)) { elem->ns = APR_XML_NS_NONE; } else { *colon = '\0'; elem->ns = find_prefix(parser, elem->name); elem->name = colon + 1; if (APR_XML_NS_IS_ERROR(elem->ns)) { parser->error = elem->ns; return; } } /* adjust all remaining attributes' namespaces */ for (attr = elem->attr; attr; attr = attr->next) { /* * apr_xml_attr defines this as "const" but we dup'd it, so we * know that we can change it. a bit hacky, but the existing * structure def is best. */ char *attr_name = (char *)attr->name; colon = strchr(attr_name, 0x3A); if (colon == NULL) { /* * Attributes do NOT use the default namespace. Therefore, * we place them into the "no namespace" category. */ attr->ns = APR_XML_NS_NONE; } else if (APR_XML_NS_IS_RESERVED(attr->name)) { attr->ns = APR_XML_NS_NONE; } else { *colon = '\0'; attr->ns = find_prefix(parser, attr->name); attr->name = colon + 1; if (APR_XML_NS_IS_ERROR(attr->ns)) { parser->error = attr->ns; return; } } } }