/** * xmlAddDocEntity: * @doc: the document * @name: the entity name * @type: the entity type XML_xxx_yyy_ENTITY * @ExternalID: the entity external ID if available * @SystemID: the entity system ID if available * @content: the entity content * * Register a new entity for this document. * * Returns a pointer to the entity or NULL in case of error */ xmlEntityPtr xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type, const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { xmlEntityPtr ret; xmlDtdPtr dtd; if (doc == NULL) { xmlEntitiesErr(XML_DTD_NO_DOC, "xmlAddDocEntity: document is NULL"); return(NULL); } if (doc->intSubset == NULL) { xmlEntitiesErr(XML_DTD_NO_DTD, "xmlAddDocEntity: document without internal subset"); return(NULL); } dtd = doc->intSubset; ret = xmlAddEntity(dtd, name, type, ExternalID, SystemID, content); if (ret == NULL) return(NULL); /* * Link it to the DTD */ ret->parent = dtd; ret->doc = dtd->doc; if (dtd->last == NULL) { dtd->children = dtd->last = (xmlNodePtr) ret; } else { dtd->last->next = (xmlNodePtr) ret; ret->prev = dtd->last; dtd->last = (xmlNodePtr) ret; } return(ret); }
/** * xmlEncodeEntitiesReentrant: * @doc: the document containing the string * @input: A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; int buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntitiesReentrant : input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntitiesReentrant : char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ if (html) snprintf(buf, sizeof(buf), "&#%d;", val); else snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out++ = 0; return(buffer); }
/** * xmlEncodeEntitiesInternal: * @doc: the document containing the string * @input: A string to convert to XML. * @attr: are we handling an atrbute value * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ static xmlChar * xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; size_t buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { size_t indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { const xmlChar *end; /* * Special handling of server side include in HTML attributes */ if (html && attr && (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { while (cur != end) { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; *out++ = *cur++; *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { /* * Special handling of &{...} construct from HTML 4, see * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 */ if (html && attr && (cur[1] == '{') && (strchr((const char *) cur, '}'))) { while (*cur != '}') { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntities: char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out = 0; return(buffer); mem_error: xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); xmlFree(buffer); return(NULL); }