/* * xmlCreateEntity: * * internal routine doing the entity node strutures allocations */ static xmlEntityPtr xmlCreateEntity(xmlDictPtr dict, const xmlChar *name, int type, const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { xmlEntityPtr ret; ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); if (ret == NULL) { xmlEntitiesErrMemory("xmlCreateEntity: malloc failed"); return(NULL); } memset(ret, 0, sizeof(xmlEntity)); ret->type = XML_ENTITY_DECL; ret->checked = 0; /* * fill the structure. */ ret->etype = (xmlEntityType) type; if (dict == NULL) { ret->name = xmlStrdup(name); if (ExternalID != NULL) ret->ExternalID = xmlStrdup(ExternalID); if (SystemID != NULL) ret->SystemID = xmlStrdup(SystemID); } else { ret->name = xmlDictLookup(dict, name, -1); if (ExternalID != NULL) ret->ExternalID = xmlDictLookup(dict, ExternalID, -1); if (SystemID != NULL) ret->SystemID = xmlDictLookup(dict, SystemID, -1); } if (content != NULL) { ret->length = xmlStrlen(content); if ((dict != NULL) && (ret->length < 5)) ret->content = (xmlChar *) xmlDictLookup(dict, content, ret->length); else ret->content = xmlStrndup(content, ret->length); } else { ret->length = 0; ret->content = NULL; } ret->URI = NULL; /* to be computed by the layer knowing the defining entity */ ret->orig = NULL; ret->owner = 0; return(ret); }
/** * xmlEncodeSpecialChars: * @doc: the document containing the string * @input: A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * this routine is reentrant, and result must be deallocated. * * Returns A newly allocated string with the substitution done. */ xmlChar * xmlEncodeSpecialChars(xmlDocPtr doc ATTRIBUTE_UNUSED, const xmlChar *input) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; int buffer_size = 0; if (input == NULL) return(NULL); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeSpecialChars: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { if (out - buffer > buffer_size - 10) { int indx = out - buffer; growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (*cur == '"') { *out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';'; } else if (*cur == '\r') { *out++ = '&'; *out++ = '#'; *out++ = '1'; *out++ = '3'; *out++ = ';'; } else { /* * Works because on UTF-8, all extended sequences cannot * result in bytes in the ASCII range. */ *out++ = *cur; } cur++; } *out++ = 0; return(buffer); }
/** * xmlEncodeEntitiesReentrant: * @doc: the document containing the string * @input: A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; int buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntitiesReentrant : input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntitiesReentrant : char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ if (html) snprintf(buf, sizeof(buf), "&#%d;", val); else snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out++ = 0; return(buffer); }
/* * xmlAddEntity : register a new entity for an entities table. */ static xmlEntityPtr xmlAddEntity(xmlDtdPtr dtd, const xmlChar *name, int type, const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) { xmlDictPtr dict = NULL; xmlEntitiesTablePtr table = NULL; xmlEntityPtr ret; if (name == NULL) return(NULL); if (dtd == NULL) return(NULL); if (dtd->doc != NULL) dict = dtd->doc->dict; switch (type) { case XML_INTERNAL_GENERAL_ENTITY: case XML_EXTERNAL_GENERAL_PARSED_ENTITY: case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: if (dtd->entities == NULL) dtd->entities = xmlHashCreateDict(0, dict); table = dtd->entities; break; case XML_INTERNAL_PARAMETER_ENTITY: case XML_EXTERNAL_PARAMETER_ENTITY: if (dtd->pentities == NULL) dtd->pentities = xmlHashCreateDict(0, dict); table = dtd->pentities; break; case XML_INTERNAL_PREDEFINED_ENTITY: return(NULL); } if (table == NULL) return(NULL); ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity)); if (ret == NULL) { xmlEntitiesErrMemory("xmlAddEntity:: malloc failed"); return(NULL); } memset(ret, 0, sizeof(xmlEntity)); ret->type = XML_ENTITY_DECL; /* * fill the structure. */ ret->etype = (xmlEntityType) type; if (dict == NULL) { ret->name = xmlStrdup(name); if (ExternalID != NULL) ret->ExternalID = xmlStrdup(ExternalID); if (SystemID != NULL) ret->SystemID = xmlStrdup(SystemID); } else { ret->name = xmlDictLookup(dict, name, -1); if (ExternalID != NULL) ret->ExternalID = xmlDictLookup(dict, ExternalID, -1); if (SystemID != NULL) ret->SystemID = xmlDictLookup(dict, SystemID, -1); } if (content != NULL) { ret->length = xmlStrlen(content); if ((dict != NULL) && (ret->length < 5)) ret->content = (xmlChar *) xmlDictLookup(dict, content, ret->length); else ret->content = xmlStrndup(content, ret->length); } else { ret->length = 0; ret->content = NULL; } ret->URI = NULL; /* to be computed by the layer knowing the defining entity */ ret->orig = NULL; ret->owner = 0; ret->doc = dtd->doc; if (xmlHashAddEntry(table, name, ret)) { /* * entity was already defined at another level. */ xmlFreeEntity(ret); return(NULL); } return(ret); }
/** * xmlEncodeEntitiesInternal: * @doc: the document containing the string * @input: A string to convert to XML. * @attr: are we handling an atrbute value * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ static xmlChar * xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; size_t buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { size_t indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { const xmlChar *end; /* * Special handling of server side include in HTML attributes */ if (html && attr && (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { while (cur != end) { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; *out++ = *cur++; *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { /* * Special handling of &{...} construct from HTML 4, see * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 */ if (html && attr && (cur[1] == '{') && (strchr((const char *) cur, '}'))) { while (*cur != '}') { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntities: char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out = 0; return(buffer); mem_error: xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); xmlFree(buffer); return(NULL); }