コード例 #1
0
ファイル: entities.c プロジェクト: SCIInstitute/SCIRun
/**
 * xmlEncodeEntitiesReentrant:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 */
xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar *cur = input;
    xmlChar *buffer = NULL;
    xmlChar *out = NULL;
    int buffer_size = 0;
    int html = 0;

    if (input == NULL) return(NULL);
    if (doc != NULL)
        html = (doc->type == XML_HTML_DOCUMENT_NODE);

    /*
     * allocate an translation buffer.
     */
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed");
	return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        if (out - buffer > buffer_size - 100) {
	    int indx = out - buffer;

	    growBufferReentrant();
	    out = &buffer[indx];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
	} else if (*cur >= 0x80) {
	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
		/*
		 * Bjørn Reese <*****@*****.**> provided the patch
	        xmlChar xc;
	        xc = (*cur & 0x3F) << 6;
	        if (cur[1] != 0) {
		    xc += *(++cur) & 0x3F;
		    *out++ = xc;
	        } else
		 */
		    *out++ = *cur;
	    } else {
		/*
		 * We assume we have UTF-8 input.
		 */
		char buf[11], *ptr;
		int val = 0, l = 1;

		if (*cur < 0xC0) {
		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
			    "xmlEncodeEntitiesReentrant : input not UTF-8");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		} else if (*cur < 0xE0) {
                    val = (cur[0]) & 0x1F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    l = 2;
		} else if (*cur < 0xF0) {
                    val = (cur[0]) & 0x0F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    l = 3;
		} else if (*cur < 0xF8) {
                    val = (cur[0]) & 0x07;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    val <<= 6;
		    val |= (cur[3]) & 0x3F;
		    l = 4;
		}
		if ((l == 1) || (!IS_CHAR(val))) {
		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
			"xmlEncodeEntitiesReentrant : char out of range\n");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		}
		/*
		 * We could do multiple things here. Just save as a char ref
		 */
		if (html)
		    snprintf(buf, sizeof(buf), "&#%d;", val);
		else
		    snprintf(buf, sizeof(buf), "&#x%X;", val);
		buf[sizeof(buf) - 1] = 0;
		ptr = buf;
		while (*ptr != 0) *out++ = *ptr++;
		cur += l;
		continue;
	    }
	} else if (IS_BYTE_CHAR(*cur)) {
	    char buf[11], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
	    buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
	cur++;
    }
    *out++ = 0;
    return(buffer);
}
コード例 #2
0
/**
 * xmlEncodeEntitiesReentrant:
 * @param doc the document containing the string
 * @param input A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 *
 * OOM: possible --> returns NULL (for input!=NULL), sets OOM flag
 */
XMLPUBFUNEXPORT xmlChar*
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar* cur = input;
    xmlChar* buffer = NULL;
    xmlChar* out = NULL;
    int buffer_size;
    int html;
    LOAD_GS_SAFE_DOC(doc)

    if (input == NULL)
        return(NULL);

    html = doc && (doc->type == XML_HTML_DOCUMENT_NODE);
    /*
     * allocate an translation buffer.
     */
    
    
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("malloc failed\n"));
        return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        
        if (out - buffer > buffer_size - 100) {
            xmlChar* newbuf;
            int indx = out - buffer;

            newbuf = (xmlChar*)xmlGrowBufferReentrant(&buffer_size, buffer); // on OOM returns NULL (buffer is not freed)
            if(!buffer)
                {
                xmlFree(buffer);
                return NULL;
                }
            buffer = newbuf;
            out = &buffer[indx]; 
        }

    /*
     * By default one have to encode at least '<', '>', '"' and '&' !
     */
    if (*cur == '<') {
        *out++ = '&';
        *out++ = 'l';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '>') {
        *out++ = '&';
        *out++ = 'g';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '&') {
        *out++ = '&';
        *out++ = 'a';
        *out++ = 'm';
        *out++ = 'p';
        *out++ = ';';
    } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
        (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
        /*
         * default case, just copy !
         */
        *out++ = *cur;
    } else if (*cur >= 0x80) {
        if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
        /*
         * Bjorn Reese <*****@*****.**> provided the patch
            xmlChar xc;
            xc = (*cur & 0x3F) << 6;
            if (cur[1] != 0) {
            xc += *(++cur) & 0x3F;
            *out++ = xc;
            } else
         */
            *out++ = *cur;
        } else {
        /*
         * We assume we have UTF-8 input.
         */
        char buf[11], *ptr;
        // DONE: rename 'l' variable -- hard to understand and error-prone otherwise (looks like '1')
        int val = 0, len = 1; // 'l' became 'len'

        if (*cur < 0xC0) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : input not UTF-8\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            
            
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            
        } else if (*cur < 0xE0) {
                val = (cur[0]) & 0x1F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                len = 2;
        } else if (*cur < 0xF0) {
                val = (cur[0]) & 0x0F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                len = 3;
        } else if (*cur < 0xF8) {
                val = (cur[0]) & 0x07;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                val <<= 6;
                val |= (cur[3]) & 0x3F;
                len = 4;
        }
        if ((len == 1) || (!IS_CHAR(val))) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : char out of range\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            // 2-->
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            // <--2
        }
        /*
         * We could do multiple things here. Just save as a char ref
         */
        // 3-->
        if (html)
            snprintf(buf, sizeof(buf), "&#%d;", val);
        else
            snprintf(buf, sizeof(buf), "&#x%X;", val);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
        cur += len;
        continue;
        // <--3
        }
    } else if (IS_BYTE_CHAR(*cur)) {
        char buf[11], *ptr;
        // 4-->
        snprintf(buf, sizeof(buf), "&#%d;", *cur);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
    }
    cur++;
    // continue; is implied here
    // <--4
    } // while (*cur != '\0')
    *out++ = 0;
    return(buffer);
OOM:
    xmlFree(buffer);
    return NULL;
}
コード例 #3
0
/**
 * xmlEncodeEntitiesReentrant:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 *
 * OOM: possible --> returns NULL (for input!=NULL), sets OOM flag
 */
xmlChar*
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar* cur = input;
    xmlChar* buffer = NULL;
    xmlChar* out = NULL;
    int buffer_size;
    int html;

    if (input == NULL)
        return(NULL);

    html = doc && (doc->type == XML_HTML_DOCUMENT_NODE);
    /*
     * allocate an translation buffer.
     */
    // TODO: "Magic number" for buffer size - make a controlled parameter
    // TODO: OPTIMIZE: Select most appropriate buffer size by default / reuse some preallocated buffer
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        SET_OOM_FLAG;
        xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("malloc failed\n"));
        return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        // TODO: Another "magic" number -- select optimal for current buffer_size
        if (out - buffer > buffer_size - 100) {
            xmlChar* newbuf;
            int indx = out - buffer;

            newbuf = (xmlChar*)xmlGrowBufferReentrant(&buffer_size, buffer); // on OOM returns NULL (buffer is not freed)
            if(!buffer)
                {
                xmlFree(buffer);
                return NULL;
                }
            buffer = newbuf;
            out = &buffer[indx];
        }

    /*
     * By default one have to encode at least '<', '>', '"' and '&' !
     */
    // TODO: OPTIMIZE: Rearrange IFs according expectations of conditions
    //                 Obviously, the default case is the most expected
    if (*cur == '<') {
        *out++ = '&';
        *out++ = 'l';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '>') {
        *out++ = '&';
        *out++ = 'g';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '&') {
        *out++ = '&';
        *out++ = 'a';
        *out++ = 'm';
        *out++ = 'p';
        *out++ = ';';
    } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
        (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
        /*
         * default case, just copy !
         */
        *out++ = *cur;
    } else if (*cur >= 0x80) {
        if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
        /*
         * Bj?rn Reese <*****@*****.**> provided the patch
            xmlChar xc;
            xc = (*cur & 0x3F) << 6;
            if (cur[1] != 0) {
            xc += *(++cur) & 0x3F;
            *out++ = xc;
            } else
         */
            *out++ = *cur;
        } else {
        /*
         * We assume we have UTF-8 input.
         */
        char buf[11], *ptr;
        // TODO: rename 'l' variable -- hard to understand and error-prone otherwise (looks like '1')
        int val = 0, l = 1;

        if (*cur < 0xC0) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : input not UTF-8\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            // TODO: Detect OOM..
            // TODO: these several lines repeat four times in WHILE loop -- try to combine and use GOTO
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            // ENDTODO:
        } else if (*cur < 0xE0) {
                val = (cur[0]) & 0x1F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                l = 2;
        } else if (*cur < 0xF0) {
                val = (cur[0]) & 0x0F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                l = 3;
        } else if (*cur < 0xF8) {
                val = (cur[0]) & 0x07;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                val <<= 6;
                val |= (cur[3]) & 0x3F;
                l = 4;
        }
        if ((l == 1) || (!IS_CHAR(val))) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : char out of range\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            // 2-->
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            // <--2
        }
        /*
         * We could do multiple things here. Just save as a char ref
         */
        // 3-->
        if (html)
            snprintf(buf, sizeof(buf), "&#%d;", val);
        else
            snprintf(buf, sizeof(buf), "&#x%X;", val);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
        cur += l;
        continue;
        // <--3
        }
    } else if (IS_BYTE_CHAR(*cur)) {
        char buf[11], *ptr;
        // 4-->
        snprintf(buf, sizeof(buf), "&#%d;", *cur);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
    }
    cur++;
    // continue; is implied here
    // <--4
    } // while (*cur != '\0')
    *out++ = 0;
    return(buffer);
OOM:
    xmlFree(buffer);
    return NULL;
}
コード例 #4
0
ファイル: entities.c プロジェクト: alexandervnuchkov/core
/**
 * xmlEncodeEntitiesInternal:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 * @attr: are we handling an atrbute value
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 */
static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
    const xmlChar *cur = input;
    xmlChar *buffer = NULL;
    xmlChar *out = NULL;
    size_t buffer_size = 0;
    int html = 0;

    if (input == NULL) return(NULL);
    if (doc != NULL)
        html = (doc->type == XML_HTML_DOCUMENT_NODE);

    /*
     * allocate an translation buffer.
     */
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
	return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        size_t indx = out - buffer;
        if (indx + 100 > buffer_size) {

	    growBufferReentrant();
	    out = &buffer[indx];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    const xmlChar *end;

	    /*
	     * Special handling of server side include in HTML attributes
	     */
	    if (html && attr &&
	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
	        while (cur != end) {
		    *out++ = *cur++;
		    indx = out - buffer;
		    if (indx + 100 > buffer_size) {
			growBufferReentrant();
			out = &buffer[indx];
		    }
		}
		*out++ = *cur++;
		*out++ = *cur++;
		*out++ = *cur++;
		continue;
	    }
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    /*
	     * Special handling of &{...} construct from HTML 4, see
	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
	     */
	    if (html && attr && (cur[1] == '{') &&
	        (strchr((const char *) cur, '}'))) {
	        while (*cur != '}') {
		    *out++ = *cur++;
		    indx = out - buffer;
		    if (indx + 100 > buffer_size) {
			growBufferReentrant();
			out = &buffer[indx];
		    }
		}
		*out++ = *cur++;
		continue;
	    }
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
	} else if (*cur >= 0x80) {
	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
		/*
		 * Bjørn Reese <*****@*****.**> provided the patch
	        xmlChar xc;
	        xc = (*cur & 0x3F) << 6;
	        if (cur[1] != 0) {
		    xc += *(++cur) & 0x3F;
		    *out++ = xc;
	        } else
		 */
		*out++ = *cur;
	    } else {
		/*
		 * We assume we have UTF-8 input.
		 */
		char buf[11], *ptr;
		int val = 0, l = 1;

		if (*cur < 0xC0) {
		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
			    "xmlEncodeEntities: input not UTF-8");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		} else if (*cur < 0xE0) {
                    val = (cur[0]) & 0x1F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    l = 2;
		} else if (*cur < 0xF0) {
                    val = (cur[0]) & 0x0F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    l = 3;
		} else if (*cur < 0xF8) {
                    val = (cur[0]) & 0x07;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    val <<= 6;
		    val |= (cur[3]) & 0x3F;
		    l = 4;
		}
		if ((l == 1) || (!IS_CHAR(val))) {
		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
			"xmlEncodeEntities: char out of range\n");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		}
		/*
		 * We could do multiple things here. Just save as a char ref
		 */
		snprintf(buf, sizeof(buf), "&#x%X;", val);
		buf[sizeof(buf) - 1] = 0;
		ptr = buf;
		while (*ptr != 0) *out++ = *ptr++;
		cur += l;
		continue;
	    }
	} else if (IS_BYTE_CHAR(*cur)) {
	    char buf[11], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
	    buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
	cur++;
    }
    *out = 0;
    return(buffer);

mem_error:
    xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
    xmlFree(buffer);
    return(NULL);
}