/**
 * xmlEncodeEntitiesReentrant:
 * @param doc the document containing the string
 * @param input A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 *
 * OOM: possible --> returns NULL (for input!=NULL), sets OOM flag
 */
XMLPUBFUNEXPORT xmlChar*
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar* cur = input;
    xmlChar* buffer = NULL;
    xmlChar* out = NULL;
    int buffer_size;
    int html;
    LOAD_GS_SAFE_DOC(doc)

    if (input == NULL)
        return(NULL);

    html = doc && (doc->type == XML_HTML_DOCUMENT_NODE);
    /*
     * allocate an translation buffer.
     */
    
    
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("malloc failed\n"));
        return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        
        if (out - buffer > buffer_size - 100) {
            xmlChar* newbuf;
            int indx = out - buffer;

            newbuf = (xmlChar*)xmlGrowBufferReentrant(&buffer_size, buffer); // on OOM returns NULL (buffer is not freed)
            if(!buffer)
                {
                xmlFree(buffer);
                return NULL;
                }
            buffer = newbuf;
            out = &buffer[indx]; 
        }

    /*
     * By default one have to encode at least '<', '>', '"' and '&' !
     */
    if (*cur == '<') {
        *out++ = '&';
        *out++ = 'l';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '>') {
        *out++ = '&';
        *out++ = 'g';
        *out++ = 't';
        *out++ = ';';
    } else if (*cur == '&') {
        *out++ = '&';
        *out++ = 'a';
        *out++ = 'm';
        *out++ = 'p';
        *out++ = ';';
    } else if (((*cur >= 0x20) && (*cur < 0x80)) ||
        (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
        /*
         * default case, just copy !
         */
        *out++ = *cur;
    } else if (*cur >= 0x80) {
        if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
        /*
         * Bjorn Reese <*****@*****.**> provided the patch
            xmlChar xc;
            xc = (*cur & 0x3F) << 6;
            if (cur[1] != 0) {
            xc += *(++cur) & 0x3F;
            *out++ = xc;
            } else
         */
            *out++ = *cur;
        } else {
        /*
         * We assume we have UTF-8 input.
         */
        char buf[11], *ptr;
        // DONE: rename 'l' variable -- hard to understand and error-prone otherwise (looks like '1')
        int val = 0, len = 1; // 'l' became 'len'

        if (*cur < 0xC0) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : input not UTF-8\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            
            
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            
        } else if (*cur < 0xE0) {
                val = (cur[0]) & 0x1F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                len = 2;
        } else if (*cur < 0xF0) {
                val = (cur[0]) & 0x0F;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                len = 3;
        } else if (*cur < 0xF8) {
                val = (cur[0]) & 0x07;
                val <<= 6;
                val |= (cur[1]) & 0x3F;
                val <<= 6;
                val |= (cur[2]) & 0x3F;
                val <<= 6;
                val |= (cur[3]) & 0x3F;
                len = 4;
        }
        if ((len == 1) || (!IS_CHAR(val))) {
            xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : char out of range\n"));
            if (doc != NULL){
                doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
                if(OOM_FLAG)
                    goto OOM;
            }
            // 2-->
            snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
            while (*ptr != 0)
            {
                *out++ = *ptr++;
            }
            cur++;
            continue;
            // <--2
        }
        /*
         * We could do multiple things here. Just save as a char ref
         */
        // 3-->
        if (html)
            snprintf(buf, sizeof(buf), "&#%d;", val);
        else
            snprintf(buf, sizeof(buf), "&#x%X;", val);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
        cur += len;
        continue;
        // <--3
        }
    } else if (IS_BYTE_CHAR(*cur)) {
        char buf[11], *ptr;
        // 4-->
        snprintf(buf, sizeof(buf), "&#%d;", *cur);
        buf[sizeof(buf) - 1] = 0;
        ptr = buf;
        while (*ptr != 0)
        {
            *out++ = *ptr++;
        }
    }
    cur++;
    // continue; is implied here
    // <--4
    } // while (*cur != '\0')
    *out++ = 0;
    return(buffer);
OOM:
    xmlFree(buffer);
    return NULL;
}
Exemple #2
0
//
//  Change_Case: C
//
// Common code for string case handling.
//
void Change_Case(
    REBVAL *out,
    REBVAL *val, // !!! Not const--uses Partial(), may change index, review
    const REBVAL *part,
    bool upper
){
    if (IS_CHAR(val)) {
        REBUNI c = VAL_CHAR(val);
        Init_Char_Unchecked(out, upper ? UP_CASE(c) : LO_CASE(c));
        return;
    }

    assert(ANY_STRING(val));
    FAIL_IF_READ_ONLY(val);

    // This is a mutating operation, and we want to return the same series at
    // the same index.  However, R3-Alpha code would use Partial() and may
    // change val's index.  Capture it before potential change, review.
    //
    Move_Value(out, val);

    REBCNT len = Part_Len_May_Modify_Index(val, part);

    // !!! This assumes that all case changes will preserve the encoding size,
    // but that's not true (some strange multibyte accented characters have
    // capital or lowercase versions that are single byte).  This may be
    // uncommon enough to have special handling (only do something weird, e.g.
    // use the mold buffer, if it happens...for the remaining portion of such
    // a string...and only if the size *expands*).  Expansions also may never
    // be possible, only contractions (is that true?)  Review when UTF-8
    // Everywhere is more mature to the point this is worth worrying about.
    //
    REBCHR(*) up = VAL_STRING_AT(val);
    REBCHR(*) dp;
    if (upper) {
        REBCNT n;
        for (n = 0; n < len; n++) {
            dp = up;

            REBUNI c;
            up = NEXT_CHR(&c, up);
            if (c < UNICODE_CASES) {
                dp = WRITE_CHR(dp, UP_CASE(c));
                assert(dp == up); // !!! not all case changes same byte size?
            }
        }
    }
    else {
        REBCNT n;
        for (n = 0; n < len; n++) {
            dp = up;

            REBUNI c;
            up = NEXT_CHR(&c, up);
            if (c < UNICODE_CASES) {
                dp = WRITE_CHR(dp, LO_CASE(c));
                assert(dp == up); // !!! not all case changes same byte size?
            }
        }
    }
}
Exemple #3
0
*/	static REBCNT Parse_To(REBPARSE *parse, REBCNT index, REBVAL *item, REBFLG is_thru)
/*
**		Parse TO a specific:
**			1. integer - index position
**			2. END - end of input
**			3. value - according to datatype
**			4. block of values - the first one we hit
**
***********************************************************************/
{
	REBSER *series = parse->series;
	REBCNT i;
	REBSER *ser;

	// TO a specific index position.
	if (IS_INTEGER(item)) {
		i = (REBCNT)Int32(item) - (is_thru ? 0 : 1);
		if (i > series->tail) i = series->tail;
	}
	// END
	else if (IS_WORD(item) && VAL_WORD_CANON(item) == SYM_END) {
		i = series->tail;
	}
	else if (IS_BLOCK(item)) {
		i = To_Thru(parse, index, item, is_thru);
	}
	else {
		if (IS_BLOCK_INPUT(parse)) {
			REBVAL word; /// !!!Temp, but where can we put it?
			if (IS_LIT_WORD(item)) {  // patch to search for word, not lit.
				word = *item;
				VAL_SET(&word, REB_WORD);
				item = &word;
			}
			///i = Find_Value(series, index, tail-index, item, 1, (REBOOL)(PF_CASE & flags), FALSE, 1);
			i = Find_Block(series, index, series->tail, item, 1, HAS_CASE(parse)?AM_FIND_CASE:0, 1);
			if (i != NOT_FOUND && is_thru) i++;
		}
		else {
			// "str"
			if (ANY_BINSTR(item)) {
				if (!IS_STRING(item) && !IS_BINARY(item)) {
					// !!! Can this be optimized not to use COPY?
					ser = Copy_Form_Value(item, 0);
					i = Find_Str_Str(series, 0, index, series->tail, 1, ser, 0, ser->tail, HAS_CASE(parse));
					if (i != NOT_FOUND && is_thru) i += ser->tail;
				}
				else {
					i = Find_Str_Str(series, 0, index, series->tail, 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), HAS_CASE(parse));
					if (i != NOT_FOUND && is_thru) i += VAL_LEN(item);
				}
			}
			// #"A"
			else if (IS_CHAR(item)) {
				i = Find_Str_Char(series, 0, index, series->tail, 1, VAL_CHAR(item), HAS_CASE(parse));
				if (i != NOT_FOUND && is_thru) i++;
			}
		}
	}

	return i;
}
/**
 * xmlEncodeEntitiesReentrant:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 */
xmlChar *
xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar *cur = input;
    xmlChar *buffer = NULL;
    xmlChar *out = NULL;
    int buffer_size = 0;
    int html = 0;

    if (input == NULL) return(NULL);
    if (doc != NULL)
        html = (doc->type == XML_HTML_DOCUMENT_NODE);

    /*
     * allocate an translation buffer.
     */
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed");
	return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        if (out - buffer > buffer_size - 100) {
	    int indx = out - buffer;

	    growBufferReentrant();
	    out = &buffer[indx];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
	} else if (*cur >= 0x80) {
	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
		/*
		 * Bjørn Reese <*****@*****.**> provided the patch
	        xmlChar xc;
	        xc = (*cur & 0x3F) << 6;
	        if (cur[1] != 0) {
		    xc += *(++cur) & 0x3F;
		    *out++ = xc;
	        } else
		 */
		*out++ = *cur;
	    } else {
		/*
		 * We assume we have UTF-8 input.
		 */
		char buf[11], *ptr;
		int val = 0, l = 1;

		if (*cur < 0xC0) {
		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
			    "xmlEncodeEntitiesReentrant : input not UTF-8");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		} else if (*cur < 0xE0) {
                    val = (cur[0]) & 0x1F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    l = 2;
		} else if (*cur < 0xF0) {
                    val = (cur[0]) & 0x0F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    l = 3;
		} else if (*cur < 0xF8) {
                    val = (cur[0]) & 0x07;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    val <<= 6;
		    val |= (cur[3]) & 0x3F;
		    l = 4;
		}
		if ((l == 1) || (!IS_CHAR(val))) {
		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
			"xmlEncodeEntitiesReentrant : char out of range\n");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		}
		/*
		 * We could do multiple things here. Just save as a char ref
		 */
		snprintf(buf, sizeof(buf), "&#x%X;", val);
		buf[sizeof(buf) - 1] = 0;
		ptr = buf;
		while (*ptr != 0) *out++ = *ptr++;
		cur += l;
		continue;
	    }
	} else if (IS_BYTE_CHAR(*cur)) {
	    char buf[11], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
	    buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
	cur++;
    }
    *out = 0;
    return(buffer);
}
Exemple #5
0
*/	REBSER *Parse_String(REBSER *series, REBCNT index, REBVAL *rules, REBCNT flags)
/*
***********************************************************************/
{
	REBCNT tail = series->tail;
	REBSER *blk;
	REBSER *set;
	REBCNT begin;
	REBCNT end;
	REBOOL skip_spaces = !(flags & PF_ALL);
	REBUNI uc;

	blk = BUF_EMIT;	// shared series
	RESET_SERIES(blk);

	// String of delimiters or single character:
	if (IS_STRING(rules) || IS_CHAR(rules)) {
		begin = Find_Max_Bit(rules);
		if (begin <= ' ') begin = ' ' + 1;
		set = Make_Bitset(begin);
		Set_Bits(set, rules, TRUE);
	}
	// None, so use defaults ",;":
	else {
		set = Make_Bitset(1+MAX(',',';'));
		Set_Bit(set, ',', TRUE);
		Set_Bit(set, ';', TRUE);
	}
	SAVE_SERIES(set);

	// If required, make space delimiters too:
	if (skip_spaces) {
		for (uc = 1; uc <= ' '; uc++) Set_Bit(set, uc, TRUE);
	}

	while (index < tail) {

		if (--Eval_Count <= 0 || Eval_Signals) Do_Signals();

		// Skip whitespace if not /all refinement: 
		if (skip_spaces) {
			uc = 0;
			for (; index < tail; index++) {
				uc = GET_ANY_CHAR(series, index);
				if (!IS_WHITE(uc)) break;
			}
		}
		else
			uc = GET_ANY_CHAR(series, index); // prefetch

		if (index < tail) {

			// Handle quoted strings (in a simple way):
			if (uc == '"') {
				begin = ++index; // eat quote
				for (; index < tail; index++) {
					uc = GET_ANY_CHAR(series, index);
					if (uc == '"') break;
				}
				end = index;
				if (index < tail) index++;
			}
			// All other tokens:
			else {
				begin = index;
				for (; index < tail; index++) {
					if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) break;
				}
				end = index;
			}

			// Skip trailing spaces:
			if (skip_spaces)
				for (; index < tail; index++) {
					uc = GET_ANY_CHAR(series, index);
					if (!IS_WHITE(uc)) break;
				}

			// Check for and remove separator:
			if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) index++;

			// Append new string:
			Set_String(Append_Value(blk), Copy_String(series, begin, end - begin)); 
		}
	}
	UNSAVE_SERIES(set);

	return Copy_Block(blk, 0);
}
Exemple #6
0
*/	static To_Thru(REBPARSE *parse, REBCNT index, REBVAL *block, REBFLG is_thru)
/*
***********************************************************************/
{
	REBSER *series = parse->series;
	REBCNT type = parse->type;
	REBVAL *blk;
	REBVAL *item;
	REBCNT cmd;
	REBCNT i;
	REBCNT len;

	for (; index <= series->tail; index++) {

		for (blk = VAL_BLK(block); NOT_END(blk); blk++) {

			item = blk;

			// Deal with words and commands
			if (IS_WORD(item)) {
				if (cmd = VAL_CMD(item)) {
					if (cmd == SYM_END) {
						if (index >= series->tail) {
							index = series->tail;
							goto found;
						}
						goto next;
					}
					else if (cmd == SYM_QUOTE) {
						item = ++blk; // next item is the quoted value
						if (IS_END(item)) goto bad_target;
						if (IS_PAREN(item)) {
							item = Do_Block_Value_Throw(item); // might GC
						}

					}
					else goto bad_target;
				}
				else {
					item = Get_Var(item);
				}
			}
			else if (IS_PATH(item)) {
				item = Get_Parse_Value(item);
			}

			// Try to match it:
			if (type >= REB_BLOCK) {
				if (ANY_BLOCK(item)) goto bad_target;
				i = Parse_Next_Block(parse, index, item, 0);
				if (i != NOT_FOUND) {
					if (!is_thru) i--;
					index = i;
					goto found;
				}
			}
			else if (type == REB_BINARY) {
				REBYTE ch1 = *BIN_SKIP(series, index);

				// Handle special string types:
				if (IS_CHAR(item)) {
					if (VAL_CHAR(item) > 0xff) goto bad_target;
					if (ch1 == VAL_CHAR(item)) goto found1;
				}
				else if (IS_BINARY(item)) {
					if (ch1 == *VAL_BIN_DATA(item)) {
						len = VAL_LEN(item);
						if (len == 1) goto found1;
						if (0 == Compare_Bytes(BIN_SKIP(series, index), VAL_BIN_DATA(item), len, 0)) {
							if (is_thru) index += len;
							goto found;
						}
					}
				}
				else if (IS_INTEGER(item)) {
					if (VAL_INT64(item) > 0xff) goto bad_target;
					if (ch1 == VAL_INT32(item)) goto found1;
				}
				else goto bad_target;
			}
			else { // String
				REBCNT ch1 = GET_ANY_CHAR(series, index);
				REBCNT ch2;

				if (!HAS_CASE(parse)) ch1 = UP_CASE(ch1);

				// Handle special string types:
				if (IS_CHAR(item)) {
					ch2 = VAL_CHAR(item);
					if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2);
					if (ch1 == ch2) goto found1;
				}
				else if (ANY_STR(item)) {
					ch2 = VAL_ANY_CHAR(item);
					if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2);
					if (ch1 == ch2) {
						len = VAL_LEN(item);
						if (len == 1) goto found1;
						i = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), len, AM_FIND_MATCH | parse->flags);
						if (i != NOT_FOUND) {
							if (is_thru) i += len;
							index = i;
							goto found;
						}
					}
				}
				else if (IS_INTEGER(item)) {
					ch1 = GET_ANY_CHAR(series, index);  // No casing!
					if (ch1 == (REBCNT)VAL_INT32(item)) goto found1;
				}
				else goto bad_target;
			}

next:		// Check for | (required if not end)
			blk++;
			if (IS_PAREN(blk)) blk++;
			if (IS_END(blk)) break;
			if (!IS_OR_BAR(blk)) {
				item = blk;
				goto bad_target;
			}
		}
	}
	return NOT_FOUND;

found:
	if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1);
	return index;

found1:
	if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1);
	return index + (is_thru ? 1 : 0);

bad_target:
	Trap1(RE_PARSE_RULE, item);
	return 0;
}
Exemple #7
0
*/	static REBCNT Find_Entry(REBSER *series, REBVAL *key, REBVAL *val)
/*
**		Try to find the entry in the map. If not found
**		and val is SET, create the entry and store the key and
**		val.
**
**		RETURNS: the index to the VALUE or zero if there is none.
**
***********************************************************************/
{
	REBSER *hser = series->extra.series; // can be null
	REBCNT *hashes;
	REBCNT hash;
	REBVAL *v;
	REBCNT n;

	if (IS_NONE(key)) return 0;

	// We may not be large enough yet for the hash table to
	// be worthwhile, so just do a linear search:
	if (!hser) {
		if (series->tail < MIN_DICT*2) {
			v = BLK_HEAD(series);
			if (ANY_WORD(key)) {
				for (n = 0; n < series->tail; n += 2, v += 2) {
					if (
						ANY_WORD(v)
						&& SAME_SYM(VAL_WORD_SYM(key), VAL_WORD_SYM(v))
					) {
						if (val) *++v = *val;
						return n/2+1;
					}
				}
			}
			else if (ANY_BINSTR(key)) {
				for (n = 0; n < series->tail; n += 2, v += 2) {
					if (VAL_TYPE(key) == VAL_TYPE(v) && 0 == Compare_String_Vals(key, v, (REBOOL)!IS_BINARY(v))) {
						if (val)
							*++v = *val;

						return n/2+1;
					}
				}
			}
			else if (IS_INTEGER(key)) {
				for (n = 0; n < series->tail; n += 2, v += 2) {
					if (IS_INTEGER(v) && VAL_INT64(key) == VAL_INT64(v)) {
						if (val) *++v = *val;
						return n/2+1;
					}
				}
			}
			else if (IS_CHAR(key)) {
				for (n = 0; n < series->tail; n += 2, v += 2) {
					if (IS_CHAR(v) && VAL_CHAR(key) == VAL_CHAR(v)) {
						if (val) *++v = *val;
						return n/2+1;
					}
				}
			}
			else
				raise Error_Has_Bad_Type(key);

			if (!val) return 0;
			Append_Value(series, key);
			Append_Value(series, val); // does not copy value, e.g. if string
			return series->tail/2;
		}

		// Add hash table:
		//Print("hash added %d", series->tail);
		series->extra.series = hser = Make_Hash_Sequence(series->tail);
		MANAGE_SERIES(hser);
		Rehash_Hash(series);
	}

	// Get hash table, expand it if needed:
	if (series->tail > hser->tail/2) {
		Expand_Hash(hser); // modifies size value
		Rehash_Hash(series);
	}

	hash = Find_Key(series, hser, key, 2, 0, 0);
	hashes = (REBCNT*)hser->data;
	n = hashes[hash];

	// Just a GET of value:
	if (!val) return n;

	// Must set the value:
	if (n) {  // re-set it:
		*BLK_SKIP(series, ((n-1)*2)+1) = *val; // set it
		return n;
	}

	// Create new entry:
	Append_Value(series, key);
	Append_Value(series, val);  // does not copy value, e.g. if string

	return (hashes[hash] = series->tail/2);
}
Exemple #8
0
*/	REBCNT Modify_String(REBCNT action, REBSER *dst_ser, REBCNT dst_idx, REBVAL *src_val, REBCNT flags, REBINT dst_len, REBINT dups)
/*
**		action: INSERT, APPEND, CHANGE
**
**		dst_ser:	target
**		dst_idx:	position
**		src_val:	source
**		flags:		AN_PART
**		dst_len:	length to remove
**		dups:		dup count
**
**		return: new dst_idx
**
***********************************************************************/
{
	REBSER *src_ser = 0;
	REBCNT src_idx = 0;
	REBCNT src_len;
	REBCNT tail  = SERIES_TAIL(dst_ser);
	REBINT size;		// total to insert

	if (dups < 0) return (action == A_APPEND) ? 0 : dst_idx;
	if (action == A_APPEND || dst_idx > tail) dst_idx = tail;

	// If the src_val is not a string, then we need to create a string:
	if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series
		if (IS_INTEGER(src_val)) {
			src_ser = Append_Byte(0, Int8u(src_val)); // creates a binary
		}
		else if (IS_BLOCK(src_val)) {
			src_ser = Join_Binary(src_val); // NOTE: it's the shared FORM buffer!
		}
		else if (IS_CHAR(src_val)) {
			src_ser = Make_Binary(6); // (I hate unicode)
			src_ser->tail = Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val));
		}
		else if (!ANY_BINSTR(src_val)) Trap_Arg(src_val);
	}
	else if (IS_CHAR(src_val)) {
		src_ser = Append_Byte(0, VAL_CHAR(src_val)); // unicode ok too
	}
	else if (IS_BLOCK(src_val)) {
		src_ser = Form_Tight_Block(src_val);
	}
	else if (!ANY_STR(src_val) || IS_TAG(src_val)) {
		src_ser = Copy_Form_Value(src_val, 0);
	}

	// Use either new src or the one that was passed:
	if (src_ser) {
		src_len = SERIES_TAIL(src_ser);
	}
	else {
		src_ser = VAL_SERIES(src_val);
		src_idx = VAL_INDEX(src_val);
		src_len = VAL_LEN(src_val);
	}

	// For INSERT or APPEND with /PART use the dst_len not src_len:
	if (action != A_CHANGE && GET_FLAG(flags, AN_PART)) src_len = dst_len;

	// If Source == Destination we need to prevent possible conflicts.
	// Clone the argument just to be safe.
	// (Note: It may be possible to optimize special cases like append !!)
	if (dst_ser == src_ser) {
		src_ser = Copy_Series_Part(src_ser, src_idx, src_len);
		src_idx = 0;
	}

	// Total to insert:
	size = dups * src_len;

	if (action != A_CHANGE) {
		// Always expand dst_ser for INSERT and APPEND actions:
		Expand_Series(dst_ser, dst_idx, size);
	} else {
		if (size > dst_len) 
			Expand_Series(dst_ser, dst_idx, size - dst_len);
		else if (size < dst_len && GET_FLAG(flags, AN_PART))
			Remove_Series(dst_ser, dst_idx, dst_len - size);
		else if (size + dst_idx > tail) {
			EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx));
		}
	}

	// For dup count:
	for (; dups > 0; dups--) {
		Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE);
		dst_idx += src_len;
	}

	TERM_SERIES(dst_ser);

	return (action == A_APPEND) ? 0 : dst_idx;
}
Exemple #9
0
static REBCNT find_string(
    REBSER *series,
    REBCNT index,
    REBCNT end,
    REBVAL *target,
    REBCNT target_len,
    REBCNT flags,
    REBINT skip
) {
    assert(end >= index);
    
    if (target_len > end - index) // series not long enough to have target
        return NOT_FOUND;

    REBCNT start = index;

    if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) {
        skip = -1;
        start = 0;
        if (flags & AM_FIND_LAST) index = end - target_len;
        else index--;
    }

    if (ANY_BINSTR(target)) {
        // Do the optimal search or the general search?
        if (
            BYTE_SIZE(series)
            && VAL_BYTE_SIZE(target)
            && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH))
        ) {
            return Find_Byte_Str(
                series,
                start,
                VAL_BIN_AT(target),
                target_len,
                NOT(GET_FLAG(flags, ARG_FIND_CASE - 1)),
                GET_FLAG(flags, ARG_FIND_MATCH - 1)
            );
        }
        else {
            return Find_Str_Str(
                series,
                start,
                index,
                end,
                skip,
                VAL_SERIES(target),
                VAL_INDEX(target),
                target_len,
                flags & (AM_FIND_MATCH|AM_FIND_CASE)
            );
        }
    }
    else if (IS_BINARY(target)) {
        const REBOOL uncase = FALSE;
        return Find_Byte_Str(
            series,
            start,
            VAL_BIN_AT(target),
            target_len,
            uncase, // "don't treat case insensitively"
            GET_FLAG(flags, ARG_FIND_MATCH - 1)
        );
    }
    else if (IS_CHAR(target)) {
        return Find_Str_Char(
            VAL_CHAR(target),
            series,
            start,
            index,
            end,
            skip,
            flags
        );
    }
    else if (IS_INTEGER(target)) {
        return Find_Str_Char(
            cast(REBUNI, VAL_INT32(target)),
            series,
            start,
            index,
            end,
            skip,
            flags
        );
    }
    else if (IS_BITSET(target)) {
        return Find_Str_Bitset(
            series,
            start,
            index,
            end,
            skip,
            VAL_SERIES(target),
            flags
        );
    }

    return NOT_FOUND;
}
Exemple #10
0
/* this function is not thread safe */
char *kek_obj_print(kek_obj_t *kek_obj) {
	static char str[1024];

	if (kek_obj == (kek_obj_t *) 0xffffffffffffffff) {
		(void) snprintf(str, 1024, "kek_obj == 0xffffffffffffffff");
		assert(0);
		goto out;
	}

	if (kek_obj == NULL) {
		(void) snprintf(str, 1024, "kek_obj == NULL");
		goto out;
	}

	/* vm_debug(DBG_STACK | DBG_STACK_FULL, "kek_obj = %p\n", kek_obj); */
	if (!IS_PTR(kek_obj)) {
		if (IS_CHAR(kek_obj)) {
			(void) snprintf(str, 1024, "char -%c-", CHAR_VAL(kek_obj));
		} else if (IS_INT(kek_obj)) {
			(void) snprintf(str, 1024, "int -%d-", INT_VAL(kek_obj));
		}
	} else {

		vm_assert(TYPE_CHECK(kek_obj->h.t), //
				"kek_obj=%p, "//
				"type=%d, "//
				"state=%d, "//
				"is_const=%d, "//
				"fromspace=%d, "//
				"tospace=%d\n",//
				kek_obj,//
				kek_obj->h.t,//
				kek_obj->h.state,//
				vm_is_const(kek_obj),//
				gc_cheney_ptr_in_from_space(kek_obj, 1),//
				gc_cheney_ptr_in_to_space(kek_obj, 1));

		switch (kek_obj->h.t) {
		case KEK_INT:
			(void) snprintf(str, 1024, "int -%d-", INT_VAL(kek_obj));
			break;
		case KEK_STR:
			(void) snprintf(str, 1024, "str -%s-",
					((kek_string_t *) kek_obj)->string);
			break;
		case KEK_ARR:
			(void) snprintf(str, 1024, "arr -%p-", (void*) kek_obj);
			break;
		case KEK_SYM:
			(void) snprintf(str, 1024, "sym -%s-",
					((kek_symbol_t *) kek_obj)->symbol);
			break;
		case KEK_NIL:
			(void) snprintf(str, 1024, "nil");
			break;
		case KEK_UDO:
			(void) snprintf(str, 1024, "udo");
			break;
		case KEK_ARR_OBJS:
			(void) snprintf(str, 1024, "arr_objs");
			break;
		case KEK_EXINFO:
			(void) snprintf(str, 1024, "exinfo");
			break;
		case KEK_EXPT:
			(void) snprintf(str, 1024, "expt");
			break;
		case KEK_FILE:
			(void) snprintf(str, 1024, "file");
			break;
		case KEK_TERM:
			(void) snprintf(str, 1024, "term");
			break;
		case KEK_CLASS:
			(void) snprintf(str, 1024, "class");
			break;
		case KEK_STACK:
			(void) snprintf(str, 1024, "stack");
			break;
		case KEK_COPIED:
			(void) snprintf(str, 1024, "COPIED!");
			break;
		default:
			assert(0);
			break;
		}
	}

	out: /* */
	return ((char *) (&str));
}
/**
 * xmlEncodeEntities:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 *
 * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary
 *       compatibility
 *
 * People must migrate their code to xmlEncodeEntitiesReentrant !
 * This routine will issue a warning when encountered.
 * 
 * Returns A newly allocated string with the substitution done.
 */
const xmlChar *
xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) {
    const xmlChar *cur = input;
    xmlChar *out = static_buffer;
    static int warning = 1;
    int html = 0;


    if (warning) {
    xmlGenericError(xmlGenericErrorContext,
	    "Deprecated API xmlEncodeEntities() used\n");
    xmlGenericError(xmlGenericErrorContext,
	    "   change code to use xmlEncodeEntitiesReentrant()\n");
    warning = 0;
    }

    if (input == NULL) return(NULL);
    if (doc != NULL)
        html = (doc->type == XML_HTML_DOCUMENT_NODE);

    if (static_buffer == NULL) {
        static_buffer_size = 1000;
        static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar));
	if (static_buffer == NULL) {
	    perror("malloc failed");
            return(NULL);
	}
	out = static_buffer;
    }
    while (*cur != '\0') {
        if (out - static_buffer > static_buffer_size - 100) {
	    int indx = out - static_buffer;

	    growBuffer();
	    out = &static_buffer[indx];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (*cur == '"') {
	    *out++ = '&';
	    *out++ = 'q';
	    *out++ = 'u';
	    *out++ = 'o';
	    *out++ = 't';
	    *out++ = ';';
	} else if ((*cur == '\'') && (!html)) {
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'p';
	    *out++ = 'o';
	    *out++ = 's';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
#ifndef USE_UTF_8
	} else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
	    char buf[10], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
#endif
	} else if (IS_CHAR(*cur)) {
	    char buf[10], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
            buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
#if 0
	else {
	    /*
	     * default case, this is not a valid char !
	     * Skip it...
	     */
	    xmlGenericError(xmlGenericErrorContext,
		    "xmlEncodeEntities: invalid char %d\n", (int) *cur);
	}
#endif
	cur++;
    }
    *out++ = 0;
    return(static_buffer);
}
Exemple #12
0
/*
 * Parse the type and its initializer and emit it (recursively).
 */
static void
emitInitVal(struct dbuf_s *oBuf, symbol *topsym, sym_link *my_type, initList *list)
{
    symbol *sym;
    int size, i;
    long lit;
    unsigned char *str;

    /* Handle designated initializers */
    if (list)
      list = reorderIlist (my_type, list);

    /* If this is a hole, substitute an appropriate initializer. */
    if (list && list->type == INIT_HOLE)
      {
        if (IS_AGGREGATE (my_type))
          {
            list = newiList(INIT_DEEP, NULL); /* init w/ {} */
          }
        else
          {
            ast *ast = newAst_VALUE (constVal("0"));
            ast = decorateType (ast, RESULT_TYPE_NONE);
            list = newiList(INIT_NODE, ast);
          }
      }

    size = getSize(my_type);

    if (IS_PTR(my_type)) {
        DEBUGprintf ("(pointer, %d byte) %p\n", size, list ? (void *)(long)list2int(list) : NULL);
        emitIvals(oBuf, topsym, list, 0, size);
        return;
    }

    if (IS_ARRAY(my_type) && topsym && topsym->isstrlit) {
        str = (unsigned char *)SPEC_CVAL(topsym->etype).v_char;
        emitIvalLabel(oBuf, topsym);
        do {
            dbuf_printf (oBuf, "\tretlw 0x%02x ; '%c'\n", str[0], (str[0] >= 0x20 && str[0] < 128) ? str[0] : '.');
        } while (*(str++));
        return;
    }

    if (IS_ARRAY(my_type) && list && list->type == INIT_NODE) {
        fprintf (stderr, "Unhandled initialized symbol: %s\n", topsym->name);
        assert ( !"Initialized char-arrays are not yet supported, assign at runtime instead." );
        return;
    }

    if (IS_ARRAY(my_type)) {
        DEBUGprintf ("(array, %d items, %d byte) below\n", DCL_ELEM(my_type), size);
        assert (!list || list->type == INIT_DEEP);
        if (list) list = list->init.deep;
        for (i = 0; i < DCL_ELEM(my_type); i++) {
            emitInitVal(oBuf, topsym, my_type->next, list);
            topsym = NULL;
            if (list) list = list->next;
        } // for i
        return;
    }

    if (IS_FLOAT(my_type)) {
        // float, 32 bit
        DEBUGprintf ("(float, %d byte) %lf\n", size, list ? list2int(list) : 0.0);
        emitIvals(oBuf, topsym, list, 0, size);
        return;
    }

    if (IS_CHAR(my_type) || IS_INT(my_type) || IS_LONG(my_type)) {
        // integral type, 8, 16, or 32 bit
        DEBUGprintf ("(integral, %d byte) 0x%lx/%ld\n", size, list ? (long)list2int(list) : 0, list ? (long)list2int(list) : 0);
        emitIvals(oBuf, topsym, list, 0, size);
        return;

    } else if (IS_STRUCT(my_type) && SPEC_STRUCT(my_type)->type == STRUCT) {
        // struct
        DEBUGprintf ("(struct, %d byte) handled below\n", size);
        assert (!list || (list->type == INIT_DEEP));

        // iterate over struct members and initList
        if (list) list = list->init.deep;
        sym = SPEC_STRUCT(my_type)->fields;
        while (sym) {
            long bitfield = 0;
            int len = 0;
            if (IS_BITFIELD(sym->type)) {
                while (sym && IS_BITFIELD(sym->type)) {
                    int bitoff = SPEC_BSTR(getSpec(sym->type)) + 8 * sym->offset;
                    assert (!list || ((list->type == INIT_NODE)
                                && IS_AST_LIT_VALUE(list->init.node)));
                    lit = (long) (list ? list2int(list) : 0);
                    DEBUGprintf ( "(bitfield member) %02lx (%d bit, starting at %d, bitfield %02lx)\n",
                            lit, SPEC_BLEN(getSpec(sym->type)),
                            bitoff, bitfield);
                    bitfield |= (lit & ((1ul << SPEC_BLEN(getSpec(sym->type))) - 1)) << bitoff;
                    len += SPEC_BLEN(getSpec(sym->type));

                    sym = sym->next;
                    if (list) list = list->next;
                } // while
                assert (len < sizeof (long) * 8); // did we overflow our initializer?!?
                len = (len + 7) & ~0x07; // round up to full bytes
                emitIvals(oBuf, topsym, NULL, bitfield, len / 8);
                topsym = NULL;
            } // if

            if (sym) {
                emitInitVal(oBuf, topsym, sym->type, list);
                topsym = NULL;
                sym = sym->next;
                if (list) list = list->next;
            } // if
        } // while
        if (list) {
            assert ( !"Excess initializers." );
        } // if
        return;

    } else if (IS_STRUCT(my_type) && SPEC_STRUCT(my_type)->type == UNION) {
        // union
        DEBUGprintf ("(union, %d byte) handled below\n", size);
        assert (list && list->type == INIT_DEEP);

        // iterate over union members and initList, try to map number and type of fields and initializers
        my_type = matchIvalToUnion(list, my_type, size);
        if (my_type) {
            emitInitVal(oBuf, topsym, my_type, list->init.deep);
            topsym = NULL;
            size -= getSize(my_type);
            if (size > 0) {
                // pad with (leading) zeros
                emitIvals(oBuf, NULL, NULL, 0, size);
            }
            return;
        } // if

        assert ( !"No UNION member matches the initializer structure.");
    } else if (IS_BITFIELD(my_type)) {
        assert ( !"bitfields should only occur in structs..." );

    } else {
        printf ("SPEC_NOUN: %d\n", SPEC_NOUN(my_type));
        assert( !"Unhandled initialized type.");
    }
}
Exemple #13
0
/*
 * For UNIONs, we first have to find the correct alternative to map the
 * initializer to. This function maps the structure of the initializer to
 * the UNION members recursively.
 * Returns the type of the first `fitting' member.
 */
static sym_link *
matchIvalToUnion (initList *list, sym_link *type, int size)
{
    symbol *sym;

    assert (type);

    if (IS_PTR(type) || IS_CHAR(type) || IS_INT(type) || IS_LONG(type)
            || IS_FLOAT(type))
    {
        if (!list || (list->type == INIT_NODE)) {
            DEBUGprintf ("OK, simple type\n");
            return (type);
        } else {
            DEBUGprintf ("ERROR, simple type\n");
            return (NULL);
        }
    } else if (IS_BITFIELD(type)) {
        if (!list || (list->type == INIT_NODE)) {
            DEBUGprintf ("OK, bitfield\n");
            return (type);
        } else {
            DEBUGprintf ("ERROR, bitfield\n");
            return (NULL);
        }
    } else if (IS_STRUCT(type) && SPEC_STRUCT(getSpec(type))->type == STRUCT) {
        if (!list || (list->type == INIT_DEEP)) {
            if (list) list = list->init.deep;
            sym = SPEC_STRUCT(type)->fields;
            while (sym) {
                DEBUGprintf ("Checking STRUCT member %s\n", sym->name);
                if (!matchIvalToUnion(list, sym->type, 0)) {
                    DEBUGprintf ("ERROR, STRUCT member %s\n", sym->name);
                    return (NULL);
                }
                if (list) list = list->next;
                sym = sym->next;
            } // while

            // excess initializers?
            if (list) {
                DEBUGprintf ("ERROR, excess initializers\n");
                return (NULL);
            }

            DEBUGprintf ("OK, struct\n");
            return (type);
        }
        return (NULL);
    } else if (IS_STRUCT(type) && SPEC_STRUCT(getSpec(type))->type == UNION) {
        if (!list || (list->type == INIT_DEEP)) {
            if (list) list = list->init.deep;
            sym = SPEC_STRUCT(type)->fields;
            while (sym) {
                while (list && list->type == INIT_HOLE) {
                  list = list->next;
                  sym = sym->next;
                }
                DEBUGprintf ("Checking UNION member %s.\n", sym->name);
                if (((IS_STRUCT(sym->type) || getSize(sym->type) == size))
                        && matchIvalToUnion(list, sym->type, size))
                {
                    DEBUGprintf ("Matched UNION member %s.\n", sym->name);
                    return (sym->type);
                }
                sym = sym->next;
            } // while
        } // if
        // no match found
        DEBUGprintf ("ERROR, no match found.\n");
        return (NULL);
    } else {
        assert ( !"Unhandled type in UNION." );
    }

    assert ( !"No match found in UNION for the given initializer structure." );
    return (NULL);
}
Exemple #14
0
/**
 * xmlEncodeEntitiesInternal:
 * @doc:  the document containing the string
 * @input:  A string to convert to XML.
 * @attr: are we handling an atrbute value
 *
 * Do a global encoding of a string, replacing the predefined entities
 * and non ASCII values with their entities and CharRef counterparts.
 * Contrary to xmlEncodeEntities, this routine is reentrant, and result
 * must be deallocated.
 *
 * Returns A newly allocated string with the substitution done.
 */
static xmlChar *
xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) {
    const xmlChar *cur = input;
    xmlChar *buffer = NULL;
    xmlChar *out = NULL;
    size_t buffer_size = 0;
    int html = 0;

    if (input == NULL) return(NULL);
    if (doc != NULL)
        html = (doc->type == XML_HTML_DOCUMENT_NODE);

    /*
     * allocate an translation buffer.
     */
    buffer_size = 1000;
    buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
    if (buffer == NULL) {
        xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed");
	return(NULL);
    }
    out = buffer;

    while (*cur != '\0') {
        size_t indx = out - buffer;
        if (indx + 100 > buffer_size) {

	    growBufferReentrant();
	    out = &buffer[indx];
	}

	/*
	 * By default one have to encode at least '<', '>', '"' and '&' !
	 */
	if (*cur == '<') {
	    const xmlChar *end;

	    /*
	     * Special handling of server side include in HTML attributes
	     */
	    if (html && attr &&
	        (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') &&
	        ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) {
	        while (cur != end) {
		    *out++ = *cur++;
		    indx = out - buffer;
		    if (indx + 100 > buffer_size) {
			growBufferReentrant();
			out = &buffer[indx];
		    }
		}
		*out++ = *cur++;
		*out++ = *cur++;
		*out++ = *cur++;
		continue;
	    }
	    *out++ = '&';
	    *out++ = 'l';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '>') {
	    *out++ = '&';
	    *out++ = 'g';
	    *out++ = 't';
	    *out++ = ';';
	} else if (*cur == '&') {
	    /*
	     * Special handling of &{...} construct from HTML 4, see
	     * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1
	     */
	    if (html && attr && (cur[1] == '{') &&
	        (strchr((const char *) cur, '}'))) {
	        while (*cur != '}') {
		    *out++ = *cur++;
		    indx = out - buffer;
		    if (indx + 100 > buffer_size) {
			growBufferReentrant();
			out = &buffer[indx];
		    }
		}
		*out++ = *cur++;
		continue;
	    }
	    *out++ = '&';
	    *out++ = 'a';
	    *out++ = 'm';
	    *out++ = 'p';
	    *out++ = ';';
	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
	    (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) {
	    /*
	     * default case, just copy !
	     */
	    *out++ = *cur;
	} else if (*cur >= 0x80) {
	    if (((doc != NULL) && (doc->encoding != NULL)) || (html)) {
		/*
		 * Bjørn Reese <*****@*****.**> provided the patch
	        xmlChar xc;
	        xc = (*cur & 0x3F) << 6;
	        if (cur[1] != 0) {
		    xc += *(++cur) & 0x3F;
		    *out++ = xc;
	        } else
		 */
		*out++ = *cur;
	    } else {
		/*
		 * We assume we have UTF-8 input.
		 */
		char buf[11], *ptr;
		int val = 0, l = 1;

		if (*cur < 0xC0) {
		    xmlEntitiesErr(XML_CHECK_NOT_UTF8,
			    "xmlEncodeEntities: input not UTF-8");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		} else if (*cur < 0xE0) {
                    val = (cur[0]) & 0x1F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    l = 2;
		} else if (*cur < 0xF0) {
                    val = (cur[0]) & 0x0F;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    l = 3;
		} else if (*cur < 0xF8) {
                    val = (cur[0]) & 0x07;
		    val <<= 6;
		    val |= (cur[1]) & 0x3F;
		    val <<= 6;
		    val |= (cur[2]) & 0x3F;
		    val <<= 6;
		    val |= (cur[3]) & 0x3F;
		    l = 4;
		}
		if ((l == 1) || (!IS_CHAR(val))) {
		    xmlEntitiesErr(XML_ERR_INVALID_CHAR,
			"xmlEncodeEntities: char out of range\n");
		    if (doc != NULL)
			doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
		    snprintf(buf, sizeof(buf), "&#%d;", *cur);
		    buf[sizeof(buf) - 1] = 0;
		    ptr = buf;
		    while (*ptr != 0) *out++ = *ptr++;
		    cur++;
		    continue;
		}
		/*
		 * We could do multiple things here. Just save as a char ref
		 */
		snprintf(buf, sizeof(buf), "&#x%X;", val);
		buf[sizeof(buf) - 1] = 0;
		ptr = buf;
		while (*ptr != 0) *out++ = *ptr++;
		cur += l;
		continue;
	    }
	} else if (IS_BYTE_CHAR(*cur)) {
	    char buf[11], *ptr;

	    snprintf(buf, sizeof(buf), "&#%d;", *cur);
	    buf[sizeof(buf) - 1] = 0;
            ptr = buf;
	    while (*ptr != 0) *out++ = *ptr++;
	}
	cur++;
    }
    *out = 0;
    return(buffer);

mem_error:
    xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed");
    xmlFree(buffer);
    return(NULL);
}
Exemple #15
0
x*/	void Modify_StringX(REBCNT action, REBVAL *string, REBVAL *arg)
/*
**		Actions: INSERT, APPEND, CHANGE
**
**		string [string!] {Series at point to insert}
**		value [any-type!] {The value to insert}
**		/part {Limits to a given length or position.}
**		length [number! series! pair!]
**		/only {Inserts a series as a series.}
**		/dup {Duplicates the insert a specified number of times.}
**		count [number! pair!]
**
***********************************************************************/
{
	REBSER *series = VAL_SERIES(string);
	REBCNT index = VAL_INDEX(string);
	REBCNT tail  = VAL_TAIL(string);
	REBINT rlen;  // length to be removed
	REBINT ilen  = 1;  // length to be inserted
	REBINT cnt   = 1;  // DUP count
	REBINT size;
	REBVAL *val;
	REBSER *arg_ser = 0; // argument series

	// Length of target (may modify index): (arg can be anything)
	rlen = Partial1((action == A_CHANGE) ? string : arg, DS_ARG(AN_LENGTH));

	index = VAL_INDEX(string);
	if (action == A_APPEND || index > tail) index = tail;

	// If the arg is not a string, then we need to create a string:
	if (IS_BINARY(string)) {
		if (IS_INTEGER(arg)) {
			if (VAL_INT64(arg) > 255 || VAL_INT64(arg) < 0)
				Trap_Range(arg);
			arg_ser = Make_Binary(1);
			Append_Byte(arg_ser, VAL_CHAR(arg)); // check for size!!!
		}
		else if (!ANY_BINSTR(arg)) Trap_Arg(arg);
	}
	else if (IS_BLOCK(arg)) {
		// MOVE!
		REB_MOLD mo = {0};
		arg_ser = mo.series = Make_Unicode(VAL_BLK_LEN(arg) * 10); // GC!?
		for (val = VAL_BLK_DATA(arg); NOT_END(val); val++)
			Mold_Value(&mo, val, 0);
	}
	else if (IS_CHAR(arg)) {
		// Optimize this case !!!
		arg_ser = Make_Unicode(1);
		Append_Byte(arg_ser, VAL_CHAR(arg));
	}
	else if (!ANY_STR(arg) || IS_TAG(arg)) {
		arg_ser = Copy_Form_Value(arg, 0);
	}
	if (arg_ser) Set_String(arg, arg_ser);
	else arg_ser = VAL_SERIES(arg);

	// Length of insertion:
	ilen = (action != A_CHANGE && DS_REF(AN_PART)) ? rlen : VAL_LEN(arg);

	// If Source == Destination we need to prevent possible conflicts.
	// Clone the argument just to be safe.
	// (Note: It may be possible to optimize special cases like append !!)
	if (series == VAL_SERIES(arg)) {
		arg_ser = Copy_Series_Part(arg_ser, VAL_INDEX(arg), ilen);  // GC!?
	}

	// Get /DUP count:
	if (DS_REF(AN_DUP)) {
		cnt = Int32(DS_ARG(AN_COUNT));
		if (cnt <= 0) return; // no changes
	}

	// Total to insert:
	size = cnt * ilen;

	if (action != A_CHANGE) {
		// Always expand series for INSERT and APPEND actions:
		Expand_Series(series, index, size);
	} else {
		if (size > rlen) 
			Expand_Series(series, index, size-rlen);
		else if (size < rlen && DS_REF(AN_PART))
			Remove_Series(series, index, rlen-size);
		else if (size + index > tail) {
			EXPAND_SERIES_TAIL(series, size - (tail - index));
		}
	}

	// For dup count:
	for (; cnt > 0; cnt--) {
		Insert_String(series, index, arg_ser, VAL_INDEX(arg), ilen, TRUE);
		index += ilen;
	}

	TERM_SERIES(series);

	VAL_INDEX(string) = (action == A_APPEND) ? 0 : index;
}