/** * xmlEncodeEntitiesReentrant: * @param doc the document containing the string * @param input A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. * * OOM: possible --> returns NULL (for input!=NULL), sets OOM flag */ XMLPUBFUNEXPORT xmlChar* xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { const xmlChar* cur = input; xmlChar* buffer = NULL; xmlChar* out = NULL; int buffer_size; int html; LOAD_GS_SAFE_DOC(doc) if (input == NULL) return(NULL); html = doc && (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("malloc failed\n")); return(NULL); } out = buffer; while (*cur != '\0') { if (out - buffer > buffer_size - 100) { xmlChar* newbuf; int indx = out - buffer; newbuf = (xmlChar*)xmlGrowBufferReentrant(&buffer_size, buffer); // on OOM returns NULL (buffer is not freed) if(!buffer) { xmlFree(buffer); return NULL; } buffer = newbuf; out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjorn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; // DONE: rename 'l' variable -- hard to understand and error-prone otherwise (looks like '1') int val = 0, len = 1; // 'l' became 'len' if (*cur < 0xC0) { xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : input not UTF-8\n")); if (doc != NULL){ doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); if(OOM_FLAG) goto OOM; } snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) { *out++ = *ptr++; } cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; len = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; len = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; len = 4; } if ((len == 1) || (!IS_CHAR(val))) { xmlGenericError(xmlGenericErrorContext, EMBED_ERRTXT("xmlEncodeEntitiesReentrant : char out of range\n")); if (doc != NULL){ doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); if(OOM_FLAG) goto OOM; } // 2--> snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) { *out++ = *ptr++; } cur++; continue; // <--2 } /* * We could do multiple things here. Just save as a char ref */ // 3--> if (html) snprintf(buf, sizeof(buf), "&#%d;", val); else snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) { *out++ = *ptr++; } cur += len; continue; // <--3 } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; // 4--> snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) { *out++ = *ptr++; } } cur++; // continue; is implied here // <--4 } // while (*cur != '\0') *out++ = 0; return(buffer); OOM: xmlFree(buffer); return NULL; }
// // Change_Case: C // // Common code for string case handling. // void Change_Case( REBVAL *out, REBVAL *val, // !!! Not const--uses Partial(), may change index, review const REBVAL *part, bool upper ){ if (IS_CHAR(val)) { REBUNI c = VAL_CHAR(val); Init_Char_Unchecked(out, upper ? UP_CASE(c) : LO_CASE(c)); return; } assert(ANY_STRING(val)); FAIL_IF_READ_ONLY(val); // This is a mutating operation, and we want to return the same series at // the same index. However, R3-Alpha code would use Partial() and may // change val's index. Capture it before potential change, review. // Move_Value(out, val); REBCNT len = Part_Len_May_Modify_Index(val, part); // !!! This assumes that all case changes will preserve the encoding size, // but that's not true (some strange multibyte accented characters have // capital or lowercase versions that are single byte). This may be // uncommon enough to have special handling (only do something weird, e.g. // use the mold buffer, if it happens...for the remaining portion of such // a string...and only if the size *expands*). Expansions also may never // be possible, only contractions (is that true?) Review when UTF-8 // Everywhere is more mature to the point this is worth worrying about. // REBCHR(*) up = VAL_STRING_AT(val); REBCHR(*) dp; if (upper) { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, UP_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } else { REBCNT n; for (n = 0; n < len; n++) { dp = up; REBUNI c; up = NEXT_CHR(&c, up); if (c < UNICODE_CASES) { dp = WRITE_CHR(dp, LO_CASE(c)); assert(dp == up); // !!! not all case changes same byte size? } } } }
*/ static REBCNT Parse_To(REBPARSE *parse, REBCNT index, REBVAL *item, REBFLG is_thru) /* ** Parse TO a specific: ** 1. integer - index position ** 2. END - end of input ** 3. value - according to datatype ** 4. block of values - the first one we hit ** ***********************************************************************/ { REBSER *series = parse->series; REBCNT i; REBSER *ser; // TO a specific index position. if (IS_INTEGER(item)) { i = (REBCNT)Int32(item) - (is_thru ? 0 : 1); if (i > series->tail) i = series->tail; } // END else if (IS_WORD(item) && VAL_WORD_CANON(item) == SYM_END) { i = series->tail; } else if (IS_BLOCK(item)) { i = To_Thru(parse, index, item, is_thru); } else { if (IS_BLOCK_INPUT(parse)) { REBVAL word; /// !!!Temp, but where can we put it? if (IS_LIT_WORD(item)) { // patch to search for word, not lit. word = *item; VAL_SET(&word, REB_WORD); item = &word; } ///i = Find_Value(series, index, tail-index, item, 1, (REBOOL)(PF_CASE & flags), FALSE, 1); i = Find_Block(series, index, series->tail, item, 1, HAS_CASE(parse)?AM_FIND_CASE:0, 1); if (i != NOT_FOUND && is_thru) i++; } else { // "str" if (ANY_BINSTR(item)) { if (!IS_STRING(item) && !IS_BINARY(item)) { // !!! Can this be optimized not to use COPY? ser = Copy_Form_Value(item, 0); i = Find_Str_Str(series, 0, index, series->tail, 1, ser, 0, ser->tail, HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += ser->tail; } else { i = Find_Str_Str(series, 0, index, series->tail, 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += VAL_LEN(item); } } // #"A" else if (IS_CHAR(item)) { i = Find_Str_Char(series, 0, index, series->tail, 1, VAL_CHAR(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i++; } } } return i; }
/** * xmlEncodeEntitiesReentrant: * @doc: the document containing the string * @input: A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ xmlChar * xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; int buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntitiesReentrant: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { if (out - buffer > buffer_size - 100) { int indx = out - buffer; growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntitiesReentrant : input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntitiesReentrant : char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out = 0; return(buffer); }
*/ REBSER *Parse_String(REBSER *series, REBCNT index, REBVAL *rules, REBCNT flags) /* ***********************************************************************/ { REBCNT tail = series->tail; REBSER *blk; REBSER *set; REBCNT begin; REBCNT end; REBOOL skip_spaces = !(flags & PF_ALL); REBUNI uc; blk = BUF_EMIT; // shared series RESET_SERIES(blk); // String of delimiters or single character: if (IS_STRING(rules) || IS_CHAR(rules)) { begin = Find_Max_Bit(rules); if (begin <= ' ') begin = ' ' + 1; set = Make_Bitset(begin); Set_Bits(set, rules, TRUE); } // None, so use defaults ",;": else { set = Make_Bitset(1+MAX(',',';')); Set_Bit(set, ',', TRUE); Set_Bit(set, ';', TRUE); } SAVE_SERIES(set); // If required, make space delimiters too: if (skip_spaces) { for (uc = 1; uc <= ' '; uc++) Set_Bit(set, uc, TRUE); } while (index < tail) { if (--Eval_Count <= 0 || Eval_Signals) Do_Signals(); // Skip whitespace if not /all refinement: if (skip_spaces) { uc = 0; for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (!IS_WHITE(uc)) break; } } else uc = GET_ANY_CHAR(series, index); // prefetch if (index < tail) { // Handle quoted strings (in a simple way): if (uc == '"') { begin = ++index; // eat quote for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (uc == '"') break; } end = index; if (index < tail) index++; } // All other tokens: else { begin = index; for (; index < tail; index++) { if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) break; } end = index; } // Skip trailing spaces: if (skip_spaces) for (; index < tail; index++) { uc = GET_ANY_CHAR(series, index); if (!IS_WHITE(uc)) break; } // Check for and remove separator: if (Check_Bit(set, GET_ANY_CHAR(series, index), !(flags & PF_CASE))) index++; // Append new string: Set_String(Append_Value(blk), Copy_String(series, begin, end - begin)); } } UNSAVE_SERIES(set); return Copy_Block(blk, 0); }
*/ static To_Thru(REBPARSE *parse, REBCNT index, REBVAL *block, REBFLG is_thru) /* ***********************************************************************/ { REBSER *series = parse->series; REBCNT type = parse->type; REBVAL *blk; REBVAL *item; REBCNT cmd; REBCNT i; REBCNT len; for (; index <= series->tail; index++) { for (blk = VAL_BLK(block); NOT_END(blk); blk++) { item = blk; // Deal with words and commands if (IS_WORD(item)) { if (cmd = VAL_CMD(item)) { if (cmd == SYM_END) { if (index >= series->tail) { index = series->tail; goto found; } goto next; } else if (cmd == SYM_QUOTE) { item = ++blk; // next item is the quoted value if (IS_END(item)) goto bad_target; if (IS_PAREN(item)) { item = Do_Block_Value_Throw(item); // might GC } } else goto bad_target; } else { item = Get_Var(item); } } else if (IS_PATH(item)) { item = Get_Parse_Value(item); } // Try to match it: if (type >= REB_BLOCK) { if (ANY_BLOCK(item)) goto bad_target; i = Parse_Next_Block(parse, index, item, 0); if (i != NOT_FOUND) { if (!is_thru) i--; index = i; goto found; } } else if (type == REB_BINARY) { REBYTE ch1 = *BIN_SKIP(series, index); // Handle special string types: if (IS_CHAR(item)) { if (VAL_CHAR(item) > 0xff) goto bad_target; if (ch1 == VAL_CHAR(item)) goto found1; } else if (IS_BINARY(item)) { if (ch1 == *VAL_BIN_DATA(item)) { len = VAL_LEN(item); if (len == 1) goto found1; if (0 == Compare_Bytes(BIN_SKIP(series, index), VAL_BIN_DATA(item), len, 0)) { if (is_thru) index += len; goto found; } } } else if (IS_INTEGER(item)) { if (VAL_INT64(item) > 0xff) goto bad_target; if (ch1 == VAL_INT32(item)) goto found1; } else goto bad_target; } else { // String REBCNT ch1 = GET_ANY_CHAR(series, index); REBCNT ch2; if (!HAS_CASE(parse)) ch1 = UP_CASE(ch1); // Handle special string types: if (IS_CHAR(item)) { ch2 = VAL_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) goto found1; } else if (ANY_STR(item)) { ch2 = VAL_ANY_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) { len = VAL_LEN(item); if (len == 1) goto found1; i = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), len, AM_FIND_MATCH | parse->flags); if (i != NOT_FOUND) { if (is_thru) i += len; index = i; goto found; } } } else if (IS_INTEGER(item)) { ch1 = GET_ANY_CHAR(series, index); // No casing! if (ch1 == (REBCNT)VAL_INT32(item)) goto found1; } else goto bad_target; } next: // Check for | (required if not end) blk++; if (IS_PAREN(blk)) blk++; if (IS_END(blk)) break; if (!IS_OR_BAR(blk)) { item = blk; goto bad_target; } } } return NOT_FOUND; found: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index; found1: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index + (is_thru ? 1 : 0); bad_target: Trap1(RE_PARSE_RULE, item); return 0; }
*/ static REBCNT Find_Entry(REBSER *series, REBVAL *key, REBVAL *val) /* ** Try to find the entry in the map. If not found ** and val is SET, create the entry and store the key and ** val. ** ** RETURNS: the index to the VALUE or zero if there is none. ** ***********************************************************************/ { REBSER *hser = series->extra.series; // can be null REBCNT *hashes; REBCNT hash; REBVAL *v; REBCNT n; if (IS_NONE(key)) return 0; // We may not be large enough yet for the hash table to // be worthwhile, so just do a linear search: if (!hser) { if (series->tail < MIN_DICT*2) { v = BLK_HEAD(series); if (ANY_WORD(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if ( ANY_WORD(v) && SAME_SYM(VAL_WORD_SYM(key), VAL_WORD_SYM(v)) ) { if (val) *++v = *val; return n/2+1; } } } else if (ANY_BINSTR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (VAL_TYPE(key) == VAL_TYPE(v) && 0 == Compare_String_Vals(key, v, (REBOOL)!IS_BINARY(v))) { if (val) *++v = *val; return n/2+1; } } } else if (IS_INTEGER(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_INTEGER(v) && VAL_INT64(key) == VAL_INT64(v)) { if (val) *++v = *val; return n/2+1; } } } else if (IS_CHAR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_CHAR(v) && VAL_CHAR(key) == VAL_CHAR(v)) { if (val) *++v = *val; return n/2+1; } } } else raise Error_Has_Bad_Type(key); if (!val) return 0; Append_Value(series, key); Append_Value(series, val); // does not copy value, e.g. if string return series->tail/2; } // Add hash table: //Print("hash added %d", series->tail); series->extra.series = hser = Make_Hash_Sequence(series->tail); MANAGE_SERIES(hser); Rehash_Hash(series); } // Get hash table, expand it if needed: if (series->tail > hser->tail/2) { Expand_Hash(hser); // modifies size value Rehash_Hash(series); } hash = Find_Key(series, hser, key, 2, 0, 0); hashes = (REBCNT*)hser->data; n = hashes[hash]; // Just a GET of value: if (!val) return n; // Must set the value: if (n) { // re-set it: *BLK_SKIP(series, ((n-1)*2)+1) = *val; // set it return n; } // Create new entry: Append_Value(series, key); Append_Value(series, val); // does not copy value, e.g. if string return (hashes[hash] = series->tail/2); }
*/ REBCNT Modify_String(REBCNT action, REBSER *dst_ser, REBCNT dst_idx, REBVAL *src_val, REBCNT flags, REBINT dst_len, REBINT dups) /* ** action: INSERT, APPEND, CHANGE ** ** dst_ser: target ** dst_idx: position ** src_val: source ** flags: AN_PART ** dst_len: length to remove ** dups: dup count ** ** return: new dst_idx ** ***********************************************************************/ { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SERIES_TAIL(dst_ser); REBINT size; // total to insert if (dups < 0) return (action == A_APPEND) ? 0 : dst_idx; if (action == A_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Append_Byte(0, Int8u(src_val)); // creates a binary } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val); // NOTE: it's the shared FORM buffer! } else if (IS_CHAR(src_val)) { src_ser = Make_Binary(6); // (I hate unicode) src_ser->tail = Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)); } else if (!ANY_BINSTR(src_val)) Trap_Arg(src_val); } else if (IS_CHAR(src_val)) { src_ser = Append_Byte(0, VAL_CHAR(src_val)); // unicode ok too } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); } else if (!ANY_STR(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); } // Use either new src or the one that was passed: if (src_ser) { src_len = SERIES_TAIL(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN(src_val); } // For INSERT or APPEND with /PART use the dst_len not src_len: if (action != A_CHANGE && GET_FLAG(flags, AN_PART)) src_len = dst_len; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { src_ser = Copy_Series_Part(src_ser, src_idx, src_len); src_idx = 0; } // Total to insert: size = dups * src_len; if (action != A_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SERIES(dst_ser); return (action == A_APPEND) ? 0 : dst_idx; }
static REBCNT find_string( REBSER *series, REBCNT index, REBCNT end, REBVAL *target, REBCNT target_len, REBCNT flags, REBINT skip ) { assert(end >= index); if (target_len > end - index) // series not long enough to have target return NOT_FOUND; REBCNT start = index; if (flags & (AM_FIND_REVERSE | AM_FIND_LAST)) { skip = -1; start = 0; if (flags & AM_FIND_LAST) index = end - target_len; else index--; } if (ANY_BINSTR(target)) { // Do the optimal search or the general search? if ( BYTE_SIZE(series) && VAL_BYTE_SIZE(target) && !(flags & ~(AM_FIND_CASE|AM_FIND_MATCH)) ) { return Find_Byte_Str( series, start, VAL_BIN_AT(target), target_len, NOT(GET_FLAG(flags, ARG_FIND_CASE - 1)), GET_FLAG(flags, ARG_FIND_MATCH - 1) ); } else { return Find_Str_Str( series, start, index, end, skip, VAL_SERIES(target), VAL_INDEX(target), target_len, flags & (AM_FIND_MATCH|AM_FIND_CASE) ); } } else if (IS_BINARY(target)) { const REBOOL uncase = FALSE; return Find_Byte_Str( series, start, VAL_BIN_AT(target), target_len, uncase, // "don't treat case insensitively" GET_FLAG(flags, ARG_FIND_MATCH - 1) ); } else if (IS_CHAR(target)) { return Find_Str_Char( VAL_CHAR(target), series, start, index, end, skip, flags ); } else if (IS_INTEGER(target)) { return Find_Str_Char( cast(REBUNI, VAL_INT32(target)), series, start, index, end, skip, flags ); } else if (IS_BITSET(target)) { return Find_Str_Bitset( series, start, index, end, skip, VAL_SERIES(target), flags ); } return NOT_FOUND; }
/* this function is not thread safe */ char *kek_obj_print(kek_obj_t *kek_obj) { static char str[1024]; if (kek_obj == (kek_obj_t *) 0xffffffffffffffff) { (void) snprintf(str, 1024, "kek_obj == 0xffffffffffffffff"); assert(0); goto out; } if (kek_obj == NULL) { (void) snprintf(str, 1024, "kek_obj == NULL"); goto out; } /* vm_debug(DBG_STACK | DBG_STACK_FULL, "kek_obj = %p\n", kek_obj); */ if (!IS_PTR(kek_obj)) { if (IS_CHAR(kek_obj)) { (void) snprintf(str, 1024, "char -%c-", CHAR_VAL(kek_obj)); } else if (IS_INT(kek_obj)) { (void) snprintf(str, 1024, "int -%d-", INT_VAL(kek_obj)); } } else { vm_assert(TYPE_CHECK(kek_obj->h.t), // "kek_obj=%p, "// "type=%d, "// "state=%d, "// "is_const=%d, "// "fromspace=%d, "// "tospace=%d\n",// kek_obj,// kek_obj->h.t,// kek_obj->h.state,// vm_is_const(kek_obj),// gc_cheney_ptr_in_from_space(kek_obj, 1),// gc_cheney_ptr_in_to_space(kek_obj, 1)); switch (kek_obj->h.t) { case KEK_INT: (void) snprintf(str, 1024, "int -%d-", INT_VAL(kek_obj)); break; case KEK_STR: (void) snprintf(str, 1024, "str -%s-", ((kek_string_t *) kek_obj)->string); break; case KEK_ARR: (void) snprintf(str, 1024, "arr -%p-", (void*) kek_obj); break; case KEK_SYM: (void) snprintf(str, 1024, "sym -%s-", ((kek_symbol_t *) kek_obj)->symbol); break; case KEK_NIL: (void) snprintf(str, 1024, "nil"); break; case KEK_UDO: (void) snprintf(str, 1024, "udo"); break; case KEK_ARR_OBJS: (void) snprintf(str, 1024, "arr_objs"); break; case KEK_EXINFO: (void) snprintf(str, 1024, "exinfo"); break; case KEK_EXPT: (void) snprintf(str, 1024, "expt"); break; case KEK_FILE: (void) snprintf(str, 1024, "file"); break; case KEK_TERM: (void) snprintf(str, 1024, "term"); break; case KEK_CLASS: (void) snprintf(str, 1024, "class"); break; case KEK_STACK: (void) snprintf(str, 1024, "stack"); break; case KEK_COPIED: (void) snprintf(str, 1024, "COPIED!"); break; default: assert(0); break; } } out: /* */ return ((char *) (&str)); }
/** * xmlEncodeEntities: * @doc: the document containing the string * @input: A string to convert to XML. * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * * TODO: remove xmlEncodeEntities, once we are not afraid of breaking binary * compatibility * * People must migrate their code to xmlEncodeEntitiesReentrant ! * This routine will issue a warning when encountered. * * Returns A newly allocated string with the substitution done. */ const xmlChar * xmlEncodeEntities(xmlDocPtr doc, const xmlChar *input) { const xmlChar *cur = input; xmlChar *out = static_buffer; static int warning = 1; int html = 0; if (warning) { xmlGenericError(xmlGenericErrorContext, "Deprecated API xmlEncodeEntities() used\n"); xmlGenericError(xmlGenericErrorContext, " change code to use xmlEncodeEntitiesReentrant()\n"); warning = 0; } if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); if (static_buffer == NULL) { static_buffer_size = 1000; static_buffer = (xmlChar *) xmlMalloc(static_buffer_size * sizeof(xmlChar)); if (static_buffer == NULL) { perror("malloc failed"); return(NULL); } out = static_buffer; } while (*cur != '\0') { if (out - static_buffer > static_buffer_size - 100) { int indx = out - static_buffer; growBuffer(); out = &static_buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (*cur == '"') { *out++ = '&'; *out++ = 'q'; *out++ = 'u'; *out++ = 'o'; *out++ = 't'; *out++ = ';'; } else if ((*cur == '\'') && (!html)) { *out++ = '&'; *out++ = 'a'; *out++ = 'p'; *out++ = 'o'; *out++ = 's'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) { /* * default case, just copy ! */ *out++ = *cur; #ifndef USE_UTF_8 } else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) { char buf[10], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; #endif } else if (IS_CHAR(*cur)) { char buf[10], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } #if 0 else { /* * default case, this is not a valid char ! * Skip it... */ xmlGenericError(xmlGenericErrorContext, "xmlEncodeEntities: invalid char %d\n", (int) *cur); } #endif cur++; } *out++ = 0; return(static_buffer); }
/* * Parse the type and its initializer and emit it (recursively). */ static void emitInitVal(struct dbuf_s *oBuf, symbol *topsym, sym_link *my_type, initList *list) { symbol *sym; int size, i; long lit; unsigned char *str; /* Handle designated initializers */ if (list) list = reorderIlist (my_type, list); /* If this is a hole, substitute an appropriate initializer. */ if (list && list->type == INIT_HOLE) { if (IS_AGGREGATE (my_type)) { list = newiList(INIT_DEEP, NULL); /* init w/ {} */ } else { ast *ast = newAst_VALUE (constVal("0")); ast = decorateType (ast, RESULT_TYPE_NONE); list = newiList(INIT_NODE, ast); } } size = getSize(my_type); if (IS_PTR(my_type)) { DEBUGprintf ("(pointer, %d byte) %p\n", size, list ? (void *)(long)list2int(list) : NULL); emitIvals(oBuf, topsym, list, 0, size); return; } if (IS_ARRAY(my_type) && topsym && topsym->isstrlit) { str = (unsigned char *)SPEC_CVAL(topsym->etype).v_char; emitIvalLabel(oBuf, topsym); do { dbuf_printf (oBuf, "\tretlw 0x%02x ; '%c'\n", str[0], (str[0] >= 0x20 && str[0] < 128) ? str[0] : '.'); } while (*(str++)); return; } if (IS_ARRAY(my_type) && list && list->type == INIT_NODE) { fprintf (stderr, "Unhandled initialized symbol: %s\n", topsym->name); assert ( !"Initialized char-arrays are not yet supported, assign at runtime instead." ); return; } if (IS_ARRAY(my_type)) { DEBUGprintf ("(array, %d items, %d byte) below\n", DCL_ELEM(my_type), size); assert (!list || list->type == INIT_DEEP); if (list) list = list->init.deep; for (i = 0; i < DCL_ELEM(my_type); i++) { emitInitVal(oBuf, topsym, my_type->next, list); topsym = NULL; if (list) list = list->next; } // for i return; } if (IS_FLOAT(my_type)) { // float, 32 bit DEBUGprintf ("(float, %d byte) %lf\n", size, list ? list2int(list) : 0.0); emitIvals(oBuf, topsym, list, 0, size); return; } if (IS_CHAR(my_type) || IS_INT(my_type) || IS_LONG(my_type)) { // integral type, 8, 16, or 32 bit DEBUGprintf ("(integral, %d byte) 0x%lx/%ld\n", size, list ? (long)list2int(list) : 0, list ? (long)list2int(list) : 0); emitIvals(oBuf, topsym, list, 0, size); return; } else if (IS_STRUCT(my_type) && SPEC_STRUCT(my_type)->type == STRUCT) { // struct DEBUGprintf ("(struct, %d byte) handled below\n", size); assert (!list || (list->type == INIT_DEEP)); // iterate over struct members and initList if (list) list = list->init.deep; sym = SPEC_STRUCT(my_type)->fields; while (sym) { long bitfield = 0; int len = 0; if (IS_BITFIELD(sym->type)) { while (sym && IS_BITFIELD(sym->type)) { int bitoff = SPEC_BSTR(getSpec(sym->type)) + 8 * sym->offset; assert (!list || ((list->type == INIT_NODE) && IS_AST_LIT_VALUE(list->init.node))); lit = (long) (list ? list2int(list) : 0); DEBUGprintf ( "(bitfield member) %02lx (%d bit, starting at %d, bitfield %02lx)\n", lit, SPEC_BLEN(getSpec(sym->type)), bitoff, bitfield); bitfield |= (lit & ((1ul << SPEC_BLEN(getSpec(sym->type))) - 1)) << bitoff; len += SPEC_BLEN(getSpec(sym->type)); sym = sym->next; if (list) list = list->next; } // while assert (len < sizeof (long) * 8); // did we overflow our initializer?!? len = (len + 7) & ~0x07; // round up to full bytes emitIvals(oBuf, topsym, NULL, bitfield, len / 8); topsym = NULL; } // if if (sym) { emitInitVal(oBuf, topsym, sym->type, list); topsym = NULL; sym = sym->next; if (list) list = list->next; } // if } // while if (list) { assert ( !"Excess initializers." ); } // if return; } else if (IS_STRUCT(my_type) && SPEC_STRUCT(my_type)->type == UNION) { // union DEBUGprintf ("(union, %d byte) handled below\n", size); assert (list && list->type == INIT_DEEP); // iterate over union members and initList, try to map number and type of fields and initializers my_type = matchIvalToUnion(list, my_type, size); if (my_type) { emitInitVal(oBuf, topsym, my_type, list->init.deep); topsym = NULL; size -= getSize(my_type); if (size > 0) { // pad with (leading) zeros emitIvals(oBuf, NULL, NULL, 0, size); } return; } // if assert ( !"No UNION member matches the initializer structure."); } else if (IS_BITFIELD(my_type)) { assert ( !"bitfields should only occur in structs..." ); } else { printf ("SPEC_NOUN: %d\n", SPEC_NOUN(my_type)); assert( !"Unhandled initialized type."); } }
/* * For UNIONs, we first have to find the correct alternative to map the * initializer to. This function maps the structure of the initializer to * the UNION members recursively. * Returns the type of the first `fitting' member. */ static sym_link * matchIvalToUnion (initList *list, sym_link *type, int size) { symbol *sym; assert (type); if (IS_PTR(type) || IS_CHAR(type) || IS_INT(type) || IS_LONG(type) || IS_FLOAT(type)) { if (!list || (list->type == INIT_NODE)) { DEBUGprintf ("OK, simple type\n"); return (type); } else { DEBUGprintf ("ERROR, simple type\n"); return (NULL); } } else if (IS_BITFIELD(type)) { if (!list || (list->type == INIT_NODE)) { DEBUGprintf ("OK, bitfield\n"); return (type); } else { DEBUGprintf ("ERROR, bitfield\n"); return (NULL); } } else if (IS_STRUCT(type) && SPEC_STRUCT(getSpec(type))->type == STRUCT) { if (!list || (list->type == INIT_DEEP)) { if (list) list = list->init.deep; sym = SPEC_STRUCT(type)->fields; while (sym) { DEBUGprintf ("Checking STRUCT member %s\n", sym->name); if (!matchIvalToUnion(list, sym->type, 0)) { DEBUGprintf ("ERROR, STRUCT member %s\n", sym->name); return (NULL); } if (list) list = list->next; sym = sym->next; } // while // excess initializers? if (list) { DEBUGprintf ("ERROR, excess initializers\n"); return (NULL); } DEBUGprintf ("OK, struct\n"); return (type); } return (NULL); } else if (IS_STRUCT(type) && SPEC_STRUCT(getSpec(type))->type == UNION) { if (!list || (list->type == INIT_DEEP)) { if (list) list = list->init.deep; sym = SPEC_STRUCT(type)->fields; while (sym) { while (list && list->type == INIT_HOLE) { list = list->next; sym = sym->next; } DEBUGprintf ("Checking UNION member %s.\n", sym->name); if (((IS_STRUCT(sym->type) || getSize(sym->type) == size)) && matchIvalToUnion(list, sym->type, size)) { DEBUGprintf ("Matched UNION member %s.\n", sym->name); return (sym->type); } sym = sym->next; } // while } // if // no match found DEBUGprintf ("ERROR, no match found.\n"); return (NULL); } else { assert ( !"Unhandled type in UNION." ); } assert ( !"No match found in UNION for the given initializer structure." ); return (NULL); }
/** * xmlEncodeEntitiesInternal: * @doc: the document containing the string * @input: A string to convert to XML. * @attr: are we handling an atrbute value * * Do a global encoding of a string, replacing the predefined entities * and non ASCII values with their entities and CharRef counterparts. * Contrary to xmlEncodeEntities, this routine is reentrant, and result * must be deallocated. * * Returns A newly allocated string with the substitution done. */ static xmlChar * xmlEncodeEntitiesInternal(xmlDocPtr doc, const xmlChar *input, int attr) { const xmlChar *cur = input; xmlChar *buffer = NULL; xmlChar *out = NULL; size_t buffer_size = 0; int html = 0; if (input == NULL) return(NULL); if (doc != NULL) html = (doc->type == XML_HTML_DOCUMENT_NODE); /* * allocate an translation buffer. */ buffer_size = 1000; buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar)); if (buffer == NULL) { xmlEntitiesErrMemory("xmlEncodeEntities: malloc failed"); return(NULL); } out = buffer; while (*cur != '\0') { size_t indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } /* * By default one have to encode at least '<', '>', '"' and '&' ! */ if (*cur == '<') { const xmlChar *end; /* * Special handling of server side include in HTML attributes */ if (html && attr && (cur[1] == '!') && (cur[2] == '-') && (cur[3] == '-') && ((end = xmlStrstr(cur, BAD_CAST "-->")) != NULL)) { while (cur != end) { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; *out++ = *cur++; *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'l'; *out++ = 't'; *out++ = ';'; } else if (*cur == '>') { *out++ = '&'; *out++ = 'g'; *out++ = 't'; *out++ = ';'; } else if (*cur == '&') { /* * Special handling of &{...} construct from HTML 4, see * http://www.w3.org/TR/html401/appendix/notes.html#h-B.7.1 */ if (html && attr && (cur[1] == '{') && (strchr((const char *) cur, '}'))) { while (*cur != '}') { *out++ = *cur++; indx = out - buffer; if (indx + 100 > buffer_size) { growBufferReentrant(); out = &buffer[indx]; } } *out++ = *cur++; continue; } *out++ = '&'; *out++ = 'a'; *out++ = 'm'; *out++ = 'p'; *out++ = ';'; } else if (((*cur >= 0x20) && (*cur < 0x80)) || (*cur == '\n') || (*cur == '\t') || ((html) && (*cur == '\r'))) { /* * default case, just copy ! */ *out++ = *cur; } else if (*cur >= 0x80) { if (((doc != NULL) && (doc->encoding != NULL)) || (html)) { /* * Bjørn Reese <*****@*****.**> provided the patch xmlChar xc; xc = (*cur & 0x3F) << 6; if (cur[1] != 0) { xc += *(++cur) & 0x3F; *out++ = xc; } else */ *out++ = *cur; } else { /* * We assume we have UTF-8 input. */ char buf[11], *ptr; int val = 0, l = 1; if (*cur < 0xC0) { xmlEntitiesErr(XML_CHECK_NOT_UTF8, "xmlEncodeEntities: input not UTF-8"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } else if (*cur < 0xE0) { val = (cur[0]) & 0x1F; val <<= 6; val |= (cur[1]) & 0x3F; l = 2; } else if (*cur < 0xF0) { val = (cur[0]) & 0x0F; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; l = 3; } else if (*cur < 0xF8) { val = (cur[0]) & 0x07; val <<= 6; val |= (cur[1]) & 0x3F; val <<= 6; val |= (cur[2]) & 0x3F; val <<= 6; val |= (cur[3]) & 0x3F; l = 4; } if ((l == 1) || (!IS_CHAR(val))) { xmlEntitiesErr(XML_ERR_INVALID_CHAR, "xmlEncodeEntities: char out of range\n"); if (doc != NULL) doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1"); snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur++; continue; } /* * We could do multiple things here. Just save as a char ref */ snprintf(buf, sizeof(buf), "&#x%X;", val); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; cur += l; continue; } } else if (IS_BYTE_CHAR(*cur)) { char buf[11], *ptr; snprintf(buf, sizeof(buf), "&#%d;", *cur); buf[sizeof(buf) - 1] = 0; ptr = buf; while (*ptr != 0) *out++ = *ptr++; } cur++; } *out = 0; return(buffer); mem_error: xmlEntitiesErrMemory("xmlEncodeEntities: realloc failed"); xmlFree(buffer); return(NULL); }
x*/ void Modify_StringX(REBCNT action, REBVAL *string, REBVAL *arg) /* ** Actions: INSERT, APPEND, CHANGE ** ** string [string!] {Series at point to insert} ** value [any-type!] {The value to insert} ** /part {Limits to a given length or position.} ** length [number! series! pair!] ** /only {Inserts a series as a series.} ** /dup {Duplicates the insert a specified number of times.} ** count [number! pair!] ** ***********************************************************************/ { REBSER *series = VAL_SERIES(string); REBCNT index = VAL_INDEX(string); REBCNT tail = VAL_TAIL(string); REBINT rlen; // length to be removed REBINT ilen = 1; // length to be inserted REBINT cnt = 1; // DUP count REBINT size; REBVAL *val; REBSER *arg_ser = 0; // argument series // Length of target (may modify index): (arg can be anything) rlen = Partial1((action == A_CHANGE) ? string : arg, DS_ARG(AN_LENGTH)); index = VAL_INDEX(string); if (action == A_APPEND || index > tail) index = tail; // If the arg is not a string, then we need to create a string: if (IS_BINARY(string)) { if (IS_INTEGER(arg)) { if (VAL_INT64(arg) > 255 || VAL_INT64(arg) < 0) Trap_Range(arg); arg_ser = Make_Binary(1); Append_Byte(arg_ser, VAL_CHAR(arg)); // check for size!!! } else if (!ANY_BINSTR(arg)) Trap_Arg(arg); } else if (IS_BLOCK(arg)) { // MOVE! REB_MOLD mo = {0}; arg_ser = mo.series = Make_Unicode(VAL_BLK_LEN(arg) * 10); // GC!? for (val = VAL_BLK_DATA(arg); NOT_END(val); val++) Mold_Value(&mo, val, 0); } else if (IS_CHAR(arg)) { // Optimize this case !!! arg_ser = Make_Unicode(1); Append_Byte(arg_ser, VAL_CHAR(arg)); } else if (!ANY_STR(arg) || IS_TAG(arg)) { arg_ser = Copy_Form_Value(arg, 0); } if (arg_ser) Set_String(arg, arg_ser); else arg_ser = VAL_SERIES(arg); // Length of insertion: ilen = (action != A_CHANGE && DS_REF(AN_PART)) ? rlen : VAL_LEN(arg); // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (series == VAL_SERIES(arg)) { arg_ser = Copy_Series_Part(arg_ser, VAL_INDEX(arg), ilen); // GC!? } // Get /DUP count: if (DS_REF(AN_DUP)) { cnt = Int32(DS_ARG(AN_COUNT)); if (cnt <= 0) return; // no changes } // Total to insert: size = cnt * ilen; if (action != A_CHANGE) { // Always expand series for INSERT and APPEND actions: Expand_Series(series, index, size); } else { if (size > rlen) Expand_Series(series, index, size-rlen); else if (size < rlen && DS_REF(AN_PART)) Remove_Series(series, index, rlen-size); else if (size + index > tail) { EXPAND_SERIES_TAIL(series, size - (tail - index)); } } // For dup count: for (; cnt > 0; cnt--) { Insert_String(series, index, arg_ser, VAL_INDEX(arg), ilen, TRUE); index += ilen; } TERM_SERIES(series); VAL_INDEX(string) = (action == A_APPEND) ? 0 : index; }