static REBSER *make_string(REBVAL *arg, REBOOL make) { REBSER *ser = 0; // MAKE <type> 123 if (make && (IS_INTEGER(arg) || IS_DECIMAL(arg))) { ser = Make_Binary(Int32s(arg, 0)); } // MAKE/TO <type> <binary!> else if (IS_BINARY(arg)) { REBYTE *bp = VAL_BIN_DATA(arg); REBCNT len = VAL_LEN(arg); switch (What_UTF(bp, len)) { case 0: break; case 8: // UTF-8 encoded bp += 3; len -= 3; break; default: Trap0(RE_BAD_DECODE); } ser = Decode_UTF_String(bp, len, 8); // UTF-8 } // MAKE/TO <type> <any-string> else if (ANY_BINSTR(arg)) { ser = Copy_String(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN(arg)); } // MAKE/TO <type> <any-word> else if (ANY_WORD(arg)) { ser = Copy_Mold_Value(arg, TRUE); //ser = Append_UTF8(0, Get_Word_Name(arg), -1); } // MAKE/TO <type> #"A" else if (IS_CHAR(arg)) { ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2); Append_Byte(ser, VAL_CHAR(arg)); } // MAKE/TO <type> <any-value> // else if (IS_NONE(arg)) { // ser = Make_Binary(0); // } else ser = Copy_Form_Value(arg, 1<<MOPT_TIGHT); return ser; }
static REBSER *MAKE_TO_String_Common(const REBVAL *arg) { REBSER *ser = 0; // MAKE/TO <type> <binary!> if (IS_BINARY(arg)) { REBYTE *bp = VAL_BIN_AT(arg); REBCNT len = VAL_LEN_AT(arg); switch (What_UTF(bp, len)) { case 0: break; case 8: // UTF-8 encoded bp += 3; len -= 3; break; default: fail (Error(RE_BAD_UTF8)); } ser = Decode_UTF_String(bp, len, 8); // UTF-8 } // MAKE/TO <type> <any-string> else if (ANY_BINSTR(arg)) { ser = Copy_String_Slimming(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN_AT(arg)); } // MAKE/TO <type> <any-word> else if (ANY_WORD(arg)) { ser = Copy_Mold_Value(arg, 0 /* opts... MOPT_0? */); } // MAKE/TO <type> #"A" else if (IS_CHAR(arg)) { ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2); Append_Codepoint_Raw(ser, VAL_CHAR(arg)); } else ser = Copy_Form_Value(arg, 1 << MOPT_TIGHT); return ser; }
x*/ void Modify_StringX(REBCNT action, REBVAL *string, REBVAL *arg) /* ** Actions: INSERT, APPEND, CHANGE ** ** string [string!] {Series at point to insert} ** value [any-type!] {The value to insert} ** /part {Limits to a given length or position.} ** length [number! series! pair!] ** /only {Inserts a series as a series.} ** /dup {Duplicates the insert a specified number of times.} ** count [number! pair!] ** ***********************************************************************/ { REBSER *series = VAL_SERIES(string); REBCNT index = VAL_INDEX(string); REBCNT tail = VAL_TAIL(string); REBINT rlen; // length to be removed REBINT ilen = 1; // length to be inserted REBINT cnt = 1; // DUP count REBINT size; REBVAL *val; REBSER *arg_ser = 0; // argument series // Length of target (may modify index): (arg can be anything) rlen = Partial1((action == A_CHANGE) ? string : arg, DS_ARG(AN_LENGTH)); index = VAL_INDEX(string); if (action == A_APPEND || index > tail) index = tail; // If the arg is not a string, then we need to create a string: if (IS_BINARY(string)) { if (IS_INTEGER(arg)) { if (VAL_INT64(arg) > 255 || VAL_INT64(arg) < 0) Trap_Range(arg); arg_ser = Make_Binary(1); Append_Byte(arg_ser, VAL_CHAR(arg)); // check for size!!! } else if (!ANY_BINSTR(arg)) Trap_Arg(arg); } else if (IS_BLOCK(arg)) { // MOVE! REB_MOLD mo = {0}; arg_ser = mo.series = Make_Unicode(VAL_BLK_LEN(arg) * 10); // GC!? for (val = VAL_BLK_DATA(arg); NOT_END(val); val++) Mold_Value(&mo, val, 0); } else if (IS_CHAR(arg)) { // Optimize this case !!! arg_ser = Make_Unicode(1); Append_Byte(arg_ser, VAL_CHAR(arg)); } else if (!ANY_STR(arg) || IS_TAG(arg)) { arg_ser = Copy_Form_Value(arg, 0); } if (arg_ser) Set_String(arg, arg_ser); else arg_ser = VAL_SERIES(arg); // Length of insertion: ilen = (action != A_CHANGE && DS_REF(AN_PART)) ? rlen : VAL_LEN(arg); // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (series == VAL_SERIES(arg)) { arg_ser = Copy_Series_Part(arg_ser, VAL_INDEX(arg), ilen); // GC!? } // Get /DUP count: if (DS_REF(AN_DUP)) { cnt = Int32(DS_ARG(AN_COUNT)); if (cnt <= 0) return; // no changes } // Total to insert: size = cnt * ilen; if (action != A_CHANGE) { // Always expand series for INSERT and APPEND actions: Expand_Series(series, index, size); } else { if (size > rlen) Expand_Series(series, index, size-rlen); else if (size < rlen && DS_REF(AN_PART)) Remove_Series(series, index, rlen-size); else if (size + index > tail) { EXPAND_SERIES_TAIL(series, size - (tail - index)); } } // For dup count: for (; cnt > 0; cnt--) { Insert_String(series, index, arg_ser, VAL_INDEX(arg), ilen, TRUE); index += ilen; } TERM_SERIES(series); VAL_INDEX(string) = (action == A_APPEND) ? 0 : index; }
// // Modify_String: C // // Returns new dst_idx. // REBCNT Modify_String( REBCNT action, // INSERT, APPEND, CHANGE REBSER *dst_ser, // target REBCNT dst_idx, // position const REBVAL *src_val, // source REBFLGS flags, // AN_PART REBINT dst_len, // length to remove REBINT dups // dup count ) { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SER_LEN(dst_ser); REBINT size; // total to insert REBOOL needs_free; REBINT limit; // For INSERT/PART and APPEND/PART if (action != SYM_CHANGE && GET_FLAG(flags, AN_PART)) limit = dst_len; // should be non-negative else limit = -1; if (limit == 0 || dups < 0) return (action == SYM_APPEND) ? 0 : dst_idx; if (action == SYM_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Make_Series_Codepoint(Int8u(src_val)); needs_free = TRUE; limit = -1; } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val, limit); // NOTE: it's the shared FORM buffer! needs_free = FALSE; limit = -1; } else if (IS_CHAR(src_val)) { // // "UTF-8 was originally specified to allow codepoints with up to // 31 bits (or 6 bytes). But with RFC3629, this was reduced to 4 // bytes max. to be more compatible to UTF-16." So depending on // which RFC you consider "the UTF-8", max size is either 4 or 6. // src_ser = Make_Binary(6); SET_SERIES_LEN( src_ser, Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)) ); needs_free = TRUE; limit = -1; } else if (ANY_STRING(src_val)) { src_len = VAL_LEN_AT(src_val); if (limit >= 0 && src_len > cast(REBCNT, limit)) src_len = limit; src_ser = Make_UTF8_From_Any_String(src_val, src_len, 0); needs_free = TRUE; limit = -1; } else if (!IS_BINARY(src_val)) fail (Error_Invalid_Arg(src_val)); } else if (IS_CHAR(src_val)) { src_ser = Make_Series_Codepoint(VAL_CHAR(src_val)); needs_free = TRUE; } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); needs_free = TRUE; } else if (!ANY_STRING(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); needs_free = TRUE; } // Use either new src or the one that was passed: if (src_ser) { src_len = SER_LEN(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN_AT(src_val); needs_free = FALSE; } if (limit >= 0) src_len = limit; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { assert(!needs_free); src_ser = Copy_Sequence_At_Len(src_ser, src_idx, src_len); needs_free = TRUE; src_idx = 0; } // Total to insert: size = dups * src_len; if (action != SYM_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SEQUENCE(dst_ser); if (needs_free) { // If we did not use the series that was passed in, but rather // created an internal temporary one, we need to free it. Free_Series(src_ser); } return (action == SYM_APPEND) ? 0 : dst_idx; }
*/ REBCNT Modify_String(REBCNT action, REBSER *dst_ser, REBCNT dst_idx, const REBVAL *src_val, REBCNT flags, REBINT dst_len, REBINT dups) /* ** action: INSERT, APPEND, CHANGE ** ** dst_ser: target ** dst_idx: position ** src_val: source ** flags: AN_PART ** dst_len: length to remove ** dups: dup count ** ** return: new dst_idx ** ***********************************************************************/ { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SERIES_TAIL(dst_ser); REBINT size; // total to insert if (dups < 0) return (action == A_APPEND) ? 0 : dst_idx; if (action == A_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Append_Byte(0, Int8u(src_val)); // creates a binary } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val); // NOTE: it's the shared FORM buffer! } else if (IS_CHAR(src_val)) { src_ser = Make_Binary(6); // (I hate unicode) src_ser->tail = Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)); } else if (!ANY_BINSTR(src_val)) Trap_Arg_DEAD_END(src_val); } else if (IS_CHAR(src_val)) { src_ser = Append_Byte(0, VAL_CHAR(src_val)); // unicode ok too } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); } else if (!ANY_STR(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); } // Use either new src or the one that was passed: if (src_ser) { src_len = SERIES_TAIL(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN(src_val); } // For INSERT or APPEND with /PART use the dst_len not src_len: if (action != A_CHANGE && GET_FLAG(flags, AN_PART)) src_len = dst_len; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { src_ser = Copy_Series_Part(src_ser, src_idx, src_len); src_idx = 0; } // Total to insert: size = dups * src_len; if (action != A_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SERIES(dst_ser); return (action == A_APPEND) ? 0 : dst_idx; }
if (Form_Value_Throws( D_OUT, &mo, &pending_delimiter, value, (REF(quote) || !IS_BLOCK(value) ? 0 : FORM_FLAG_REDUCE), delimiter, 0 // depth )) { return R_OUT_IS_THROWN; } Val_Init_String(D_OUT, Pop_Molded_String(&mo)); } else { Val_Init_String(D_OUT, Copy_Form_Value(value, 0)); } return R_OUT; } // // mold: native [ // // "Converts a value to a REBOL-readable string." // // value [any-value!] "The value to mold" // /only {For a block value, mold only its contents, no outer []} // /all "Use construction syntax" // /flat "No indentation" // ]
*/ static REBCNT Parse_To(REBPARSE *parse, REBCNT index, REBVAL *item, REBFLG is_thru) /* ** Parse TO a specific: ** 1. integer - index position ** 2. END - end of input ** 3. value - according to datatype ** 4. block of values - the first one we hit ** ***********************************************************************/ { REBSER *series = parse->series; REBCNT i; REBSER *ser; // TO a specific index position. if (IS_INTEGER(item)) { i = (REBCNT)Int32(item) - (is_thru ? 0 : 1); if (i > series->tail) i = series->tail; } // END else if (IS_WORD(item) && VAL_WORD_CANON(item) == SYM_END) { i = series->tail; } else if (IS_BLOCK(item)) { i = To_Thru(parse, index, item, is_thru); } else { if (IS_BLOCK_INPUT(parse)) { REBVAL word; /// !!!Temp, but where can we put it? if (IS_LIT_WORD(item)) { // patch to search for word, not lit. word = *item; VAL_SET(&word, REB_WORD); item = &word; } ///i = Find_Value(series, index, tail-index, item, 1, (REBOOL)(PF_CASE & flags), FALSE, 1); i = Find_Block(series, index, series->tail, item, 1, HAS_CASE(parse)?AM_FIND_CASE:0, 1); if (i != NOT_FOUND && is_thru) i++; } else { // "str" if (ANY_BINSTR(item)) { if (!IS_STRING(item) && !IS_BINARY(item)) { // !!! Can this be optimized not to use COPY? ser = Copy_Form_Value(item, 0); i = Find_Str_Str(series, 0, index, series->tail, 1, ser, 0, ser->tail, HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += ser->tail; } else { i = Find_Str_Str(series, 0, index, series->tail, 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i += VAL_LEN(item); } } // #"A" else if (IS_CHAR(item)) { i = Find_Str_Char(series, 0, index, series->tail, 1, VAL_CHAR(item), HAS_CASE(parse)); if (i != NOT_FOUND && is_thru) i++; } } } return i; }
*/ static REBCNT Parse_Next_String(REBPARSE *parse, REBCNT index, REBVAL *item, REBCNT depth) /* ** Match the next item in the string ruleset. ** ** If it matches, return the index just past it. ** Otherwise return NOT_FOUND. ** ***********************************************************************/ { // !!! THIS CODE NEEDS CLEANUP AND REWRITE BASED ON OTHER CHANGES REBSER *series = parse->series; REBSER *ser; REBCNT flags = parse->flags | AM_FIND_MATCH | AM_FIND_TAIL; int rewrite_needed; if (Trace_Level) { Trace_Value(7, item); Trace_String(8, STR_SKIP(series, index), series->tail - index); } if (IS_NONE(item)) return index; if (index >= series->tail) return NOT_FOUND; switch (VAL_TYPE(item)) { // Do we match a single character? case REB_CHAR: if (HAS_CASE(parse)) index = (VAL_CHAR(item) == GET_ANY_CHAR(series, index)) ? index+1 : NOT_FOUND; else index = (UP_CASE(VAL_CHAR(item)) == UP_CASE(GET_ANY_CHAR(series, index))) ? index+1 : NOT_FOUND; break; case REB_EMAIL: case REB_STRING: case REB_BINARY: index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), VAL_LEN(item), flags); break; // Do we match to a char set? case REB_BITSET: flags = Check_Bit(VAL_SERIES(item), GET_ANY_CHAR(series, index), !HAS_CASE(parse)); index = flags ? index + 1 : NOT_FOUND; break; /* case REB_DATATYPE: // Currently: integer! if (VAL_DATATYPE(item) == REB_INTEGER) { REBCNT begin = index; while (IS_LEX_NUMBER(*str)) str++, index++; if (begin == index) index = NOT_FOUND; } break; */ case REB_TAG: case REB_FILE: // case REB_ISSUE: // !! Can be optimized (w/o COPY) ser = Copy_Form_Value(item, 0); index = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, ser, 0, ser->tail, flags); break; case REB_NONE: break; // Parse a sub-rule block: case REB_BLOCK: index = Parse_Rules_Loop(parse, index, VAL_BLK_DATA(item), depth); break; // Do an expression: case REB_PAREN: item = Do_Block_Value_Throw(item); // might GC // old: if (IS_ERROR(item)) Throw_Error(VAL_ERR_OBJECT(item)); index = MIN(index, series->tail); // may affect tail break; default: Trap1(RE_PARSE_RULE, item); } return index; }