*/ REBSER *Copy_String(REBSER *src, REBCNT index, REBINT length) /* ** Copies a portion of any string (byte or unicode). ** Will slim the string, if needed. ** ** The index + length must be in range unsigned int 32. ** ***********************************************************************/ { REBUNI *up; REBYTE wide = 1; REBSER *dst; REBINT n; if (length < 0) length = src->tail; // Can it be slimmed down? if (!BYTE_SIZE(src)) { up = UNI_SKIP(src, index); for (n = 0; n < length; n++) if (up[n] > 0xff) break; if (n < length) wide = sizeof(REBUNI); } dst = Make_Series(length + 1, wide, MKS_NONE); Insert_String(dst, 0, src, index, length, TRUE); SERIES_TAIL(dst) = length; TERM_SEQUENCE(dst); return dst; }
// // Trim_Tail: C // // Used to trim off hanging spaces during FORM and MOLD. // void Trim_Tail(REBSER *src, REBYTE chr) { REBOOL unicode = NOT(BYTE_SIZE(src)); REBCNT tail; REBUNI c; assert(!Is_Array_Series(src)); for (tail = SER_LEN(src); tail > 0; tail--) { c = unicode ? *UNI_AT(src, tail - 1) : *BIN_AT(src, tail - 1); if (c != chr) break; } SET_SERIES_LEN(src, tail); TERM_SEQUENCE(src); }
*/ void Trim_Tail(REBSER *src, REBYTE chr) /* ** Used to trim off hanging spaces during FORM and MOLD. ** ***********************************************************************/ { REBOOL is_uni = !BYTE_SIZE(src); REBCNT tail; REBUNI c; assert(!Is_Array_Series(src)); for (tail = SERIES_TAIL(src); tail > 0; tail--) { c = is_uni ? *UNI_SKIP(src, tail - 1) : *BIN_SKIP(src, tail - 1); if (c != chr) break; } SERIES_TAIL(src) = tail; TERM_SEQUENCE(src); }
*/ REBSER *Make_Unicode(REBCNT length) /* ** Make a unicode string series. Used for internal strings. ** Add 1 extra for terminator. ** ***********************************************************************/ { REBSER *series = Make_Series(length + 1, sizeof(REBUNI), MKS_NONE); LABEL_SERIES(series, "make unicode"); // !!! Clients seem to have different expectations of if `length` is // total capacity (and the binary should be empty) or actually is // specifically being preallocated at a fixed length. Until this // is straightened out, terminate for both possibilities. UNI_HEAD(series)[length] = 0; TERM_SEQUENCE(series); return series; }
*/ REBSER *Make_Binary(REBCNT length) /* ** Make a binary string series. For byte, C, and UTF8 strings. ** Add 1 extra for terminator. ** ***********************************************************************/ { REBSER *series = Make_Series(length + 1, sizeof(REBYTE), MKS_NONE); LABEL_SERIES(series, "make binary"); // !!! Clients seem to have different expectations of if `length` is // total capacity (and the binary should be empty) or actually is // specifically being preallocated at a fixed length. Until this // is straightened out, terminate for both possibilities. BIN_DATA(series)[length] = 0; TERM_SEQUENCE(series); return series; }
// // Modify_String: C // // Returns new dst_idx. // REBCNT Modify_String( REBCNT action, // INSERT, APPEND, CHANGE REBSER *dst_ser, // target REBCNT dst_idx, // position const REBVAL *src_val, // source REBFLGS flags, // AN_PART REBINT dst_len, // length to remove REBINT dups // dup count ) { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SER_LEN(dst_ser); REBINT size; // total to insert REBOOL needs_free; REBINT limit; // For INSERT/PART and APPEND/PART if (action != SYM_CHANGE && GET_FLAG(flags, AN_PART)) limit = dst_len; // should be non-negative else limit = -1; if (limit == 0 || dups < 0) return (action == SYM_APPEND) ? 0 : dst_idx; if (action == SYM_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Make_Series_Codepoint(Int8u(src_val)); needs_free = TRUE; limit = -1; } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val, limit); // NOTE: it's the shared FORM buffer! needs_free = FALSE; limit = -1; } else if (IS_CHAR(src_val)) { // // "UTF-8 was originally specified to allow codepoints with up to // 31 bits (or 6 bytes). But with RFC3629, this was reduced to 4 // bytes max. to be more compatible to UTF-16." So depending on // which RFC you consider "the UTF-8", max size is either 4 or 6. // src_ser = Make_Binary(6); SET_SERIES_LEN( src_ser, Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)) ); needs_free = TRUE; limit = -1; } else if (ANY_STRING(src_val)) { src_len = VAL_LEN_AT(src_val); if (limit >= 0 && src_len > cast(REBCNT, limit)) src_len = limit; src_ser = Make_UTF8_From_Any_String(src_val, src_len, 0); needs_free = TRUE; limit = -1; } else if (!IS_BINARY(src_val)) fail (Error_Invalid_Arg(src_val)); } else if (IS_CHAR(src_val)) { src_ser = Make_Series_Codepoint(VAL_CHAR(src_val)); needs_free = TRUE; } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); needs_free = TRUE; } else if (!ANY_STRING(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); needs_free = TRUE; } // Use either new src or the one that was passed: if (src_ser) { src_len = SER_LEN(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN_AT(src_val); needs_free = FALSE; } if (limit >= 0) src_len = limit; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { assert(!needs_free); src_ser = Copy_Sequence_At_Len(src_ser, src_idx, src_len); needs_free = TRUE; src_idx = 0; } // Total to insert: size = dups * src_len; if (action != SYM_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SEQUENCE(dst_ser); if (needs_free) { // If we did not use the series that was passed in, but rather // created an internal temporary one, we need to free it. Free_Series(src_ser); } return (action == SYM_APPEND) ? 0 : dst_idx; }