*/ REBINT PD_File(REBPVS *pvs) /* ***********************************************************************/ { REBSER *ser; REB_MOLD mo = {0}; REBCNT n; REBUNI c; REBSER *arg; if (pvs->setval) return PE_BAD_SET; ser = Copy_Series_Value(pvs->value); n = SERIES_TAIL(ser); if (n > 0) c = GET_ANY_CHAR(ser, n-1); if (n == 0 || c != '/') Append_Byte(ser, '/'); if (ANY_STR(pvs->select)) arg = VAL_SERIES(pvs->select); else { Reset_Mold(&mo); Mold_Value(&mo, pvs->select, 0); arg = mo.series; } c = GET_ANY_CHAR(arg, 0); n = (c == '/' || c == '\\') ? 1 : 0; Append_String(ser, arg, n, arg->tail-n); Set_Series(VAL_TYPE(pvs->value), pvs->store, ser); return PE_USE; }
STOID Mold_File(REBVAL *value, REB_MOLD *mold) { REBUNI *dp; REBCNT n; REBUNI c; REBCNT len = VAL_LEN(value); REBSER *ser = VAL_SERIES(value); // Compute extra space needed for hex encoded characters: for (n = VAL_INDEX(value); n < VAL_TAIL(value); n++) { c = GET_ANY_CHAR(ser, n); if (IS_FILE_ESC(c)) len += 2; } len++; // room for % at start dp = Prep_Uni_Series(mold, len); *dp++ = '%'; for (n = VAL_INDEX(value); n < VAL_TAIL(value); n++) { c = GET_ANY_CHAR(ser, n); if (IS_FILE_ESC(c)) dp = Form_Hex_Esc_Uni(dp, c); // c => %xx else *dp++ = c; } *dp = 0; }
// // Temp_Byte_Chars_May_Fail: C // // NOTE: This function returns a temporary result, and uses an internal // buffer. Do not use it recursively. Also, it will Trap on errors. // // Prequalifies a string before using it with a function that // expects it to be 8-bits. It would be used for instance to convert // a string that is potentially REBUNI-wide into a form that can be used // with a Scan_XXX routine, that is expecting ASCII or UTF-8 source. // (Many TO-XXX conversions from STRING re-use that scanner logic.) // // Returns a temporary string and sets the length field. // // If `allow_utf8`, the constructed result is converted to UTF8. // // Checks or converts it: // // 1. it is byte string (not unicode) // 2. if unicode, copy and return as temp byte string // 3. it's actual content (less space, newlines) <= max len // 4. it does not contain other values ("123 456") // 5. it's not empty or only whitespace // REBYTE *Temp_Byte_Chars_May_Fail( const REBVAL *val, REBINT max_len, REBCNT *length, REBOOL allow_utf8 ) { REBCNT tail = VAL_LEN_HEAD(val); REBCNT index = VAL_INDEX(val); REBCNT len; REBUNI c; REBYTE *bp; REBSER *src = VAL_SERIES(val); if (index > tail) fail (Error(RE_PAST_END)); Resize_Series(BYTE_BUF, max_len+1); bp = BIN_HEAD(BYTE_BUF); // Skip leading whitespace: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) break; } // Copy chars that are valid: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (c >= 0x80) { if (!allow_utf8) fail (Error(RE_INVALID_CHARS)); len = Encode_UTF8_Char(bp, c); max_len -= len; bp += len; } else if (!IS_SPACE(c)) { *bp++ = (REBYTE)c; max_len--; } else break; if (max_len < 0) fail (Error(RE_TOO_LONG)); } // Rest better be just spaces: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) fail (Error(RE_INVALID_CHARS)); } *bp = '\0'; len = bp - BIN_HEAD(BYTE_BUF); if (len == 0) fail (Error(RE_TOO_SHORT)); if (length) *length = len; return BIN_HEAD(BYTE_BUF); }
*/ static void trim_auto(REBSER *ser, REBCNT index, REBCNT tail) /* ** Skip any blank lines and then determine indent of ** first line and make the rest align with it. ** ** BUG!!! If the indentation uses TABS, then it could ** fill past the source pointer! ** ***********************************************************************/ { REBCNT out = index; REBCNT line; REBCNT len; REBCNT indent; REBUNI uc = 0; // Skip whitespace, remember start of last line: for (line = index; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (!IS_WHITE(uc)) break; if (uc == LF) line = index+1; } // Count the indentation used: for (indent = 0; line < index; line++) { if (GET_ANY_CHAR(ser, line) == ' ') indent++; else indent = (indent + TAB_SIZE) & ~3; } // For each line, pad with necessary indentation: while (index < tail) { // Skip to next content, track indentation: for (len = 0; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (!IS_SPACE(uc) || len >= indent) break; if (uc == ' ') len++; else len = (len + TAB_SIZE) & ~3; } // Indent the line: for (; len > indent; len--) { SET_ANY_CHAR(ser, out, ' '); out++; } // Copy line contents: while (index < tail) { uc = GET_ANY_CHAR(ser, index); SET_ANY_CHAR(ser, out, uc); out++; index++; if (uc == LF) break; } } SET_ANY_CHAR(ser, out, 0); SERIES_TAIL(ser) = out; }
*/ REBINT PD_String(REBPVS *pvs) /* ***********************************************************************/ { REBVAL *data = pvs->value; REBVAL *val = pvs->setval; REBINT n = 0; REBCNT i; REBINT c; REBSER *ser = VAL_SERIES(data); if (IS_INTEGER(pvs->select)) { n = Int32(pvs->select) + VAL_INDEX(data) - 1; } else return PE_BAD_SELECT; if (val == 0) { if (n < 0 || (REBCNT)n >= SERIES_TAIL(ser)) return PE_NONE; if (IS_BINARY(data)) { SET_INTEGER(pvs->store, *BIN_SKIP(ser, n)); } else { SET_CHAR(pvs->store, GET_ANY_CHAR(ser, n)); } return PE_USE; } if (n < 0 || (REBCNT)n >= SERIES_TAIL(ser)) return PE_BAD_RANGE; if (IS_CHAR(val)) { c = VAL_CHAR(val); if (c > MAX_CHAR) return PE_BAD_SET; } else if (IS_INTEGER(val)) { c = Int32(val); if (c > MAX_CHAR || c < 0) return PE_BAD_SET; if (IS_BINARY(data)) { // special case for binary if (c > 0xff) Trap_Range(val); BIN_HEAD(ser)[n] = (REBYTE)c; return PE_OK; } } else if (ANY_BINSTR(val)) { i = VAL_INDEX(val); if (i >= VAL_TAIL(val)) return PE_BAD_SET; c = GET_ANY_CHAR(VAL_SERIES(val), i); } else return PE_BAD_SELECT; TRAP_PROTECT(ser); if (BYTE_SIZE(ser) && c > 0xff) Widen_String(ser); SET_ANY_CHAR(ser, n, c); return PE_OK; }
static void swap_chars(REBVAL *val1, REBVAL *val2) { REBUNI c1; REBUNI c2; REBSER *s1 = VAL_SERIES(val1); REBSER *s2 = VAL_SERIES(val2); c1 = GET_ANY_CHAR(s1, VAL_INDEX(val1)); c2 = GET_ANY_CHAR(s2, VAL_INDEX(val2)); if (BYTE_SIZE(s1) && c2 > 0xff) Widen_String(s1); SET_ANY_CHAR(s1, VAL_INDEX(val1), c2); if (BYTE_SIZE(s2) && c1 > 0xff) Widen_String(s2); SET_ANY_CHAR(s2, VAL_INDEX(val2), c1); }
// // Shuffle_String: C // // Randomize a string. Return a new string series. // Handles both BYTE and UNICODE strings. // void Shuffle_String(REBVAL *value, REBOOL secure) { REBCNT n; REBCNT k; REBSER *series = VAL_SERIES(value); REBCNT idx = VAL_INDEX(value); REBUNI swap; for (n = VAL_LEN_AT(value); n > 1;) { k = idx + (REBCNT)Random_Int(secure) % n; n--; swap = GET_ANY_CHAR(series, k); SET_ANY_CHAR(series, k, GET_ANY_CHAR(series, n + idx)); SET_ANY_CHAR(series, n + idx, swap); } }
*/ REBCNT Find_Str_Char(REBSER *ser, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBUNI c2, REBCNT flags) /* ** General purpose find a char in a string. ** ** Supports: forward/reverse with skip, cased/uncase, Unicode/byte. ** ** Skip can be set positive or negative (for reverse). ** ** Flags are set according to ALL_FIND_REFS ** ***********************************************************************/ { REBUNI c1; REBOOL uncase = !GET_FLAG(flags, ARG_FIND_CASE-1); // uncase = case insenstive if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2); for (; index >= head && index < tail; index += skip) { c1 = GET_ANY_CHAR(ser, index); if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c1 == c2) return index; if GET_FLAG(flags, ARG_FIND_MATCH-1) break; } return NOT_FOUND; }
*/ REBCNT Find_Str_Bitset(REBSER *ser, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBSER *bset, REBCNT flags) /* ** General purpose find a bitset char in a string. ** ** Supports: forward/reverse with skip, cased/uncase, Unicode/byte. ** ** Skip can be set positive or negative (for reverse). ** ** Flags are set according to ALL_FIND_REFS ** ***********************************************************************/ { REBUNI c1; REBOOL uncase = !GET_FLAG(flags, ARG_FIND_CASE-1); // uncase = case insenstive for (; index >= head && index < tail; index += skip) { c1 = GET_ANY_CHAR(ser, index); //if (uncase && c1 < UNICODE_CASES) { // if (Check_Bit(bset, LO_CASE(c1)) || Check_Bit(bset, UP_CASE(c1))) // return index; //} //else if (Check_Bit(bset, c1, uncase)) return index; if (flags & AM_FIND_MATCH) break; } return NOT_FOUND; }
*/ static void trim_lines(REBSER *ser, REBCNT index, REBCNT tail) /* ** Remove all newlines and extra space. ** ***********************************************************************/ { REBINT pad = 1; // used to allow a single space REBUNI uc; REBCNT out = index; for (; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (IS_WHITE(uc)) { uc = ' '; if (!pad) { SET_ANY_CHAR(ser, out, uc); out++; pad = 2; } } else { SET_ANY_CHAR(ser, out, uc); out++; pad = 0; } } // Remove extra end pad if found: if (pad == 2) out--; SET_ANY_CHAR(ser, out, 0); SERIES_TAIL(ser) = out; }
*/ REBCNT Find_Str_Str(REBSER *ser1, REBCNT head, REBCNT index, REBCNT tail, REBINT skip, REBSER *ser2, REBCNT index2, REBCNT len, REBCNT flags) /* ** General purpose find a substring. ** ** Supports: forward/reverse with skip, cased/uncase, Unicode/byte. ** ** Skip can be set positive or negative (for reverse). ** ** Flags are set according to ALL_FIND_REFS ** ***********************************************************************/ { REBUNI c1; REBUNI c2; REBUNI c3; REBCNT n = 0; REBOOL uncase = !(flags & AM_FIND_CASE); // uncase = case insenstive c2 = GET_ANY_CHAR(ser2, index2); // starting char if (uncase && c2 < UNICODE_CASES) c2 = LO_CASE(c2); for (; index >= head && index < tail; index += skip) { c1 = GET_ANY_CHAR(ser1, index); if (uncase && c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c1 == c2) { for (n = 1; n < len; n++) { c1 = GET_ANY_CHAR(ser1, index+n); c3 = GET_ANY_CHAR(ser2, index2+n); if (uncase && c1 < UNICODE_CASES && c3 < UNICODE_CASES) { if (LO_CASE(c1) != LO_CASE(c3)) break; } else { if (c1 != c3) break; } } if (n == len) { if (flags & AM_FIND_TAIL) return index + len; return index; } } if (flags & AM_FIND_MATCH) break; } return NOT_FOUND; }
*/ static void replace_with(REBSER *ser, REBCNT index, REBCNT tail, REBVAL *with) /* ** Replace whitespace chars that match WITH string. ** ** Resulting string is always smaller than it was to start. ** ***********************************************************************/ { #define MAX_WITH 32 REBCNT wlen; REBUNI with_chars[MAX_WITH]; // chars to be trimmed REBUNI *up = with_chars; REBYTE *bp; REBCNT n; REBUNI uc; // Setup WITH array from arg or the default: n = 0; if (IS_NONE(with)) { bp = "\n \r\t"; wlen = n = 4; } else if (IS_CHAR(with)) { wlen = 1; *up++ = VAL_CHAR(with); } else if (IS_INTEGER(with)) { wlen = 1; *up++ = Int32s(with, 0); } else if (ANY_BINSTR(with)) { n = VAL_LEN(with); if (n >= MAX_WITH) n = MAX_WITH-1; wlen = n; if (VAL_BYTE_SIZE(with)) { bp = VAL_BIN_DATA(with); } else { memcpy(up, VAL_UNI_DATA(with), n * sizeof(REBUNI)); n = 0; } } for (; n > 0; n--) *up++ = (REBUNI)*bp++; // Remove all occurances of chars found in WITH string: for (n = index; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (!find_in_uni(with_chars, wlen, uc)) { SET_ANY_CHAR(ser, n, uc); n++; } } SET_ANY_CHAR(ser, n, 0); SERIES_TAIL(ser) = n; }
*/ REBSER *Split_Lines(REBVAL *val) /* ** Given a string series, split lines on CR-LF. ** Series can be bytes or Unicode. ** ***********************************************************************/ { REBSER *ser = BUF_EMIT; // GC protected (because it is emit buffer) REBSER *str = VAL_SERIES(val); REBCNT len = VAL_LEN(val); REBCNT idx = VAL_INDEX(val); REBCNT start = idx; REBSER *out; REBUNI c; BLK_RESET(ser); while (idx < len) { c = GET_ANY_CHAR(str, idx); if (c == LF || c == CR) { out = Copy_String(str, start, idx - start); val = Alloc_Tail_Array(ser); Val_Init_String(val, out); VAL_SET_OPT(val, OPT_VALUE_LINE); idx++; if (c == CR && GET_ANY_CHAR(str, idx) == LF) idx++; start = idx; } else idx++; } // Possible remainder (no terminator) if (idx > start) { out = Copy_String(str, start, idx - start); val = Alloc_Tail_Array(ser); Val_Init_String(val, out); VAL_SET_OPT(val, OPT_VALUE_LINE); } return Copy_Array_Shallow(ser); }
*/ void Shuffle_String(REBVAL *value, REBFLG secure) /* ** Randomize a string. Return a new string series. ** Handles both BYTE and UNICODE strings. ** ***********************************************************************/ { REBCNT n; REBCNT k; REBSER *series = VAL_SERIES(value); REBCNT idx = VAL_INDEX(value); REBUNI swap; for (n = VAL_LEN(value); n > 1;) { k = idx + (REBCNT)Random_Int(secure) % n; n--; swap = GET_ANY_CHAR(series, k); SET_ANY_CHAR(series, k, GET_ANY_CHAR(series, n + idx)); SET_ANY_CHAR(series, n + idx, swap); } }
*/ REBFLG Match_Sub_Path(REBSER *s1, REBSER *s2) /* ** Compare two file path series, regardless of char size. ** Return TRUE if s1 is a subpath of s2. ** Case insensitive. ** ***********************************************************************/ { REBCNT len = s1->tail; REBCNT n; REBUNI c1 = 0; REBUNI c2; // Debug_Series(s1); // Debug_Series(s2); // s1 len must be <= s2 len if (len > s2->tail) return FALSE; for (n = 0; n < len; n++) { // includes terminator c1 = GET_ANY_CHAR(s1, n); c2 = GET_ANY_CHAR(s2, n); if (c1 < UNICODE_CASES) c1 = LO_CASE(c1); if (c2 < UNICODE_CASES) c2 = LO_CASE(c2); if (c1 != c2) break; } // a/b matches: a/b, a/b/, a/b/c c2 = GET_ANY_CHAR(s2, n); return ( n >= len // all chars matched && // Must be at end or at dir sep: (c1 == '/' || c1 == '\\' || c2 == 0 || c2 == '/' || c2 == '\\') ); }
// // Binary_To_Decimal: C // static void Binary_To_Decimal(const REBVAL *bin, REBVAL *out) { REBI64 n = 0; REBSER *ser = VAL_SERIES(bin); REBCNT idx = VAL_INDEX(bin); REBCNT len = VAL_LEN_AT(bin); if (len > 8) len = 8; for (; len; len--, idx++) n = (n << 8) | (REBI64)(GET_ANY_CHAR(ser, idx)); VAL_RESET_HEADER(out, REB_DECIMAL); INIT_DECIMAL_BITS(out, n); }
static void Mold_Url(const REBVAL *value, REB_MOLD *mold) { REBUNI *dp; REBCNT n; REBUNI c; REBCNT len = VAL_LEN(value); REBSER *ser = VAL_SERIES(value); // Compute extra space needed for hex encoded characters: for (n = VAL_INDEX(value); n < VAL_TAIL(value); n++) { c = GET_ANY_CHAR(ser, n); if (IS_URL_ESC(c)) len += 2; } dp = Prep_Uni_Series(mold, len); for (n = VAL_INDEX(value); n < VAL_TAIL(value); n++) { c = GET_ANY_CHAR(ser, n); if (IS_URL_ESC(c)) dp = Form_Hex_Esc_Uni(dp, c); // c => %xx else *dp++ = c; } *dp = 0; }
// // Split_Lines: C // // Given a string series, split lines on CR-LF. // Series can be bytes or Unicode. // REBARR *Split_Lines(REBVAL *val) { REBARR *array = BUF_EMIT; // GC protected (because it is emit buffer) REBSER *str = VAL_SERIES(val); REBCNT len = VAL_LEN_AT(val); REBCNT idx = VAL_INDEX(val); REBCNT start = idx; REBSER *out; REBUNI c; RESET_ARRAY(array); while (idx < len) { c = GET_ANY_CHAR(str, idx); if (c == LF || c == CR) { out = Copy_String_Slimming(str, start, idx - start); val = Alloc_Tail_Array(array); Val_Init_String(val, out); SET_VAL_FLAG(val, VALUE_FLAG_LINE); idx++; if (c == CR && GET_ANY_CHAR(str, idx) == LF) idx++; start = idx; } else idx++; } // Possible remainder (no terminator) if (idx > start) { out = Copy_String_Slimming(str, start, idx - start); val = Alloc_Tail_Array(array); Val_Init_String(val, out); SET_VAL_FLAG(val, VALUE_FLAG_LINE); } return Copy_Array_Shallow(array, SPECIFIED); // no relative values }
*/ static void Binary_To_Decimal(REBVAL *bin, REBVAL *dec) /* ***********************************************************************/ { REBI64 n = 0; REBSER *ser = VAL_SERIES(bin); REBCNT idx = VAL_INDEX(bin); REBCNT len = VAL_LEN(bin); if (len > 8) len = 8; for (; len; len--, idx++) n = (n << 8) | (REBI64)(GET_ANY_CHAR(ser, idx)); VAL_SET(dec, REB_DECIMAL); VAL_INT64(dec) = n; // aliasing the bits! }
RL_API int RL_Get_Char(REBSER *series, u32 index) /* ** Get a character from byte or unicode string. ** ** Returns: ** A Unicode character point from string. If index is ** at or past the tail, a -1 is returned. ** Arguments: ** series - string series pointer ** index - zero based index of character ** Notes: ** This function works for byte and unicoded strings. ** The maximum size of a Unicode char is determined by ** R3 build options. The default is 16 bits. */ { if (index >= series->tail) return -1; return GET_ANY_CHAR(series, index); }
STOID Mold_Issue(REBVAL *value, REB_MOLD *mold) { REBUNI *dp; REBCNT n; REBUNI c; REBSER *ser = VAL_SERIES(value); dp = Prep_Uni_Series(mold, VAL_LEN(value)+1); // '#' extra *dp++ = '#'; for (n = VAL_INDEX(value); n < VAL_TAIL(value); n++) { c = GET_ANY_CHAR(ser, n); if (IS_LEX_DELIMIT(c)) c = '?'; *dp++ = c; } *dp = 0; }
*/ static REB_R Console_Actor(struct Reb_Call *call_, REBSER *port, REBCNT action) /* ***********************************************************************/ { REBREQ *req; REBINT result; REBVAL *arg = D_ARG(2); REBSER *ser; Validate_Port(port, action); arg = D_ARG(2); *D_OUT = *D_ARG(1); req = cast(REBREQ*, Use_Port_State(port, RDI_STDIO, sizeof(REBREQ))); switch (action) { case A_READ: // If not open, open it: if (!IS_OPEN(req)) { if (OS_DO_DEVICE(req, RDC_OPEN)) Trap_Port_DEAD_END(RE_CANNOT_OPEN, port, req->error); } // If no buffer, create a buffer: arg = OFV(port, STD_PORT_DATA); if (!IS_STRING(arg) && !IS_BINARY(arg)) { Set_Binary(arg, MAKE_OS_BUFFER(OUT_BUF_SIZE)); } ser = VAL_SERIES(arg); RESET_SERIES(ser); req->common.data = BIN_HEAD(ser); req->length = SERIES_AVAIL(ser); #ifdef nono // Is the buffer large enough? req->length = SERIES_AVAIL(ser); // space available if (req->length < OUT_BUF_SIZE/2) Extend_Series(ser, OUT_BUF_SIZE); req->length = SERIES_AVAIL(ser); // Don't make buffer too large: Bug #174 ????? if (req->length > 1024) req->length = 1024; //??? req->common.data = STR_TAIL(ser); // write at tail //??? if (SERIES_TAIL(ser) == 0) req->actual = 0; //??? #endif result = OS_DO_DEVICE(req, RDC_READ); if (result < 0) Trap_Port_DEAD_END(RE_READ_ERROR, port, req->error); #ifdef nono // Does not belong here!! // Remove or replace CRs: result = 0; for (n = 0; n < req->actual; n++) { chr = GET_ANY_CHAR(ser, n); if (chr == CR) { chr = LF; // Skip LF if it follows: if ((n+1) < req->actual && LF == GET_ANY_CHAR(ser, n+1)) n++; } SET_ANY_CHAR(ser, result, chr); result++; } #endif // !!! Among many confusions in this file, it said "Another copy???" //Set_String(D_OUT, Copy_OS_Str(ser->data, result)); Set_Binary(D_OUT, Copy_Bytes(req->common.data, req->actual)); break; case A_OPEN: // ?? why??? //if (OS_DO_DEVICE(req, RDC_OPEN)) Trap_Port_DEAD_END(RE_CANNOT_OPEN, port); SET_OPEN(req); break; case A_CLOSE: SET_CLOSED(req); //OS_DO_DEVICE(req, RDC_CLOSE); break; case A_OPENQ: if (IS_OPEN(req)) return R_TRUE; return R_FALSE; default: Trap_Action_DEAD_END(REB_PORT, action); } return R_OUT; }
*/ static void trim_head_tail(REBSER *ser, REBCNT index, REBCNT tail, REBFLG h, REBFLG t) /* ** Trim from head and tail of each line, trim any leading or ** trailing lines as well, leaving one at the end if present ** ***********************************************************************/ { REBCNT start = index; REBCNT out = index; REBUNI uc; // Skip head lines if required: if (h || !t) { for (; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (!IS_WHITE(uc)) break; } } // Trim the head and tail parts of a line: if (!h && !t) { REBINT hf = 1; // head space flag REBINT tf = 0; // tail space flag and index // Trim lines: for (; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); if (IS_SPACE(uc)) { if (hf) continue; // trim from head tf = index; // tailing spaces? } else if (uc == LF) { hf = 1; if (tf) out = tf; tf = 0; } else hf = tf = 0; SET_ANY_CHAR(ser, out, uc); out++; } } else { for (; index < tail; index++) { uc = GET_ANY_CHAR(ser, index); SET_ANY_CHAR(ser, out, uc); out++; } } // Trim tail lines if required: if (t || !h) { REBOOL flag = FALSE; // found newline for (out--; out >= start; out--) { uc = GET_ANY_CHAR(ser, out); if (!IS_WHITE(uc)) break; if (uc == LF) flag = TRUE; } out++; if (!t && flag) { SET_ANY_CHAR(ser, out, LF); out++; } } SET_ANY_CHAR(ser, out, 0); SERIES_TAIL(ser) = out; }
*/ REBYTE *Temp_Byte_Chars_May_Fail(const REBVAL *val, REBINT max_len, REBCNT *length, REBINT opts) /* ** NOTE: This function returns a temporary result, and uses an internal ** buffer. Do not use it recursively. Also, it will Trap on errors. ** ** Prequalifies a string before using it with a function that ** expects it to be 8-bits. It would be used for instance to convert ** a string that is potentially REBUNI-wide into a form that can be used ** with a Scan_XXX routine, that is expecting ASCII or UTF-8 source. ** (Many TO-XXX conversions from STRING re-use that scanner logic.) ** ** Returns a temporary string and sets the length field. ** ** Opts can be: ** 0 - no special options ** 1 - allow UTF8 (val is converted to UTF8 during qualification) ** 2 - allow binary ** ** Checks or converts it: ** ** 1. it is byte string (not unicode) ** 2. if unicode, copy and return as temp byte string ** 3. it's actual content (less space, newlines) <= max len ** 4. it does not contain other values ("123 456") ** 5. it's not empty or only whitespace ** ***********************************************************************/ { REBCNT tail = VAL_TAIL(val); REBCNT index = VAL_INDEX(val); REBCNT len; REBUNI c; REBYTE *bp; REBSER *src = VAL_SERIES(val); if (index > tail) raise Error_0(RE_PAST_END); Resize_Series(BUF_FORM, max_len+1); bp = BIN_HEAD(BUF_FORM); // Skip leading whitespace: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) break; } // Copy chars that are valid: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (opts < 2 && c >= 0x80) { if (opts == 0) raise Error_0(RE_INVALID_CHARS); len = Encode_UTF8_Char(bp, c); max_len -= len; bp += len; } else if (!IS_SPACE(c)) { *bp++ = (REBYTE)c; max_len--; } else break; if (max_len < 0) raise Error_0(RE_TOO_LONG); } // Rest better be just spaces: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) raise Error_0(RE_INVALID_CHARS); } *bp= 0; len = bp - BIN_HEAD(BUF_FORM); if (len == 0) raise Error_0(RE_TOO_SHORT); if (length) *length = len; return BIN_HEAD(BUF_FORM); }
// // RL_Get_Char: C // // Get a character from byte or unicode string. // // Returns: // A Unicode character point from string. If index is // at or past the tail, a -1 is returned. // Arguments: // series - string series pointer // index - zero based index of character // Notes: // This function works for byte and unicoded strings. // The maximum size of a Unicode char is determined by // R3 build options. The default is 16 bits. // RL_API int RL_Get_Char(REBSER *series, u32 index) { if (index >= SER_LEN(series)) return -1; return GET_ANY_CHAR(series, index); }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series( const REBVAL *val1, const REBVAL *val2, REBFLGS flags, REBOOL cased, REBCNT skip ) { REBCNT i; REBINT h = 1; // used for both logic true/false and hash check REBOOL first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate `i` as maximum length of result block. The temporary buffer // will be allocated at this size, but copied out at the exact size of // the actual result. // i = VAL_LEN_AT(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN_AT(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series // The buffer used for building the return series. Currently it // reuses BUF_EMIT, because that buffer is not likely to be in // use (emit doesn't call set operations, nor vice versa). However, // other routines may get the same idea and start recursing so it // may be better to use something more similar to the mold stack // approach of marking off successive ranges in the array. // REBSER *buffer = ARR_SERIES(BUF_EMIT); Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to FIND on the value itself w/o the hash. do { REBARR *array1 = VAL_ARRAY(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2 // if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, skip, cased); // Iterate over first series // i = VAL_INDEX(val1); for (; i < ARR_LEN(array1); i += skip) { RELVAL *item = ARR_AT(array1, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key_Hashed( VAL_ARRAY(val2), hser, item, VAL_SPECIFIER(val1), skip, cased, 1 ); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) { Find_Key_Hashed( AS_ARRAY(buffer), hret, item, VAL_SPECIFIER(val1), skip, cased, 2 ); } } if (i != ARR_LEN(array1)) { // // In the current philosophy, the semantics of what to do // with things like `intersect/skip [1 2 3] [7] 2` is too // shaky to deal with, so an error is reported if it does // not work out evenly to the skip size. // fail (Error(RE_BLOCK_SKIP_WRONG)); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? // if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = ARR_SERIES(Copy_Array_Shallow(AS_ARRAY(buffer), SPECIFIED)); SET_SERIES_LEN(buffer, 0); // required - allow reuse } else { REB_MOLD mo; CLEARS(&mo); if (IS_BINARY(val1)) { // // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) // cased = TRUE; } // ask mo.series to have at least `i` capacity beyond mo.start // mo.opts = MOPT_RESERVE; mo.reserve = i; Push_Mold(&mo); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series // i = VAL_INDEX(val1); for (; i < SER_LEN(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( uc, VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_LEN_HEAD(val2), skip, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( uc, // c2 (the character to find) mo.series, // ser mo.start, // head mo.start, // index SER_LEN(mo.series), // tail skip, // skip cased ? AM_FIND_CASE : 0 // flags ) ) { Append_String(mo.series, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? // if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Pop_Molded_String(&mo); } return out_ser; }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip) { REBSER *buffer; // buffer for building the return series REBCNT i; REBINT h = TRUE; REBFLG first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; // This routine should only be called with SERIES! values assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate i as length of result block. i = VAL_LEN(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series buffer = BUF_EMIT; // use preallocated shared block Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2: if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, cased); // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { REBVAL *item = BLK_SKIP(ser, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) Find_Key(buffer, hret, item, skip, cased, 2); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = Copy_Array_Shallow(buffer); RESET_TAIL(buffer); // required - allow reuse } else { if (IS_BINARY(val1)) { // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) cased = TRUE; } buffer = BUF_MOLD; Reset_Buffer(buffer, i); RESET_TAIL(buffer); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( buffer, 0, 0, SERIES_TAIL(buffer), skip, uc, cased ? AM_FIND_CASE : 0 ) ) { Append_String(buffer, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Copy_String(buffer, 0, -1); } return out_ser; }
*/ static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags) /* ** Do set operations on a series. ** ***********************************************************************/ { REBVAL *val; REBVAL *val1; REBVAL *val2 = 0; REBSER *ser; REBSER *hser = 0; // hash table for series REBSER *retser; // return series REBSER *hret; // hash table for return series REBCNT i; REBINT h = TRUE; REBCNT skip = 1; // record size REBCNT cased = 0; // case sensitive when TRUE SET_NONE(D_OUT); val1 = D_ARG(1); i = 2; // Check for second series argument: if (flags != SET_OP_UNIQUE) { val2 = D_ARG(i++); if (VAL_TYPE(val1) != VAL_TYPE(val2)) raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)); } // Refinements /case and /skip N cased = D_REF(i++); // cased if (D_REF(i++)) skip = Int32s(D_ARG(i), 1); switch (VAL_TYPE(val1)) { case REB_BLOCK: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_EMIT; // use preallocated shared block Resize_Series(retser, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { // Check what is in series1 but not in series2: if (GET_FLAG(flags, SOP_CHECK)) hser = Hash_Block(val2, cased); // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) { if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h) Find_Key(retser, hret, val, skip, cased, 2); } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } if (GET_FLAG(flags, SOP_CHECK)) Free_Series(hser); } while (i); if (hret) Free_Series(hret); Val_Init_Block(D_OUT, Copy_Array_Shallow(retser)); RESET_TAIL(retser); // required - allow reuse break; case REB_BINARY: cased = TRUE; SET_TYPE(D_OUT, REB_BINARY); case REB_STRING: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_MOLD; Reset_Buffer(retser, i); RESET_TAIL(retser); do { REBUNI uc; cased = cased ? AM_FIND_CASE : 0; // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) { Append_String(retser, ser, i, skip); } } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } } while (i); ser = Copy_String(retser, 0, -1); if (IS_BINARY(D_OUT)) Val_Init_Binary(D_OUT, ser); else Val_Init_String(D_OUT, ser); break; case REB_BITSET: switch (flags) { case SET_OP_UNIQUE: return R_ARG1; case SET_OP_UNION: i = A_OR; break; case SET_OP_INTERSECT: i = A_AND; break; case SET_OP_DIFFERENCE: i = A_XOR; break; case SET_OP_EXCLUDE: i = 0; // special case break; } ser = Xandor_Binary(i, val1, val2); Val_Init_Bitset(D_OUT, ser); break; case REB_TYPESET: switch (flags) { case SET_OP_UNIQUE: break; case SET_OP_UNION: VAL_TYPESET(val1) |= VAL_TYPESET(val2); break; case SET_OP_INTERSECT: VAL_TYPESET(val1) &= VAL_TYPESET(val2); break; case SET_OP_DIFFERENCE: VAL_TYPESET(val1) ^= VAL_TYPESET(val2); break; case SET_OP_EXCLUDE: VAL_TYPESET(val1) &= ~VAL_TYPESET(val2); break; } return R_ARG1; default: raise Error_Invalid_Arg(val1); } return R_OUT; }
*/ static REB_R Loop_Each(struct Reb_Call *call_, REBINT mode) /* ** Supports these natives (modes): ** 0: foreach ** 1: remove-each ** 2: map ** ***********************************************************************/ { REBSER *body; REBVAL *vars; REBVAL *words; REBSER *frame; REBVAL *value; REBSER *series; REBSER *out; // output block (for MAP, mode = 2) REBINT index; // !!!! should these be REBCNT? REBINT tail; REBINT windex; // write REBINT rindex; // read REBINT err; REBCNT i; REBCNT j; REBVAL *ds; assert(mode >= 0 && mode < 3); value = D_ARG(2); // series if (IS_NONE(value)) return R_NONE; body = Init_Loop(D_ARG(1), D_ARG(3), &frame); // vars, body SET_OBJECT(D_ARG(1), frame); // keep GC safe Set_Block(D_ARG(3), body); // keep GC safe SET_NONE(D_OUT); // Default result to NONE if the loop does not run // If it's MAP, create result block: if (mode == 2) { out = Make_Block(VAL_LEN(value)); SAVE_SERIES(out); } // Get series info: if (ANY_OBJECT(value)) { series = VAL_OBJ_FRAME(value); out = FRM_WORD_SERIES(series); // words (the out local reused) index = 1; //if (frame->tail > 3) Trap_Arg_DEAD_END(FRM_WORD(frame, 3)); } else if (IS_MAP(value)) { series = VAL_SERIES(value); index = 0; //if (frame->tail > 3) Trap_Arg_DEAD_END(FRM_WORD(frame, 3)); } else { series = VAL_SERIES(value); index = VAL_INDEX(value); if (index >= cast(REBINT, SERIES_TAIL(series))) { if (mode == 1) { SET_INTEGER(D_OUT, 0); } else if (mode == 2) { Set_Block(D_OUT, out); UNSAVE_SERIES(out); } return R_OUT; } } windex = index; // Iterate over each value in the series block: while (index < (tail = SERIES_TAIL(series))) { rindex = index; // remember starting spot j = 0; // Set the FOREACH loop variables from the series: for (i = 1; i < frame->tail; i++) { vars = FRM_VALUE(frame, i); words = FRM_WORD(frame, i); // var spec is WORD if (IS_WORD(words)) { if (index < tail) { if (ANY_BLOCK(value)) { *vars = *BLK_SKIP(series, index); } else if (ANY_OBJECT(value)) { if (!VAL_GET_EXT(BLK_SKIP(out, index), EXT_WORD_HIDE)) { // Alternate between word and value parts of object: if (j == 0) { Init_Word(vars, REB_WORD, VAL_WORD_SYM(BLK_SKIP(out, index)), series, index); if (NOT_END(vars+1)) index--; // reset index for the value part } else if (j == 1) *vars = *BLK_SKIP(series, index); else Trap_Arg_DEAD_END(words); j++; } else { // Do not evaluate this iteration index++; goto skip_hidden; } } else if (IS_VECTOR(value)) { Set_Vector_Value(vars, series, index); } else if (IS_MAP(value)) { REBVAL *val = BLK_SKIP(series, index | 1); if (!IS_NONE(val)) { if (j == 0) { *vars = *BLK_SKIP(series, index & ~1); if (IS_END(vars+1)) index++; // only words } else if (j == 1) *vars = *BLK_SKIP(series, index); else Trap_Arg_DEAD_END(words); j++; } else { index += 2; goto skip_hidden; } } else { // A string or binary if (IS_BINARY(value)) { SET_INTEGER(vars, (REBI64)(BIN_HEAD(series)[index])); } else if (IS_IMAGE(value)) { Set_Tuple_Pixel(BIN_SKIP(series, index), vars); } else { VAL_SET(vars, REB_CHAR); VAL_CHAR(vars) = GET_ANY_CHAR(series, index); } } index++; } else SET_NONE(vars); } // var spec is SET_WORD: else if (IS_SET_WORD(words)) { if (ANY_OBJECT(value) || IS_MAP(value)) { *vars = *value; } else { VAL_SET(vars, REB_BLOCK); VAL_SERIES(vars) = series; VAL_INDEX(vars) = index; } //if (index < tail) index++; // do not increment block. } else Trap_Arg_DEAD_END(words); } if (index == rindex) index++; //the word block has only set-words: foreach [a:] [1 2 3][] if (!DO_BLOCK(D_OUT, body, 0)) { if ((err = Check_Error(D_OUT)) >= 0) { index = rindex; break; } // else CONTINUE: if (mode == 1) SET_FALSE(D_OUT); // keep the value (for mode == 1) } else { err = 0; // prevent later test against uninitialized value } if (mode > 0) { //if (ANY_OBJECT(value)) Trap_Types_DEAD_END(words, REB_BLOCK, VAL_TYPE(value)); //check not needed // If FALSE return, copy values to the write location: if (mode == 1) { // remove-each if (IS_CONDITIONAL_FALSE(D_OUT)) { REBCNT wide = SERIES_WIDE(series); // memory areas may overlap, so use memmove and not memcpy! memmove(series->data + (windex * wide), series->data + (rindex * wide), (index - rindex) * wide); windex += index - rindex; // old: while (rindex < index) *BLK_SKIP(series, windex++) = *BLK_SKIP(series, rindex++); } } else if (!IS_UNSET(D_OUT)) Append_Value(out, D_OUT); // (mode == 2) } skip_hidden: ; } // Finish up: if (mode == 1) { // Remove hole (updates tail): if (windex < index) Remove_Series(series, windex, index - windex); SET_INTEGER(D_OUT, index - windex); return R_OUT; } // If MAP... if (mode == 2) { UNSAVE_SERIES(out); if (err != 2) { // ...and not BREAK/RETURN: Set_Block(D_OUT, out); return R_OUT; } } return R_OUT; }
static void str_to_char(REBVAL *out, REBVAL *val, REBCNT idx) { // STRING value to CHAR value (save some code space) SET_CHAR(out, GET_ANY_CHAR(VAL_SERIES(val), idx)); }