// // Temp_Byte_Chars_May_Fail: C // // NOTE: This function returns a temporary result, and uses an internal // buffer. Do not use it recursively. Also, it will Trap on errors. // // Prequalifies a string before using it with a function that // expects it to be 8-bits. It would be used for instance to convert // a string that is potentially REBUNI-wide into a form that can be used // with a Scan_XXX routine, that is expecting ASCII or UTF-8 source. // (Many TO-XXX conversions from STRING re-use that scanner logic.) // // Returns a temporary string and sets the length field. // // If `allow_utf8`, the constructed result is converted to UTF8. // // Checks or converts it: // // 1. it is byte string (not unicode) // 2. if unicode, copy and return as temp byte string // 3. it's actual content (less space, newlines) <= max len // 4. it does not contain other values ("123 456") // 5. it's not empty or only whitespace // REBYTE *Temp_Byte_Chars_May_Fail( const REBVAL *val, REBINT max_len, REBCNT *length, REBOOL allow_utf8 ) { REBCNT tail = VAL_LEN_HEAD(val); REBCNT index = VAL_INDEX(val); REBCNT len; REBUNI c; REBYTE *bp; REBSER *src = VAL_SERIES(val); if (index > tail) fail (Error(RE_PAST_END)); Resize_Series(BYTE_BUF, max_len+1); bp = BIN_HEAD(BYTE_BUF); // Skip leading whitespace: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) break; } // Copy chars that are valid: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (c >= 0x80) { if (!allow_utf8) fail (Error(RE_INVALID_CHARS)); len = Encode_UTF8_Char(bp, c); max_len -= len; bp += len; } else if (!IS_SPACE(c)) { *bp++ = (REBYTE)c; max_len--; } else break; if (max_len < 0) fail (Error(RE_TOO_LONG)); } // Rest better be just spaces: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) fail (Error(RE_INVALID_CHARS)); } *bp = '\0'; len = bp - BIN_HEAD(BYTE_BUF); if (len == 0) fail (Error(RE_TOO_SHORT)); if (length) *length = len; return BIN_HEAD(BYTE_BUF); }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip) { REBSER *buffer; // buffer for building the return series REBCNT i; REBINT h = TRUE; REBFLG first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; // This routine should only be called with SERIES! values assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate i as length of result block. i = VAL_LEN(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series buffer = BUF_EMIT; // use preallocated shared block Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2: if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, cased); // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { REBVAL *item = BLK_SKIP(ser, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) Find_Key(buffer, hret, item, skip, cased, 2); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = Copy_Array_Shallow(buffer); RESET_TAIL(buffer); // required - allow reuse } else { if (IS_BINARY(val1)) { // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) cased = TRUE; } buffer = BUF_MOLD; Reset_Buffer(buffer, i); RESET_TAIL(buffer); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( buffer, 0, 0, SERIES_TAIL(buffer), skip, uc, cased ? AM_FIND_CASE : 0 ) ) { Append_String(buffer, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Copy_String(buffer, 0, -1); } return out_ser; }
*/ static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags) /* ** Do set operations on a series. ** ***********************************************************************/ { REBVAL *val; REBVAL *val1; REBVAL *val2 = 0; REBSER *ser; REBSER *hser = 0; // hash table for series REBSER *retser; // return series REBSER *hret; // hash table for return series REBCNT i; REBINT h = TRUE; REBCNT skip = 1; // record size REBCNT cased = 0; // case sensitive when TRUE SET_NONE(D_OUT); val1 = D_ARG(1); i = 2; // Check for second series argument: if (flags != SET_OP_UNIQUE) { val2 = D_ARG(i++); if (VAL_TYPE(val1) != VAL_TYPE(val2)) raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)); } // Refinements /case and /skip N cased = D_REF(i++); // cased if (D_REF(i++)) skip = Int32s(D_ARG(i), 1); switch (VAL_TYPE(val1)) { case REB_BLOCK: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_EMIT; // use preallocated shared block Resize_Series(retser, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { // Check what is in series1 but not in series2: if (GET_FLAG(flags, SOP_CHECK)) hser = Hash_Block(val2, cased); // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) { if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h) Find_Key(retser, hret, val, skip, cased, 2); } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } if (GET_FLAG(flags, SOP_CHECK)) Free_Series(hser); } while (i); if (hret) Free_Series(hret); Val_Init_Block(D_OUT, Copy_Array_Shallow(retser)); RESET_TAIL(retser); // required - allow reuse break; case REB_BINARY: cased = TRUE; SET_TYPE(D_OUT, REB_BINARY); case REB_STRING: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_MOLD; Reset_Buffer(retser, i); RESET_TAIL(retser); do { REBUNI uc; cased = cased ? AM_FIND_CASE : 0; // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) { Append_String(retser, ser, i, skip); } } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } } while (i); ser = Copy_String(retser, 0, -1); if (IS_BINARY(D_OUT)) Val_Init_Binary(D_OUT, ser); else Val_Init_String(D_OUT, ser); break; case REB_BITSET: switch (flags) { case SET_OP_UNIQUE: return R_ARG1; case SET_OP_UNION: i = A_OR; break; case SET_OP_INTERSECT: i = A_AND; break; case SET_OP_DIFFERENCE: i = A_XOR; break; case SET_OP_EXCLUDE: i = 0; // special case break; } ser = Xandor_Binary(i, val1, val2); Val_Init_Bitset(D_OUT, ser); break; case REB_TYPESET: switch (flags) { case SET_OP_UNIQUE: break; case SET_OP_UNION: VAL_TYPESET(val1) |= VAL_TYPESET(val2); break; case SET_OP_INTERSECT: VAL_TYPESET(val1) &= VAL_TYPESET(val2); break; case SET_OP_DIFFERENCE: VAL_TYPESET(val1) ^= VAL_TYPESET(val2); break; case SET_OP_EXCLUDE: VAL_TYPESET(val1) &= ~VAL_TYPESET(val2); break; } return R_ARG1; default: raise Error_Invalid_Arg(val1); } return R_OUT; }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series( const REBVAL *val1, const REBVAL *val2, REBFLGS flags, REBOOL cased, REBCNT skip ) { REBCNT i; REBINT h = 1; // used for both logic true/false and hash check REBOOL first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate `i` as maximum length of result block. The temporary buffer // will be allocated at this size, but copied out at the exact size of // the actual result. // i = VAL_LEN_AT(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN_AT(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series // The buffer used for building the return series. Currently it // reuses BUF_EMIT, because that buffer is not likely to be in // use (emit doesn't call set operations, nor vice versa). However, // other routines may get the same idea and start recursing so it // may be better to use something more similar to the mold stack // approach of marking off successive ranges in the array. // REBSER *buffer = ARR_SERIES(BUF_EMIT); Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to FIND on the value itself w/o the hash. do { REBARR *array1 = VAL_ARRAY(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2 // if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, skip, cased); // Iterate over first series // i = VAL_INDEX(val1); for (; i < ARR_LEN(array1); i += skip) { RELVAL *item = ARR_AT(array1, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key_Hashed( VAL_ARRAY(val2), hser, item, VAL_SPECIFIER(val1), skip, cased, 1 ); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) { Find_Key_Hashed( AS_ARRAY(buffer), hret, item, VAL_SPECIFIER(val1), skip, cased, 2 ); } } if (i != ARR_LEN(array1)) { // // In the current philosophy, the semantics of what to do // with things like `intersect/skip [1 2 3] [7] 2` is too // shaky to deal with, so an error is reported if it does // not work out evenly to the skip size. // fail (Error(RE_BLOCK_SKIP_WRONG)); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? // if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = ARR_SERIES(Copy_Array_Shallow(AS_ARRAY(buffer), SPECIFIED)); SET_SERIES_LEN(buffer, 0); // required - allow reuse } else { REB_MOLD mo; CLEARS(&mo); if (IS_BINARY(val1)) { // // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) // cased = TRUE; } // ask mo.series to have at least `i` capacity beyond mo.start // mo.opts = MOPT_RESERVE; mo.reserve = i; Push_Mold(&mo); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series // i = VAL_INDEX(val1); for (; i < SER_LEN(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( uc, VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_LEN_HEAD(val2), skip, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( uc, // c2 (the character to find) mo.series, // ser mo.start, // head mo.start, // index SER_LEN(mo.series), // tail skip, // skip cased ? AM_FIND_CASE : 0 // flags ) ) { Append_String(mo.series, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? // if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Pop_Molded_String(&mo); } return out_ser; }
*/ static REBINT Do_Cmd(REBDIA *dia) /* ** Returns the length of command processed or error. See below. ** ***********************************************************************/ { REBVAL *fargs; REBINT size; REBVAL *val; REBINT err; REBINT n; // Get formal arguments block for this command: fargs = FRM_VALUES(dia->dialect) + dia->cmd; if (!IS_BLOCK(fargs)) return -REB_DIALECT_BAD_SPEC; dia->fargs = VAL_SERIES(fargs); fargs = VAL_BLK_DATA(fargs); size = Count_Dia_Args(fargs); // approximate // Preallocate output block (optimize for large blocks): if (dia->len > size) size = dia->len; if (GET_FLAG(dia->flags, RDIA_ALL)) { Extend_Series(dia->out, size+1); } else { Resize_Series(dia->out, size+1); // tail = 0 CLEAR_SERIES(dia->out); // Be sure it is entirely cleared } // Insert command word: if (!GET_FLAG(dia->flags, RDIA_NO_CMD)) { val = Append_Value(dia->out); Set_Word(val, FRM_WORD_SYM(dia->dialect, dia->cmd), dia->dialect, dia->cmd); if (GET_FLAG(dia->flags, RDIA_LIT_CMD)) VAL_SET(val, REB_LIT_WORD); dia->outi++; size++; } if (dia->cmd > 1) dia->argi++; // default cmd has no word arg // Foreach argument provided: for (n = dia->len; n > 0; n--, dia->argi++) { val = Eval_Arg(dia); if (!val) return -REB_DIALECT_BAD_ARG; if (IS_END(val)) break; if (!IS_NONE(val)) { //Print("n %d len %d argi %d", n, dia->len, dia->argi); err = Add_Arg(dia, val); // 1: good, 0: no-type, -N: error if (err == 0) return n; // remainder if (err < 0) return err; } } // If not enough args, pad with NONE values: if (dia->cmd > 1) { for (n = SERIES_TAIL(dia->out); n < size; n++) { Append_Value(dia->out); } } dia->outi = SERIES_TAIL(dia->out); return 0; }
*/ REBYTE *Temp_Byte_Chars_May_Fail(const REBVAL *val, REBINT max_len, REBCNT *length, REBINT opts) /* ** NOTE: This function returns a temporary result, and uses an internal ** buffer. Do not use it recursively. Also, it will Trap on errors. ** ** Prequalifies a string before using it with a function that ** expects it to be 8-bits. It would be used for instance to convert ** a string that is potentially REBUNI-wide into a form that can be used ** with a Scan_XXX routine, that is expecting ASCII or UTF-8 source. ** (Many TO-XXX conversions from STRING re-use that scanner logic.) ** ** Returns a temporary string and sets the length field. ** ** Opts can be: ** 0 - no special options ** 1 - allow UTF8 (val is converted to UTF8 during qualification) ** 2 - allow binary ** ** Checks or converts it: ** ** 1. it is byte string (not unicode) ** 2. if unicode, copy and return as temp byte string ** 3. it's actual content (less space, newlines) <= max len ** 4. it does not contain other values ("123 456") ** 5. it's not empty or only whitespace ** ***********************************************************************/ { REBCNT tail = VAL_TAIL(val); REBCNT index = VAL_INDEX(val); REBCNT len; REBUNI c; REBYTE *bp; REBSER *src = VAL_SERIES(val); if (index > tail) raise Error_0(RE_PAST_END); Resize_Series(BUF_FORM, max_len+1); bp = BIN_HEAD(BUF_FORM); // Skip leading whitespace: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) break; } // Copy chars that are valid: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (opts < 2 && c >= 0x80) { if (opts == 0) raise Error_0(RE_INVALID_CHARS); len = Encode_UTF8_Char(bp, c); max_len -= len; bp += len; } else if (!IS_SPACE(c)) { *bp++ = (REBYTE)c; max_len--; } else break; if (max_len < 0) raise Error_0(RE_TOO_LONG); } // Rest better be just spaces: for (; index < tail; index++) { c = GET_ANY_CHAR(src, index); if (!IS_SPACE(c)) raise Error_0(RE_INVALID_CHARS); } *bp= 0; len = bp - BIN_HEAD(BUF_FORM); if (len == 0) raise Error_0(RE_TOO_SHORT); if (length) *length = len; return BIN_HEAD(BUF_FORM); }