*/ REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr) /* ** Do all the details to decode a string. ** Input is a byte series. Len is len of input. ** The utf is 0, 8, +/-16, +/-32. ** A special -1 means use the BOM. ** ***********************************************************************/ { REBSER *ser = BUF_UTF8; // buffer is Unicode width REBSER *dst; REBINT size; //REBFLG ccr = FALSE; // in original R3-alpha if was TRUE //@@ https://github.com/rebol/rebol-issues/issues/2336 if (utf == -1) { utf = What_UTF(bp, len); if (utf) { if (utf == 8) bp += 3, len -= 3; else if (utf == -16 || utf == 16) bp += 2, len -= 2; else if (utf == -32 || utf == 32) bp += 4, len -= 4; } } if (utf == 0 || utf == 8) { size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr); } else if (utf == -16 || utf == 16) { size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr); } else if (utf == -32 || utf == 32) { size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr); } else { return NULL; } if (size < 0) { size = -size; dst = Make_Binary(size); Append_Uni_Bytes(dst, UNI_HEAD(ser), size); } else { dst = Make_Unicode(size); Append_Uni_Uni(dst, UNI_HEAD(ser), size); } return dst; }
// // Detab_Bytes: C // // Detab a string and return a new series. // REBSER *Detab_Bytes(REBYTE *bp, REBCNT index, REBCNT len, REBINT tabsize) { REBCNT cnt = 0; REBCNT n; REBYTE *dp; REBYTE c; // Estimate new length based on tab expansion: for (n = index; n < len; n++) if (bp[n] == TAB) cnt++; dp = Reset_Buffer(BYTE_BUF, len + (cnt * (tabsize-1))); n = 0; while (index < len) { c = bp[index++]; if (c == '\t') { *dp++ = ' '; n++; for (; n % tabsize != 0; n++) *dp++ = ' '; continue; } if (c == '\n') n = 0; else n++; *dp++ = c; } return Copy_Buffer(BYTE_BUF, 0, dp); }
*/ REBSER *Encode_UTF8_String(void *src, REBCNT len, REBFLG uni, REBFLG opts) /* ** Do all the details to encode a string as UTF8. ** No_copy means do not make a copy. ** Result can be a shared buffer! ** ***********************************************************************/ { REBSER *ser = BUF_FORM; // a shared buffer REBCNT size; REBYTE *cp; REBFLG ccr = GET_FLAG(opts, ENC_OPT_CRLF); if (uni) { REBUNI *up = (REBUNI*)src; size = Length_As_UTF8(up, len, TRUE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); UNUSED(cp); Encode_UTF8(Reset_Buffer(ser, size), size, up, &len, TRUE, ccr); } else { REBYTE *bp = (REBYTE*)src; if (Is_Not_ASCII(bp, len)) { size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); Encode_UTF8(cp, size, bp, &len, FALSE, ccr); } else if (GET_FLAG(opts, ENC_OPT_NO_COPY)) return 0; else return Copy_Bytes(bp, len); } SERIES_TAIL(ser) = len; STR_TERM(ser); return Copy_Bytes(BIN_HEAD(ser), len); }
*/ REBSER *Entab_Unicode(REBUNI *bp, REBCNT index, REBCNT len, REBINT tabsize) /* ** Entab a string and return a new series. ** ***********************************************************************/ { REBINT n = 0; REBUNI *dp; REBUNI c; dp = (REBUNI *)Reset_Buffer(BUF_MOLD, len); for (; index < len; index++) { c = bp[index]; // Count leading spaces, insert TAB for each tabsize: if (c == ' ') { if (++n >= tabsize) { *dp++ = '\t'; n = 0; } continue; } // Hitting a leading TAB resets space counter: if (c == '\t') { *dp++ = (REBYTE)c; n = 0; } else { // Incomplete tab space, pad with spaces: for (; n > 0; n--) *dp++ = ' '; // Copy chars thru end-of-line (or end of buffer): while (index < len) { if ((*dp++ = bp[index++]) == '\n') break; } } } return Copy_Buffer(BUF_MOLD, dp); }
*/ static int Read_Dir(REBREQ *dir, REBSER *files) /* ** Provide option to get file info too. ** Provide option to prepend dir path. ** Provide option to use wildcards. ** ***********************************************************************/ { REBINT result; REBCNT len; REBSER *fname; REBSER *name; REBREQ file; RESET_TAIL(files); CLEARS(&file); // Temporary filename storage: fname = BUF_OS_STR; file.file.path = (REBCHR*)Reset_Buffer(fname, MAX_FILE_NAME); SET_FLAG(dir->modes, RFM_DIR); dir->data = (REBYTE*)(&file); while ((result = OS_DO_DEVICE(dir, RDC_READ)) == 0 && !GET_FLAG(dir->flags, RRF_DONE)) { len = LEN_STR(file.file.path); if (GET_FLAG(file.modes, RFM_DIR)) len++; name = Copy_OS_Str(file.file.path, len); if (GET_FLAG(file.modes, RFM_DIR)) SET_ANY_CHAR(name, name->tail-1, '/'); Set_Series(REB_FILE, Append_Value(files), name); } if (result < 0 && dir->error != -RFE_OPEN_FAIL && (FIND_CHR(dir->file.path, '*') || FIND_CHR(dir->file.path, '?'))) result = 0; // no matches found, but not an error return result; }
// // Entab_Bytes: C // // Entab a string and return a new series. // REBSER *Entab_Bytes(REBYTE *bp, REBCNT index, REBCNT len, REBINT tabsize) { REBINT n = 0; REBYTE *dp; REBYTE c; dp = Reset_Buffer(BYTE_BUF, len); for (; index < len; index++) { c = bp[index]; // Count leading spaces, insert TAB for each tabsize: if (c == ' ') { if (++n >= tabsize) { *dp++ = '\t'; n = 0; } continue; } // Hitting a leading TAB resets space counter: if (c == '\t') { *dp++ = (REBYTE)c; n = 0; } else { // Incomplete tab space, pad with spaces: for (; n > 0; n--) *dp++ = ' '; // Copy chars thru end-of-line (or end of buffer): while (index < len) { if ((*dp++ = bp[index++]) == '\n') break; } } } return Copy_Buffer(BYTE_BUF, 0, dp); }
*/ REBSER *Detab_Unicode(REBUNI *bp, REBCNT index, REBCNT len, REBINT tabsize) /* ** Detab a unicode string and return a new series. ** ***********************************************************************/ { REBCNT cnt = 0; REBCNT n; REBUNI *dp; REBUNI c; // Estimate new length based on tab expansion: for (n = index; n < len; n++) if (bp[n] == TAB) cnt++; dp = (REBUNI *)Reset_Buffer(BUF_MOLD, len + (cnt * (tabsize-1))); n = 0; while (index < len) { c = bp[index++]; if (c == '\t') { *dp++ = ' '; n++; for (; n % tabsize != 0; n++) *dp++ = ' '; continue; } if (c == '\n') n = 0; else n++; *dp++ = c; } return Copy_Buffer(BUF_MOLD, dp); }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip) { REBSER *buffer; // buffer for building the return series REBCNT i; REBINT h = TRUE; REBFLG first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; // This routine should only be called with SERIES! values assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate i as length of result block. i = VAL_LEN(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series buffer = BUF_EMIT; // use preallocated shared block Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2: if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, cased); // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { REBVAL *item = BLK_SKIP(ser, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) Find_Key(buffer, hret, item, skip, cased, 2); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = Copy_Array_Shallow(buffer); RESET_TAIL(buffer); // required - allow reuse } else { if (IS_BINARY(val1)) { // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) cased = TRUE; } buffer = BUF_MOLD; Reset_Buffer(buffer, i); RESET_TAIL(buffer); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( buffer, 0, 0, SERIES_TAIL(buffer), skip, uc, cased ? AM_FIND_CASE : 0 ) ) { Append_String(buffer, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Copy_String(buffer, 0, -1); } return out_ser; }
*/ static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags) /* ** Do set operations on a series. ** ***********************************************************************/ { REBVAL *val; REBVAL *val1; REBVAL *val2 = 0; REBSER *ser; REBSER *hser = 0; // hash table for series REBSER *retser; // return series REBSER *hret; // hash table for return series REBCNT i; REBINT h = TRUE; REBCNT skip = 1; // record size REBCNT cased = 0; // case sensitive when TRUE SET_NONE(D_OUT); val1 = D_ARG(1); i = 2; // Check for second series argument: if (flags != SET_OP_UNIQUE) { val2 = D_ARG(i++); if (VAL_TYPE(val1) != VAL_TYPE(val2)) raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)); } // Refinements /case and /skip N cased = D_REF(i++); // cased if (D_REF(i++)) skip = Int32s(D_ARG(i), 1); switch (VAL_TYPE(val1)) { case REB_BLOCK: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_EMIT; // use preallocated shared block Resize_Series(retser, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { // Check what is in series1 but not in series2: if (GET_FLAG(flags, SOP_CHECK)) hser = Hash_Block(val2, cased); // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) { if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h) Find_Key(retser, hret, val, skip, cased, 2); } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } if (GET_FLAG(flags, SOP_CHECK)) Free_Series(hser); } while (i); if (hret) Free_Series(hret); Val_Init_Block(D_OUT, Copy_Array_Shallow(retser)); RESET_TAIL(retser); // required - allow reuse break; case REB_BINARY: cased = TRUE; SET_TYPE(D_OUT, REB_BINARY); case REB_STRING: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_MOLD; Reset_Buffer(retser, i); RESET_TAIL(retser); do { REBUNI uc; cased = cased ? AM_FIND_CASE : 0; // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) { Append_String(retser, ser, i, skip); } } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } } while (i); ser = Copy_String(retser, 0, -1); if (IS_BINARY(D_OUT)) Val_Init_Binary(D_OUT, ser); else Val_Init_String(D_OUT, ser); break; case REB_BITSET: switch (flags) { case SET_OP_UNIQUE: return R_ARG1; case SET_OP_UNION: i = A_OR; break; case SET_OP_INTERSECT: i = A_AND; break; case SET_OP_DIFFERENCE: i = A_XOR; break; case SET_OP_EXCLUDE: i = 0; // special case break; } ser = Xandor_Binary(i, val1, val2); Val_Init_Bitset(D_OUT, ser); break; case REB_TYPESET: switch (flags) { case SET_OP_UNIQUE: break; case SET_OP_UNION: VAL_TYPESET(val1) |= VAL_TYPESET(val2); break; case SET_OP_INTERSECT: VAL_TYPESET(val1) &= VAL_TYPESET(val2); break; case SET_OP_DIFFERENCE: VAL_TYPESET(val1) ^= VAL_TYPESET(val2); break; case SET_OP_EXCLUDE: VAL_TYPESET(val1) &= ~VAL_TYPESET(val2); break; } return R_ARG1; default: raise Error_Invalid_Arg(val1); } return R_OUT; }