*/ RL_API void *RL_Make_String(u32 size, int unicode) /* ** Allocate a new string or binary series. ** ** Returns: ** A pointer to a string or binary series. ** Arguments: ** size - the length of the string. The system will add one extra ** for a null terminator (not strictly required, but good for C.) ** unicode - set FALSE for ASCII/Latin1 strings, set TRUE for Unicode. ** Notes: ** Strings can be REBYTE or REBCHR sized (depends on R3 config.) ** Strings are allocated with REBOL's internal memory manager. ** Internal structures may change, so NO assumptions should be made! ** Strings are automatically garbage collected if there are ** no references to them from REBOL code (C code does nothing.) ** However, you can lock strings to prevent deallocation. (?? default) ** ***********************************************************************/ { REBSER *result = unicode ? Make_Unicode(size) : Make_Binary(size); // !!! Assume client does not have Free_Series() or MANAGE_SERIES() // APIs, so the series we give back must be managed. But how can // we be sure they get what usage they needed before the GC happens? MANAGE_SERIES(result); return result; }
*/ void Init_Mold(REBCNT size) /* ***********************************************************************/ { REBYTE *cp; REBYTE c; REBYTE *dc; Set_Root_Series(TASK_MOLD_LOOP, Make_Block(size/10), "mold loop"); Set_Root_Series(TASK_BUF_MOLD, Make_Unicode(size), "mold buffer"); // Create quoted char escape table: Char_Escapes = cp = Make_Mem(MAX_ESC_CHAR+1); // cleared for (c = '@'; c <= '_'; c++) *cp++ = c; Char_Escapes[TAB] = '-'; Char_Escapes[LF] = '/'; Char_Escapes['"'] = '"'; Char_Escapes['^'] = '^'; URL_Escapes = cp = Make_Mem(MAX_URL_CHAR+1); // cleared //for (c = 0; c <= MAX_URL_CHAR; c++) if (IS_LEX_DELIMIT(c)) cp[c] = ESC_URL; for (c = 0; c <= ' '; c++) cp[c] = ESC_URL | ESC_FILE; dc = ";%\"()[]{}<>"; for (c = LEN_BYTES(dc); c > 0; c--) URL_Escapes[*dc++] = ESC_URL | ESC_FILE; }
*/ void Init_Mold(REBCNT size) /* ***********************************************************************/ { REBYTE *cp; REBYTE c; const REBYTE *dc; Set_Root_Series(TASK_MOLD_LOOP, Make_Block(size/10), "mold loop"); Set_Root_Series(TASK_BUF_MOLD, Make_Unicode(size), "mold buffer"); // Create quoted char escape table: Char_Escapes = cp = ALLOC_ARRAY_ZEROFILL(REBYTE, MAX_ESC_CHAR + 1); for (c = '@'; c <= '_'; c++) *cp++ = c; Char_Escapes[cast(REBYTE, TAB)] = '-'; Char_Escapes[cast(REBYTE, LF)] = '/'; Char_Escapes[cast(REBYTE, '"')] = '"'; Char_Escapes[cast(REBYTE, '^')] = '^'; URL_Escapes = cp = ALLOC_ARRAY_ZEROFILL(REBYTE, MAX_URL_CHAR + 1); //for (c = 0; c <= MAX_URL_CHAR; c++) if (IS_LEX_DELIMIT(c)) cp[c] = ESC_URL; for (c = 0; c <= ' '; c++) cp[c] = ESC_URL | ESC_FILE; dc = cb_cast(";%\"()[]{}<>"); for (c = LEN_BYTES(dc); c > 0; c--) URL_Escapes[*dc++] = ESC_URL | ESC_FILE; }
*/ REBSER *Copy_Wide_Str(void *src, REBINT len) /* ** Create a REBOL string series from a wide char string. ** Minimize to bytes if possible */ { REBSER *dst; REBUNI *str = (REBUNI*)src; if (Is_Wide(str, len)) { REBUNI *up; dst = Make_Unicode(len); SERIES_TAIL(dst) = len; up = UNI_HEAD(dst); while (len-- > 0) *up++ = *str++; *up = 0; } else { REBYTE *bp; dst = Make_Binary(len); SERIES_TAIL(dst) = len; bp = BIN_HEAD(dst); while (len-- > 0) *bp++ = (REBYTE)*str++; *bp = 0; } return dst; }
// // RL_Make_String: C // // Allocate a new string or binary series. // // Returns: // A pointer to a string or binary series. // Arguments: // size - the length of the string. The system will add one extra // for a null terminator (not strictly required, but good for C.) // unicode - set FALSE for ASCII/Latin1 strings, set TRUE for Unicode. // Notes: // Strings can be REBYTE or REBCHR sized (depends on R3 config.) // Strings are allocated with REBOL's internal memory manager. // Internal structures may change, so NO assumptions should be made! // Strings are automatically garbage collected if there are // no references to them from REBOL code (C code does nothing.) // However, you can lock strings to prevent deallocation. (?? default) // RL_API REBSER *RL_Make_String(u32 size, REBOOL unicode) { REBSER *result = unicode ? Make_Unicode(size) : Make_Binary(size); // !!! Assume client does not have Free_Series() or MANAGE_SERIES() // APIs, so the series we give back must be managed. But how can // we be sure they get what usage they needed before the GC happens? MANAGE_SERIES(result); return result; }
*/ REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr) /* ** Do all the details to decode a string. ** Input is a byte series. Len is len of input. ** The utf is 0, 8, +/-16, +/-32. ** A special -1 means use the BOM. ** ***********************************************************************/ { REBSER *ser = BUF_UTF8; // buffer is Unicode width REBSER *dst; REBINT size; //REBFLG ccr = FALSE; // in original R3-alpha if was TRUE //@@ https://github.com/rebol/rebol-issues/issues/2336 if (utf == -1) { utf = What_UTF(bp, len); if (utf) { if (utf == 8) bp += 3, len -= 3; else if (utf == -16 || utf == 16) bp += 2, len -= 2; else if (utf == -32 || utf == 32) bp += 4, len -= 4; } } if (utf == 0 || utf == 8) { size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr); } else if (utf == -16 || utf == 16) { size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr); } else if (utf == -32 || utf == 32) { size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr); } else { return NULL; } if (size < 0) { size = -size; dst = Make_Binary(size); Append_Uni_Bytes(dst, UNI_HEAD(ser), size); } else { dst = Make_Unicode(size); Append_Uni_Uni(dst, UNI_HEAD(ser), size); } return dst; }
static REBSER *make_string(REBVAL *arg, REBOOL make) { REBSER *ser = 0; // MAKE <type> 123 if (make && (IS_INTEGER(arg) || IS_DECIMAL(arg))) { ser = Make_Binary(Int32s(arg, 0)); } // MAKE/TO <type> <binary!> else if (IS_BINARY(arg)) { REBYTE *bp = VAL_BIN_DATA(arg); REBCNT len = VAL_LEN(arg); switch (What_UTF(bp, len)) { case 0: break; case 8: // UTF-8 encoded bp += 3; len -= 3; break; default: Trap0(RE_BAD_DECODE); } ser = Decode_UTF_String(bp, len, 8); // UTF-8 } // MAKE/TO <type> <any-string> else if (ANY_BINSTR(arg)) { ser = Copy_String(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN(arg)); } // MAKE/TO <type> <any-word> else if (ANY_WORD(arg)) { ser = Copy_Mold_Value(arg, TRUE); //ser = Append_UTF8(0, Get_Word_Name(arg), -1); } // MAKE/TO <type> #"A" else if (IS_CHAR(arg)) { ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2); Append_Byte(ser, VAL_CHAR(arg)); } // MAKE/TO <type> <any-value> // else if (IS_NONE(arg)) { // ser = Make_Binary(0); // } else ser = Copy_Form_Value(arg, 1<<MOPT_TIGHT); return ser; }
*/ REBCHR *Val_Str_To_OS_Managed(REBSER **out, REBVAL *val) /* ** This is used to pass a REBOL value string to an OS API. ** ** The REBOL (input) string can be byte or wide sized. ** The OS (output) string is in the native OS format. ** On Windows, its a wide-char, but on Linux, its UTF-8. ** ** If we know that the string can be used directly as-is, ** (because it's in the OS size format), we can used it ** like that. ** ** !!! The series is created but just let up to the garbage ** collector to free. This is a "leaky" approach. You may ** optionally request to have the series returned if it is ** important for you to protect it from GC, but you cannot ** currently get a "freeable" series out of this. ** ***********************************************************************/ { #ifdef OS_WIDE_CHAR if (VAL_BYTE_SIZE(val)) { // On windows, we need to convert byte to wide: REBINT n = VAL_LEN(val); REBSER *up = Make_Unicode(n); // !!!"Leaks" in the sense that the GC has to take care of this MANAGE_SERIES(up); n = Decode_UTF8(UNI_HEAD(up), VAL_BIN_DATA(val), n, FALSE); SERIES_TAIL(up) = abs(n); UNI_TERM(up); if (out) *out = up; return cast(REBCHR*, UNI_HEAD(up)); } else { // Already wide, we can use it as-is: // !Assumes the OS uses same wide format! if (out) *out = VAL_SERIES(val);
RL_API void *RL_Make_String(u32 size, int unicode) /* ** Allocate a new string or binary series. ** ** Returns: ** A pointer to a string or binary series. ** Arguments: ** size - the length of the string. The system will add one extra ** for a null terminator (not strictly required, but good for C.) ** unicode - set FALSE for ASCII/Latin1 strings, set TRUE for Unicode. ** Notes: ** Strings can be REBYTE or REBCHR sized (depends on R3 config.) ** Strings are allocated with REBOL's internal memory manager. ** Internal structures may change, so NO assumptions should be made! ** Strings are automatically garbage collected if there are ** no references to them from REBOL code (C code does nothing.) ** However, you can lock strings to prevent deallocation. (?? default) */ { return unicode ? Make_Unicode(size) : Make_Binary(size); }
*/ REBSER *Copy_Bytes_To_Unicode(REBYTE *src, REBINT len) /* ** Convert a byte string to a unicode string. This can ** be used for ASCII or LATIN-8 strings. ** ***********************************************************************/ { REBSER *series; REBUNI *dst; series = Make_Unicode(len); dst = UNI_HEAD(series); SERIES_TAIL(series) = len; for (; len > 0; len--) { *dst++ = (REBUNI)(*src++); } UNI_TERM(series); return series; }
static REBSER *MAKE_TO_String_Common(const REBVAL *arg) { REBSER *ser = 0; // MAKE/TO <type> <binary!> if (IS_BINARY(arg)) { REBYTE *bp = VAL_BIN_AT(arg); REBCNT len = VAL_LEN_AT(arg); switch (What_UTF(bp, len)) { case 0: break; case 8: // UTF-8 encoded bp += 3; len -= 3; break; default: fail (Error(RE_BAD_UTF8)); } ser = Decode_UTF_String(bp, len, 8); // UTF-8 } // MAKE/TO <type> <any-string> else if (ANY_BINSTR(arg)) { ser = Copy_String_Slimming(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN_AT(arg)); } // MAKE/TO <type> <any-word> else if (ANY_WORD(arg)) { ser = Copy_Mold_Value(arg, 0 /* opts... MOPT_0? */); } // MAKE/TO <type> #"A" else if (IS_CHAR(arg)) { ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2); Append_Codepoint_Raw(ser, VAL_CHAR(arg)); } else ser = Copy_Form_Value(arg, 1 << MOPT_TIGHT); return ser; }
x*/ void Modify_StringX(REBCNT action, REBVAL *string, REBVAL *arg) /* ** Actions: INSERT, APPEND, CHANGE ** ** string [string!] {Series at point to insert} ** value [any-type!] {The value to insert} ** /part {Limits to a given length or position.} ** length [number! series! pair!] ** /only {Inserts a series as a series.} ** /dup {Duplicates the insert a specified number of times.} ** count [number! pair!] ** ***********************************************************************/ { REBSER *series = VAL_SERIES(string); REBCNT index = VAL_INDEX(string); REBCNT tail = VAL_TAIL(string); REBINT rlen; // length to be removed REBINT ilen = 1; // length to be inserted REBINT cnt = 1; // DUP count REBINT size; REBVAL *val; REBSER *arg_ser = 0; // argument series // Length of target (may modify index): (arg can be anything) rlen = Partial1((action == A_CHANGE) ? string : arg, DS_ARG(AN_LENGTH)); index = VAL_INDEX(string); if (action == A_APPEND || index > tail) index = tail; // If the arg is not a string, then we need to create a string: if (IS_BINARY(string)) { if (IS_INTEGER(arg)) { if (VAL_INT64(arg) > 255 || VAL_INT64(arg) < 0) Trap_Range(arg); arg_ser = Make_Binary(1); Append_Byte(arg_ser, VAL_CHAR(arg)); // check for size!!! } else if (!ANY_BINSTR(arg)) Trap_Arg(arg); } else if (IS_BLOCK(arg)) { // MOVE! REB_MOLD mo = {0}; arg_ser = mo.series = Make_Unicode(VAL_BLK_LEN(arg) * 10); // GC!? for (val = VAL_BLK_DATA(arg); NOT_END(val); val++) Mold_Value(&mo, val, 0); } else if (IS_CHAR(arg)) { // Optimize this case !!! arg_ser = Make_Unicode(1); Append_Byte(arg_ser, VAL_CHAR(arg)); } else if (!ANY_STR(arg) || IS_TAG(arg)) { arg_ser = Copy_Form_Value(arg, 0); } if (arg_ser) Set_String(arg, arg_ser); else arg_ser = VAL_SERIES(arg); // Length of insertion: ilen = (action != A_CHANGE && DS_REF(AN_PART)) ? rlen : VAL_LEN(arg); // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (series == VAL_SERIES(arg)) { arg_ser = Copy_Series_Part(arg_ser, VAL_INDEX(arg), ilen); // GC!? } // Get /DUP count: if (DS_REF(AN_DUP)) { cnt = Int32(DS_ARG(AN_COUNT)); if (cnt <= 0) return; // no changes } // Total to insert: size = cnt * ilen; if (action != A_CHANGE) { // Always expand series for INSERT and APPEND actions: Expand_Series(series, index, size); } else { if (size > rlen) Expand_Series(series, index, size-rlen); else if (size < rlen && DS_REF(AN_PART)) Remove_Series(series, index, rlen-size); else if (size + index > tail) { EXPAND_SERIES_TAIL(series, size - (tail - index)); } } // For dup count: for (; cnt > 0; cnt--) { Insert_String(series, index, arg_ser, VAL_INDEX(arg), ilen, TRUE); index += ilen; } TERM_SERIES(series); VAL_INDEX(string) = (action == A_APPEND) ? 0 : index; }
// // Clipboard_Actor: C // static REB_R Clipboard_Actor(struct Reb_Call *call_, REBSER *port, REBCNT action) { REBREQ *req; REBINT result; REBVAL *arg; REBCNT refs; // refinement argument flags REBINT len; REBSER *ser; Validate_Port(port, action); arg = DS_ARGC > 1 ? D_ARG(2) : NULL; req = cast(REBREQ*, Use_Port_State(port, RDI_CLIPBOARD, sizeof(REBREQ))); switch (action) { case A_UPDATE: // Update the port object after a READ or WRITE operation. // This is normally called by the WAKE-UP function. arg = OFV(port, STD_PORT_DATA); if (req->command == RDC_READ) { // this could be executed twice: // once for an event READ, once for the CLOSE following the READ if (!req->common.data) return R_NONE; len = req->actual; if (GET_FLAG(req->flags, RRF_WIDE)) { // convert to UTF8, so that it can be converted back to string! Val_Init_Binary(arg, Make_UTF8_Binary( req->common.data, len / sizeof(REBUNI), 0, OPT_ENC_UNISRC )); } else { REBSER *ser = Make_Binary(len); memcpy(BIN_HEAD(ser), req->common.data, len); SERIES_TAIL(ser) = len; Val_Init_Binary(arg, ser); } OS_FREE(req->common.data); // release the copy buffer req->common.data = 0; } else if (req->command == RDC_WRITE) { SET_NONE(arg); // Write is done. } return R_NONE; case A_READ: // This device is opened on the READ: if (!IS_OPEN(req)) { if (OS_DO_DEVICE(req, RDC_OPEN)) fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error)); } // Issue the read request: CLR_FLAG(req->flags, RRF_WIDE); // allow byte or wide chars result = OS_DO_DEVICE(req, RDC_READ); if (result < 0) fail (Error_On_Port(RE_READ_ERROR, port, req->error)); if (result > 0) return R_NONE; /* pending */ // Copy and set the string result: arg = OFV(port, STD_PORT_DATA); len = req->actual; if (GET_FLAG(req->flags, RRF_WIDE)) { // convert to UTF8, so that it can be converted back to string! Val_Init_Binary(arg, Make_UTF8_Binary( req->common.data, len / sizeof(REBUNI), 0, OPT_ENC_UNISRC )); } else { REBSER *ser = Make_Binary(len); memcpy(BIN_HEAD(ser), req->common.data, len); SERIES_TAIL(ser) = len; Val_Init_Binary(arg, ser); } *D_OUT = *arg; return R_OUT; case A_WRITE: if (!IS_STRING(arg) && !IS_BINARY(arg)) fail (Error(RE_INVALID_PORT_ARG, arg)); // This device is opened on the WRITE: if (!IS_OPEN(req)) { if (OS_DO_DEVICE(req, RDC_OPEN)) fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error)); } refs = Find_Refines(call_, ALL_WRITE_REFS); // Handle /part refinement: len = VAL_LEN(arg); if (refs & AM_WRITE_PART && VAL_INT32(D_ARG(ARG_WRITE_LIMIT)) < len) len = VAL_INT32(D_ARG(ARG_WRITE_LIMIT)); // If bytes, see if we can fit it: if (SERIES_WIDE(VAL_SERIES(arg)) == 1) { #ifdef ARG_STRINGS_ALLOWED if (!All_Bytes_ASCII(VAL_BIN_DATA(arg), len)) { Val_Init_String( arg, Copy_Bytes_To_Unicode(VAL_BIN_DATA(arg), len) ); } else req->common.data = VAL_BIN_DATA(arg); #endif // Temp conversion:!!! ser = Make_Unicode(len); len = Decode_UTF8(UNI_HEAD(ser), VAL_BIN_DATA(arg), len, FALSE); SERIES_TAIL(ser) = len = abs(len); UNI_TERM(ser); Val_Init_String(arg, ser); req->common.data = cast(REBYTE*, UNI_HEAD(ser)); SET_FLAG(req->flags, RRF_WIDE); } else // If unicode (may be from above conversion), handle it: if (SERIES_WIDE(VAL_SERIES(arg)) == sizeof(REBUNI)) { req->common.data = cast(REBYTE *, VAL_UNI_DATA(arg)); SET_FLAG(req->flags, RRF_WIDE); }