*/ REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr) /* ** Do all the details to decode a string. ** Input is a byte series. Len is len of input. ** The utf is 0, 8, +/-16, +/-32. ** A special -1 means use the BOM. ** ***********************************************************************/ { REBSER *ser = BUF_UTF8; // buffer is Unicode width REBSER *dst; REBINT size; //REBFLG ccr = FALSE; // in original R3-alpha if was TRUE //@@ https://github.com/rebol/rebol-issues/issues/2336 if (utf == -1) { utf = What_UTF(bp, len); if (utf) { if (utf == 8) bp += 3, len -= 3; else if (utf == -16 || utf == 16) bp += 2, len -= 2; else if (utf == -32 || utf == 32) bp += 4, len -= 4; } } if (utf == 0 || utf == 8) { size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr); } else if (utf == -16 || utf == 16) { size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr); } else if (utf == -32 || utf == 32) { size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr); } else { return NULL; } if (size < 0) { size = -size; dst = Make_Binary(size); Append_Uni_Bytes(dst, UNI_HEAD(ser), size); } else { dst = Make_Unicode(size); Append_Uni_Uni(dst, UNI_HEAD(ser), size); } return dst; }
*/ void Debug_Uni(const REBSER *ser) /* ** Print debug unicode string followed by a newline. ** ***********************************************************************/ { REBCNT ul; REBCNT bl; REBYTE buf[1024]; REBUNI *up = UNI_HEAD(ser); REBINT size = Length_As_UTF8(up, SERIES_TAIL(ser), TRUE, OS_CRLF); REBINT disabled = GC_Disabled; GC_Disabled = 1; while (size > 0) { ul = Encode_UTF8(buf, MIN(size, 1020), up, &bl, TRUE, OS_CRLF); Debug_String(buf, bl, 0, 0); size -= ul; up += ul; } Debug_Line(); assert(GC_Disabled == 1); GC_Disabled = disabled; }
// // Enline_Uni: C // void Enline_Uni(REBSER *ser, REBCNT idx, REBCNT len) { REBCNT cnt = 0; REBUNI *bp; REBUNI c = 0; REBCNT tail; // Calculate the size difference by counting the number of LF's // that have no CR's in front of them. bp = UNI_AT(ser, idx); for (; len > 0; len--) { if (*bp == LF && c != CR) cnt++; c = *bp++; } if (cnt == 0) return; // Extend series: len = SER_LEN(ser); // before expansion EXPAND_SERIES_TAIL(ser, cnt); tail = SER_LEN(ser); // after expansion bp = UNI_HEAD(ser); // expand may change it // Add missing CRs: while (cnt > 0) { bp[tail--] = bp[len]; // Copy src to dst. if (bp[len] == LF && (len == 0 || bp[len - 1] != CR)) { bp[tail--] = CR; cnt--; } len--; } }
*/ REBSER *Copy_Wide_Str(void *src, REBINT len) /* ** Create a REBOL string series from a wide char string. ** Minimize to bytes if possible */ { REBSER *dst; REBUNI *str = (REBUNI*)src; if (Is_Wide(str, len)) { REBUNI *up; dst = Make_Unicode(len); SERIES_TAIL(dst) = len; up = UNI_HEAD(dst); while (len-- > 0) *up++ = *str++; *up = 0; } else { REBYTE *bp; dst = Make_Binary(len); SERIES_TAIL(dst) = len; bp = BIN_HEAD(dst); while (len-- > 0) *bp++ = (REBYTE)*str++; *bp = 0; } return dst; }
*/ REBSER *Make_Unicode(REBCNT length) /* ** Make a unicode string series. Used for internal strings. ** Add 1 extra for terminator. ** ***********************************************************************/ { REBSER *series = Make_Series(length + 1, sizeof(REBUNI), MKS_NONE); LABEL_SERIES(series, "make unicode"); // !!! Clients seem to have different expectations of if `length` is // total capacity (and the binary should be empty) or actually is // specifically being preallocated at a fixed length. Until this // is straightened out, terminate for both possibilities. UNI_HEAD(series)[length] = 0; TERM_SEQUENCE(series); return series; }
*/ REBSER *Parse_Lines(REBSER *src) /* ** Convert a string buffer to a block of strings. ** Note that the string must already be converted ** to REBOL LF format (no CRs). ** ***********************************************************************/ { REBSER *blk; REBUNI c; REBCNT i; REBCNT s; REBVAL *val; REBOOL uni = !BYTE_SIZE(src); REBYTE *bp = BIN_HEAD(src); REBUNI *up = UNI_HEAD(src); blk = BUF_EMIT; RESET_SERIES(blk); // Scan string, looking for LF and CR terminators: for (i = s = 0; i < SERIES_TAIL(src); i++) { c = uni ? up[i] : bp[i]; if (c == LF || c == CR) { val = Append_Value(blk); Set_String(val, Copy_String(src, s, i - s)); VAL_SET_LINE(val); // Skip CRLF if found: if (c == CR && LF == uni ? up[i] : bp[i]) i++; s = i; } } // Partial line (no linefeed): if (s + 1 != i) { val = Append_Value(blk); Set_String(val, Copy_String(src, s, i - s)); VAL_SET_LINE(val); } return Copy_Block(blk, 0); }
*/ REBCHR *Val_Str_To_OS_Managed(REBSER **out, REBVAL *val) /* ** This is used to pass a REBOL value string to an OS API. ** ** The REBOL (input) string can be byte or wide sized. ** The OS (output) string is in the native OS format. ** On Windows, its a wide-char, but on Linux, its UTF-8. ** ** If we know that the string can be used directly as-is, ** (because it's in the OS size format), we can used it ** like that. ** ** !!! The series is created but just let up to the garbage ** collector to free. This is a "leaky" approach. You may ** optionally request to have the series returned if it is ** important for you to protect it from GC, but you cannot ** currently get a "freeable" series out of this. ** ***********************************************************************/ { #ifdef OS_WIDE_CHAR if (VAL_BYTE_SIZE(val)) { // On windows, we need to convert byte to wide: REBINT n = VAL_LEN(val); REBSER *up = Make_Unicode(n); // !!!"Leaks" in the sense that the GC has to take care of this MANAGE_SERIES(up); n = Decode_UTF8(UNI_HEAD(up), VAL_BIN_DATA(val), n, FALSE); SERIES_TAIL(up) = abs(n); UNI_TERM(up); if (out) *out = up; return cast(REBCHR*, UNI_HEAD(up)); } else { // Already wide, we can use it as-is: // !Assumes the OS uses same wide format! if (out) *out = VAL_SERIES(val);
STOID Mold_Uni_Char(REBSER *dst, REBUNI chr, REBOOL molded, REBOOL parened) { REBCNT tail = SERIES_TAIL(dst); REBUNI *up; if (!molded) { EXPAND_SERIES_TAIL(dst, 1); *UNI_SKIP(dst, tail) = chr; } else { EXPAND_SERIES_TAIL(dst, 10); // worst case: #"^(1234)" up = UNI_SKIP(dst, tail); *up++ = '#'; *up++ = '"'; up = Emit_Uni_Char(up, chr, parened); *up++ = '"'; dst->tail = up - UNI_HEAD(dst); } UNI_TERM(dst); }
*/ REBSER *Copy_Bytes_To_Unicode(REBYTE *src, REBINT len) /* ** Convert a byte string to a unicode string. This can ** be used for ASCII or LATIN-8 strings. ** ***********************************************************************/ { REBSER *series; REBUNI *dst; series = Make_Unicode(len); dst = UNI_HEAD(series); SERIES_TAIL(series) = len; for (; len > 0; len--) { *dst++ = (REBUNI)(*src++); } UNI_TERM(series); return series; }
*/ REBSER *Copy_Buffer(REBSER *buf, void *end) /* ** Copy a shared buffer. Set tail and termination. ** ***********************************************************************/ { REBSER *ser; REBCNT len; len = BYTE_SIZE(buf) ? ((REBYTE *)end) - BIN_HEAD(buf) : ((REBUNI *)end) - UNI_HEAD(buf); ser = Make_Series( len + 1, SERIES_WIDE(buf), Is_Array_Series(buf) ? MKS_ARRAY : MKS_NONE ); memcpy(ser->data, buf->data, SERIES_WIDE(buf) * len); ser->tail = len; TERM_SERIES(ser); return ser; }
*/ int Encode_UTF8_Line(REBSER *dst, REBSER *src, REBCNT idx) /* ** Encode a unicode source buffer into a binary line of UTF8. ** Include the LF terminator in the result. ** Return the length of the line buffer. ** ***********************************************************************/ { REBUNI *up = UNI_HEAD(src); REBCNT len = SERIES_TAIL(src); REBCNT tail; REBUNI c; REBINT n; REBYTE buf[8]; tail = RESET_TAIL(dst); while (idx < len) { if ((c = up[idx]) < 0x80) { EXPAND_SERIES_TAIL(dst, 1); BIN_HEAD(dst)[tail++] = (REBYTE)c; } else { n = Encode_UTF8_Char(buf, c); EXPAND_SERIES_TAIL(dst, n); memcpy(BIN_SKIP(dst, tail), buf, n); tail += n; } idx++; if (c == LF) break; } BIN_HEAD(dst)[tail] = 0; SERIES_TAIL(dst) = tail; return idx; }
STOID Mold_String_Series(REBVAL *value, REB_MOLD *mold) { REBCNT len = VAL_LEN(value); REBSER *ser = VAL_SERIES(value); REBCNT idx = VAL_INDEX(value); REB_STRF sf = {0}; REBYTE *bp; REBUNI *up; REBUNI *dp; REBOOL uni = !BYTE_SIZE(ser); REBCNT n; REBUNI c; // Empty string: if (idx >= VAL_TAIL(value)) { Append_Bytes(mold->series, "\"\""); //Trap0(RE_PAST_END); return; } Sniff_String(ser, idx, &sf); if (!GET_MOPT(mold, MOPT_ANSI_ONLY)) sf.paren = 0; // Source can be 8 or 16 bits: if (uni) up = UNI_HEAD(ser); else bp = STR_HEAD(ser); // If it is a short quoted string, emit it as "string": if (len <= MAX_QUOTED_STR && sf.quote == 0 && sf.newline < 3) { dp = Prep_Uni_Series(mold, len + sf.newline + sf.escape + sf.paren + sf.chr1e + 2); *dp++ = '"'; for (n = idx; n < VAL_TAIL(value); n++) { c = uni ? up[n] : (REBUNI)(bp[n]); dp = Emit_Uni_Char(dp, c, (REBOOL)GET_MOPT(mold, MOPT_ANSI_ONLY)); // parened } *dp++ = '"'; *dp = 0; return; } // It is a braced string, emit it as {string}: if (!sf.malign) sf.brace_in = sf.brace_out = 0; dp = Prep_Uni_Series(mold, len + sf.brace_in + sf.brace_out + sf.escape + sf.paren + sf.chr1e + 2); *dp++ = '{'; for (n = idx; n < VAL_TAIL(value); n++) { c = uni ? up[n] : (REBUNI)(bp[n]); switch (c) { case '{': case '}': if (sf.malign) { *dp++ = '^'; *dp++ = c; break; } case '\n': case '"': *dp++ = c; break; default: dp = Emit_Uni_Char(dp, c, (REBOOL)GET_MOPT(mold, MOPT_ANSI_ONLY)); // parened } } *dp++ = '}'; *dp = 0; }
// // Clipboard_Actor: C // static REB_R Clipboard_Actor(struct Reb_Call *call_, REBSER *port, REBCNT action) { REBREQ *req; REBINT result; REBVAL *arg; REBCNT refs; // refinement argument flags REBINT len; REBSER *ser; Validate_Port(port, action); arg = DS_ARGC > 1 ? D_ARG(2) : NULL; req = cast(REBREQ*, Use_Port_State(port, RDI_CLIPBOARD, sizeof(REBREQ))); switch (action) { case A_UPDATE: // Update the port object after a READ or WRITE operation. // This is normally called by the WAKE-UP function. arg = OFV(port, STD_PORT_DATA); if (req->command == RDC_READ) { // this could be executed twice: // once for an event READ, once for the CLOSE following the READ if (!req->common.data) return R_NONE; len = req->actual; if (GET_FLAG(req->flags, RRF_WIDE)) { // convert to UTF8, so that it can be converted back to string! Val_Init_Binary(arg, Make_UTF8_Binary( req->common.data, len / sizeof(REBUNI), 0, OPT_ENC_UNISRC )); } else { REBSER *ser = Make_Binary(len); memcpy(BIN_HEAD(ser), req->common.data, len); SERIES_TAIL(ser) = len; Val_Init_Binary(arg, ser); } OS_FREE(req->common.data); // release the copy buffer req->common.data = 0; } else if (req->command == RDC_WRITE) { SET_NONE(arg); // Write is done. } return R_NONE; case A_READ: // This device is opened on the READ: if (!IS_OPEN(req)) { if (OS_DO_DEVICE(req, RDC_OPEN)) fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error)); } // Issue the read request: CLR_FLAG(req->flags, RRF_WIDE); // allow byte or wide chars result = OS_DO_DEVICE(req, RDC_READ); if (result < 0) fail (Error_On_Port(RE_READ_ERROR, port, req->error)); if (result > 0) return R_NONE; /* pending */ // Copy and set the string result: arg = OFV(port, STD_PORT_DATA); len = req->actual; if (GET_FLAG(req->flags, RRF_WIDE)) { // convert to UTF8, so that it can be converted back to string! Val_Init_Binary(arg, Make_UTF8_Binary( req->common.data, len / sizeof(REBUNI), 0, OPT_ENC_UNISRC )); } else { REBSER *ser = Make_Binary(len); memcpy(BIN_HEAD(ser), req->common.data, len); SERIES_TAIL(ser) = len; Val_Init_Binary(arg, ser); } *D_OUT = *arg; return R_OUT; case A_WRITE: if (!IS_STRING(arg) && !IS_BINARY(arg)) fail (Error(RE_INVALID_PORT_ARG, arg)); // This device is opened on the WRITE: if (!IS_OPEN(req)) { if (OS_DO_DEVICE(req, RDC_OPEN)) fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error)); } refs = Find_Refines(call_, ALL_WRITE_REFS); // Handle /part refinement: len = VAL_LEN(arg); if (refs & AM_WRITE_PART && VAL_INT32(D_ARG(ARG_WRITE_LIMIT)) < len) len = VAL_INT32(D_ARG(ARG_WRITE_LIMIT)); // If bytes, see if we can fit it: if (SERIES_WIDE(VAL_SERIES(arg)) == 1) { #ifdef ARG_STRINGS_ALLOWED if (!All_Bytes_ASCII(VAL_BIN_DATA(arg), len)) { Val_Init_String( arg, Copy_Bytes_To_Unicode(VAL_BIN_DATA(arg), len) ); } else req->common.data = VAL_BIN_DATA(arg); #endif // Temp conversion:!!! ser = Make_Unicode(len); len = Decode_UTF8(UNI_HEAD(ser), VAL_BIN_DATA(arg), len, FALSE); SERIES_TAIL(ser) = len = abs(len); UNI_TERM(ser); Val_Init_String(arg, ser); req->common.data = cast(REBYTE*, UNI_HEAD(ser)); SET_FLAG(req->flags, RRF_WIDE); } else // If unicode (may be from above conversion), handle it: if (SERIES_WIDE(VAL_SERIES(arg)) == sizeof(REBUNI)) { req->common.data = cast(REBYTE *, VAL_UNI_DATA(arg)); SET_FLAG(req->flags, RRF_WIDE); }