*/ void Debug_Uni(const REBSER *ser) /* ** Print debug unicode string followed by a newline. ** ***********************************************************************/ { REBCNT ul; REBCNT bl; REBYTE buf[1024]; REBUNI *up = UNI_HEAD(ser); REBINT size = Length_As_UTF8(up, SERIES_TAIL(ser), TRUE, OS_CRLF); REBINT disabled = GC_Disabled; GC_Disabled = 1; while (size > 0) { ul = Encode_UTF8(buf, MIN(size, 1020), up, &bl, TRUE, OS_CRLF); Debug_String(buf, bl, 0, 0); size -= ul; up += ul; } Debug_Line(); assert(GC_Disabled == 1); GC_Disabled = disabled; }
*/ RL_API REBCNT RL_Encode_UTF8(REBYTE *dst, REBINT max, const void *src, REBCNT *len, REBFLG uni, REBFLG ccr) /* ** Encode the unicode into UTF8 byte string. ** ** Returns: ** Number of dst bytes used. ** ** Arguments: ** dst - destination for encoded UTF8 bytes ** max - maximum size of the result in bytes ** src - source array of bytes or wide characters ** len - input is source length, updated to reflect src chars used ** uni - true if src is in wide character format ** ccr - convert linefeed + carriage-return into just linefeed ** ** Notes: ** Does not add a terminator. ** ** !!! Host code is not supposed to call any Rebol routines except ** for those in the RL_Api. This exposes Rebol's internal UTF8 ** length routine, as it was being used by the Linux host code by ** Atronix. Should be reviewed along with the rest of the RL_Api. ** ***********************************************************************/ { return Encode_UTF8( dst, max, src, len, (uni ? OPT_ENC_UNISRC : 0) | (ccr ? OPT_ENC_CRLF : 0) ); }
*/ REBSER *Encode_UTF8_String(void *src, REBCNT len, REBFLG uni, REBFLG opts) /* ** Do all the details to encode a string as UTF8. ** No_copy means do not make a copy. ** Result can be a shared buffer! ** ***********************************************************************/ { REBSER *ser = BUF_FORM; // a shared buffer REBCNT size; REBYTE *cp; REBFLG ccr = GET_FLAG(opts, ENC_OPT_CRLF); if (uni) { REBUNI *up = (REBUNI*)src; size = Length_As_UTF8(up, len, TRUE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); UNUSED(cp); Encode_UTF8(Reset_Buffer(ser, size), size, up, &len, TRUE, ccr); } else { REBYTE *bp = (REBYTE*)src; if (Is_Not_ASCII(bp, len)) { size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); Encode_UTF8(cp, size, bp, &len, FALSE, ccr); } else if (GET_FLAG(opts, ENC_OPT_NO_COPY)) return 0; else return Copy_Bytes(bp, len); } SERIES_TAIL(ser) = len; STR_TERM(ser); return Copy_Bytes(BIN_HEAD(ser), len); }
// // RL_Encode_UTF8: C // // Encode the unicode into UTF8 byte string. // // Returns: // Number of dst bytes used. // // Arguments: // dst - destination for encoded UTF8 bytes // max - maximum size of the result in bytes // src - source array of bytes or wide characters // len - input is source length, updated to reflect src chars used // unicode - true if src is in wide character format // crlf_to_lf - convert carriage-return + linefeed into just linefeed // // Notes: // Does not add a terminator. // // !!! Host code is not supposed to call any Rebol routines except // for those in the RL_Api. This exposes Rebol's internal UTF8 // length routine, as it was being used by the Linux host code by // Atronix. Should be reviewed along with the rest of the RL_Api. // RL_API REBCNT RL_Encode_UTF8( REBYTE *dst, REBINT max, const void *src, REBCNT *len, REBOOL unicode, REBOOL crlf_to_lf ) { return Encode_UTF8( dst, max, src, len, (unicode ? OPT_ENC_UNISRC : 0) | (crlf_to_lf ? OPT_ENC_CRLF : 0) ); }