*/ void Debug_Uni(const REBSER *ser) /* ** Print debug unicode string followed by a newline. ** ***********************************************************************/ { REBCNT ul; REBCNT bl; REBYTE buf[1024]; REBUNI *up = UNI_HEAD(ser); REBINT size = Length_As_UTF8(up, SERIES_TAIL(ser), TRUE, OS_CRLF); REBINT disabled = GC_Disabled; GC_Disabled = 1; while (size > 0) { ul = Encode_UTF8(buf, MIN(size, 1020), up, &bl, TRUE, OS_CRLF); Debug_String(buf, bl, 0, 0); size -= ul; up += ul; } Debug_Line(); assert(GC_Disabled == 1); GC_Disabled = disabled; }
*/ RL_API REBCNT RL_Length_As_UTF8(const void *p, REBCNT len, REBOOL uni, REBOOL ccr) /* ** Calculate the UTF8 length of an array of unicode codepoints ** ** Returns: ** How long the UTF8 encoded string would be ** ** Arguments: ** p - pointer to array of bytes or wide characters ** len - length of src in codepoints (not including terminator) ** uni - true if src is in wide character format ** ccr - convert linefeeds into linefeed + carraige-return ** ** !!! Host code is not supposed to call any Rebol routines except ** for those in the RL_Api. This exposes Rebol's internal UTF8 ** length routine, as it was being used by host code. It should ** be reviewed along with the rest of the RL_Api. ** ***********************************************************************/ { return Length_As_UTF8( p, len, (uni ? OPT_ENC_UNISRC : 0) | (ccr ? OPT_ENC_CRLF : 0) ); }
// // RL_Length_As_UTF8: C // // Calculate the UTF8 length of an array of unicode codepoints // // Returns: // How long the UTF8 encoded string would be // // Arguments: // p - pointer to array of bytes or wide characters // len - length of src in codepoints (not including terminator) // unicode - true if src is in wide character format // lf_to_crlf - convert linefeeds into carraige-return + linefeed // // !!! Host code is not supposed to call any Rebol routines except // for those in the RL_Api. This exposes Rebol's internal UTF8 // length routine, as it was being used by host code. It should // be reviewed along with the rest of the RL_Api. // RL_API REBCNT RL_Length_As_UTF8( const void *p, REBCNT len, REBOOL unicode, REBOOL lf_to_crlf ) { return Length_As_UTF8( p, len, (unicode ? OPT_ENC_UNISRC : 0) | (lf_to_crlf ? OPT_ENC_CRLF : 0) ); }
*/ REBSER *Encode_UTF8_String(void *src, REBCNT len, REBFLG uni, REBFLG opts) /* ** Do all the details to encode a string as UTF8. ** No_copy means do not make a copy. ** Result can be a shared buffer! ** ***********************************************************************/ { REBSER *ser = BUF_FORM; // a shared buffer REBCNT size; REBYTE *cp; REBFLG ccr = GET_FLAG(opts, ENC_OPT_CRLF); if (uni) { REBUNI *up = (REBUNI*)src; size = Length_As_UTF8(up, len, TRUE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); UNUSED(cp); Encode_UTF8(Reset_Buffer(ser, size), size, up, &len, TRUE, ccr); } else { REBYTE *bp = (REBYTE*)src; if (Is_Not_ASCII(bp, len)) { size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr); cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0)); Encode_UTF8(cp, size, bp, &len, FALSE, ccr); } else if (GET_FLAG(opts, ENC_OPT_NO_COPY)) return 0; else return Copy_Bytes(bp, len); } SERIES_TAIL(ser) = len; STR_TERM(ser); return Copy_Bytes(BIN_HEAD(ser), len); }