Пример #1
0
*/	void Debug_Uni(const REBSER *ser)
/*
**		Print debug unicode string followed by a newline.
**
***********************************************************************/
{
	REBCNT ul;
	REBCNT bl;
	REBYTE buf[1024];
	REBUNI *up = UNI_HEAD(ser);
	REBINT size = Length_As_UTF8(up, SERIES_TAIL(ser), TRUE, OS_CRLF);

	REBINT disabled = GC_Disabled;
	GC_Disabled = 1;

	while (size > 0) {
		ul = Encode_UTF8(buf, MIN(size, 1020), up, &bl, TRUE, OS_CRLF);
		Debug_String(buf, bl, 0, 0);
		size -= ul;
		up += ul;
	}

	Debug_Line();

	assert(GC_Disabled == 1);
	GC_Disabled = disabled;
}
Пример #2
0
*/	RL_API REBCNT RL_Encode_UTF8(REBYTE *dst, REBINT max, const void *src, REBCNT *len, REBFLG uni, REBFLG ccr)
/*
**		Encode the unicode into UTF8 byte string.
**
**	Returns:
**		Number of dst bytes used.
**
**	Arguments:
**		dst - destination for encoded UTF8 bytes
**		max - maximum size of the result in bytes
**		src - source array of bytes or wide characters
**		len - input is source length, updated to reflect src chars used
**		uni - true if src is in wide character format
**		ccr - convert linefeed + carriage-return into just linefeed
**
**	Notes:
**		Does not add a terminator.
**
**		!!! Host code is not supposed to call any Rebol routines except
**		for those in the RL_Api.  This exposes Rebol's internal UTF8
**		length routine, as it was being used by the Linux host code by
**		Atronix.  Should be reviewed along with the rest of the RL_Api.
**
***********************************************************************/
{
	return Encode_UTF8(
		dst,
		max,
		src,
		len,
		(uni ? OPT_ENC_UNISRC : 0) | (ccr ? OPT_ENC_CRLF : 0)
	);
}
Пример #3
0
*/	REBSER *Encode_UTF8_String(void *src, REBCNT len, REBFLG uni, REBFLG opts)
/*
**		Do all the details to encode a string as UTF8.
**		No_copy means do not make a copy.
**		Result can be a shared buffer!
**
***********************************************************************/
{
	REBSER *ser = BUF_FORM; // a shared buffer
	REBCNT size;
	REBYTE *cp;
	REBFLG ccr = GET_FLAG(opts, ENC_OPT_CRLF);

	if (uni) {
		REBUNI *up = (REBUNI*)src;

		size = Length_As_UTF8(up, len, TRUE, (REBOOL)ccr);
		cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
        UNUSED(cp);
		Encode_UTF8(Reset_Buffer(ser, size), size, up, &len, TRUE, ccr);
	}
	else {
		REBYTE *bp = (REBYTE*)src;

		if (Is_Not_ASCII(bp, len)) {
			size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr);
			cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
			Encode_UTF8(cp, size, bp, &len, FALSE, ccr);
		}
		else if (GET_FLAG(opts, ENC_OPT_NO_COPY)) return 0;
		else return Copy_Bytes(bp, len);
	}

	SERIES_TAIL(ser) = len;
	STR_TERM(ser);

	return Copy_Bytes(BIN_HEAD(ser), len);
}
Пример #4
0
//
//  RL_Encode_UTF8: C
// 
// Encode the unicode into UTF8 byte string.
// 
// Returns:
// Number of dst bytes used.
// 
// Arguments:
// dst - destination for encoded UTF8 bytes
// max - maximum size of the result in bytes
// src - source array of bytes or wide characters
// len - input is source length, updated to reflect src chars used
// unicode - true if src is in wide character format
// crlf_to_lf - convert carriage-return + linefeed into just linefeed
// 
// Notes:
// Does not add a terminator.
// 
// !!! Host code is not supposed to call any Rebol routines except
// for those in the RL_Api.  This exposes Rebol's internal UTF8
// length routine, as it was being used by the Linux host code by
// Atronix.  Should be reviewed along with the rest of the RL_Api.
//
RL_API REBCNT RL_Encode_UTF8(
    REBYTE *dst,
    REBINT max,
    const void *src,
    REBCNT *len,
    REBOOL unicode,
    REBOOL crlf_to_lf
) {
    return Encode_UTF8(
        dst,
        max,
        src,
        len,
        (unicode ? OPT_ENC_UNISRC : 0) | (crlf_to_lf ? OPT_ENC_CRLF : 0)
    );
}