Esempi in C++ (Cpp) per Make_UTF8_From_Any_String

Esempio n. 1

0

Mostra file

File: s-ops.c Progetto: asampal/ren-c

*/	REBSER *Temp_Bin_Str_Managed(REBVAL *val, REBCNT *index, REBCNT *length)
/*
**	Determines if UTF8 conversion is needed for a series before it
**	is used with a byte-oriented function.
**
**	If conversion is needed, a UTF8 series will be created.  Otherwise,
**	the source series is returned as-is.
**
**	Note: This routine should only be used to generate a value used
**	for temporary purposes, because it has a "surprising variance"
**	regarding its input.  If the value's series can be reused, it is--
**	and this depends on an implementation detail of internal encoding
**	that the user should not be aware of (they need not know if the
**	internal representation of an ASCII string uses 1, 2, or however
**	many bytes).  But copying vs. non-copying means the resulting
**	data might or might not have previous values available to step
**	back into from the originating series!
**
**	!!! Should performance dictate it, the callsites could be
**	adapted to know whether this produced a new series or not, and
**	instead of managing a created result they could be responsible
**	for freeing it if so.
**
***********************************************************************/
{
	REBCNT len = (length && *length) ? *length : VAL_LEN(val);
	REBSER *series;

	assert(IS_BINARY(val) || ANY_STR(val));

	if (len == 0 || IS_BINARY(val) || VAL_STR_IS_ASCII(val)) {
		// If it's zero length, BINARY!, or an ANY-STRING! whose bytes are
		// all values less than 128, we reuse the series.

		series = VAL_SERIES(val);
		ASSERT_SERIES_MANAGED(series);

		if (index) *index = VAL_INDEX(val);
		if (length) *length = len;
	}
	else {
		// UTF-8 conversion is required, and we manage the result.

		series = Make_UTF8_From_Any_String(val, len, OPT_ENC_CRLF_MAYBE);
		MANAGE_SERIES(series);

		if (index) *index = 0;
		if (length) *length = SERIES_TAIL(series);
	}

	return series;
}

Esempio n. 2

0

Mostra file

File: f-modify.c Progetto: rgchris/ren-c

//
//  Modify_String: C
// 
// Returns new dst_idx.
//
REBCNT Modify_String(
    REBCNT action,          // INSERT, APPEND, CHANGE
    REBSER *dst_ser,        // target
    REBCNT dst_idx,         // position
    const REBVAL *src_val,  // source
    REBFLGS flags,          // AN_PART
    REBINT dst_len,         // length to remove
    REBINT dups             // dup count
) {
    REBSER *src_ser = 0;
    REBCNT src_idx = 0;
    REBCNT src_len;
    REBCNT tail  = SER_LEN(dst_ser);
    REBINT size;        // total to insert
    REBOOL needs_free;
    REBINT limit;

    // For INSERT/PART and APPEND/PART
    if (action != SYM_CHANGE && GET_FLAG(flags, AN_PART))
        limit = dst_len; // should be non-negative
    else
        limit = -1;

    if (limit == 0 || dups < 0) return (action == SYM_APPEND) ? 0 : dst_idx;
    if (action == SYM_APPEND || dst_idx > tail) dst_idx = tail;

    // If the src_val is not a string, then we need to create a string:
    if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series
        if (IS_INTEGER(src_val)) {
            src_ser = Make_Series_Codepoint(Int8u(src_val));
            needs_free = TRUE;
            limit = -1;
        }
        else if (IS_BLOCK(src_val)) {
            src_ser = Join_Binary(src_val, limit); // NOTE: it's the shared FORM buffer!
            needs_free = FALSE;
            limit = -1;
        }
        else if (IS_CHAR(src_val)) {
            //
            // "UTF-8 was originally specified to allow codepoints with up to
            // 31 bits (or 6 bytes). But with RFC3629, this was reduced to 4
            // bytes max. to be more compatible to UTF-16."  So depending on
            // which RFC you consider "the UTF-8", max size is either 4 or 6.
            //
            src_ser = Make_Binary(6);
            SET_SERIES_LEN(
                src_ser,
                Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val))
            );
            needs_free = TRUE;
            limit = -1;
        }
        else if (ANY_STRING(src_val)) {
            src_len = VAL_LEN_AT(src_val);
            if (limit >= 0 && src_len > cast(REBCNT, limit))
                src_len = limit;
            src_ser = Make_UTF8_From_Any_String(src_val, src_len, 0);
            needs_free = TRUE;
            limit = -1;
        }
        else if (!IS_BINARY(src_val))
            fail (Error_Invalid_Arg(src_val));
    }
    else if (IS_CHAR(src_val)) {
        src_ser = Make_Series_Codepoint(VAL_CHAR(src_val));
        needs_free = TRUE;
    }
    else if (IS_BLOCK(src_val)) {
        src_ser = Form_Tight_Block(src_val);
        needs_free = TRUE;
    }
    else if (!ANY_STRING(src_val) || IS_TAG(src_val)) {
        src_ser = Copy_Form_Value(src_val, 0);
        needs_free = TRUE;
    }

    // Use either new src or the one that was passed:
    if (src_ser) {
        src_len = SER_LEN(src_ser);
    }
    else {
        src_ser = VAL_SERIES(src_val);
        src_idx = VAL_INDEX(src_val);
        src_len = VAL_LEN_AT(src_val);
        needs_free = FALSE;
    }

    if (limit >= 0) src_len = limit;

    // If Source == Destination we need to prevent possible conflicts.
    // Clone the argument just to be safe.
    // (Note: It may be possible to optimize special cases like append !!)
    if (dst_ser == src_ser) {
        assert(!needs_free);
        src_ser = Copy_Sequence_At_Len(src_ser, src_idx, src_len);
        needs_free = TRUE;
        src_idx = 0;
    }

    // Total to insert:
    size = dups * src_len;

    if (action != SYM_CHANGE) {
        // Always expand dst_ser for INSERT and APPEND actions:
        Expand_Series(dst_ser, dst_idx, size);
    } else {
        if (size > dst_len)
            Expand_Series(dst_ser, dst_idx, size - dst_len);
        else if (size < dst_len && GET_FLAG(flags, AN_PART))
            Remove_Series(dst_ser, dst_idx, dst_len - size);
        else if (size + dst_idx > tail) {
            EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx));
        }
    }

    // For dup count:
    for (; dups > 0; dups--) {
        Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE);
        dst_idx += src_len;
    }

    TERM_SEQUENCE(dst_ser);

    if (needs_free) {
        // If we did not use the series that was passed in, but rather
        // created an internal temporary one, we need to free it.
        Free_Series(src_ser);
    }

    return (action == SYM_APPEND) ? 0 : dst_idx;
}

Esempio n. 3

0

Mostra file

File: s-ops.c Progetto: rhencke/rebol

//
//  Temp_Bin_Str_Managed: C
// 
// Determines if UTF8 conversion is needed for a series before it
// is used with a byte-oriented function.
// 
// If conversion is needed, a UTF8 series will be created.  Otherwise,
// the source series is returned as-is.
// 
// Note: This routine should only be used to generate a value used
// for temporary purposes, because it has a "surprising variance"
// regarding its input.  If the value's series can be reused, it is--
// and this depends on an implementation detail of internal encoding
// that the user should not be aware of (they need not know if the
// internal representation of an ASCII string uses 1, 2, or however
// many bytes).  But copying vs. non-copying means the resulting
// data might or might not have previous values available to step
// back into from the originating series!
// 
// !!! Should performance dictate it, the callsites could be
// adapted to know whether this produced a new series or not, and
// instead of managing a created result they could be responsible
// for freeing it if so.
//
REBSER *Temp_Bin_Str_Managed(const REBVAL *val, REBCNT *index, REBCNT *length)
{
    REBCNT len = (length && *length) ? *length : VAL_LEN_AT(val);
    REBSER *series;

    assert(IS_BINARY(val) || ANY_STRING(val));

    // !!! This used to check `len == 0` and reuse a zero length string.
    // However, the zero length string could have the wrong width.  We are
    // expected to be returning a BYTE_SIZE() string, and that confused
    // things.  It's not a good idea to mutate the source string (e.g.
    // reallocate under a new width) so consider having an EMPTY_BYTE_STRING
    // like EMPTY_ARRAY which is protected to hand back.
    //
    if (
        IS_BINARY(val)
        || (
            VAL_BYTE_SIZE(val)
            && All_Bytes_ASCII(VAL_BIN_AT(val), VAL_LEN_AT(val))
        )
    ){
        //
        // It's BINARY!, or an ANY-STRING! whose codepoints are all values in
        // ASCII (0x00 => 0x7F), hence not needing any UTF-8 encoding.
        //
        series = VAL_SERIES(val);
        ASSERT_SERIES_MANAGED(series);

        if (index)
            *index = VAL_INDEX(val);
        if (length)
            *length = len;
    }
    else {
        // UTF-8 conversion is required, and we manage the result.

        series = Make_UTF8_From_Any_String(val, len, OPT_ENC_CRLF_MAYBE);
        MANAGE_SERIES(series);

    #if !defined(NDEBUG)
        //
        // Also, PROTECT the result in the debug build...because since the
        // caller doesn't know if a new series was created or if the initial
        // data is being used, they should not be modifying it!  (We don't
        // want to protect the original data, because we wouldn't know when
        // we were allowed to unlock it...there's no later call in this
        // model to clean up the series.)
        {
            REBVAL protect;
            Val_Init_String(&protect, series);

            Protect_Value(&protect, FLAGIT(PROT_SET));

            // just a string...not /DEEP...shouldn't need to Unmark()
        }
    #endif

        if (index)
            *index = 0;
        if (length)
            *length = SER_LEN(series);
    }

    assert(BYTE_SIZE(series));
    return series;
}

Esempio n. 4

0

Mostra file

File: t-string.c Progetto: rgchris/ren-c

static REBSER *make_binary(const REBVAL *arg, REBOOL make)
{
    REBSER *ser;

    // MAKE BINARY! 123
    switch (VAL_TYPE(arg)) {
    case REB_INTEGER:
    case REB_DECIMAL:
        if (make) ser = Make_Binary(Int32s(arg, 0));
        else ser = Make_Binary_BE64(arg);
        break;

    // MAKE/TO BINARY! BINARY!
    case REB_BINARY:
        ser = Copy_Bytes(VAL_BIN_AT(arg), VAL_LEN_AT(arg));
        break;

    // MAKE/TO BINARY! <any-string>
    case REB_STRING:
    case REB_FILE:
    case REB_EMAIL:
    case REB_URL:
    case REB_TAG:
//  case REB_ISSUE:
        ser = Make_UTF8_From_Any_String(arg, VAL_LEN_AT(arg), 0);
        break;

    case REB_BLOCK:
        // Join_Binary returns a shared buffer, so produce a copy:
        ser = Copy_Sequence(Join_Binary(arg, -1));
        break;

    // MAKE/TO BINARY! <tuple!>
    case REB_TUPLE:
        ser = Copy_Bytes(VAL_TUPLE(arg), VAL_TUPLE_LEN(arg));
        break;

    // MAKE/TO BINARY! <char!>
    case REB_CHAR:
        ser = Make_Binary(6);
        TERM_SEQUENCE_LEN(ser, Encode_UTF8_Char(BIN_HEAD(ser), VAL_CHAR(arg)));
        break;

    // MAKE/TO BINARY! <bitset!>
    case REB_BITSET:
        ser = Copy_Bytes(VAL_BIN(arg), VAL_LEN_HEAD(arg));
        break;

    // MAKE/TO BINARY! <image!>
    case REB_IMAGE:
        ser = Make_Image_Binary(arg);
        break;

    case REB_MONEY:
        ser = Make_Binary(12);
        deci_to_binary(BIN_HEAD(ser), VAL_MONEY_AMOUNT(arg));
        TERM_SEQUENCE_LEN(ser, 12);
        break;

    default:
        ser = 0;
    }

    return ser;
}