Exemple #1
0
*/	REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr)
/*
**		Do all the details to decode a string.
**		Input is a byte series. Len is len of input.
**		The utf is 0, 8, +/-16, +/-32.
**		A special -1 means use the BOM.
**
***********************************************************************/
{
	REBSER *ser = BUF_UTF8; // buffer is Unicode width
	REBSER *dst;
	REBINT size;

	//REBFLG ccr = FALSE; // in original R3-alpha if was TRUE
	//@@ https://github.com/rebol/rebol-issues/issues/2336

	if (utf == -1) {
		utf = What_UTF(bp, len);
		if (utf) {
			if (utf == 8) bp += 3, len -= 3;
			else if (utf == -16 || utf == 16) bp += 2, len -= 2;
			else if (utf == -32 || utf == 32) bp += 4, len -= 4;
		}
	}

	if (utf == 0 || utf == 8) {
		size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr);
	} 
	else if (utf == -16 || utf == 16) {
		size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr);
	}
	else if (utf == -32 || utf == 32) {
		size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr);
	}
    else {
        return NULL;
    }

	if (size < 0) {
		size = -size;
		dst = Make_Binary(size);
		Append_Uni_Bytes(dst, UNI_HEAD(ser), size);
	}
	else {
		dst = Make_Unicode(size);
		Append_Uni_Uni(dst, UNI_HEAD(ser), size);
	}

	return dst;
}
Exemple #2
0
static REBSER *make_string(REBVAL *arg, REBOOL make)
{
	REBSER *ser = 0;

	// MAKE <type> 123
	if (make && (IS_INTEGER(arg) || IS_DECIMAL(arg))) {
		ser = Make_Binary(Int32s(arg, 0));
	}
	// MAKE/TO <type> <binary!>
	else if (IS_BINARY(arg)) {
		REBYTE *bp = VAL_BIN_DATA(arg);
		REBCNT len = VAL_LEN(arg);
		switch (What_UTF(bp, len)) {
		case 0:
			break;
		case 8: // UTF-8 encoded
			bp  += 3;
			len -= 3;
			break;
		default:
			Trap0(RE_BAD_DECODE);
		}
		ser = Decode_UTF_String(bp, len, 8); // UTF-8
	}
	// MAKE/TO <type> <any-string>
	else if (ANY_BINSTR(arg)) {
		ser = Copy_String(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN(arg));
	}
	// MAKE/TO <type> <any-word>
	else if (ANY_WORD(arg)) {
		ser = Copy_Mold_Value(arg, TRUE);
		//ser = Append_UTF8(0, Get_Word_Name(arg), -1);
	}
	// MAKE/TO <type> #"A"
	else if (IS_CHAR(arg)) {
		ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2);
		Append_Byte(ser, VAL_CHAR(arg));
	}
	// MAKE/TO <type> <any-value>
//	else if (IS_NONE(arg)) {
//		ser = Make_Binary(0);
//	}
	else
		ser = Copy_Form_Value(arg, 1<<MOPT_TIGHT);

	return ser;
}
Exemple #3
0
//
//  Decode_UTF_String: C
// 
// Do all the details to decode a string.
// Input is a byte series. Len is len of input.
// The utf is 0, 8, +/-16
// A special -1 means use the BOM, if present, or UTF-8 otherwise.
// 
// Returns the decoded string or NULL for unsupported encodings.
//
REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf)
{
    REBSER *ser = BUF_UTF8; // buffer is Unicode width
    REBSER *dst;
    REBINT size;

    if (utf == -1) {
        // Try to detect UTF encoding from a BOM. Returns 0 if no BOM present.
        utf = What_UTF(bp, len);
        if (utf != 0) {
            if (utf == 8) bp += 3, len -= 3;
            else if (utf == -16 || utf == 16) bp += 2, len -= 2;
            else return NULL;
        }
    }

    if (utf == 0 || utf == 8) {
        size = Decode_UTF8_May_Fail(
            cast(REBUNI*, Reset_Buffer(ser, len)), bp, len, TRUE
        );
    }
Exemple #4
0
static REBSER *MAKE_TO_String_Common(const REBVAL *arg)
{
    REBSER *ser = 0;

    // MAKE/TO <type> <binary!>
    if (IS_BINARY(arg)) {
        REBYTE *bp = VAL_BIN_AT(arg);
        REBCNT len = VAL_LEN_AT(arg);
        switch (What_UTF(bp, len)) {
        case 0:
            break;
        case 8: // UTF-8 encoded
            bp  += 3;
            len -= 3;
            break;
        default:
            fail (Error(RE_BAD_UTF8));
        }
        ser = Decode_UTF_String(bp, len, 8); // UTF-8
    }
    // MAKE/TO <type> <any-string>
    else if (ANY_BINSTR(arg)) {
        ser = Copy_String_Slimming(VAL_SERIES(arg), VAL_INDEX(arg), VAL_LEN_AT(arg));
    }
    // MAKE/TO <type> <any-word>
    else if (ANY_WORD(arg)) {
        ser = Copy_Mold_Value(arg, 0 /* opts... MOPT_0? */);
    }
    // MAKE/TO <type> #"A"
    else if (IS_CHAR(arg)) {
        ser = (VAL_CHAR(arg) > 0xff) ? Make_Unicode(2) : Make_Binary(2);
        Append_Codepoint_Raw(ser, VAL_CHAR(arg));
    }
    else
        ser = Copy_Form_Value(arg, 1 << MOPT_TIGHT);

    return ser;
}