Exemple #1
0
*/	REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr)
/*
**		Do all the details to decode a string.
**		Input is a byte series. Len is len of input.
**		The utf is 0, 8, +/-16, +/-32.
**		A special -1 means use the BOM.
**
***********************************************************************/
{
	REBSER *ser = BUF_UTF8; // buffer is Unicode width
	REBSER *dst;
	REBINT size;

	//REBFLG ccr = FALSE; // in original R3-alpha if was TRUE
	//@@ https://github.com/rebol/rebol-issues/issues/2336

	if (utf == -1) {
		utf = What_UTF(bp, len);
		if (utf) {
			if (utf == 8) bp += 3, len -= 3;
			else if (utf == -16 || utf == 16) bp += 2, len -= 2;
			else if (utf == -32 || utf == 32) bp += 4, len -= 4;
		}
	}

	if (utf == 0 || utf == 8) {
		size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr);
	} 
	else if (utf == -16 || utf == 16) {
		size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr);
	}
	else if (utf == -32 || utf == 32) {
		size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr);
	}
    else {
        return NULL;
    }

	if (size < 0) {
		size = -size;
		dst = Make_Binary(size);
		Append_Uni_Bytes(dst, UNI_HEAD(ser), size);
	}
	else {
		dst = Make_Unicode(size);
		Append_Uni_Uni(dst, UNI_HEAD(ser), size);
	}

	return dst;
}
Exemple #2
0
*/	void Debug_Uni(const REBSER *ser)
/*
**		Print debug unicode string followed by a newline.
**
***********************************************************************/
{
	REBCNT ul;
	REBCNT bl;
	REBYTE buf[1024];
	REBUNI *up = UNI_HEAD(ser);
	REBINT size = Length_As_UTF8(up, SERIES_TAIL(ser), TRUE, OS_CRLF);

	REBINT disabled = GC_Disabled;
	GC_Disabled = 1;

	while (size > 0) {
		ul = Encode_UTF8(buf, MIN(size, 1020), up, &bl, TRUE, OS_CRLF);
		Debug_String(buf, bl, 0, 0);
		size -= ul;
		up += ul;
	}

	Debug_Line();

	assert(GC_Disabled == 1);
	GC_Disabled = disabled;
}
Exemple #3
0
//
//  Enline_Uni: C
//
void Enline_Uni(REBSER *ser, REBCNT idx, REBCNT len)
{
    REBCNT cnt = 0;
    REBUNI *bp;
    REBUNI c = 0;
    REBCNT tail;

    // Calculate the size difference by counting the number of LF's
    // that have no CR's in front of them.
    bp = UNI_AT(ser, idx);
    for (; len > 0; len--) {
        if (*bp == LF && c != CR) cnt++;
        c = *bp++;
    }
    if (cnt == 0) return;

    // Extend series:
    len = SER_LEN(ser); // before expansion
    EXPAND_SERIES_TAIL(ser, cnt);
    tail = SER_LEN(ser); // after expansion
    bp = UNI_HEAD(ser); // expand may change it

    // Add missing CRs:
    while (cnt > 0) {
        bp[tail--] = bp[len]; // Copy src to dst.
        if (bp[len] == LF && (len == 0 || bp[len - 1] != CR)) {
            bp[tail--] = CR;
            cnt--;
        }
        len--;
    }
}
Exemple #4
0
*/	REBSER *Copy_Wide_Str(void *src, REBINT len)
/*
**		Create a REBOL string series from a wide char string.
**		Minimize to bytes if possible
*/
{
    REBSER *dst;
    REBUNI *str = (REBUNI*)src;
    if (Is_Wide(str, len)) {
        REBUNI *up;
        dst = Make_Unicode(len);
        SERIES_TAIL(dst) = len;
        up = UNI_HEAD(dst);
        while (len-- > 0) *up++ = *str++;
        *up = 0;
    }
    else {
        REBYTE *bp;
        dst = Make_Binary(len);
        SERIES_TAIL(dst) = len;
        bp = BIN_HEAD(dst);
        while (len-- > 0) *bp++ = (REBYTE)*str++;
        *bp = 0;
    }
    return dst;
}
Exemple #5
0
*/	REBSER *Make_Unicode(REBCNT length)
/*
**		Make a unicode string series. Used for internal strings.
**		Add 1 extra for terminator.
**
***********************************************************************/
{
    REBSER *series = Make_Series(length + 1, sizeof(REBUNI), MKS_NONE);
    LABEL_SERIES(series, "make unicode");

    // !!! Clients seem to have different expectations of if `length` is
    // total capacity (and the binary should be empty) or actually is
    // specifically being preallocated at a fixed length.  Until this
    // is straightened out, terminate for both possibilities.

    UNI_HEAD(series)[length] = 0;
    TERM_SEQUENCE(series);
    return series;
}
Exemple #6
0
*/	REBSER *Parse_Lines(REBSER *src)
/*
**		Convert a string buffer to a block of strings.
**		Note that the string must already be converted
**		to REBOL LF format (no CRs).
**
***********************************************************************/
{
	REBSER	*blk;
	REBUNI c;
	REBCNT i;
	REBCNT s;
	REBVAL *val;
	REBOOL uni = !BYTE_SIZE(src);
	REBYTE *bp = BIN_HEAD(src);
	REBUNI *up = UNI_HEAD(src);

	blk = BUF_EMIT;
	RESET_SERIES(blk);

	// Scan string, looking for LF and CR terminators:
	for (i = s = 0; i < SERIES_TAIL(src); i++) {
		c = uni ? up[i] : bp[i];
		if (c == LF || c == CR) {
			val = Append_Value(blk);
			Set_String(val, Copy_String(src, s, i - s));
			VAL_SET_LINE(val);
			// Skip CRLF if found:
			if (c == CR && LF == uni ? up[i] : bp[i]) i++; 
			s = i;
		}
	}

	// Partial line (no linefeed):
	if (s + 1 != i) {
		val = Append_Value(blk);
		Set_String(val, Copy_String(src, s, i - s));
		VAL_SET_LINE(val);
	}

	return Copy_Block(blk, 0);
}
Exemple #7
0
*/	REBCHR *Val_Str_To_OS_Managed(REBSER **out, REBVAL *val)
/*
**		This is used to pass a REBOL value string to an OS API.
**
**		The REBOL (input) string can be byte or wide sized.
**		The OS (output) string is in the native OS format.
**		On Windows, its a wide-char, but on Linux, its UTF-8.
**
**		If we know that the string can be used directly as-is,
**		(because it's in the OS size format), we can used it
**		like that.
**
**		!!! The series is created but just let up to the garbage
**		collector to free.  This is a "leaky" approach.  You may
**		optionally request to have the series returned if it is
**		important for you to protect it from GC, but you cannot
**		currently get a "freeable" series out of this.
**
***********************************************************************/
{
#ifdef OS_WIDE_CHAR
    if (VAL_BYTE_SIZE(val)) {
        // On windows, we need to convert byte to wide:
        REBINT n = VAL_LEN(val);
        REBSER *up = Make_Unicode(n);

        // !!!"Leaks" in the sense that the GC has to take care of this
        MANAGE_SERIES(up);

        n = Decode_UTF8(UNI_HEAD(up), VAL_BIN_DATA(val), n, FALSE);
        SERIES_TAIL(up) = abs(n);
        UNI_TERM(up);

        if (out) *out = up;

        return cast(REBCHR*, UNI_HEAD(up));
    }
    else {
        // Already wide, we can use it as-is:
        // !Assumes the OS uses same wide format!

        if (out) *out = VAL_SERIES(val);
Exemple #8
0
STOID Mold_Uni_Char(REBSER *dst, REBUNI chr, REBOOL molded, REBOOL parened)
{
	REBCNT tail = SERIES_TAIL(dst);
	REBUNI *up;

	if (!molded) {
		EXPAND_SERIES_TAIL(dst, 1);
		*UNI_SKIP(dst, tail) = chr;
	}
	else {
		EXPAND_SERIES_TAIL(dst, 10); // worst case: #"^(1234)"
		up = UNI_SKIP(dst, tail);
		*up++ = '#';
		*up++ = '"';
		up = Emit_Uni_Char(up, chr, parened);
		*up++ = '"';
		dst->tail = up - UNI_HEAD(dst);
	}
	UNI_TERM(dst);
}
Exemple #9
0
*/	REBSER *Copy_Bytes_To_Unicode(REBYTE *src, REBINT len)
/*
**		Convert a byte string to a unicode string. This can
**		be used for ASCII or LATIN-8 strings.
**
***********************************************************************/
{
    REBSER *series;
    REBUNI *dst;

    series = Make_Unicode(len);
    dst = UNI_HEAD(series);
    SERIES_TAIL(series) = len;

    for (; len > 0; len--) {
        *dst++ = (REBUNI)(*src++);
    }

    UNI_TERM(series);

    return series;
}
Exemple #10
0
*/  REBSER *Copy_Buffer(REBSER *buf, void *end)
/*
**		Copy a shared buffer. Set tail and termination.
**
***********************************************************************/
{
	REBSER *ser;
	REBCNT len;

	len = BYTE_SIZE(buf) ? ((REBYTE *)end) - BIN_HEAD(buf)
		: ((REBUNI *)end) - UNI_HEAD(buf);

	ser = Make_Series(
		len + 1,
		SERIES_WIDE(buf),
		Is_Array_Series(buf) ? MKS_ARRAY : MKS_NONE
	);

	memcpy(ser->data, buf->data, SERIES_WIDE(buf) * len);
	ser->tail = len;
	TERM_SERIES(ser);

	return ser;
}
Exemple #11
0
*/  int Encode_UTF8_Line(REBSER *dst, REBSER *src, REBCNT idx)
/*
**		Encode a unicode source buffer into a binary line of UTF8.
**		Include the LF terminator in the result.
**		Return the length of the line buffer.
**
***********************************************************************/
{
	REBUNI *up = UNI_HEAD(src);
	REBCNT len  = SERIES_TAIL(src);
	REBCNT tail;
	REBUNI c;
	REBINT n;
	REBYTE buf[8];

	tail = RESET_TAIL(dst);

	while (idx < len) {
		if ((c = up[idx]) < 0x80) {
			EXPAND_SERIES_TAIL(dst, 1);
			BIN_HEAD(dst)[tail++] = (REBYTE)c;
		}
		else {
			n = Encode_UTF8_Char(buf, c);
			EXPAND_SERIES_TAIL(dst, n);
			memcpy(BIN_SKIP(dst, tail), buf, n);
			tail += n;
		}
		idx++;
		if (c == LF) break;
	}

	BIN_HEAD(dst)[tail] = 0;
	SERIES_TAIL(dst) = tail;
	return idx;
}
Exemple #12
0
STOID Mold_String_Series(REBVAL *value, REB_MOLD *mold)
{
	REBCNT len = VAL_LEN(value);
	REBSER *ser = VAL_SERIES(value);
	REBCNT idx = VAL_INDEX(value);
	REB_STRF sf = {0};
	REBYTE *bp;
	REBUNI *up;
	REBUNI *dp;
	REBOOL uni = !BYTE_SIZE(ser);
	REBCNT n;
	REBUNI c;

	// Empty string:
	if (idx >= VAL_TAIL(value)) {
		Append_Bytes(mold->series, "\"\"");  //Trap0(RE_PAST_END);
		return;
	}

	Sniff_String(ser, idx, &sf);
	if (!GET_MOPT(mold, MOPT_ANSI_ONLY)) sf.paren = 0;

	// Source can be 8 or 16 bits:
	if (uni) up = UNI_HEAD(ser);
	else bp = STR_HEAD(ser);

	// If it is a short quoted string, emit it as "string":
	if (len <= MAX_QUOTED_STR && sf.quote == 0 && sf.newline < 3) {

		dp = Prep_Uni_Series(mold, len + sf.newline + sf.escape + sf.paren + sf.chr1e + 2);

		*dp++ = '"';

		for (n = idx; n < VAL_TAIL(value); n++) {
			c = uni ? up[n] : (REBUNI)(bp[n]);
			dp = Emit_Uni_Char(dp, c, (REBOOL)GET_MOPT(mold, MOPT_ANSI_ONLY)); // parened
		}

		*dp++ = '"';
		*dp = 0;
		return;
	}

	// It is a braced string, emit it as {string}:
	if (!sf.malign) sf.brace_in = sf.brace_out = 0;

	dp = Prep_Uni_Series(mold, len + sf.brace_in + sf.brace_out + sf.escape + sf.paren + sf.chr1e + 2);

	*dp++ = '{';

	for (n = idx; n < VAL_TAIL(value); n++) {

		c = uni ? up[n] : (REBUNI)(bp[n]);
		switch (c) {
		case '{':
		case '}':
			if (sf.malign) {
				*dp++ = '^';
				*dp++ = c;
				break;
			}
		case '\n':
		case '"':
			*dp++ = c;
			break;
		default:
			dp = Emit_Uni_Char(dp, c, (REBOOL)GET_MOPT(mold, MOPT_ANSI_ONLY)); // parened
		}
	}

	*dp++ = '}';
	*dp = 0;
}
Exemple #13
0
//
//  Clipboard_Actor: C
//
static REB_R Clipboard_Actor(struct Reb_Call *call_, REBSER *port, REBCNT action)
{
    REBREQ *req;
    REBINT result;
    REBVAL *arg;
    REBCNT refs;    // refinement argument flags
    REBINT len;
    REBSER *ser;

    Validate_Port(port, action);

    arg = DS_ARGC > 1 ? D_ARG(2) : NULL;

    req = cast(REBREQ*, Use_Port_State(port, RDI_CLIPBOARD, sizeof(REBREQ)));

    switch (action) {
    case A_UPDATE:
        // Update the port object after a READ or WRITE operation.
        // This is normally called by the WAKE-UP function.
        arg = OFV(port, STD_PORT_DATA);
        if (req->command == RDC_READ) {
            // this could be executed twice:
            // once for an event READ, once for the CLOSE following the READ
            if (!req->common.data) return R_NONE;
            len = req->actual;
            if (GET_FLAG(req->flags, RRF_WIDE)) {
                // convert to UTF8, so that it can be converted back to string!
                Val_Init_Binary(arg, Make_UTF8_Binary(
                    req->common.data,
                    len / sizeof(REBUNI),
                    0,
                    OPT_ENC_UNISRC
                ));
            }
            else {
                REBSER *ser = Make_Binary(len);
                memcpy(BIN_HEAD(ser), req->common.data, len);
                SERIES_TAIL(ser) = len;
                Val_Init_Binary(arg, ser);
            }
            OS_FREE(req->common.data); // release the copy buffer
            req->common.data = 0;
        }
        else if (req->command == RDC_WRITE) {
            SET_NONE(arg);  // Write is done.
        }
        return R_NONE;

    case A_READ:
        // This device is opened on the READ:
        if (!IS_OPEN(req)) {
            if (OS_DO_DEVICE(req, RDC_OPEN))
                fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error));
        }
        // Issue the read request:
        CLR_FLAG(req->flags, RRF_WIDE); // allow byte or wide chars
        result = OS_DO_DEVICE(req, RDC_READ);
        if (result < 0) fail (Error_On_Port(RE_READ_ERROR, port, req->error));
        if (result > 0) return R_NONE; /* pending */

        // Copy and set the string result:
        arg = OFV(port, STD_PORT_DATA);

        len = req->actual;
        if (GET_FLAG(req->flags, RRF_WIDE)) {
            // convert to UTF8, so that it can be converted back to string!
            Val_Init_Binary(arg, Make_UTF8_Binary(
                req->common.data,
                len / sizeof(REBUNI),
                0,
                OPT_ENC_UNISRC
            ));
        }
        else {
            REBSER *ser = Make_Binary(len);
            memcpy(BIN_HEAD(ser), req->common.data, len);
            SERIES_TAIL(ser) = len;
            Val_Init_Binary(arg, ser);
        }

        *D_OUT = *arg;
        return R_OUT;

    case A_WRITE:
        if (!IS_STRING(arg) && !IS_BINARY(arg))
            fail (Error(RE_INVALID_PORT_ARG, arg));
        // This device is opened on the WRITE:
        if (!IS_OPEN(req)) {
            if (OS_DO_DEVICE(req, RDC_OPEN))
                fail (Error_On_Port(RE_CANNOT_OPEN, port, req->error));
        }

        refs = Find_Refines(call_, ALL_WRITE_REFS);

        // Handle /part refinement:
        len = VAL_LEN(arg);
        if (refs & AM_WRITE_PART && VAL_INT32(D_ARG(ARG_WRITE_LIMIT)) < len)
            len = VAL_INT32(D_ARG(ARG_WRITE_LIMIT));

        // If bytes, see if we can fit it:
        if (SERIES_WIDE(VAL_SERIES(arg)) == 1) {
#ifdef ARG_STRINGS_ALLOWED
            if (!All_Bytes_ASCII(VAL_BIN_DATA(arg), len)) {
                Val_Init_String(
                    arg, Copy_Bytes_To_Unicode(VAL_BIN_DATA(arg), len)
                );
            } else
                req->common.data = VAL_BIN_DATA(arg);
#endif

            // Temp conversion:!!!
            ser = Make_Unicode(len);
            len = Decode_UTF8(UNI_HEAD(ser), VAL_BIN_DATA(arg), len, FALSE);
            SERIES_TAIL(ser) = len = abs(len);
            UNI_TERM(ser);
            Val_Init_String(arg, ser);
            req->common.data = cast(REBYTE*, UNI_HEAD(ser));
            SET_FLAG(req->flags, RRF_WIDE);
        }
        else
        // If unicode (may be from above conversion), handle it:
        if (SERIES_WIDE(VAL_SERIES(arg)) == sizeof(REBUNI)) {
            req->common.data = cast(REBYTE *, VAL_UNI_DATA(arg));
            SET_FLAG(req->flags, RRF_WIDE);
        }