예제 #1
0
파일: s-unicode.c 프로젝트: Oldes/r3
*/	REBSER *Decode_UTF_String(REBYTE *bp, REBCNT len, REBINT utf, REBFLG ccr)
/*
**		Do all the details to decode a string.
**		Input is a byte series. Len is len of input.
**		The utf is 0, 8, +/-16, +/-32.
**		A special -1 means use the BOM.
**
***********************************************************************/
{
	REBSER *ser = BUF_UTF8; // buffer is Unicode width
	REBSER *dst;
	REBINT size;

	//REBFLG ccr = FALSE; // in original R3-alpha if was TRUE
	//@@ https://github.com/rebol/rebol-issues/issues/2336

	if (utf == -1) {
		utf = What_UTF(bp, len);
		if (utf) {
			if (utf == 8) bp += 3, len -= 3;
			else if (utf == -16 || utf == 16) bp += 2, len -= 2;
			else if (utf == -32 || utf == 32) bp += 4, len -= 4;
		}
	}

	if (utf == 0 || utf == 8) {
		size = Decode_UTF8((REBUNI*)Reset_Buffer(ser, len), bp, len, ccr);
	} 
	else if (utf == -16 || utf == 16) {
		size = Decode_UTF16((REBUNI*)Reset_Buffer(ser, len/2 + 1), bp, len, utf < 0, ccr);
	}
	else if (utf == -32 || utf == 32) {
		size = Decode_UTF32((REBUNI*)Reset_Buffer(ser, len/4 + 1), bp, len, utf < 0, ccr);
	}
    else {
        return NULL;
    }

	if (size < 0) {
		size = -size;
		dst = Make_Binary(size);
		Append_Uni_Bytes(dst, UNI_HEAD(ser), size);
	}
	else {
		dst = Make_Unicode(size);
		Append_Uni_Uni(dst, UNI_HEAD(ser), size);
	}

	return dst;
}
예제 #2
0
파일: s-ops.c 프로젝트: rhencke/rebol
//
//  Detab_Bytes: C
// 
// Detab a string and return a new series.
//
REBSER *Detab_Bytes(REBYTE *bp, REBCNT index, REBCNT len, REBINT tabsize)
{
    REBCNT cnt = 0;
    REBCNT n;
    REBYTE *dp;
    REBYTE c;

    // Estimate new length based on tab expansion:
    for (n = index; n < len; n++)
        if (bp[n] == TAB) cnt++;

    dp = Reset_Buffer(BYTE_BUF, len + (cnt * (tabsize-1)));

    n = 0;
    while (index < len) {

        c = bp[index++];

        if (c == '\t') {
            *dp++ = ' ';
            n++;
            for (; n % tabsize != 0; n++) *dp++ = ' ';
            continue;
        }

        if (c == '\n') n = 0;
        else n++;

        *dp++ = c;
    }

    return Copy_Buffer(BYTE_BUF, 0, dp);
}
예제 #3
0
파일: s-unicode.c 프로젝트: Oldes/r3
*/	REBSER *Encode_UTF8_String(void *src, REBCNT len, REBFLG uni, REBFLG opts)
/*
**		Do all the details to encode a string as UTF8.
**		No_copy means do not make a copy.
**		Result can be a shared buffer!
**
***********************************************************************/
{
	REBSER *ser = BUF_FORM; // a shared buffer
	REBCNT size;
	REBYTE *cp;
	REBFLG ccr = GET_FLAG(opts, ENC_OPT_CRLF);

	if (uni) {
		REBUNI *up = (REBUNI*)src;

		size = Length_As_UTF8(up, len, TRUE, (REBOOL)ccr);
		cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
        UNUSED(cp);
		Encode_UTF8(Reset_Buffer(ser, size), size, up, &len, TRUE, ccr);
	}
	else {
		REBYTE *bp = (REBYTE*)src;

		if (Is_Not_ASCII(bp, len)) {
			size = Length_As_UTF8((REBUNI*)bp, len, FALSE, (REBOOL)ccr);
			cp = Reset_Buffer(ser, size + (GET_FLAG(opts, ENC_OPT_BOM) ? 3 : 0));
			Encode_UTF8(cp, size, bp, &len, FALSE, ccr);
		}
		else if (GET_FLAG(opts, ENC_OPT_NO_COPY)) return 0;
		else return Copy_Bytes(bp, len);
	}

	SERIES_TAIL(ser) = len;
	STR_TERM(ser);

	return Copy_Bytes(BIN_HEAD(ser), len);
}
예제 #4
0
파일: s-ops.c 프로젝트: asampal/ren-c
*/  REBSER *Entab_Unicode(REBUNI *bp, REBCNT index, REBCNT len, REBINT tabsize)
/*
**		Entab a string and return a new series.
**
***********************************************************************/
{
	REBINT n = 0;
	REBUNI *dp;
	REBUNI c;

	dp = (REBUNI *)Reset_Buffer(BUF_MOLD, len);

	for (; index < len; index++) {

		c = bp[index];

		// Count leading spaces, insert TAB for each tabsize:
		if (c == ' ') {
			if (++n >= tabsize) {
				*dp++ = '\t';
				n = 0;
			}
			continue;
		}

		// Hitting a leading TAB resets space counter:
		if (c == '\t') {
			*dp++ = (REBYTE)c;
			n = 0;
		}
		else {
			// Incomplete tab space, pad with spaces:
			for (; n > 0; n--) *dp++ = ' ';

			// Copy chars thru end-of-line (or end of buffer):
			while (index < len) {
				if ((*dp++ = bp[index++]) == '\n') break;
			}
		}
	}

	return Copy_Buffer(BUF_MOLD, dp);
}
예제 #5
0
파일: p-dir.c 프로젝트: 51weekend/r3
*/	static int Read_Dir(REBREQ *dir, REBSER *files)
/*
**		Provide option to get file info too.
**		Provide option to prepend dir path.
**		Provide option to use wildcards.
**
***********************************************************************/
{
	REBINT result;
	REBCNT len;
	REBSER *fname;
	REBSER *name;
	REBREQ file;

	RESET_TAIL(files);
	CLEARS(&file);

	// Temporary filename storage:
	fname = BUF_OS_STR;
	file.file.path = (REBCHR*)Reset_Buffer(fname, MAX_FILE_NAME);

	SET_FLAG(dir->modes, RFM_DIR);

	dir->data = (REBYTE*)(&file);

	while ((result = OS_DO_DEVICE(dir, RDC_READ)) == 0 && !GET_FLAG(dir->flags, RRF_DONE)) {
		len = LEN_STR(file.file.path);
		if (GET_FLAG(file.modes, RFM_DIR)) len++;
		name = Copy_OS_Str(file.file.path, len);
		if (GET_FLAG(file.modes, RFM_DIR))
			SET_ANY_CHAR(name, name->tail-1, '/');
		Set_Series(REB_FILE, Append_Value(files), name);
	}

	if (result < 0 && dir->error != -RFE_OPEN_FAIL
		&& (FIND_CHR(dir->file.path, '*') || FIND_CHR(dir->file.path, '?')))
		result = 0;  // no matches found, but not an error

	return result;
}
예제 #6
0
파일: s-ops.c 프로젝트: rhencke/rebol
//
//  Entab_Bytes: C
// 
// Entab a string and return a new series.
//
REBSER *Entab_Bytes(REBYTE *bp, REBCNT index, REBCNT len, REBINT tabsize)
{
    REBINT n = 0;
    REBYTE *dp;
    REBYTE c;

    dp = Reset_Buffer(BYTE_BUF, len);

    for (; index < len; index++) {

        c = bp[index];

        // Count leading spaces, insert TAB for each tabsize:
        if (c == ' ') {
            if (++n >= tabsize) {
                *dp++ = '\t';
                n = 0;
            }
            continue;
        }

        // Hitting a leading TAB resets space counter:
        if (c == '\t') {
            *dp++ = (REBYTE)c;
            n = 0;
        }
        else {
            // Incomplete tab space, pad with spaces:
            for (; n > 0; n--) *dp++ = ' ';

            // Copy chars thru end-of-line (or end of buffer):
            while (index < len) {
                if ((*dp++ = bp[index++]) == '\n') break;
            }
        }
    }

    return Copy_Buffer(BYTE_BUF, 0, dp);
}
예제 #7
0
파일: s-ops.c 프로젝트: asampal/ren-c
*/  REBSER *Detab_Unicode(REBUNI *bp, REBCNT index, REBCNT len, REBINT tabsize)
/*
**		Detab a unicode string and return a new series.
**
***********************************************************************/
{
	REBCNT cnt = 0;
	REBCNT n;
	REBUNI *dp;
	REBUNI c;

	// Estimate new length based on tab expansion:
	for (n = index; n < len; n++)
		if (bp[n] == TAB) cnt++;

	dp = (REBUNI *)Reset_Buffer(BUF_MOLD, len + (cnt * (tabsize-1)));

	n = 0;
	while (index < len) {

		c = bp[index++];

		if (c == '\t') {
			*dp++ = ' ';
			n++;
			for (; n % tabsize != 0; n++) *dp++ = ' ';
			continue;
		}

		if (c == '\n') n = 0;
		else n++;

		*dp++ = c;
	}

	return Copy_Buffer(BUF_MOLD, dp);
}
예제 #8
0
//
//  Make_Set_Operation_Series: C
// 
// Do set operations on a series.  Case-sensitive if `cased` is TRUE.
// `skip` is the record size.
//
static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip)
{
    REBSER *buffer;     // buffer for building the return series
    REBCNT i;
    REBINT h = TRUE;
    REBFLG first_pass = TRUE; // are we in the first pass over the series?
    REBSER *out_ser;

    // This routine should only be called with SERIES! values
    assert(ANY_SERIES(val1));

    if (val2) {
        assert(ANY_SERIES(val2));

        if (ANY_ARRAY(val1)) {
            if (!ANY_ARRAY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));

            // As long as they're both arrays, we're willing to do:
            //
            //     >> union quote (a b c) 'b/d/e
            //     (a b c d e)
            //
            // The type of the result will match the first value.
        }
        else if (!IS_BINARY(val1)) {

            // We will similarly do any two ANY-STRING! types:
            //
            //      >> union <abc> "bde"
            //      <abcde>

            if (IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
        else {
            // Binaries only operate with other binaries

            if (!IS_BINARY(val2))
                fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)));
        }
    }

    // Calculate i as length of result block.
    i = VAL_LEN(val1);
    if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2);

    if (ANY_ARRAY(val1)) {
        REBSER *hser = 0;   // hash table for series
        REBSER *hret;       // hash table for return series

        buffer = BUF_EMIT;          // use preallocated shared block
        Resize_Series(buffer, i);
        hret = Make_Hash_Sequence(i);   // allocated

        // Optimization note: !!
        // This code could be optimized for small blocks by not hashing them
        // and extending Find_Key to do a FIND on the value itself w/o the hash.

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!

            // Check what is in series1 but not in series2:
            if (flags & SOP_FLAG_CHECK)
                hser = Hash_Block(val2, cased);

            // Iterate over first series:
            i = VAL_INDEX(val1);
            for (; i < SERIES_TAIL(ser); i += skip) {
                REBVAL *item = BLK_SKIP(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1);
                    h = (h >= 0);
                    if (flags & SOP_FLAG_INVERT) h = !h;
                }
                if (h) Find_Key(buffer, hret, item, skip, cased, 2);
            }

            if (flags & SOP_FLAG_CHECK)
                Free_Series(hser);

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        if (hret)
            Free_Series(hret);

        out_ser = Copy_Array_Shallow(buffer);
        RESET_TAIL(buffer); // required - allow reuse
    }
    else {
        if (IS_BINARY(val1)) {
            // All binaries use "case-sensitive" comparison (e.g. each byte
            // is treated distinctly)
            cased = TRUE;
        }

        buffer = BUF_MOLD;
        Reset_Buffer(buffer, i);
        RESET_TAIL(buffer);

        do {
            REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass!
            REBUNI uc;

            // Iterate over first series:
            i = VAL_INDEX(val1);
            for (; i < SERIES_TAIL(ser); i += skip) {
                uc = GET_ANY_CHAR(ser, i);
                if (flags & SOP_FLAG_CHECK) {
                    h = (NOT_FOUND != Find_Str_Char(
                        VAL_SERIES(val2),
                        0,
                        VAL_INDEX(val2),
                        VAL_TAIL(val2),
                        skip,
                        uc,
                        cased ? AM_FIND_CASE : 0
                    ));

                    if (flags & SOP_FLAG_INVERT) h = !h;
                }

                if (!h) continue;

                if (
                    NOT_FOUND == Find_Str_Char(
                        buffer,
                        0,
                        0,
                        SERIES_TAIL(buffer),
                        skip,
                        uc,
                        cased ? AM_FIND_CASE : 0
                    )
                ) {
                    Append_String(buffer, ser, i, skip);
                }
            }

            if (!first_pass) break;
            first_pass = FALSE;

            // Iterate over second series?
            if ((i = ((flags & SOP_FLAG_BOTH) != 0))) {
                const REBVAL *temp = val1;
                val1 = val2;
                val2 = temp;
            }
        } while (i);

        out_ser = Copy_String(buffer, 0, -1);
    }

    return out_ser;
}
예제 #9
0
파일: n-sets.c 프로젝트: kealist/ren-c
*/	static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags)
/*
**		Do set operations on a series.
**
***********************************************************************/
{
	REBVAL *val;
	REBVAL *val1;
	REBVAL *val2 = 0;
	REBSER *ser;
	REBSER *hser = 0;	// hash table for series
	REBSER *retser;		// return series
	REBSER *hret;		// hash table for return series
	REBCNT i;
	REBINT h = TRUE;
	REBCNT skip = 1;	// record size
	REBCNT cased = 0;	// case sensitive when TRUE

	SET_NONE(D_OUT);
	val1 = D_ARG(1);
	i = 2;

	// Check for second series argument:
	if (flags != SET_OP_UNIQUE) {
		val2 = D_ARG(i++);
		if (VAL_TYPE(val1) != VAL_TYPE(val2))
			raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2));
	}

	// Refinements /case and /skip N
	cased = D_REF(i++); // cased
	if (D_REF(i++)) skip = Int32s(D_ARG(i), 1);

	switch (VAL_TYPE(val1)) {

	case REB_BLOCK:
		i = VAL_LEN(val1);
		// Setup result block:
		if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);
		retser = BUF_EMIT;			// use preallocated shared block
		Resize_Series(retser, i);
		hret = Make_Hash_Sequence(i);	// allocated

		// Optimization note: !!
		// This code could be optimized for small blocks by not hashing them
		// and extending Find_Key to do a FIND on the value itself w/o the hash.

		do {
			// Check what is in series1 but not in series2:
			if (GET_FLAG(flags, SOP_CHECK))
				hser = Hash_Block(val2, cased);

			// Iterate over first series:
			ser = VAL_SERIES(val1);
			i = VAL_INDEX(val1);
			for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) {
				if (GET_FLAG(flags, SOP_CHECK)) {
					h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0;
					if (GET_FLAG(flags, SOP_INVERT)) h = !h;
				}
				if (h) Find_Key(retser, hret, val, skip, cased, 2);
			}

			// Iterate over second series?
			if ((i = GET_FLAG(flags, SOP_BOTH))) {
				val = val1;
				val1 = val2;
				val2 = val;
				CLR_FLAG(flags, SOP_BOTH);
			}

			if (GET_FLAG(flags, SOP_CHECK))
				Free_Series(hser);
		} while (i);

		if (hret)
			Free_Series(hret);

		Val_Init_Block(D_OUT, Copy_Array_Shallow(retser));
		RESET_TAIL(retser); // required - allow reuse

		break;

	case REB_BINARY:
		cased = TRUE;
		SET_TYPE(D_OUT, REB_BINARY);
	case REB_STRING:
		i = VAL_LEN(val1);
		// Setup result block:
		if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2);

		retser = BUF_MOLD;
		Reset_Buffer(retser, i);
		RESET_TAIL(retser);

		do {
			REBUNI uc;

			cased = cased ? AM_FIND_CASE : 0;

			// Iterate over first series:
			ser = VAL_SERIES(val1);
			i = VAL_INDEX(val1);
			for (; i < SERIES_TAIL(ser); i += skip) {
				uc = GET_ANY_CHAR(ser, i);
				if (GET_FLAG(flags, SOP_CHECK)) {
					h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND;
					if (GET_FLAG(flags, SOP_INVERT)) h = !h;
				}
				if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) {
					Append_String(retser, ser, i, skip);
				}
			}

			// Iterate over second series?
			if ((i = GET_FLAG(flags, SOP_BOTH))) {
				val = val1;
				val1 = val2;
				val2 = val;
				CLR_FLAG(flags, SOP_BOTH);
			}
		} while (i);

		ser = Copy_String(retser, 0, -1);
		if (IS_BINARY(D_OUT))
			Val_Init_Binary(D_OUT, ser);
		else
			Val_Init_String(D_OUT, ser);
		break;

	case REB_BITSET:
		switch (flags) {
		case SET_OP_UNIQUE:
			return R_ARG1;
		case SET_OP_UNION:
			i = A_OR;
			break;
		case SET_OP_INTERSECT:
			i = A_AND;
			break;
		case SET_OP_DIFFERENCE:
			i = A_XOR;
			break;
		case SET_OP_EXCLUDE:
			i = 0; // special case
			break;
		}
		ser = Xandor_Binary(i, val1, val2);
		Val_Init_Bitset(D_OUT, ser);
		break;

	case REB_TYPESET:
		switch (flags) {
		case SET_OP_UNIQUE:
			break;
		case SET_OP_UNION:
			VAL_TYPESET(val1) |= VAL_TYPESET(val2);
			break;
		case SET_OP_INTERSECT:
			VAL_TYPESET(val1) &= VAL_TYPESET(val2);
			break;
		case SET_OP_DIFFERENCE:
			VAL_TYPESET(val1) ^= VAL_TYPESET(val2);
			break;
		case SET_OP_EXCLUDE:
			VAL_TYPESET(val1) &= ~VAL_TYPESET(val2);
			break;
		}
		return R_ARG1;

	default:
		raise Error_Invalid_Arg(val1);
	}

	return R_OUT;
}