Beispiel #1
0
/**
 * Reads a complete utf-8 character
 * and returns the unicode value, or -1 on error.
 */
static int fd_read(struct current *current)
{
#ifdef USE_UTF8
    char buf[4];
    int n;
    int i;
    int c;

    if (read(current->fd, &buf[0], 1) != 1) {
        return -1;
    }
    n = utf8_charlen(buf[0]);
    if (n < 1 || n > 3) {
        return -1;
    }
    for (i = 1; i < n; i++) {
        if (read(current->fd, &buf[i], 1) != 1) {
            return -1;
        }
    }
    buf[n] = 0;
    /* decode and return the character */
    utf8_tounicode(buf, &c);
    return c;
#else
    return fd_read_char(current->fd, -1);
#endif
}
STATIC mp_obj_t uni_unary_op(mp_unary_op_t op, mp_obj_t self_in) {
    GET_STR_DATA_LEN(self_in, str_data, str_len);
    switch (op) {
        case MP_UNARY_OP_BOOL:
            return mp_obj_new_bool(str_len != 0);
        case MP_UNARY_OP_LEN:
            return MP_OBJ_NEW_SMALL_INT(utf8_charlen(str_data, str_len));
        default:
            return MP_OBJ_NULL; // op not supported
    }
}
Beispiel #3
0
static size_t linelen(const char *buffer, size_t cols)
{
	size_t len;
	const char *p;

	len = 0;
	p = buffer;

	/*
	 *	Step 1: go up "cols" characters
	 */
	while (*p) {
		if (len >= cols) break;

		if (*p < ' ') break;

#ifdef USE_UTF8
		p += utf8_charlen((int) *p);
#else
		p++;
#endif
		len++;
	}

	/*
	 *	Ended at a CR/LF.  Skip it, and return all of it.
	 */
	if (*p && (*p < ' ')) {
		while (*p && (*p < ' ')) p++;
		return p - buffer;
	}

	if (!*p) return p - buffer; /* short */

	/*
	 *	Step 2: back up to the previous space
	 */
	while ((p >= buffer) && (*p > ' ')) p--;

	if (p > buffer) return p - buffer;

	/*
	 *	No previous space, print the entire word.
	 */
	while (*p && (*p > ' ')) p++;

	return p - buffer;
}
Beispiel #4
0
/**
 * (Experimental) Implementation of mbrtowc for Windows.
 * This is required because the other, commonly available implementations
 * seem to not work very well, based on user reports.  Someone who is
 * really, really good at windows programming needs to review this stuff!
 */
size_t lg_mbrtowc(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
	int nb, nb2;

	if (NULL == s) return 0;
	if (0 == n) return -2;
	if (0 == *s) { *pwc = 0; return 0; }

	nb = utf8_charlen(s);
	if (0 == nb) return 0;
	if (0 > nb) return nb;
	nb2 = MultiByteToWideChar(CP_UTF8, 0, s, nb, NULL, 0);
	nb2 = MultiByteToWideChar(CP_UTF8, 0, s, nb, pwc, nb2);
	if (0 == nb2) return (size_t)-1;
	return nb;
}
Beispiel #5
0
/*
 - regexec - match a regexp against a string
 */
int regexec(regex_t  *preg,  const  char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
{
	const char *s;
	int scan;

	/* Be paranoid... */
	if (preg == NULL || preg->program == NULL || string == NULL) {
		return REG_ERR_NULL_ARGUMENT;
	}

	/* Check validity of program. */
	if (*preg->program != REG_MAGIC) {
		return REG_ERR_CORRUPTED;
	}

#ifdef DEBUG
	fprintf(stderr, "regexec: %s\n", string);
	regdump(preg);
#endif

	preg->eflags = eflags;
	preg->pmatch = pmatch;
	preg->nmatch = nmatch;
	preg->start = string;	/* All offsets are computed from here */

	/* Must clear out the embedded repeat counts */
	for (scan = OPERAND(1); scan != 0; scan = regnext(preg, scan)) {
		switch (OP(preg, scan)) {
		case REP:
		case REPMIN:
		case REPX:
		case REPXMIN:
			preg->program[scan + 4] = 0;
			break;
		}
	}

	/* If there is a "must appear" string, look for it. */
	if (preg->regmust != 0) {
		s = string;
		while ((s = str_find(s, preg->program[preg->regmust], preg->cflags & REG_ICASE)) != NULL) {
			if (prefix_cmp(preg->program + preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) {
				break;
			}
			s++;
		}
		if (s == NULL)	/* Not present. */
			return REG_NOMATCH;
	}

	/* Mark beginning of line for ^ . */
	preg->regbol = string;

	/* Simplest case:  anchored match need be tried only once (maybe per line). */
	if (preg->reganch) {
		if (eflags & REG_NOTBOL) {
			/* This is an anchored search, but not an BOL, so possibly skip to the next line */
			goto nextline;
		}
		while (1) {
			int ret = regtry(preg, string);
			if (ret) {
				return REG_NOERROR;
			}
			if (*string) {
nextline:
				if (preg->cflags & REG_NEWLINE) {
					/* Try the next anchor? */
					string = strchr(string, '\n');
					if (string) {
						preg->regbol = ++string;
						continue;
					}
				}
			}
			return REG_NOMATCH;
		}
	}

	/* Messy cases:  unanchored match. */
	s = string;
	if (preg->regstart != '\0') {
		/* We know what char it must start with. */
		while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) {
			if (regtry(preg, s))
				return REG_NOERROR;
			s++;
		}
	}
	else
		/* We don't -- general case. */
		while (1) {
			if (regtry(preg, s))
				return REG_NOERROR;
			if (*s == '\0') {
				break;
			}
			s += utf8_charlen(*s);
		}

	/* Failure. */
	return REG_NOMATCH;
}