C++ (Cpp) utf8_tounicode Exemples

Exemple #1

0

Afficher le fichier

Fichier : jim-tclprefix.c Projet : msteveb/jimtcl

/**
 * Returns the common initial length of the two strings.
 */
static int JimStringCommonLength(const char *str1, int charlen1, const char *str2, int charlen2)
{
    int maxlen = 0;
    while (charlen1-- && charlen2--) {
        int c1;
        int c2;
        str1 += utf8_tounicode(str1, &c1);
        str2 += utf8_tounicode(str2, &c2);
        if (c1 != c2) {
            break;
        }
        maxlen++;
    }
    return maxlen;
}

Exemple #2

0

Afficher le fichier

Fichier : linenoise.c Projet : evanhunter/jimtcl

static int outputChars(struct current *current, const char *buf, int len)
{
    COORD pos;
    DWORD n;

    pos.Y = (SHORT)current->y;

#ifdef USE_UTF8
    while ( len > 0 ) {
        int c, s;
        wchar_t wc;

        s = utf8_tounicode(buf, &c);

        len -= s;
        buf += s;

        wc = (wchar_t)c;

        pos.X = (SHORT)current->x;

        /* fixed display utf8 character */
        WriteConsoleOutputCharacterW(current->outh, &wc, 1, pos, &n);

        current->x += utf8_width(c);
    }
#else
    pos.X = (SHORT)current->x;

    WriteConsoleOutputCharacterA(current->outh, buf, len, pos, &n);
    current->x += len;
#endif

    return 0;
}

Exemple #3

0

Afficher le fichier

Fichier : linenoise.c Projet : evanhunter/jimtcl

/**
 * Reads a complete utf-8 character
 * and returns the unicode value, or -1 on error.
 */
static int fd_read(struct current *current)
{
#ifdef USE_UTF8
    char buf[4];
    int n;
    int i;
    int c;

    if (read(current->fd, &buf[0], 1) != 1) {
        return -1;
    }
    n = utf8_charlen(buf[0]);
    if (n < 1 || n > 3) {
        return -1;
    }
    for (i = 1; i < n; i++) {
        if (read(current->fd, &buf[i], 1) != 1) {
            return -1;
        }
    }
    buf[n] = 0;
    /* decode and return the character */
    utf8_tounicode(buf, &c);
    return c;
#else
    return fd_read_char(current->fd, -1);
#endif
}

Exemple #4

0

Afficher le fichier

Fichier : jimregexp.c Projet : BitThunder/bitthunder

/**
 * Extracts the next unicode char from utf8.
 *
 * If 'upper' is set, converts the char to uppercase.
 */
static int reg_utf8_tounicode_case(const char *s, int *uc, int upper)
{
	int l = utf8_tounicode(s, uc);
	if (upper) {
		*uc = utf8_upper(*uc);
	}
	return l;
}

Exemple #5

0

Afficher le fichier

Fichier : linenoise.c Projet : hscarter/revbayes

/**
 * Returns the unicode character at the given offset,
 * or -1 if none.
 */
static int get_char(struct current *current, int pos) {
    if (pos >= 0 && pos < current->chars) {
        int c;
        int i = utf8_index(current->buf, pos);
        (void) utf8_tounicode(current->buf + i, &c);
        return c;
    }
    return -1;
}

Exemple #6

0

Afficher le fichier

Fichier : linenoise.c Projet : hscarter/revbayes

/**
 * Inserts the characters (string) 'chars' at the cursor position 'pos'.
 *
 * Returns 0 if no chars were inserted or non-zero otherwise.
 */
static int insert_chars(struct current *current, int pos, const char *chars) {
    int inserted = 0;

    while (*chars) {
        int ch;
        int n = utf8_tounicode(chars, &ch);
        if (insert_char(current, pos, ch) == 0) {
            break;
        }
        inserted++;
        pos++;
        chars += n;
    }
    return inserted;
}

Exemple #7

0

Afficher le fichier

Fichier : linenoise.c Projet : evanhunter/jimtcl

static void refreshLine(const char *prompt, struct current *current)
{
    int plen;
    int pchars;
    int backup = 0;
    int i;
    const char *buf = current->buf;
    int chars = current->chars;
    int pos = current->pos;
    int b;
    int ch;
    int n;
    int width;
    int bufwidth;

    /* Should intercept SIGWINCH. For now, just get the size every time */
    getWindowSize(current);

    plen = strlen(prompt);
    pchars = utf8_strwidth(prompt, utf8_strlen(prompt, plen));

    /* Scan the prompt for embedded ansi color control sequences and
     * discount them as characters/columns.
     */
    pchars -= countColorControlChars(prompt);

    /* Account for a line which is too long to fit in the window.
     * Note that control chars require an extra column
     */

    /* How many cols are required to the left of 'pos'?
     * The prompt, plus one extra for each control char
     */
    n = pchars + utf8_strwidth(buf, utf8_strlen(buf, current->len));
    b = 0;
    for (i = 0; i < pos; i++) {
        b += utf8_tounicode(buf + b, &ch);
        if (ch < ' ') {
            n++;
        }
    }

    /* Pluse one if the current char is a control character */
    if (current->pos < current->chars && get_char(current, current->pos) < ' ') {
        n++;
    }

    /* If too many are needed, strip chars off the front of 'buf'
     * until it fits. Note that if the current char is a control character,
     * we need one extra col.
     */
    while (n >= current->cols && pos > 0) {
        b = utf8_tounicode(buf, &ch);
        if (ch < ' ') {
            n--;
        }
        n -= utf8_width(ch);
        buf += b;
        pos--;
        chars--;
    }

    /* Cursor to left edge, then the prompt */
    cursorToLeft(current);
    outputChars(current, prompt, plen);

    /* Now the current buffer content */

    /* Need special handling for control characters.
     * If we hit 'cols', stop.
     */
    b = 0; /* unwritted bytes */
    n = 0; /* How many control chars were written */
    width = 0; /* current display width */
    bufwidth = utf8_strwidth(buf, pos);

    for (i = 0; i < chars; i++) {
        int ch;
        int w = utf8_tounicode(buf + b, &ch);
        if (ch < ' ') {
            n++;
        }

        width += utf8_width(ch);

        if (pchars + width + n >= current->cols) {
            break;
        }
        if (ch < ' ') {
            /* A control character, so write the buffer so far */
            outputChars(current, buf, b);
            buf += b + w;
            b = 0;
            outputControlChar(current, ch + '@');
            if (i < pos) {
                backup++;
            }
        }
        else {
            b += w;
        }
    }
    outputChars(current, buf, b);

    /* Erase to right, move cursor to original position */
    eraseEol(current);
    setCursorPos(current, bufwidth + pchars + backup);
}

Exemple #8

0

Afficher le fichier

Fichier : jimregexp.c Projet : BitThunder/bitthunder

/*
 - regatom - the lowest level
 *
 * Optimization:  gobbles an entire sequence of ordinary characters so that
 * it can turn them into a single node, which is smaller to store and
 * faster to run.  Backslashed characters are exceptions, each becoming a
 * separate node; the code is simpler that way and it's not worth fixing.
 */
static int regatom(regex_t *preg, int *flagp)
{
	int ret;
	int flags;
	int nocase = (preg->cflags & REG_ICASE);

	int ch;
	int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase);

	*flagp = WORST;		/* Tentatively. */

	preg->regparse += n;
	switch (ch) {
	/* FIXME: these chars only have meaning at beg/end of pat? */
	case '^':
		ret = regnode(preg, BOL);
		break;
	case '$':
		ret = regnode(preg, EOL);
		break;
	case '.':
		ret = regnode(preg, ANY);
		*flagp |= HASWIDTH|SIMPLE;
		break;
	case '[': {
			const char *pattern = preg->regparse;

			if (*pattern == '^') {	/* Complement of range. */
				ret = regnode(preg, ANYBUT);
				pattern++;
			} else
				ret = regnode(preg, ANYOF);

			/* Special case. If the first char is ']' or '-', it is part of the set */
			if (*pattern == ']' || *pattern == '-') {
				reg_addrange(preg, *pattern, *pattern);
				pattern++;
			}

			while (*pattern && *pattern != ']') {
				/* Is this a range? a-z */
				int start;
				int end;

				pattern += reg_utf8_tounicode_case(pattern, &start, nocase);
				if (start == '\\') {
					pattern += reg_decode_escape(pattern, &start);
					if (start == 0) {
						preg->err = REG_ERR_NULL_CHAR;
						return 0;
					}
				}
				if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') {
					/* skip '-' */
					pattern += utf8_tounicode(pattern, &end);
					pattern += reg_utf8_tounicode_case(pattern, &end, nocase);
					if (end == '\\') {
						pattern += reg_decode_escape(pattern, &end);
						if (end == 0) {
							preg->err = REG_ERR_NULL_CHAR;
							return 0;
						}
					}

					reg_addrange(preg, start, end);
					continue;
				}
				if (start == '[') {
					if (strncmp(pattern, ":alpha:]", 8) == 0) {
						if ((preg->cflags & REG_ICASE) == 0) {
							reg_addrange(preg, 'a', 'z');
						}
						reg_addrange(preg, 'A', 'Z');
						pattern += 8;
						continue;
					}
					if (strncmp(pattern, ":alnum:]", 8) == 0) {
						if ((preg->cflags & REG_ICASE) == 0) {
							reg_addrange(preg, 'a', 'z');
						}
						reg_addrange(preg, 'A', 'Z');
						reg_addrange(preg, '0', '9');
						pattern += 8;
						continue;
					}
					if (strncmp(pattern, ":space:]", 8) == 0) {
						reg_addrange_str(preg, " \t\r\n\f\v");
						pattern += 8;
						continue;
					}
				}
				/* Not a range, so just add the char */
				reg_addrange(preg, start, start);
			}
			regc(preg, '\0');

			if (*pattern) {
				pattern++;
			}
			preg->regparse = pattern;

			*flagp |= HASWIDTH|SIMPLE;
		}
		break;
	case '(':
		ret = reg(preg, 1, &flags);
		if (ret == 0)
			return 0;
		*flagp |= flags&(HASWIDTH|SPSTART);
		break;
	case '\0':
	case '|':
	case ')':
		preg->err = REG_ERR_INTERNAL;
		return 0;	/* Supposed to be caught earlier. */
	case '?':
	case '+':
	case '*':
	case '{':
		preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
		return 0;
	case '\\':
		switch (*preg->regparse++) {
		case '\0':
			preg->err = REG_ERR_TRAILING_BACKSLASH;
			return 0;
		case '<':
		case 'm':
			ret = regnode(preg, WORDA);
			break;
		case '>':
		case 'M':
			ret = regnode(preg, WORDZ);
			break;
		case 'd':
			ret = regnode(preg, ANYOF);
			reg_addrange(preg, '0', '9');
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 'w':
			ret = regnode(preg, ANYOF);
			if ((preg->cflags & REG_ICASE) == 0) {
				reg_addrange(preg, 'a', 'z');
			}
			reg_addrange(preg, 'A', 'Z');
			reg_addrange(preg, '0', '9');
			reg_addrange(preg, '_', '_');
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		case 's':
			ret = regnode(preg, ANYOF);
			reg_addrange_str(preg," \t\r\n\f\v");
			regc(preg, '\0');
			*flagp |= HASWIDTH|SIMPLE;
			break;
		/* FIXME: Someday handle \1, \2, ... */
		default:
			/* Handle general quoted chars in exact-match routine */
			/* Back up to include the backslash */
			preg->regparse--;
			goto de_fault;
		}
		break;
	de_fault:
	default: {
			/*
			 * Encode a string of characters to be matched exactly.
			 */
			int added = 0;

			/* Back up to pick up the first char of interest */
			preg->regparse -= n;

			ret = regnode(preg, EXACTLY);

			/* Note that a META operator such as ? or * consumes the
			 * preceding char.
			 * Thus we must be careful to look ahead by 2 and add the
			 * last char as it's own EXACTLY if necessary
			 */

			/* Until end of string or a META char is reached */
			while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
				n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
				if (ch == '\\' && preg->regparse[n]) {
					/* Non-trailing backslash.
					 * Is this a special escape, or a regular escape?
					 */
					if (strchr("<>mMwds", preg->regparse[n])) {
						/* A special escape. All done with EXACTLY */
						break;
					}
					/* Decode it. Note that we add the length for the escape
					 * sequence to the length for the backlash so we can skip
					 * the entire sequence, or not as required.
					 */
					n += reg_decode_escape(preg->regparse + n, &ch);
					if (ch == 0) {
						preg->err = REG_ERR_NULL_CHAR;
						return 0;
					}
				}

				/* Now we have one char 'ch' of length 'n'.
				 * Check to see if the following char is a MULT
				 */

				if (ISMULT(preg->regparse[n])) {
					/* Yes. But do we already have some EXACTLY chars? */
					if (added) {
						/* Yes, so return what we have and pick up the current char next time around */
						break;
					}
					/* No, so add this single char and finish */
					regc(preg, ch);
					added++;
					preg->regparse += n;
					break;
				}

				/* No, so just add this char normally */
				regc(preg, ch);
				added++;
				preg->regparse += n;
			}
			regc(preg, '\0');

			*flagp |= HASWIDTH;
			if (added == 1)
				*flagp |= SIMPLE;
			break;
		}
		break;
	}

	return(ret);
}

Exemple #9

0

Afficher le fichier

Fichier : jimregexp.c Projet : BitThunder/bitthunder

/*
 - regexec - match a regexp against a string
 */
int regexec(regex_t  *preg,  const  char *string, size_t nmatch, regmatch_t pmatch[], int eflags)
{
	const char *s;
	int scan;

	/* Be paranoid... */
	if (preg == NULL || preg->program == NULL || string == NULL) {
		return REG_ERR_NULL_ARGUMENT;
	}

	/* Check validity of program. */
	if (*preg->program != REG_MAGIC) {
		return REG_ERR_CORRUPTED;
	}

#ifdef DEBUG
	fprintf(stderr, "regexec: %s\n", string);
	regdump(preg);
#endif

	preg->eflags = eflags;
	preg->pmatch = pmatch;
	preg->nmatch = nmatch;
	preg->start = string;	/* All offsets are computed from here */

	/* Must clear out the embedded repeat counts of REPX and REPXMIN opcodes */
	for (scan = OPERAND(1); scan != 0; scan += regopsize(preg, scan)) {
		int op = OP(preg, scan);
		if (op == END)
			break;
		if (op == REPX || op == REPXMIN)
			preg->program[scan + 4] = 0;
	}

	/* If there is a "must appear" string, look for it. */
	if (preg->regmust != 0) {
		s = string;
		while ((s = str_find(s, preg->program[preg->regmust], preg->cflags & REG_ICASE)) != NULL) {
			if (prefix_cmp(preg->program + preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) {
				break;
			}
			s++;
		}
		if (s == NULL)	/* Not present. */
			return REG_NOMATCH;
	}

	/* Mark beginning of line for ^ . */
	preg->regbol = string;

	/* Simplest case:  anchored match need be tried only once (maybe per line). */
	if (preg->reganch) {
		if (eflags & REG_NOTBOL) {
			/* This is an anchored search, but not an BOL, so possibly skip to the next line */
			goto nextline;
		}
		while (1) {
			if (regtry(preg, string)) {
				return REG_NOERROR;
			}
			if (*string) {
nextline:
				if (preg->cflags & REG_NEWLINE) {
					/* Try the next anchor? */
					string = strchr(string, '\n');
					if (string) {
						preg->regbol = ++string;
						continue;
					}
				}
			}
			return REG_NOMATCH;
		}
	}

	/* Messy cases:  unanchored match. */
	s = string;
	if (preg->regstart != '\0') {
		/* We know what char it must start with. */
		while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) {
			if (regtry(preg, s))
				return REG_NOERROR;
			s++;
		}
	}
	else
		/* We don't -- general case. */
		while (1) {
			if (regtry(preg, s))
				return REG_NOERROR;
			if (*s == '\0') {
				break;
			}
			else {
				int c;
				s += utf8_tounicode(s, &c);
			}
		}

	/* Failure. */
	return REG_NOMATCH;
}

Exemple #10

0

Afficher le fichier

Fichier : liblinenoise.c Projet : h3rald/ickle

static void refreshLine(const char *prompt, struct current *current)
{
    int plen;
    int pchars;
    int backup = 0;
    int i;
    const char *buf = current->buf;
    int chars = current->chars;
    int pos = current->pos;
    int b;
    int ch;
    int n;

    /* Should intercept SIGWINCH. For now, just get the size every time */
    getWindowSize(current);

    plen = strlen(prompt);
    pchars = utf8_strlen(prompt, plen);

    /* Scan the prompt for embedded ansi color control sequences and
     * discount them as characters/columns.
     */
    pchars -= countColorControlChars(prompt);

    /* Account for a line which is too long to fit in the window.
     * Note that control chars require an extra column
     */

    /* How many cols are required to the left of 'pos'?
     * The prompt, plus one extra for each control char
     */
    n = pchars + utf8_strlen(buf, current->len);
    b = 0;
    for (i = 0; i < pos; i++) {
        b += utf8_tounicode(buf + b, &ch);
        if (ch < ' ') {
            n++;
        }
    }

    /* If too many are needed, strip chars off the front of 'buf'
     * until it fits. Note that if the current char is a control character,
     * we need one extra col.
     */
    if (current->pos < current->chars && get_char(current, current->pos) < ' ') {
        n++;
    }

    while (n >= current->cols && pos > 0) {
        b = utf8_tounicode(buf, &ch);
        if (ch < ' ') {
            n--;
        }
        n--;
        buf += b;
        pos--;
        chars--;
    }

    /* Cursor to left edge, then the prompt */
    cursorToLeft(current);
    outputChars(current, prompt, plen);

    /* Now the current buffer content, but only if not fully hidden */

    if (is_hidden != LN_HIDDEN_ALL) {
        /* Need special handling for control characters.
	 * If we hit 'cols', stop.
	 */
        b = 0; /* unwritted bytes */
	n = 0; /* How many control chars were written */
	for (i = 0; i < chars; i++) {
	    int ch;
	    int w = utf8_tounicode(buf + b, &ch);
	    if (!is_hidden && ch < ' ') {
	        n++;
	    }
	    if (pchars + i + n >= current->cols) {
	        break;
	    }
	    if (is_hidden == LN_HIDDEN_STAR) {
	        /* In hidden/star mode all user-entered characters are
		 * shown as astericks ('*'). This is like control chars,
		 * except for a different translation. */
	        /* assert (b == 0) */
	        outputChars(current, "*", 1);
		buf += w;
		/* keep b = 0; */
	    }
	    else if (ch < ' ') {
	        /* A control character, so write the buffer so far */
	        outputChars(current, buf, b);
		buf += b + w;
		b = 0;
		outputControlChar(current, ch + '@');
		if (i < pos) {
		    backup++;
		}
	    }
	    else {
	        b += w;
	    }
	}

	/* if (is_hidden) assert (b==0) */
	outputChars(current, buf, b);

	/* Erase to right, move cursor to original position */
	eraseEol(current);
	setCursorPos(current, pos + pchars + backup);
    }
}