/** * Returns the common initial length of the two strings. */ static int JimStringCommonLength(const char *str1, int charlen1, const char *str2, int charlen2) { int maxlen = 0; while (charlen1-- && charlen2--) { int c1; int c2; str1 += utf8_tounicode(str1, &c1); str2 += utf8_tounicode(str2, &c2); if (c1 != c2) { break; } maxlen++; } return maxlen; }
static int outputChars(struct current *current, const char *buf, int len) { COORD pos; DWORD n; pos.Y = (SHORT)current->y; #ifdef USE_UTF8 while ( len > 0 ) { int c, s; wchar_t wc; s = utf8_tounicode(buf, &c); len -= s; buf += s; wc = (wchar_t)c; pos.X = (SHORT)current->x; /* fixed display utf8 character */ WriteConsoleOutputCharacterW(current->outh, &wc, 1, pos, &n); current->x += utf8_width(c); } #else pos.X = (SHORT)current->x; WriteConsoleOutputCharacterA(current->outh, buf, len, pos, &n); current->x += len; #endif return 0; }
/** * Reads a complete utf-8 character * and returns the unicode value, or -1 on error. */ static int fd_read(struct current *current) { #ifdef USE_UTF8 char buf[4]; int n; int i; int c; if (read(current->fd, &buf[0], 1) != 1) { return -1; } n = utf8_charlen(buf[0]); if (n < 1 || n > 3) { return -1; } for (i = 1; i < n; i++) { if (read(current->fd, &buf[i], 1) != 1) { return -1; } } buf[n] = 0; /* decode and return the character */ utf8_tounicode(buf, &c); return c; #else return fd_read_char(current->fd, -1); #endif }
/** * Extracts the next unicode char from utf8. * * If 'upper' is set, converts the char to uppercase. */ static int reg_utf8_tounicode_case(const char *s, int *uc, int upper) { int l = utf8_tounicode(s, uc); if (upper) { *uc = utf8_upper(*uc); } return l; }
/** * Returns the unicode character at the given offset, * or -1 if none. */ static int get_char(struct current *current, int pos) { if (pos >= 0 && pos < current->chars) { int c; int i = utf8_index(current->buf, pos); (void) utf8_tounicode(current->buf + i, &c); return c; } return -1; }
/** * Inserts the characters (string) 'chars' at the cursor position 'pos'. * * Returns 0 if no chars were inserted or non-zero otherwise. */ static int insert_chars(struct current *current, int pos, const char *chars) { int inserted = 0; while (*chars) { int ch; int n = utf8_tounicode(chars, &ch); if (insert_char(current, pos, ch) == 0) { break; } inserted++; pos++; chars += n; } return inserted; }
static void refreshLine(const char *prompt, struct current *current) { int plen; int pchars; int backup = 0; int i; const char *buf = current->buf; int chars = current->chars; int pos = current->pos; int b; int ch; int n; int width; int bufwidth; /* Should intercept SIGWINCH. For now, just get the size every time */ getWindowSize(current); plen = strlen(prompt); pchars = utf8_strwidth(prompt, utf8_strlen(prompt, plen)); /* Scan the prompt for embedded ansi color control sequences and * discount them as characters/columns. */ pchars -= countColorControlChars(prompt); /* Account for a line which is too long to fit in the window. * Note that control chars require an extra column */ /* How many cols are required to the left of 'pos'? * The prompt, plus one extra for each control char */ n = pchars + utf8_strwidth(buf, utf8_strlen(buf, current->len)); b = 0; for (i = 0; i < pos; i++) { b += utf8_tounicode(buf + b, &ch); if (ch < ' ') { n++; } } /* Pluse one if the current char is a control character */ if (current->pos < current->chars && get_char(current, current->pos) < ' ') { n++; } /* If too many are needed, strip chars off the front of 'buf' * until it fits. Note that if the current char is a control character, * we need one extra col. */ while (n >= current->cols && pos > 0) { b = utf8_tounicode(buf, &ch); if (ch < ' ') { n--; } n -= utf8_width(ch); buf += b; pos--; chars--; } /* Cursor to left edge, then the prompt */ cursorToLeft(current); outputChars(current, prompt, plen); /* Now the current buffer content */ /* Need special handling for control characters. * If we hit 'cols', stop. */ b = 0; /* unwritted bytes */ n = 0; /* How many control chars were written */ width = 0; /* current display width */ bufwidth = utf8_strwidth(buf, pos); for (i = 0; i < chars; i++) { int ch; int w = utf8_tounicode(buf + b, &ch); if (ch < ' ') { n++; } width += utf8_width(ch); if (pchars + width + n >= current->cols) { break; } if (ch < ' ') { /* A control character, so write the buffer so far */ outputChars(current, buf, b); buf += b + w; b = 0; outputControlChar(current, ch + '@'); if (i < pos) { backup++; } } else { b += w; } } outputChars(current, buf, b); /* Erase to right, move cursor to original position */ eraseEol(current); setCursorPos(current, bufwidth + pchars + backup); }
/* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static int regatom(regex_t *preg, int *flagp) { int ret; int flags; int nocase = (preg->cflags & REG_ICASE); int ch; int n = reg_utf8_tounicode_case(preg->regparse, &ch, nocase); *flagp = WORST; /* Tentatively. */ preg->regparse += n; switch (ch) { /* FIXME: these chars only have meaning at beg/end of pat? */ case '^': ret = regnode(preg, BOL); break; case '$': ret = regnode(preg, EOL); break; case '.': ret = regnode(preg, ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { const char *pattern = preg->regparse; if (*pattern == '^') { /* Complement of range. */ ret = regnode(preg, ANYBUT); pattern++; } else ret = regnode(preg, ANYOF); /* Special case. If the first char is ']' or '-', it is part of the set */ if (*pattern == ']' || *pattern == '-') { reg_addrange(preg, *pattern, *pattern); pattern++; } while (*pattern && *pattern != ']') { /* Is this a range? a-z */ int start; int end; pattern += reg_utf8_tounicode_case(pattern, &start, nocase); if (start == '\\') { pattern += reg_decode_escape(pattern, &start); if (start == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } if (pattern[0] == '-' && pattern[1] && pattern[1] != ']') { /* skip '-' */ pattern += utf8_tounicode(pattern, &end); pattern += reg_utf8_tounicode_case(pattern, &end, nocase); if (end == '\\') { pattern += reg_decode_escape(pattern, &end); if (end == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } reg_addrange(preg, start, end); continue; } if (start == '[') { if (strncmp(pattern, ":alpha:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); pattern += 8; continue; } if (strncmp(pattern, ":alnum:]", 8) == 0) { if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); pattern += 8; continue; } if (strncmp(pattern, ":space:]", 8) == 0) { reg_addrange_str(preg, " \t\r\n\f\v"); pattern += 8; continue; } } /* Not a range, so just add the char */ reg_addrange(preg, start, start); } regc(preg, '\0'); if (*pattern) { pattern++; } preg->regparse = pattern; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(preg, 1, &flags); if (ret == 0) return 0; *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': preg->err = REG_ERR_INTERNAL; return 0; /* Supposed to be caught earlier. */ case '?': case '+': case '*': case '{': preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING; return 0; case '\\': switch (*preg->regparse++) { case '\0': preg->err = REG_ERR_TRAILING_BACKSLASH; return 0; case '<': case 'm': ret = regnode(preg, WORDA); break; case '>': case 'M': ret = regnode(preg, WORDZ); break; case 'd': ret = regnode(preg, ANYOF); reg_addrange(preg, '0', '9'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 'w': ret = regnode(preg, ANYOF); if ((preg->cflags & REG_ICASE) == 0) { reg_addrange(preg, 'a', 'z'); } reg_addrange(preg, 'A', 'Z'); reg_addrange(preg, '0', '9'); reg_addrange(preg, '_', '_'); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; case 's': ret = regnode(preg, ANYOF); reg_addrange_str(preg," \t\r\n\f\v"); regc(preg, '\0'); *flagp |= HASWIDTH|SIMPLE; break; /* FIXME: Someday handle \1, \2, ... */ default: /* Handle general quoted chars in exact-match routine */ /* Back up to include the backslash */ preg->regparse--; goto de_fault; } break; de_fault: default: { /* * Encode a string of characters to be matched exactly. */ int added = 0; /* Back up to pick up the first char of interest */ preg->regparse -= n; ret = regnode(preg, EXACTLY); /* Note that a META operator such as ? or * consumes the * preceding char. * Thus we must be careful to look ahead by 2 and add the * last char as it's own EXACTLY if necessary */ /* Until end of string or a META char is reached */ while (*preg->regparse && strchr(META, *preg->regparse) == NULL) { n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE)); if (ch == '\\' && preg->regparse[n]) { /* Non-trailing backslash. * Is this a special escape, or a regular escape? */ if (strchr("<>mMwds", preg->regparse[n])) { /* A special escape. All done with EXACTLY */ break; } /* Decode it. Note that we add the length for the escape * sequence to the length for the backlash so we can skip * the entire sequence, or not as required. */ n += reg_decode_escape(preg->regparse + n, &ch); if (ch == 0) { preg->err = REG_ERR_NULL_CHAR; return 0; } } /* Now we have one char 'ch' of length 'n'. * Check to see if the following char is a MULT */ if (ISMULT(preg->regparse[n])) { /* Yes. But do we already have some EXACTLY chars? */ if (added) { /* Yes, so return what we have and pick up the current char next time around */ break; } /* No, so add this single char and finish */ regc(preg, ch); added++; preg->regparse += n; break; } /* No, so just add this char normally */ regc(preg, ch); added++; preg->regparse += n; } regc(preg, '\0'); *flagp |= HASWIDTH; if (added == 1) *flagp |= SIMPLE; break; } break; } return(ret); }
/* - regexec - match a regexp against a string */ int regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { const char *s; int scan; /* Be paranoid... */ if (preg == NULL || preg->program == NULL || string == NULL) { return REG_ERR_NULL_ARGUMENT; } /* Check validity of program. */ if (*preg->program != REG_MAGIC) { return REG_ERR_CORRUPTED; } #ifdef DEBUG fprintf(stderr, "regexec: %s\n", string); regdump(preg); #endif preg->eflags = eflags; preg->pmatch = pmatch; preg->nmatch = nmatch; preg->start = string; /* All offsets are computed from here */ /* Must clear out the embedded repeat counts of REPX and REPXMIN opcodes */ for (scan = OPERAND(1); scan != 0; scan += regopsize(preg, scan)) { int op = OP(preg, scan); if (op == END) break; if (op == REPX || op == REPXMIN) preg->program[scan + 4] = 0; } /* If there is a "must appear" string, look for it. */ if (preg->regmust != 0) { s = string; while ((s = str_find(s, preg->program[preg->regmust], preg->cflags & REG_ICASE)) != NULL) { if (prefix_cmp(preg->program + preg->regmust, preg->regmlen, s, preg->cflags & REG_ICASE) >= 0) { break; } s++; } if (s == NULL) /* Not present. */ return REG_NOMATCH; } /* Mark beginning of line for ^ . */ preg->regbol = string; /* Simplest case: anchored match need be tried only once (maybe per line). */ if (preg->reganch) { if (eflags & REG_NOTBOL) { /* This is an anchored search, but not an BOL, so possibly skip to the next line */ goto nextline; } while (1) { if (regtry(preg, string)) { return REG_NOERROR; } if (*string) { nextline: if (preg->cflags & REG_NEWLINE) { /* Try the next anchor? */ string = strchr(string, '\n'); if (string) { preg->regbol = ++string; continue; } } } return REG_NOMATCH; } } /* Messy cases: unanchored match. */ s = string; if (preg->regstart != '\0') { /* We know what char it must start with. */ while ((s = str_find(s, preg->regstart, preg->cflags & REG_ICASE)) != NULL) { if (regtry(preg, s)) return REG_NOERROR; s++; } } else /* We don't -- general case. */ while (1) { if (regtry(preg, s)) return REG_NOERROR; if (*s == '\0') { break; } else { int c; s += utf8_tounicode(s, &c); } } /* Failure. */ return REG_NOMATCH; }
static void refreshLine(const char *prompt, struct current *current) { int plen; int pchars; int backup = 0; int i; const char *buf = current->buf; int chars = current->chars; int pos = current->pos; int b; int ch; int n; /* Should intercept SIGWINCH. For now, just get the size every time */ getWindowSize(current); plen = strlen(prompt); pchars = utf8_strlen(prompt, plen); /* Scan the prompt for embedded ansi color control sequences and * discount them as characters/columns. */ pchars -= countColorControlChars(prompt); /* Account for a line which is too long to fit in the window. * Note that control chars require an extra column */ /* How many cols are required to the left of 'pos'? * The prompt, plus one extra for each control char */ n = pchars + utf8_strlen(buf, current->len); b = 0; for (i = 0; i < pos; i++) { b += utf8_tounicode(buf + b, &ch); if (ch < ' ') { n++; } } /* If too many are needed, strip chars off the front of 'buf' * until it fits. Note that if the current char is a control character, * we need one extra col. */ if (current->pos < current->chars && get_char(current, current->pos) < ' ') { n++; } while (n >= current->cols && pos > 0) { b = utf8_tounicode(buf, &ch); if (ch < ' ') { n--; } n--; buf += b; pos--; chars--; } /* Cursor to left edge, then the prompt */ cursorToLeft(current); outputChars(current, prompt, plen); /* Now the current buffer content, but only if not fully hidden */ if (is_hidden != LN_HIDDEN_ALL) { /* Need special handling for control characters. * If we hit 'cols', stop. */ b = 0; /* unwritted bytes */ n = 0; /* How many control chars were written */ for (i = 0; i < chars; i++) { int ch; int w = utf8_tounicode(buf + b, &ch); if (!is_hidden && ch < ' ') { n++; } if (pchars + i + n >= current->cols) { break; } if (is_hidden == LN_HIDDEN_STAR) { /* In hidden/star mode all user-entered characters are * shown as astericks ('*'). This is like control chars, * except for a different translation. */ /* assert (b == 0) */ outputChars(current, "*", 1); buf += w; /* keep b = 0; */ } else if (ch < ' ') { /* A control character, so write the buffer so far */ outputChars(current, buf, b); buf += b + w; b = 0; outputControlChar(current, ch + '@'); if (i < pos) { backup++; } } else { b += w; } } /* if (is_hidden) assert (b==0) */ outputChars(current, buf, b); /* Erase to right, move cursor to original position */ eraseEol(current); setCursorPos(current, pos + pchars + backup); } }