int conv_big_c(t_string *string, int i) { wchar_t tmp; tmp = 0; tmp = get_wchar(string); if (tmp == 0) flag_default_char(string, 0); else add_conv_wchar(string, tmp); return (i + 1); }
int conv_c(t_string *string, int i) { wchar_t tmp; if (!ft_strncmp(string->converter.type, "l", 1)) { tmp = get_wchar(string); if (tmp == 0) flag_default_char(string, 0); else add_conv_wchar(string, tmp); } else flag_default_char(string, get_char(string)); return (i + 1); }
static int do_append(LWCHAR ch, char *rep, off_t pos) { int a; LWCHAR prev_ch; a = AT_NORMAL; if (ch == '\b') { if (bs_mode == BS_CONTROL) goto do_control_char; /* * A better test is needed here so we don't * backspace over part of the printed * representation of a binary character. */ if (curr <= lmargin || column <= lmargin || (attr[curr - 1] & (AT_ANSI|AT_BINARY))) { STORE_PRCHAR('\b', pos); } else if (bs_mode == BS_NORMAL) { STORE_CHAR(ch, AT_NORMAL, NULL, pos); } else if (bs_mode == BS_SPECIAL) { overstrike = backc(); } return (0); } if (overstrike > 0) { /* * Overstrike the character at the current position * in the line buffer. This will cause either * underline (if a "_" is overstruck), * bold (if an identical character is overstruck), * or just deletion of the character in the buffer. */ overstrike = utf_mode ? -1 : 0; /* To be correct, this must be a base character. */ prev_ch = get_wchar(linebuf + curr); a = attr[curr]; if (ch == prev_ch) { /* * Overstriking a char with itself means make it bold. * But overstriking an underscore with itself is * ambiguous. It could mean make it bold, or * it could mean make it underlined. * Use the previous overstrike to resolve it. */ if (ch == '_') { if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL) a |= (AT_BOLD|AT_UNDERLINE); else if (last_overstrike != AT_NORMAL) a |= last_overstrike; else a |= AT_BOLD; } else { a |= AT_BOLD; } } else if (ch == '_') { a |= AT_UNDERLINE; ch = prev_ch; rep = linebuf + curr; } else if (prev_ch == '_') { a |= AT_UNDERLINE; } /* Else we replace prev_ch, but we keep its attributes. */ } else if (overstrike < 0) { if (is_composing_char(ch) || is_combining_char(get_wchar(linebuf + curr), ch)) /* Continuation of the same overstrike. */ a = last_overstrike; else overstrike = 0; } if (ch == '\t') { /* * Expand a tab into spaces. */ switch (bs_mode) { case BS_CONTROL: goto do_control_char; case BS_NORMAL: case BS_SPECIAL: STORE_TAB(a, pos); break; } } else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch)) { do_control_char: if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && IS_CSI_START(ch))) { /* * Output as a normal character. */ STORE_CHAR(ch, AT_NORMAL, rep, pos); } else { STORE_PRCHAR((char)ch, pos); } } else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch)) { char *s; s = prutfchar(ch); if (column + (int)strlen(s) - 1 + pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width) return (1); for (; *s != 0; s++) STORE_CHAR(*s, AT_BINARY, NULL, pos); } else { STORE_CHAR(ch, a, rep, pos); } return (0); }
/* * Append a character to the line buffer. * Expand tabs into spaces, handle underlining, boldfacing, etc. * Returns 0 if ok, 1 if couldn't fit in buffer. */ int pappend(char c, off_t pos) { int r; if (pendc) { if (do_append(pendc, NULL, pendpos)) /* * Oops. We've probably lost the char which * was in pendc, since caller won't back up. */ return (1); pendc = '\0'; } if (c == '\r' && bs_mode == BS_SPECIAL) { if (mbc_buf_len > 0) /* utf_mode must be on. */ { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; if (r) return (mbc_buf_index); } /* * Don't put the CR into the buffer until we see * the next char. If the next char is a newline, * discard the CR. */ pendc = c; pendpos = pos; return (0); } if (!utf_mode) { r = do_append((LWCHAR) c, NULL, pos); } else { /* Perform strict validation in all possible cases. */ if (mbc_buf_len == 0) { retry: mbc_buf_index = 1; *mbc_buf = c; if (IS_ASCII_OCTET(c)) { r = do_append((LWCHAR) c, NULL, pos); } else if (IS_UTF8_LEAD(c)) { mbc_buf_len = utf_len(c); mbc_pos = pos; return (0); } else { /* UTF8_INVALID or stray UTF8_TRAIL */ r = flush_mbc_buf(pos); } } else if (IS_UTF8_TRAIL(c)) { mbc_buf[mbc_buf_index++] = c; if (mbc_buf_index < mbc_buf_len) return (0); if (is_utf8_well_formed(mbc_buf)) r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos); else /* Complete, but not shortest form, sequence. */ mbc_buf_index = r = flush_mbc_buf(mbc_pos); mbc_buf_len = 0; } else { /* Flush incomplete (truncated) sequence. */ r = flush_mbc_buf(mbc_pos); mbc_buf_index = r + 1; mbc_buf_len = 0; /* Handle new char. */ if (!r) goto retry; } } /* * If we need to shift the line, do it. * But wait until we get to at least the middle of the screen, * so shifting it doesn't affect the chars we're currently * pappending. (Bold & underline can get messed up otherwise.) */ if (cshift < hshift && column > sc_width / 2) { linebuf[curr] = '\0'; pshift(hshift - cshift); } if (r) { /* How many chars should caller back up? */ r = (!utf_mode) ? 1 : mbc_buf_index; } return (r); }
/* * Shift the input line left. * This means discarding N printable chars at the start of the buffer. */ static void pshift(int shift) { LWCHAR prev_ch = 0; unsigned char c; int shifted = 0; int to; int from; int len; int width; int prev_attr; int next_attr; if (shift > column - lmargin) shift = column - lmargin; if (shift > curr - lmargin) shift = curr - lmargin; to = from = lmargin; /* * We keep on going when shifted == shift * to get all combining chars. */ while (shifted <= shift && from < curr) { c = linebuf[from]; if (ctldisp == OPT_ONPLUS && IS_CSI_START(c)) { /* Keep cumulative effect. */ linebuf[to] = c; attr[to++] = attr[from++]; while (from < curr && linebuf[from]) { linebuf[to] = linebuf[from]; attr[to++] = attr[from]; if (!is_ansi_middle(linebuf[from++])) break; } continue; } width = 0; if (!IS_ASCII_OCTET(c) && utf_mode) { /* Assumes well-formedness validation already done. */ LWCHAR ch; len = utf_len(c); if (from + len > curr) break; ch = get_wchar(linebuf + from); if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch)) width = is_wide_char(ch) ? 2 : 1; prev_ch = ch; } else { len = 1; if (c == '\b') /* XXX - Incorrect if several '\b' in a row. */ width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1; else if (!control_char(c)) width = 1; prev_ch = 0; } if (width == 2 && shift - shifted == 1) { /* Should never happen when called by pshift_all(). */ attr[to] = attr[from]; /* * Assume a wide_char will never be the first half of a * combining_char pair, so reset prev_ch in case we're * followed by a '\b'. */ prev_ch = linebuf[to++] = ' '; from += len; shifted++; continue; } /* Adjust width for magic cookies. */ prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL; next_attr = (from + len < curr) ? attr[from + len] : prev_attr; if (!is_at_equiv(attr[from], prev_attr) && !is_at_equiv(attr[from], next_attr)) { width += attr_swidth(attr[from]); if (from + len < curr) width += attr_ewidth(attr[from]); if (is_at_equiv(prev_attr, next_attr)) { width += attr_ewidth(prev_attr); if (from + len < curr) width += attr_swidth(next_attr); } } if (shift - shifted < width) break; from += len; shifted += width; if (shifted < 0) shifted = 0; } while (from < curr) { linebuf[to] = linebuf[from]; attr[to++] = attr[from++]; } curr = to; column -= shifted; cshift += shifted; }
static const char * test_string(const char *stringp, const char *regexp) { /* * returns a pointer to the first character following the first * substring of the string addressed by stringp that matches * the compiled regular expression addressed by regexp */ unsigned int group_length; int nextra_matches_allowed; int nmust_match; wchar_t regex_wchar; int regex_char_size; const char *repeat_startp; unsigned int return_argn; wchar_t string_wchar; int string_char_size; unsigned int substringn; char_test_condition_t test_condition; const char *test_stringp; for (;;) { /* * Exit the loop via a return whenever there's a match * or it's clear that there can be no match. */ switch ((int)*regexp) { /* * No fall-through. * Each case ends with either a return or with stringp * addressing the next character to be tested and regexp * addressing the next compiled regular expression * * NOTE: The comments for each case give the meaning * of the compiled regular expression decoded by the case * and the character string that the compiled regular * expression uses to encode the case. Each single * character encoded in the compiled regular expression * is shown enclosed in angle brackets (<>). Each * compiled regular expression begins with a marker * character which is shown as a named constant * (e.g. <ASCII_CHAR>). Character constants are shown * enclosed in single quotes (e.g. <'$'>). All other * single characters encoded in the compiled regular * expression are shown as lower case variable names * (e.g. <ascii_char> or <multibyte_char>). Multicharacter * strings encoded in the compiled regular expression * are shown as variable names followed by elipses * (e.g. <compiled_regex...>). */ case ASCII_CHAR: /* single ASCII char */ /* encoded as <ASCII_CHAR><ascii_char> */ regexp++; if (*regexp == *stringp) { regexp++; stringp++; } else { return ((char *)0); } break; /* end case ASCII_CHAR */ case MULTIBYTE_CHAR: /* single multibyte char */ /* encoded as <MULTIBYTE_CHAR><multibyte_char> */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { return ((char *)0); } else { regexp += regex_char_size; stringp += string_char_size; } break; /* end case MULTIBYTE_CHAR */ case ANY_CHAR: /* any single ASCII or multibyte char */ /* encoded as <ANY_CHAR> */ if (!multibyte) { if (*stringp == '\0') { return ((char *)0); } else { regexp++; stringp++; } } else { string_char_size = get_wchar(&string_wchar, stringp); if (string_char_size <= 0) { return ((char *)0); } else { regexp++; stringp += string_char_size; } } break; /* end case ANY_CHAR */ case IN_ASCII_CHAR_CLASS: /* [.....] */ case NOT_IN_ASCII_CHAR_CLASS: /* * encoded as <IN_ASCII_CHAR_CLASS><class_length><class...> * or <NOT_IN_ASCII_CHAR_CLASS><class_length><class...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)IN_ASCII_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ if ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp++; } else { return ((char *)0); } break; /* end case IN_ASCII_CHAR_CLASS */ case IN_MULTIBYTE_CHAR_CLASS: /* [....] */ case NOT_IN_MULTIBYTE_CHAR_CLASS: /* * encoded as <IN_MULTIBYTE_CHAR_CLASS><class_length><class...> * or <NOT_IN_MULTIBYTE_CHAR_CLASS><class_length><class...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)IN_MULTIBYTE_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp += string_char_size; } else { return ((char *)0); } break; /* end case IN_MULTIBYTE_CHAR_CLASS */ case IN_OLD_ASCII_CHAR_CLASS: /* [...] */ case NOT_IN_OLD_ASCII_CHAR_CLASS: /* * encoded as <IN_OLD_ASCII_CHAR_CLASS><class_length><class...> * or <NOT_IN_OLD_ASCII_CHAR_CLASS><class_length><class...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)IN_OLD_ASCII_CHAR_CLASS) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ if ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { regexp += (int)*regexp; /* add the class length to regexp */ stringp++; } else { return ((char *)0); } break; /* end case [NOT_]IN_OLD_ASCII_CHAR_CLASS */ case SIMPLE_GROUP: /* (.....) */ /* encoded as <SIMPLE_GROUP><group_length> */ regexp += 2; break; /* end case SIMPLE_GROUP */ case END_GROUP: /* (.....) */ /* encoded as <END_GROUP><groupn> */ regexp += 2; break; /* end case END_GROUP */ case SAVED_GROUP: /* (.....)$0-9 */ /* encoded as <SAVED_GROUP><substringn> */ regexp++; substringn = (unsigned int)*regexp; if (substringn >= NSUBSTRINGS) return ((char *)0); substring_startp[substringn] = stringp; regexp++; break; /* end case SAVED_GROUP */ case END_SAVED_GROUP: /* (.....)$0-9 */ /* * encoded as <END_SAVED_GROUP><substringn>\ * <return_arg_number[substringn]> */ regexp++; substringn = (unsigned int)*regexp; if (substringn >= NSUBSTRINGS) return ((char *)0); substring_endp[substringn] = stringp; regexp++; return_argn = (unsigned int)*regexp; if (return_argn >= NSUBSTRINGS) return ((char *)0); return_arg_number[substringn] = return_argn; regexp++; break; /* end case END_SAVED_GROUP */ case ASCII_CHAR|ZERO_OR_MORE: /* char* */ /* encoded as <ASCII_CHAR|ZERO_OR_MORE><ascii_char> */ regexp++; repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case ASCII_CHAR|ZERO_OR_MORE */ case ASCII_CHAR|ONE_OR_MORE: /* char+ */ /* encoded as <ASCII_CHAR|ONE_OR_MORE><ascii_char> */ regexp++; if (*stringp != *regexp) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case ASCII_CHAR|ONE_OR_MORE */ case ASCII_CHAR|COUNT: /* char{min_count,max_count} */ /* * encoded as <ASCII_CHAR|COUNT><ascii_char>\ * <minimum_match_count><maximum_match_count> */ regexp++; get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + 1); while ((*stringp == *regexp) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (*stringp == *regexp) { stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp == *regexp) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case ASCII_CHAR|COUNT */ case MULTIBYTE_CHAR|ZERO_OR_MORE: /* char* */ /* encoded as <MULTIBYTE_CHAR|ZERO_OR_MORE><multibyte_char> */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); /* end case MULTIBYTE_CHAR|ZERO_OR_MORE */ case MULTIBYTE_CHAR|ONE_OR_MORE: /* char+ */ /* encoded as <MULTIBYTE_CHAR|ONE_OR_MORE><multibyte_char> */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (string_wchar != regex_wchar)) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case MULTIBYTE_CHAR|ONE_OR_MORE */ case MULTIBYTE_CHAR|COUNT: /* char{min_count,max_count} */ /* * encoded as <MULTIBYTE_CHAR|COUNT><multibyte_char>\ * <minimum_match_count><maximum_match_count> */ regexp++; regex_char_size = get_wchar(®ex_wchar, regexp); get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + regex_char_size); string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (string_wchar == regex_wchar) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((string_char_size > 0) && (string_wchar == regex_wchar)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (string_wchar == regex_wchar) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += regex_char_size + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case MULTIBYTE_CHAR|COUNT */ case ANY_CHAR|ZERO_OR_MORE: /* .* */ /* encoded as <ANY_CHAR|ZERO_OR_MORE> */ repeat_startp = stringp; if (!multibyte) { while (*stringp != '\0') { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { string_char_size = get_wchar(&string_wchar, stringp); while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp++; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case <ANY_CHAR|ZERO_OR_MORE> */ case ANY_CHAR|ONE_OR_MORE: /* .+ */ /* encoded as <ANY_CHAR|ONE_OR_MORE> */ if (!multibyte) { if (*stringp == '\0') { return ((char *)0); } else { stringp++; repeat_startp = stringp; while (*stringp != '\0') { stringp++; } regexp++; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } } else { string_char_size = get_wchar(&string_wchar, stringp); if (string_char_size <= 0) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp++; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } } /* end case <ANY_CHAR|ONE_OR_MORE> */ case ANY_CHAR|COUNT: /* .{min_count,max_count} */ /* * encoded as <ANY_CHAR|COUNT>\ * <minimum_match_count><maximum_match_count> */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + 1); if (!multibyte) { while ((*stringp != '\0') && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (*stringp != '\0') { stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += 3; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } } else { /* multibyte character */ string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (string_char_size > 0) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += 3; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += 3; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } } /* end case ANY_CHAR|COUNT */ case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as <IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case IN_ASCII_CHAR_CLASS|ZERO_OR_MORE */ case IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE: /* * encoded as <IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_ASCII_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ if ((*stringp == '\0') || (test_char_against_ascii_class(*stringp, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_ASCII_CHAR_CLASS|ONE_OR_MORE */ case IN_ASCII_CHAR_CLASS | COUNT: /* [.....]{max_count,min_count} */ case NOT_IN_ASCII_CHAR_CLASS | COUNT: /* * endoded as <IN_ASCII_CHAR_CLASS|COUNT><class_length>\ * <class ...><minimum_match_count>\ * <maximum_match_count> * or <NOT_IN_ASCII_CHAR_CLASS|COUNT><class_length>\ * <class ...><minimum_match_count>\ * <maximum_match_count> * * NOTE: <class_length> includes the <class_length> byte, * but not the <minimum_match_count> or * <maximum_match_count> bytes */ if ((int)*regexp == (int)(IN_ASCII_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_ASCII_CHAR_CLASS|COUNT */ case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as <IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); /* end case IN_MULTIBYTE_CHAR_CLASS|ZERO_OR_MORE */ case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE: /* * encoded as <IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ string_char_size = get_wchar(&string_wchar, stringp); if ((string_char_size <= 0) || (test_char_against_multibyte_class(string_wchar, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp += string_char_size; repeat_startp = stringp; string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case IN_MULTIBYTE_CHAR_CLASS|ONE_OR_MORE */ case IN_MULTIBYTE_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ case NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT: /* * encoded as <IN_MULTIBYTE_CHAR_CLASS|COUNT>\ * <class_length><class ...><min_count><max_count> * or <NOT_IN_MULTIBYTE_CHAR_CLASS|COUNT>\ * <class_length><class ...><min_count><max_count> * * NOTE: <class_length> includes the <class_length> byte * but not the <minimum_match_count> or * <maximum_match_count> bytes */ if ((int)*regexp == (int)(IN_MULTIBYTE_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); string_char_size = get_wchar(&string_wchar, stringp); while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE)) { stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((string_char_size > 0) && (test_char_against_multibyte_class(string_wchar, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp += string_char_size; string_char_size = get_wchar(&string_wchar, stringp); } regexp += (int)*regexp + 2; return (test_repeated_multibyte_char(repeat_startp, stringp, regexp)); } /* end case IN_MULTIBYTE_CHAR_CLASS|COUNT */ case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* [.....]* */ case NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE: /* * encoded as <IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE>\ * <class_length><class ...> * * NOTE: <class_length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); /* end case IN_OLD_ASCII_CHAR_CLASS|ZERO_OR_MORE */ case IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* [.....]+ */ case NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE: /* * encoded as <IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * or <NOT_IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE>\ * <class_length><class ...> * * NOTE: <class length> includes the <class_length> byte */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|ONE_OR_MORE)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ if ((*stringp == '\0') || (test_char_against_old_ascii_class(*stringp, regexp, test_condition) != CONDITION_TRUE)) { return ((char *)0); } else { stringp++; repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp; /* add the class length to regexp */ return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_OLD_ASCII_CHAR_CLASS | ONE_OR_MORE */ case IN_OLD_ASCII_CHAR_CLASS|COUNT: /* [...]{min_count,max_count} */ case NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT: /* * encoded as <IN_OLD_ASCII_CHAR_CLASS|COUNT><class_length>\ * <class ...><minimum_match_count>\ * <maximum_match_count> * or <NOT_IN_OLD_ASCII_CHAR_CLASS|COUNT>\ * <class_length><class ...><minimum_match_count>\ * <maximum_match_count> * * NOTE: <class_length> includes the <class_length> byte * but not the <minimum_match_count> or * <maximum_match_count> bytes */ if ((int)*regexp == (int)(IN_OLD_ASCII_CHAR_CLASS|COUNT)) { test_condition = IN_CLASS; } else { test_condition = NOT_IN_CLASS; } regexp++; /* point to the <class_length> byte */ get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + (int)*regexp); while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nmust_match > 0)) { nmust_match--; stringp++; } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE)) { stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((*stringp != '\0') && (test_char_against_old_ascii_class(*stringp, regexp, test_condition) == CONDITION_TRUE) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; stringp++; } regexp += (int)*regexp + 2; return (test_repeated_ascii_char(repeat_startp, stringp, regexp)); } /* end case IN_OLD_ASCII_CHAR_CLASS|COUNT */ case ZERO_OR_MORE_GROUP: /* (.....)* */ case ZERO_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: case ZERO_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: case ZERO_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as <ZERO_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ * <group_length><compiled_regex...>\ * <END_GROUP|ZERO_OR_MORE><groupn> * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of(<compiled_regex...><END_GROUP|ZERO_OR_MORE>\ * <groupn>) * */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; repeat_startp = stringp; test_stringp = test_string(stringp, regexp); while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length; return (test_repeated_group(repeat_startp, stringp, regexp)); /* end case ZERO_OR_MORE_GROUP */ case END_GROUP|ZERO_OR_MORE: /* (.....)* */ /* encoded as <END_GROUP|ZERO_OR_MORE> */ /* return from recursive call to test_string() */ return ((char *)stringp); /* end case END_GROUP|ZERO_OR_MORE */ case ONE_OR_MORE_GROUP: /* (.....)+ */ case ONE_OR_MORE_GROUP|ADD_256_TO_GROUP_LENGTH: case ONE_OR_MORE_GROUP|ADD_512_TO_GROUP_LENGTH: case ONE_OR_MORE_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as <ONE_OR_MORE_GROUP|ADDED_LENGTH_BITS>\ * <group_length><compiled_regex...>\ * <END_GROUP|ONE_OR_MORE><groupn> * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of(<compiled_regex...><END_GROUP|ONE_OR_MORE>\ * <groupn>) */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; stringp = test_string(stringp, regexp); if (stringp == (char *)0) return ((char *)0); repeat_startp = stringp; test_stringp = test_string(stringp, regexp); while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length; return (test_repeated_group(repeat_startp, stringp, regexp)); /* end case ONE_OR_MORE_GROUP */ case END_GROUP|ONE_OR_MORE: /* (.....)+ */ /* encoded as <END_GROUP|ONE_OR_MORE><groupn> */ /* return from recursive call to test_string() */ return ((char *)stringp); /* end case END_GROUP|ONE_OR_MORE */ case COUNTED_GROUP: /* (.....){max_count,min_count} */ case COUNTED_GROUP|ADD_256_TO_GROUP_LENGTH: case COUNTED_GROUP|ADD_512_TO_GROUP_LENGTH: case COUNTED_GROUP|ADD_768_TO_GROUP_LENGTH: /* * encoded as <COUNTED_GROUP|ADDED_LENGTH_BITS><group_length>\ * <compiled_regex...>\<END_GROUP|COUNT><groupn>\ * <minimum_match_count><maximum_match_count> * * NOTE: * * group_length + (256 * ADDED_LENGTH_BITS) == * length_of(<compiled_regex...><END_GROUP|COUNT><groupn>) * * but does not include the <minimum_match_count> or * <maximum_match_count> bytes */ group_length = (((unsigned int)*regexp & ADDED_LENGTH_BITS) << TIMES_256_SHIFT); regexp++; group_length += (unsigned int)*regexp; regexp++; get_match_counts(&nmust_match, &nextra_matches_allowed, regexp + group_length); test_stringp = test_string(stringp, regexp); while ((test_stringp != (char *)0) && (nmust_match > 0)) { stringp = test_stringp; nmust_match--; test_stringp = test_string(stringp, regexp); } if (nmust_match > 0) { return ((char *)0); } else if (nextra_matches_allowed == UNLIMITED) { repeat_startp = stringp; while (test_stringp != (char *)0) { if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length + 2; return (test_repeated_group(repeat_startp, stringp, regexp)); } else { repeat_startp = stringp; while ((test_stringp != (char *)0) && (nextra_matches_allowed > 0)) { nextra_matches_allowed--; if (push_stringp(stringp) == (char *)0) return ((char *)0); stringp = test_stringp; test_stringp = test_string(stringp, regexp); } regexp += group_length + 2; return (test_repeated_group(repeat_startp, stringp, regexp)); } /* end case COUNTED_GROUP */ case END_GROUP|COUNT: /* (.....){max_count,min_count} */ /* encoded as <END_GROUP|COUNT> */ /* return from recursive call to test_string() */ return (stringp); /* end case END_GROUP|COUNT */ case END_OF_STRING_MARK: /* encoded as <END_OF_STRING_MARK><END_REGEX> */ if (*stringp == '\0') { regexp++; } else { return ((char *)0); } break; /* end case END_OF_STRING_MARK */ case END_REGEX: /* end of the compiled regular expression */ /* encoded as <END_REGEX> */ return (stringp); /* end case END_REGEX */ default: return ((char *)0); } /* end switch (*regexp) */ } /* end for (;;) */ } /* test_string() */
static char_test_result_t test_char_against_multibyte_class(wchar_t test_char, const char *classp, char_test_condition_t test_condition) { /* * tests a character for membership in a multibyte character class; * * NOTE: The range a-z in a multibyte character class compiles to * aTHRUz. */ int char_size; wchar_t current_char; int nbytes_to_check; wchar_t previous_char; nbytes_to_check = (int)*classp; classp++; nbytes_to_check--; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (test_char == current_char) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else { classp += char_size; nbytes_to_check -= char_size; } while (nbytes_to_check > 0) { previous_char = current_char; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (test_char == current_char) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else if (current_char == THRU) { classp += char_size; nbytes_to_check -= char_size; char_size = get_wchar(¤t_char, classp); if (char_size <= 0) { return (CHAR_TEST_ERROR); } else if (in_wchar_range(test_char, previous_char, current_char)) { if (test_condition == IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } else { classp += char_size; nbytes_to_check -= char_size; } } else { classp += char_size; nbytes_to_check -= char_size; } } if (test_condition == NOT_IN_CLASS) { return (CONDITION_TRUE); } else { return (CONDITION_FALSE); } } /* test_char_against_multibyte_class() */
extern char * regex(const char *regexp, const char *stringp, ...) { va_list arg_listp; int char_size; const char *end_of_matchp; wchar_t regex_wchar; char *return_argp[NSUBSTRINGS]; char *returned_substringp; int substringn; const char *substringp; wchar_t string_wchar; if (____loc1() == (char **)0) { return ((char *)0); } else { lmutex_lock(®ex_lock); __loc1 = (char *)0; } if ((stringp == (char *)0) || (regexp == (char *)0)) { lmutex_unlock(®ex_lock); return ((char *)0); } /* INITIALIZE SUBSTRINGS THAT MIGHT BE RETURNED IN VARARGS */ substringn = 0; va_start(arg_listp, stringp); while (substringn < NSUBSTRINGS) { return_argp[substringn] = va_arg(arg_listp, char *); substring_startp[substringn] = (char *)0; return_arg_number[substringn] = -1; substringn++; } va_end(arg_listp); /* TEST THE STRING AGAINST THE REGULAR EXPRESSION */ end_of_matchp = (char *)0; stringp_stackp = &stringp_stack[STRINGP_STACK_SIZE]; if ((int)*regexp == (int)START_OF_STRING_MARK) { /* * the match must start at the beginning of the string */ __loc1 = (char *)stringp; regexp++; end_of_matchp = test_string(stringp, regexp); } else if ((int)*regexp == (int)ASCII_CHAR) { /* * test a string against a regular expression * that starts with a single ASCII character: * * move to each character in the string that matches * the first character in the regular expression * and test the remaining string */ while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { stringp++; } while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { end_of_matchp = test_string(stringp, regexp); if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } else { stringp++; while ((*stringp != *(regexp + 1)) && (*stringp != '\0')) { stringp++; } } } } else if (!multibyte) { /* * if the value of the "multibyte" macro defined in <euc.h> * is false, regex() is running in an ASCII locale; * test an ASCII string against an ASCII regular expression * that doesn't start with a single ASCII character: * * move forward in the string one byte at a time, testing * the remaining string against the regular expression */ end_of_matchp = test_string(stringp, regexp); while ((end_of_matchp == (char *)0) && (*stringp != '\0')) { stringp++; end_of_matchp = test_string(stringp, regexp); } if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } } else if ((int)*regexp == (int)MULTIBYTE_CHAR) { /* * test a multibyte string against a multibyte regular expression * that starts with a single multibyte character: * * move to each character in the string that matches * the first character in the regular expression * and test the remaining string */ (void) get_wchar(®ex_wchar, regexp + 1); char_size = get_wchar(&string_wchar, stringp); while ((string_wchar != regex_wchar) && (char_size > 0)) { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); } while ((end_of_matchp == (char *)0) && (char_size > 0)) { end_of_matchp = test_string(stringp, regexp); if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } else { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); while ((string_wchar != regex_wchar) && (char_size > 0)) { stringp += char_size; char_size = get_wchar(&string_wchar, stringp); } } } } else { /* * test a multibyte string against a multibyte regular expression * that doesn't start with a single multibyte character * * move forward in the string one multibyte character at a time, * testing the remaining string against the regular expression */ end_of_matchp = test_string(stringp, regexp); char_size = get_wchar(&string_wchar, stringp); while ((end_of_matchp == (char *)0) && (char_size > 0)) { stringp += char_size; end_of_matchp = test_string(stringp, regexp); char_size = get_wchar(&string_wchar, stringp); } if (end_of_matchp != (char *)0) { __loc1 = (char *)stringp; } } /* * Return substrings that matched subexpressions for which * matching substrings are to be returned. * * NOTE: * * According to manual page regcmp(3G), regex() returns substrings * that match subexpressions even when no substring matches the * entire regular expression. */ substringn = 0; while (substringn < NSUBSTRINGS) { substringp = substring_startp[substringn]; if ((substringp != (char *)0) && (return_arg_number[substringn] >= 0)) { returned_substringp = return_argp[return_arg_number[substringn]]; if (returned_substringp != (char *)0) { while (substringp < substring_endp[substringn]) { *returned_substringp = (char)*substringp; returned_substringp++; substringp++; } *returned_substringp = '\0'; } } substringn++; } lmutex_unlock(®ex_lock); return ((char *)end_of_matchp); } /* regex() */