/* Return the internal code representing the syntax SPEC, or -1 if SPEC is invalid. The `m4_syntax_map' table is searched case insensitively, after replacing any spaces or dashes in SPEC with underscore characters. Possible matches for the "GNU_M4" element then, are "gnu m4", "GNU-m4" or "Gnu_M4". */ int m4_regexp_syntax_encode (const char *spec) { const m4_resyntax *resyntax; char *canonical; char *p; /* Unless specified otherwise, return the historical GNU M4 default. */ if (!spec) return RE_SYNTAX_EMACS; canonical = xstrdup (spec); /* Canonicalise SPEC. */ for (p = canonical; *p != '\0'; ++p) { if ((*p == ' ') || (*p == '-')) *p = '_'; else if (islower (to_uchar (*p))) *p = toupper (to_uchar (*p)); } for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax) { if (STREQ (resyntax->spec, canonical)) break; } free (canonical); return resyntax->code; }
char * latin1toutf8 (const char *str) { char *p = malloc (2 * strlen (str) + 1); if (p) { size_t i, j = 0; for (i = 0; str[i]; i++) { if (to_uchar (str[i]) < 0x80) p[j++] = str[i]; else if (to_uchar (str[i]) < 0xC0) { p[j++] = (unsigned char) 0xC2; p[j++] = str[i]; } else { p[j++] = (unsigned char) 0xC3; p[j++] = str[i] - 64; } } p[j] = 0x00; } return p; }
unsigned utf8_to_unicode(char const * begin, char const * end) { unsigned result = 0; if (begin == end) return result; auto it = begin; unsigned c = to_uchar(*it); ++it; if (c < 128) return c; unsigned mask = (1u << 6) -1; unsigned hmask = mask; unsigned shift = 0; unsigned num_bits = 0; while ((c & 0xC0) == 0xC0) { c <<= 1; c &= 0xff; num_bits += 6; hmask >>= 1; shift++; result <<= 6; if (it == end) return 0; result |= *it & mask; ++it; } result |= ((c >> shift) & hmask) << num_bits; return result; }
static bool match_input (const char *s, bool consume) { int n; /* number of characters matched */ int ch; /* input character */ const char *t; bool result = false; ch = peek_input (); if (ch != to_uchar (*s)) return false; /* fail */ if (s[1] == '\0') { if (consume) next_char (); return true; /* short match */ } next_char (); for (n = 1, t = s++; peek_input () == to_uchar (*s++); ) { next_char (); n++; if (*s == '\0') /* long match */ { if (consume) return true; result = true; break; } } /* Failed or shouldn't consume, push back input. */ { struct obstack *h = push_string_init (); /* `obstack_grow' may be macro evaluating its arg 1 several times. */ obstack_grow (h, t, n); } push_string_finish (); return result; }
static size_t unibyte_qmark_chars(char *buf, size_t len) { char *p = buf; char const *plimit = buf + len; while (p < plimit) { if (! ISPRINT (to_uchar (*p))) *p = '?'; p++; } return len; }
char * utf8tolatin1ifpossible (const char *passwd) { char *p; size_t i; for (i = 0; passwd[i]; i++) { if (to_uchar (passwd[i]) > 0x7F) { if (to_uchar (passwd[i]) < 0xC0 || to_uchar (passwd[i]) > 0xC3) return strdup (passwd); i++; if (to_uchar (passwd[i]) < 0x80 || to_uchar (passwd[i]) > 0xBF) return strdup (passwd); } } p = malloc (strlen (passwd) + 1); if (p) { size_t j = 0; for (i = 0; passwd[i]; i++) { if (to_uchar (passwd[i]) > 0x7F) { /* p[i+1] can't be zero here */ p[j++] = ((to_uchar (passwd[i]) & 0x3) << 6) | (to_uchar (passwd[i + 1]) & 0x3F); i++; } else p[j++] = passwd[i]; } p[j] = 0x00; } return p; }
static int peek_input (void) { int ch; input_block *block = isp; while (1) { if (block == NULL) return CHAR_EOF; switch (block->type) { case INPUT_STRING: ch = to_uchar (block->u.u_s.string[0]); if (ch != '\0') return ch; break; case INPUT_FILE: ch = getc (block->u.u_f.fp); if (ch != EOF) { ungetc (ch, block->u.u_f.fp); return ch; } block->u.u_f.end = true; break; case INPUT_MACRO: return CHAR_MACRO; default: M4ERROR ((warning_status, 0, "INTERNAL ERROR: input stack botch in peek_input ()")); abort (); } block = block->prev; } }
void get_field (const struct linebuffer *line, size_t field, const char** /* OUT*/ _ptr, size_t /*OUT*/ *_len) { size_t pos = 0; size_t flen = 0; const size_t buflen = line->length; char* fptr = line->buffer; /* Move 'fptr' to point to the beginning of 'field' */ if (tab != TAB_DEFAULT) { /* delimiter is explicit character */ while ((pos<buflen) && --field) { while ( (pos<buflen) && (*fptr != tab)) { ++fptr; ++pos; } if ( (pos<buflen) && (*fptr == tab)) { ++fptr; ++pos; } } } else { /* delimiter is white-space transition (multiple whitespaces are one delimiter) */ while ((pos<buflen) && --field) { while ( (pos<buflen) && !blanks[to_uchar(*fptr)]) { ++fptr; ++pos; } while ( (pos<buflen) && blanks[to_uchar(*fptr)]) { ++fptr; ++pos; } } } /* Find the length of the field (until the next delimiter/eol) */ if (tab != TAB_DEFAULT) { while ( (pos+flen<buflen) && (*(fptr+flen) != tab) ) flen++; } else { while ( (pos+flen<buflen) && !blanks[to_uchar(*(fptr+flen))] ) flen++; } /* Chomp field if needed */ if ( (flen>1) && ((*(fptr + flen -1) == 0) || (*(fptr+flen-1)==eolchar)) ) flen--; *_len = flen; *_ptr = fptr; }
static void cut_fields(FILE *stream ) { int c ; size_t field_idx ; _Bool found_any_selected_field ; _Bool buffer_first_field ; _Bool tmp ; int tmp___0 ; ssize_t len ; size_t n_bytes ; int tmp___1 ; int tmp___2 ; unsigned char tmp___3 ; _Bool tmp___4 ; _Bool tmp___5 ; { field_idx = 1UL; found_any_selected_field = (_Bool)0; c = getc_unlocked(stream); if (c == -1) { return; } else { } ungetc(c, stream); tmp = print_kth(1UL, (_Bool *)((void *)0)); if (tmp) { tmp___0 = 0; } else { tmp___0 = 1; } buffer_first_field = (_Bool )((int )suppress_non_delimited ^ tmp___0); while (1) { if (field_idx == 1UL) { if (buffer_first_field) { len = getndelim2(& field_1_buffer, & field_1_bufsize, 0UL, 4294967295UL, (int )delim, '\n', stream); if (len < 0L) { free((void *)field_1_buffer); field_1_buffer = (char *)((void *)0); tmp___1 = ferror_unlocked(stream); if (tmp___1) { break; } else { tmp___2 = feof_unlocked(stream); if (tmp___2) { break; } else { } } xalloc_die(); } else { } n_bytes = (unsigned long )len; if (! (n_bytes != 0UL)) { __assert_fail("n_bytes != 0", "cut.c", 626U, "cut_fields"); } else { } tmp___3 = to_uchar(*(field_1_buffer + (n_bytes - 1UL))); if ((int )tmp___3 != (int )delim) { if (! suppress_non_delimited) { fwrite_unlocked((void const */* __restrict */)field_1_buffer, sizeof(char ), n_bytes, (FILE */* __restrict */)stdout); if ((int )*(field_1_buffer + (n_bytes - 1UL)) != 10) { putchar_unlocked('\n'); } else { } } else { } continue; } else { } tmp___4 = print_kth(1UL, (_Bool *)((void *)0)); if (tmp___4) { fwrite_unlocked((void const */* __restrict */)field_1_buffer, sizeof(char ), n_bytes - 1UL, (FILE */* __restrict */)stdout); found_any_selected_field = (_Bool)1; } else { } field_idx ++; } else { } } else { } if (c != -1) { tmp___5 = print_kth(field_idx, (_Bool *)((void *)0)); if (tmp___5) { if (found_any_selected_field) { fwrite_unlocked((void const */* __restrict */)output_delimiter_string, sizeof(char ), output_delimiter_length, (FILE */* __restrict */)stdout); } else { } found_any_selected_field = (_Bool)1; while (1) { c = getc_unlocked(stream); if (c != (int )delim) { if (c != 10) { if (! (c != -1)) { break; } else { } } else { break; } } else { break; } putchar_unlocked(c); } } else { while (1) { c = getc_unlocked(stream); if (c != (int )delim) { if (c != 10) { if (! (c != -1)) { break; } else { } } else { break; } } else { break; } } } } else { } if (c == 10) { c = getc_unlocked(stream); if (c != -1) { ungetc(c, stream); c = '\n'; } else { } } else { } if (c == (int )delim) { field_idx ++; } else { if (c == 10) { goto _L; } else { if (c == -1) { _L: if (found_any_selected_field) { putchar_unlocked('\n'); } else { if (suppress_non_delimited) { if (! (field_idx == 1UL)) { putchar_unlocked('\n'); } else { } } else { putchar_unlocked('\n'); } } if (c == -1) { break; } else { } field_idx = 1UL; found_any_selected_field = (_Bool)0; } else { } } } } return; } }
static _Bool set_fields(char const *fieldstr ) { size_t initial ; size_t value ; _Bool lhs_specified ; _Bool rhs_specified ; _Bool dash_found ; _Bool field_found ; struct range_pair *rp ; size_t n_rp ; size_t n_rp_allocated ; size_t i ; _Bool in_digits ; char *tmp ; char *tmp___0 ; char *tmp___1 ; char *tmp___2 ; void *tmp___3 ; char *tmp___4 ; void *tmp___5 ; char *tmp___6 ; void *tmp___7 ; size_t len ; size_t tmp___8 ; char *bad_num ; char *tmp___9 ; char const *tmp___10 ; char *tmp___11 ; char const *tmp___12 ; char *tmp___13 ; int tmp___14 ; char *tmp___15 ; unsigned short const **tmp___16 ; unsigned char tmp___17 ; void *tmp___18 ; size_t j ; size_t rsi_candidate ; _Bool tmp___19 ; _Bool tmp___20 ; { initial = 1UL; value = 0UL; lhs_specified = (_Bool)0; rhs_specified = (_Bool)0; dash_found = (_Bool)0; field_found = (_Bool)0; rp = (struct range_pair *)((void *)0); n_rp = 0UL; n_rp_allocated = 0UL; in_digits = (_Bool)0; while (1) { __repair_app_270__3a5: /* CIL Label */ { if ((int const )*fieldstr == 45) { in_digits = (_Bool)0; if (dash_found) { while (1) { tmp = gettext("invalid byte or field list"); error(0, 0, (char const *)tmp); usage(1); break; } } else { } dash_found = (_Bool)1; fieldstr ++; if (lhs_specified) { initial = value; } else { initial = 1UL; } value = 0UL; } else { if ((int const )*fieldstr == 44) { goto _L___2; } else { tmp___16 = __ctype_b_loc(); tmp___17 = to_uchar((char )*fieldstr); if ((int const )*(*tmp___16 + (int )tmp___17) & 1) { goto _L___2; } else { if ((int const )*fieldstr == 0) { _L___2: in_digits = (_Bool)0; if (dash_found) { dash_found = (_Bool)0; if (! lhs_specified) { if (! rhs_specified) { while (1) { tmp___0 = gettext("invalid range with no endpoint: -"); error(0, 0, (char const *)tmp___0); usage(1); break; } } else { } } else { } if (! rhs_specified) { eol_range_start = initial; field_found = (_Bool)1; } else { if (value < initial) { while (1) { tmp___1 = gettext("invalid decreasing range"); error(0, 0, (char const *)tmp___1); usage(1); break; } } else { } if (eol_range_start != 0UL) { if (initial < eol_range_start) { if (eol_range_start <= value) { eol_range_start = initial; } else { while (1) { if (initial == 0UL) { goto _L; } else { if (value == 0UL) { _L: while (1) { tmp___2 = gettext("fields and positions are numbered from 1"); error(0, 0, (char const *)tmp___2); usage(1); break; } } else { } } if (n_rp >= n_rp_allocated) { rp = (struct range_pair *)tmp___3; } else { } (rp + n_rp)->lo = initial; (rp + n_rp)->hi = value; n_rp ++; break; } } field_found = (_Bool)1; } else { } } else { while (1) { if (initial == 0UL) { goto _L___0; } else { if (value == 0UL) { _L___0: while (1) { tmp___4 = gettext("fields and positions are numbered from 1"); error(0, 0, (char const *)tmp___4); usage(1); break; } } else { } } if (n_rp >= n_rp_allocated) { rp = (struct range_pair *)tmp___5; } else { } (rp + n_rp)->lo = initial; (rp + n_rp)->hi = value; n_rp ++; break; } field_found = (_Bool)1; } value = 0UL; } } else { while (1) { if (value == 0UL) { goto _L___1; } else { if (value == 0UL) { _L___1: while (1) { tmp___6 = gettext("fields and positions are numbered from 1"); error(0, 0, (char const *)tmp___6); usage(1); break; } } else { } } if (n_rp >= n_rp_allocated) { rp = (struct range_pair *)tmp___7; } else { } (rp + n_rp)->lo = value; (rp + n_rp)->hi = value; n_rp ++; break; } value = 0UL; field_found = (_Bool)1; } if ((int const )*fieldstr == 0) { break; } else { } fieldstr ++; lhs_specified = (_Bool)0; rhs_specified = (_Bool)0; } else { if ((unsigned int )*fieldstr - 48U <= 9U) { if (! in_digits) { num_start = fieldstr; } else { if (! num_start) { num_start = fieldstr; } else { } } in_digits = (_Bool)1; if (dash_found) { rhs_specified = (_Bool)1; } else { lhs_specified = (_Bool)1; } if (0UL < value) { tmp___14 = 0; } else { if (value * 10UL + (unsigned long )((int const )*fieldstr - 48) < value) { tmp___14 = 0; } else { value = value * 10UL + (unsigned long )((int const )*fieldstr - 48); tmp___14 = 1; } } if (! tmp___14) { tmp___8 = strspn(num_start, "0123456789"); len = tmp___8; tmp___9 = xstrndup(num_start, len); bad_num = tmp___9; if ((unsigned int )operating_mode == 1U) { tmp___10 = quote((char const *)bad_num); tmp___11 = gettext("byte offset %s is too large"); error(0, 0, (char const *)tmp___11, tmp___10); } else { tmp___12 = quote((char const *)bad_num); tmp___13 = gettext("field number %s is too large"); error(0, 0, (char const *)tmp___13, tmp___12); } free((void *)bad_num); exit(1); } else { } fieldstr ++; } else { while (1) { tmp___15 = gettext("invalid byte or field list"); error(0, 0, (char const *)tmp___15); usage(1); break; } } } } } } value = 0UL; } } max_range_endpoint = 0UL; i = 0UL; while (i < n_rp) { if ((rp + i)->hi > max_range_endpoint) { max_range_endpoint = (rp + i)->hi; } else { } i ++; } tmp___18 = xzalloc(max_range_endpoint / 8UL + 1UL); printable_field = (unsigned char *)tmp___18; qsort((void *)rp, n_rp, sizeof(*(rp + 0)), & compare_ranges); i = 0UL; while (i < n_rp) { if (complement) { rsi_candidate = (rp + i)->hi + 1UL; } else { rsi_candidate = (rp + i)->lo; } if (output_delimiter_specified) { tmp___19 = is_printable_field(rsi_candidate); if (! tmp___19) { mark_range_start(rsi_candidate); } else { } } else { } j = (rp + i)->lo; while (j <= (rp + i)->hi) { mark_printable_field(j); j ++; } i ++; } if (output_delimiter_specified) { if (! complement) { if (eol_range_start) { tmp___20 = is_printable_field(eol_range_start); if (! tmp___20) { mark_range_start(eol_range_start); } else { } } else { } } else { } } else { } free((void *)rp); return (field_found); } }
size_t Pexecute (char const *buf, size_t size, size_t *match_size, char const *start_ptr) { #if !HAVE_LIBPCRE /* We can't get here, because Pcompile would have been called earlier. */ error (EXIT_TROUBLE, 0, _("internal error")); return -1; #else int sub[NSUB]; char const *p = start_ptr ? start_ptr : buf; bool bol = p[-1] == eolbyte; char const *line_start = buf; int e = PCRE_ERROR_NOMATCH; char const *line_end; /* The search address to pass to pcre_exec. This is the start of the buffer, or just past the most-recently discovered encoding error. */ char const *subject = buf; /* If the input type is unknown, the caller is still testing the input, which means the current buffer cannot contain encoding errors and a multiline search is typically more efficient. Otherwise, a single-line search is typically faster, so that pcre_exec doesn't waste time validating the entire input buffer. */ bool multiline = input_textbin == TEXTBIN_UNKNOWN; for (; p < buf + size; p = line_start = line_end + 1) { bool too_big; if (multiline) { size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1); size_t scan_size = MIN (pcre_size_max + 1, buf + size - p); line_end = memrchr (p, eolbyte, scan_size); too_big = ! line_end; } else { line_end = memchr (p, eolbyte, buf + size - p); too_big = INT_MAX < line_end - p; } if (too_big) error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); for (;;) { /* Skip past bytes that are easily determined to be encoding errors, treating them as data that cannot match. This is faster than having pcre_exec check them. */ while (mbclen_cache[to_uchar (*p)] == (size_t) -1) { p++; bol = false; } int search_offset = p - subject; /* Check for an empty match; this is faster than letting pcre_exec do it. */ if (p == line_end) { sub[0] = sub[1] = search_offset; e = empty_match[bol]; break; } int options = 0; if (!bol) options |= PCRE_NOTBOL; if (multiline) options |= PCRE_NO_UTF8_CHECK; e = jit_exec (subject, line_end - subject, search_offset, options, sub); if (e != PCRE_ERROR_BADUTF8) { if (0 < e && multiline && sub[1] - sub[0] != 0) { char const *nl = memchr (subject + sub[0], eolbyte, sub[1] - sub[0]); if (nl) { /* This match crosses a line boundary; reject it. */ p = subject + sub[0]; line_end = nl; continue; } } break; } int valid_bytes = sub[0]; /* Try to match the string before the encoding error. */ if (valid_bytes < search_offset) e = PCRE_ERROR_NOMATCH; else if (valid_bytes == 0) { /* Handle the empty-match case specially, for speed. This optimization is valid if VALID_BYTES is zero, which means SEARCH_OFFSET is also zero. */ sub[1] = 0; e = empty_match[bol]; } else e = jit_exec (subject, valid_bytes, search_offset, options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); if (e != PCRE_ERROR_NOMATCH) break; /* Treat the encoding error as data that cannot match. */ p = subject += valid_bytes + 1; bol = false; } if (e != PCRE_ERROR_NOMATCH) break; bol = true; } if (e <= 0) { switch (e) { case PCRE_ERROR_NOMATCH: break; case PCRE_ERROR_NOMEMORY: error (EXIT_TROUBLE, 0, _("memory exhausted")); # if PCRE_STUDY_JIT_COMPILE case PCRE_ERROR_JIT_STACKLIMIT: error (EXIT_TROUBLE, 0, _("exhausted PCRE JIT stack")); # endif case PCRE_ERROR_MATCHLIMIT: error (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit")); default: /* For now, we lump all remaining PCRE failures into this basket. If anyone cares to provide sample grep usage that can trigger particular PCRE errors, we can add to the list (above) of more detailed diagnostics. */ error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e); } return -1; } else { char const *matchbeg = subject + sub[0]; char const *matchend = subject + sub[1]; char const *beg; char const *end; if (start_ptr) { beg = matchbeg; end = matchend; } else if (multiline) { char const *prev_nl = memrchr (line_start - 1, eolbyte, matchbeg - (line_start - 1)); char const *next_nl = memchr (matchend, eolbyte, line_end + 1 - matchend); beg = prev_nl + 1; end = next_nl + 1; } else { beg = line_start; end = line_end + 1; } *match_size = end - beg; return beg - buf; } #endif }
static int next_char_1 (void) { int ch; while (1) { if (isp == NULL) { current_file = ""; current_line = 0; return CHAR_EOF; } if (input_change) { current_file = isp->file; current_line = isp->line; input_change = false; } switch (isp->type) { case INPUT_STRING: ch = to_uchar (*isp->u.u_s.string++); if (ch != '\0') return ch; break; case INPUT_FILE: if (start_of_input_line) { start_of_input_line = false; current_line = ++isp->line; } /* If stdin is a terminal, calling getc after peek_input already called it would make the user have to hit ^D twice to quit. */ ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.fp); if (ch != EOF) { if (ch == '\n') start_of_input_line = true; return ch; } break; case INPUT_MACRO: pop_input (); /* INPUT_MACRO input sources has only one token */ return CHAR_MACRO; default: M4ERROR ((warning_status, 0, "INTERNAL ERROR: input stack botch in next_char ()")); abort (); } /* End of input source --- pop one level. */ pop_input (); } }
constexpr byte operator<<(byte b, IntegerType shift) noexcept { return byte{ to_uchar(to_uchar( b ) << shift) }; }
static bool expand_argument (struct obstack *obs, token_data *argp) { token_type t; token_data td; char *text; int paren_level; const char *file = current_file; int line = current_line; TOKEN_DATA_TYPE (argp) = TOKEN_VOID; /* Skip leading white space. */ do { t = next_token (&td, NULL); } while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td)))); paren_level = 0; while (1) { switch (t) { /* TOKSW */ case TOKEN_COMMA: case TOKEN_CLOSE: if (paren_level == 0) { /* The argument MUST be finished, whether we want it or not. */ obstack_1grow (obs, '\0'); text = (char *) obstack_finish (obs); if (TOKEN_DATA_TYPE (argp) == TOKEN_VOID) { TOKEN_DATA_TYPE (argp) = TOKEN_TEXT; TOKEN_DATA_TEXT (argp) = text; } return t == TOKEN_COMMA; } /* fallthru */ case TOKEN_OPEN: case TOKEN_SIMPLE: text = TOKEN_DATA_TEXT (&td); if (*text == '(') paren_level++; else if (*text == ')') paren_level--; expand_token (obs, t, &td, line); break; case TOKEN_EOF: /* current_file changed to "" if we see TOKEN_EOF, use the previous value we stored earlier. */ M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line, "ERROR: end of file in argument list")); break; case TOKEN_WORD: case TOKEN_STRING: expand_token (obs, t, &td, line); break; case TOKEN_MACDEF: if (obstack_object_size (obs) == 0) { TOKEN_DATA_TYPE (argp) = TOKEN_FUNC; TOKEN_DATA_FUNC (argp) = TOKEN_DATA_FUNC (&td); } break; default: M4ERROR ((warning_status, 0, "INTERNAL ERROR: bad token type in expand_argument ()")); abort (); } t = next_token (&td, NULL); } }
void format (struct obstack *obs, int argc, token_data **argv) { char *fmt; /* format control string */ const char *fstart; /* beginning of current format spec */ int c; /* a simple character */ /* Flags. */ char flags; /* 1 iff treating flags */ /* Precision specifiers. */ int width; /* minimum field width */ int prec; /* precision */ char lflag; /* long flag */ char hflag; /* short flag */ /* Buffer and stuff. */ char *str; /* malloc'd buffer of formatted text */ enum {INT, UINT, LONG, ULONG, DOUBLE, STR} datatype; fmt = (char *) ARG_STR (argc, argv); for (;;) { while ((c = *fmt++) != '%') { if (c == 0) return; obstack_1grow (obs, c); } fstart = fmt - 1; if (*fmt == '%') { obstack_1grow (obs, '%'); fmt++; continue; } /* Parse flags. */ flags = 1; do { switch (*fmt) { case '-': /* left justification */ case '+': /* mandatory sign */ case ' ': /* space instead of positive sign */ case '0': /* zero padding */ case '#': /* alternate output */ break; default: flags = 0; break; } } while (flags && fmt++); /* Minimum field width. */ width = -1; if (*fmt == '*') { width = ARG_INT (argc, argv); fmt++; } else if (isdigit (to_uchar (*fmt))) { do { fmt++; } while (isdigit (to_uchar (*fmt))); } /* Maximum precision. */ prec = -1; if (*fmt == '.') { if (*(++fmt) == '*') { prec = ARG_INT (argc, argv); ++fmt; } else if (isdigit (to_uchar (*fmt))) { do { fmt++; } while (isdigit (to_uchar (*fmt))); } } /* Length modifiers. */ lflag = (*fmt == 'l'); hflag = (*fmt == 'h'); if (lflag || hflag) fmt++; switch (*fmt++) { case '\0': return; case 'c': datatype = INT; break; case 's': datatype = STR; break; case 'd': case 'i': if (lflag) { datatype = LONG; } else { datatype = INT; } break; case 'o': case 'x': case 'X': case 'u': if (lflag) { datatype = ULONG; } else { datatype = UINT; } break; case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': datatype = DOUBLE; break; default: continue; } c = *fmt; *fmt = '\0'; switch(datatype) { case INT: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_INT(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_INT(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_INT(argc, argv)); else str = xasprintf (fstart, ARG_INT(argc, argv)); break; case UINT: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_UINT(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_UINT(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_UINT(argc, argv)); else str = xasprintf (fstart, ARG_UINT(argc, argv)); break; case LONG: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_LONG(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_LONG(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_LONG(argc, argv)); else str = xasprintf (fstart, ARG_LONG(argc, argv)); break; case ULONG: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_ULONG(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_ULONG(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_ULONG(argc, argv)); else str = xasprintf (fstart, ARG_ULONG(argc, argv)); break; case DOUBLE: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_DOUBLE(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_DOUBLE(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_DOUBLE(argc, argv)); else str = xasprintf (fstart, ARG_DOUBLE(argc, argv)); break; case STR: if (width != -1 && prec != -1) str = xasprintf (fstart, width, prec, ARG_STR(argc, argv)); else if (width != -1) str = xasprintf (fstart, width, ARG_STR(argc, argv)); else if (prec != -1) str = xasprintf (fstart, prec, ARG_STR(argc, argv)); else str = xasprintf (fstart, ARG_STR(argc, argv)); break; default: abort(); } *fmt = c; /* NULL was returned on failure, such as invalid format string. For now, just silently ignore that bad specifier. */ if (str == NULL) continue; obstack_grow (obs, str, strlen (str)); free (str); } }
token_type next_token (token_data *td, int *line) { int ch; int quote_level; token_type type; #ifdef ENABLE_CHANGEWORD int startpos; char *orig_text = NULL; #endif const char *file; int dummy; obstack_free (&token_stack, token_bottom); if (!line) line = &dummy; /* Can't consume character until after CHAR_MACRO is handled. */ ch = peek_input (); if (ch == CHAR_EOF) { #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> EOF\n"); #endif next_char (); return TOKEN_EOF; } if (ch == CHAR_MACRO) { init_macro_token (td); next_char (); #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> MACDEF (%s)\n", find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name); #endif return TOKEN_MACDEF; } next_char (); /* Consume character we already peeked at. */ file = current_file; *line = current_line; if (MATCH (ch, bcomm.string, true)) { obstack_grow (&token_stack, bcomm.string, bcomm.length); while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string, true)) obstack_1grow (&token_stack, ch); if (ch != CHAR_EOF) obstack_grow (&token_stack, ecomm.string, ecomm.length); else /* current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line, "ERROR: end of file in comment")); type = TOKEN_STRING; } else if (default_word_regexp && (isalpha (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_')) { obstack_1grow (&token_stack, ch); next_char (); } type = TOKEN_WORD; } #ifdef ENABLE_CHANGEWORD else if (!default_word_regexp && word_regexp.fastmap[ch]) { obstack_1grow (&token_stack, ch); while (1) { ch = peek_input (); if (ch == CHAR_EOF) break; obstack_1grow (&token_stack, ch); startpos = re_search (&word_regexp, (char *) obstack_base (&token_stack), obstack_object_size (&token_stack), 0, 0, ®s); if (startpos || regs.end [0] != (regoff_t) obstack_object_size (&token_stack)) { *(((char *) obstack_base (&token_stack) + obstack_object_size (&token_stack)) - 1) = '\0'; break; } next_char (); } obstack_1grow (&token_stack, '\0'); orig_text = (char *) obstack_finish (&token_stack); if (regs.start[1] != -1) obstack_grow (&token_stack,orig_text + regs.start[1], regs.end[1] - regs.start[1]); else obstack_grow (&token_stack, orig_text,regs.end[0]); type = TOKEN_WORD; } #endif /* ENABLE_CHANGEWORD */ else if (!MATCH (ch, lquote.string, true)) { switch (ch) { case '(': type = TOKEN_OPEN; break; case ',': type = TOKEN_COMMA; break; case ')': type = TOKEN_CLOSE; break; default: type = TOKEN_SIMPLE; break; } obstack_1grow (&token_stack, ch); } else { bool fast = lquote.length == 1 && rquote.length == 1; quote_level = 1; while (1) { /* Try scanning a buffer first. */ const char *buffer = (isp && isp->type == INPUT_STRING ? isp->u.u_s.string : NULL); if (buffer && *buffer) { size_t len = isp->u.u_s.end - buffer; const char *p = buffer; do { p = (char *) memchr2 (p, *lquote.string, *rquote.string, buffer + len - p); } while (p && fast && (*p++ == *rquote.string ? --quote_level : ++quote_level)); if (p) { if (fast) { assert (!quote_level); obstack_grow (&token_stack, buffer, p - buffer - 1); isp->u.u_s.string += p - buffer; break; } obstack_grow (&token_stack, buffer, p - buffer); ch = to_uchar (*p); isp->u.u_s.string += p - buffer + 1; } else { obstack_grow (&token_stack, buffer, len); isp->u.u_s.string += len; continue; } } /* Fall back to a byte. */ else ch = next_char (); if (ch == CHAR_EOF) /* current_file changed to "" if we see CHAR_EOF, use the previous value we stored earlier. */ M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line, "ERROR: end of file in string")); if (MATCH (ch, rquote.string, true)) { if (--quote_level == 0) break; obstack_grow (&token_stack, rquote.string, rquote.length); } else if (MATCH (ch, lquote.string, true)) { quote_level++; obstack_grow (&token_stack, lquote.string, lquote.length); } else obstack_1grow (&token_stack, ch); } type = TOKEN_STRING; } obstack_1grow (&token_stack, '\0'); TOKEN_DATA_TYPE (td) = TOKEN_TEXT; TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack); #ifdef ENABLE_CHANGEWORD if (orig_text == NULL) orig_text = TOKEN_DATA_TEXT (td); TOKEN_DATA_ORIG_TEXT (td) = orig_text; #endif #ifdef DEBUG_INPUT xfprintf (stderr, "next_token -> %s (%s)\n", token_type_string (type), TOKEN_DATA_TEXT (td)); #endif return type; }
static const char b64c[64] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /* Base64 encode IN array of size INLEN into OUT array. OUT needs to be of length >= BASE64_LENGTH(INLEN), and INLEN needs to be a multiple of 3. */ static void base64_encode_fast (const char *restrict in, size_t inlen, char *restrict out) { while (inlen) { *out++ = b64c[to_uchar (in[0]) >> 2]; *out++ = b64c[((to_uchar (in[0]) << 4) + (to_uchar (in[1]) >> 4)) & 0x3f]; *out++ = b64c[((to_uchar (in[1]) << 2) + (to_uchar (in[2]) >> 6)) & 0x3f]; *out++ = b64c[to_uchar (in[2]) & 0x3f]; inlen -= 3; in += 3; } } /* Base64 encode IN array of size INLEN into OUT array of size OUTLEN. If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as possible. If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero terminate the output buffer. */ void base64_encode (const char *restrict in, size_t inlen, char *restrict out, size_t outlen) { /* Note this outlen constraint can be enforced at compile time.
/* Given the list of field or byte range specifications FIELDSTR, allocate and initialize the FRP array. FIELDSTR should be composed of one or more numbers or ranges of numbers, separated by blanks or commas. Incomplete ranges may be given: '-m' means '1-m'; 'n-' means 'n' through end of line. n=0 and n>=SIZE_MAX values will trigger an error. if SETFLD_ALLOW_DASH option is used, a single '-' means all fields (otherwise a single dash triggers an error). if SETFLD_COMPLEMENT option is used, the specified field list is complemented (e.g. '1-3' will result in fields '4-'). if SETFLD_ERRMSG_USE_POS option is used, error messages will say 'position' (or 'byte/character positions') instead of fields (used with cut -b/-c). The function terminates on failure. Upon return, the FRP array is initialized to contain a non-overlapping, increasing list of field ranges. N_FRP holds the number of field ranges in the FRP array. The first field is stored as 1 (zero is not used). An open-ended range (i.e., until the last field of the input line) is indicated with hi = SIZE_MAX. A sentinel of SIZE_MAX/SIZE_MAX is always added as the last field range pair. Examples: given '1-2,4', frp = [ { .lo = 1, .hi = 2 }, { .lo = 4, .hi = 4 }, { .lo = SIZE_MAX, .hi = SIZE_MAX } ]; given '3-', frp = [ { .lo = 3, .hi = SIZE_MAX }, { .lo = SIZE_MAX, .hi = SIZE_MAX } ]; */ void set_fields (const char *fieldstr, unsigned int options) { size_t initial = 1; /* Value of first number in a range. */ size_t value = 0; /* If nonzero, a number being accumulated. */ bool lhs_specified = false; bool rhs_specified = false; bool dash_found = false; /* True if a '-' is found in this field. */ size_t i; bool in_digits = false; /* Collect and store in RP the range end points. */ /* Special case: '--field=-' means all fields, emulate '--field=1-' . */ if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-")) { value = 1; lhs_specified = true; dash_found = true; fieldstr++; } while (true) { if (*fieldstr == '-') { in_digits = false; /* Starting a range. */ if (dash_found) FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS) ?_("invalid byte or character range") :_("invalid field range")); dash_found = true; fieldstr++; if (lhs_specified && !value) FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS) ?_("byte/character positions are numbered from 1") :_("fields are numbered from 1")); initial = (lhs_specified ? value : 1); value = 0; } else if (*fieldstr == ',' || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0') { in_digits = false; /* Ending the string, or this field/byte sublist. */ if (dash_found) { dash_found = false; if (!lhs_specified && !rhs_specified) { /* if a lone dash is allowed, emulate '1-' for all fields */ if (options & SETFLD_ALLOW_DASH) initial = 1; else FATAL_ERROR (_("invalid range with no endpoint: -")); } /* A range. Possibilities: -n, m-n, n-. In any case, 'initial' contains the start of the range. */ if (!rhs_specified) { /* 'n-'. From 'initial' to end of line. */ add_range_pair (initial, SIZE_MAX); } else { /* 'm-n' or '-n' (1-n). */ if (value < initial) FATAL_ERROR (_("invalid decreasing range")); add_range_pair (initial, value); } value = 0; } else { /* A simple field number, not a range. */ if (value == 0) FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS) ?_("byte/character positions are numbered from 1") :_("fields are numbered from 1")); add_range_pair (value, value); value = 0; } if (*fieldstr == '\0') break; fieldstr++; lhs_specified = false; rhs_specified = false; } else if (ISDIGIT (*fieldstr)) { /* Record beginning of digit string, in case we have to complain about it. */ static char const *num_start; if (!in_digits || !num_start) num_start = fieldstr; in_digits = true; if (dash_found) rhs_specified = 1; else lhs_specified = 1; /* Detect overflow. */ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t) || value == SIZE_MAX) { /* In case the user specified -c$(echo 2^64|bc),22, complain only about the first number. */ /* Determine the length of the offending number. */ size_t len = strspn (num_start, "0123456789"); char *bad_num = xstrndup (num_start, len); error (0, 0, (options & SETFLD_ERRMSG_USE_POS) ?_("byte/character offset %s is too large") :_("field number %s is too large"), quote (bad_num)); free (bad_num); usage (EXIT_FAILURE); } fieldstr++; } else { error (0, 0, (options & SETFLD_ERRMSG_USE_POS) ?_("invalid byte/character position %s") :_("invalid field value %s"), quote (fieldstr)); usage (EXIT_FAILURE); } } if (!n_frp) FATAL_ERROR ( (options&SETFLD_ERRMSG_USE_POS) ?_("missing list of byte/character positions") :_("missing list of fields")); qsort (frp, n_frp, sizeof (frp[0]), compare_ranges); /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */ for (i = 0; i < n_frp; ++i) { for (size_t j = i + 1; j < n_frp; ++j) { if (frp[j].lo <= frp[i].hi) { frp[i].hi = MAX (frp[j].hi, frp[i].hi); memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp); n_frp--; j--; } else break; } } if (options & SETFLD_COMPLEMENT) complement_rp (); /* After merging, reallocate RP so we release memory to the system. Also add a sentinel at the end of RP, to avoid out of bounds access and for performance reasons. */ ++n_frp; frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair)); frp[n_frp - 1].lo = frp[n_frp - 1].hi = SIZE_MAX; }