Пример #1
0
/* Return the internal code representing the syntax SPEC, or -1 if
   SPEC is invalid.  The `m4_syntax_map' table is searched case
   insensitively, after replacing any spaces or dashes in SPEC with
   underscore characters.  Possible matches for the "GNU_M4" element
   then, are "gnu m4", "GNU-m4" or "Gnu_M4".  */
int
m4_regexp_syntax_encode (const char *spec)
{
  const m4_resyntax *resyntax;
  char *canonical;
  char *p;

  /* Unless specified otherwise, return the historical GNU M4 default.  */
  if (!spec)
    return RE_SYNTAX_EMACS;

  canonical = xstrdup (spec);

  /* Canonicalise SPEC.  */
  for (p = canonical; *p != '\0'; ++p)
    {
      if ((*p == ' ') || (*p == '-'))
        *p = '_';
      else if (islower (to_uchar (*p)))
        *p = toupper (to_uchar (*p));
    }

  for (resyntax = m4_resyntax_map; resyntax->spec != NULL; ++resyntax)
    {
      if (STREQ (resyntax->spec, canonical))
        break;
    }

  free (canonical);

  return resyntax->code;
}
Пример #2
0
char *
latin1toutf8 (const char *str)
{
  char *p = malloc (2 * strlen (str) + 1);
  if (p)
    {
      size_t i, j = 0;
      for (i = 0; str[i]; i++)
	{
	  if (to_uchar (str[i]) < 0x80)
	    p[j++] = str[i];
	  else if (to_uchar (str[i]) < 0xC0)
	    {
	      p[j++] = (unsigned char) 0xC2;
	      p[j++] = str[i];
	    }
	  else
	    {
	      p[j++] = (unsigned char) 0xC3;
	      p[j++] = str[i] - 64;
	    }
	}
      p[j] = 0x00;
    }

  return p;
}
Пример #3
0
unsigned utf8_to_unicode(char const * begin, char const * end) {
    unsigned result = 0;
    if (begin == end)
        return result;
    auto it = begin;
    unsigned c = to_uchar(*it);
    ++it;
    if (c < 128)
        return c;
    unsigned mask     = (1u << 6) -1;
    unsigned hmask    = mask;
    unsigned shift    = 0;
    unsigned num_bits = 0;
    while ((c & 0xC0) == 0xC0) {
        c <<= 1;
        c &= 0xff;
        num_bits += 6;
        hmask >>= 1;
        shift++;
        result <<= 6;
        if (it == end)
            return 0;
        result |= *it & mask;
        ++it;
    }
    result |= ((c >> shift) & hmask) << num_bits;
    return result;
}
Пример #4
0
static bool
match_input (const char *s, bool consume)
{
  int n;                        /* number of characters matched */
  int ch;                       /* input character */
  const char *t;
  bool result = false;

  ch = peek_input ();
  if (ch != to_uchar (*s))
    return false;                       /* fail */

  if (s[1] == '\0')
    {
      if (consume)
        next_char ();
      return true;                      /* short match */
    }

  next_char ();
  for (n = 1, t = s++; peek_input () == to_uchar (*s++); )
    {
      next_char ();
      n++;
      if (*s == '\0')           /* long match */
        {
          if (consume)
            return true;
          result = true;
          break;
        }
    }

  /* Failed or shouldn't consume, push back input.  */
  {
    struct obstack *h = push_string_init ();

    /* `obstack_grow' may be macro evaluating its arg 1 several times. */
    obstack_grow (h, t, n);
  }
  push_string_finish ();
  return result;
}
Пример #5
0
static size_t
unibyte_qmark_chars(char *buf, size_t len)
{
  char *p = buf;
  char const *plimit = buf + len;
  
  while (p < plimit)
    {
      if (! ISPRINT (to_uchar (*p)))
	*p = '?';
      p++;
    }
  return len;
}
Пример #6
0
char *
utf8tolatin1ifpossible (const char *passwd)
{
  char *p;
  size_t i;

  for (i = 0; passwd[i]; i++)
    {
      if (to_uchar (passwd[i]) > 0x7F)
	{
	  if (to_uchar (passwd[i]) < 0xC0 || to_uchar (passwd[i]) > 0xC3)
	    return strdup (passwd);
	  i++;
	  if (to_uchar (passwd[i]) < 0x80 || to_uchar (passwd[i]) > 0xBF)
	    return strdup (passwd);
	}
    }

  p = malloc (strlen (passwd) + 1);
  if (p)
    {
      size_t j = 0;
      for (i = 0; passwd[i]; i++)
	{
	  if (to_uchar (passwd[i]) > 0x7F)
	    {
	      /* p[i+1] can't be zero here */
	      p[j++] =
		((to_uchar (passwd[i]) & 0x3) << 6)
		| (to_uchar (passwd[i + 1]) & 0x3F);
	      i++;
	    }
	  else
	    p[j++] = passwd[i];
	}
      p[j] = 0x00;
    }
  return p;
}
Пример #7
0
static int
peek_input (void)
{
  int ch;
  input_block *block = isp;

  while (1)
    {
      if (block == NULL)
        return CHAR_EOF;

      switch (block->type)
        {
        case INPUT_STRING:
          ch = to_uchar (block->u.u_s.string[0]);
          if (ch != '\0')
            return ch;
          break;

        case INPUT_FILE:
          ch = getc (block->u.u_f.fp);
          if (ch != EOF)
            {
              ungetc (ch, block->u.u_f.fp);
              return ch;
            }
          block->u.u_f.end = true;
          break;

        case INPUT_MACRO:
          return CHAR_MACRO;

        default:
          M4ERROR ((warning_status, 0,
                    "INTERNAL ERROR: input stack botch in peek_input ()"));
          abort ();
        }
      block = block->prev;
    }
}
Пример #8
0
void
get_field (const struct linebuffer *line, size_t field,
               const char** /* OUT*/ _ptr, size_t /*OUT*/ *_len)
{
  size_t pos = 0;
  size_t flen = 0;
  const size_t buflen = line->length;
  char* fptr = line->buffer;
  /* Move 'fptr' to point to the beginning of 'field' */
  if (tab != TAB_DEFAULT)
    {
      /* delimiter is explicit character */
      while ((pos<buflen) && --field)
        {
          while ( (pos<buflen) && (*fptr != tab))
            {
              ++fptr;
              ++pos;
            }
          if ( (pos<buflen) && (*fptr == tab))
            {
              ++fptr;
              ++pos;
            }
        }
    }
  else
    {
      /* delimiter is white-space transition
         (multiple whitespaces are one delimiter) */
      while ((pos<buflen) && --field)
        {
          while ( (pos<buflen) && !blanks[to_uchar(*fptr)])
            {
              ++fptr;
              ++pos;
            }
          while ( (pos<buflen) && blanks[to_uchar(*fptr)])
            {
              ++fptr;
              ++pos;
            }
        }
    }

  /* Find the length of the field (until the next delimiter/eol) */
  if (tab != TAB_DEFAULT)
    {
      while ( (pos+flen<buflen) && (*(fptr+flen) != tab) )
        flen++;
    }
  else
    {
      while ( (pos+flen<buflen) && !blanks[to_uchar(*(fptr+flen))] )
        flen++;
    }

  /* Chomp field if needed */
  if ( (flen>1) && ((*(fptr + flen -1) == 0) || (*(fptr+flen-1)==eolchar)) )
    flen--;

  *_len = flen;
  *_ptr = fptr;
}
Пример #9
0
static void cut_fields(FILE *stream ) 
{ int c ;
  size_t field_idx ;
  _Bool found_any_selected_field ;
  _Bool buffer_first_field ;
  _Bool tmp ;
  int tmp___0 ;
  ssize_t len ;
  size_t n_bytes ;
  int tmp___1 ;
  int tmp___2 ;
  unsigned char tmp___3 ;
  _Bool tmp___4 ;
  _Bool tmp___5 ;

  {
  field_idx = 1UL;
  found_any_selected_field = (_Bool)0;
  c = getc_unlocked(stream);
  if (c == -1) {
    return;
  } else {

  }
  ungetc(c, stream);
  tmp = print_kth(1UL, (_Bool *)((void *)0));
  if (tmp) {
    tmp___0 = 0;
  } else {
    tmp___0 = 1;
  }
  buffer_first_field = (_Bool )((int )suppress_non_delimited ^ tmp___0);
  while (1) {
    if (field_idx == 1UL) {
      if (buffer_first_field) {
        len = getndelim2(& field_1_buffer, & field_1_bufsize, 0UL, 4294967295UL, (int )delim, '\n', stream);
        if (len < 0L) {
          free((void *)field_1_buffer);
          field_1_buffer = (char *)((void *)0);
          tmp___1 = ferror_unlocked(stream);
          if (tmp___1) {
            break;
          } else {
            tmp___2 = feof_unlocked(stream);
            if (tmp___2) {
              break;
            } else {

            }
          }
          xalloc_die();
        } else {

        }
        n_bytes = (unsigned long )len;
        if (! (n_bytes != 0UL)) {
          __assert_fail("n_bytes != 0", "cut.c", 626U, "cut_fields");
        } else {

        }
        tmp___3 = to_uchar(*(field_1_buffer + (n_bytes - 1UL)));
        if ((int )tmp___3 != (int )delim) {
          if (! suppress_non_delimited) {
            fwrite_unlocked((void const   */* __restrict  */)field_1_buffer, sizeof(char ), n_bytes, (FILE */* __restrict  */)stdout);
            if ((int )*(field_1_buffer + (n_bytes - 1UL)) != 10) {
              putchar_unlocked('\n');
            } else {

            }
          } else {

          }
          continue;
        } else {

        }
        tmp___4 = print_kth(1UL, (_Bool *)((void *)0));
        if (tmp___4) {
          fwrite_unlocked((void const   */* __restrict  */)field_1_buffer, sizeof(char ), n_bytes - 1UL, (FILE */* __restrict  */)stdout);
          found_any_selected_field = (_Bool)1;
        } else {

        }
        field_idx ++;
      } else {

      }
    } else {

    }
    if (c != -1) {
      tmp___5 = print_kth(field_idx, (_Bool *)((void *)0));
      if (tmp___5) {
        if (found_any_selected_field) {
          fwrite_unlocked((void const   */* __restrict  */)output_delimiter_string, sizeof(char ), output_delimiter_length, (FILE */* __restrict  */)stdout);
        } else {

        }
        found_any_selected_field = (_Bool)1;
        while (1) {
          c = getc_unlocked(stream);
          if (c != (int )delim) {
            if (c != 10) {
              if (! (c != -1)) {
                break;
              } else {

              }
            } else {
              break;
            }
          } else {
            break;
          }
          putchar_unlocked(c);
        }
      } else {
        while (1) {
          c = getc_unlocked(stream);
          if (c != (int )delim) {
            if (c != 10) {
              if (! (c != -1)) {
                break;
              } else {

              }
            } else {
              break;
            }
          } else {
            break;
          }
        }
      }
    } else {

    }
    if (c == 10) {
      c = getc_unlocked(stream);
      if (c != -1) {
        ungetc(c, stream);
        c = '\n';
      } else {

      }
    } else {

    }
    if (c == (int )delim) {
      field_idx ++;
    } else {
      if (c == 10) {
        goto _L;
      } else {
        if (c == -1) {
          _L: 
          if (found_any_selected_field) {
            putchar_unlocked('\n');
          } else {
            if (suppress_non_delimited) {
              if (! (field_idx == 1UL)) {
                putchar_unlocked('\n');
              } else {

              }
            } else {
              putchar_unlocked('\n');
            }
          }
          if (c == -1) {
            break;
          } else {

          }
          field_idx = 1UL;
          found_any_selected_field = (_Bool)0;
        } else {

        }
      }
    }
  }
  return;
}
}
Пример #10
0
static _Bool set_fields(char const   *fieldstr ) 
{ size_t initial ;
  size_t value ;
  _Bool lhs_specified ;
  _Bool rhs_specified ;
  _Bool dash_found ;
  _Bool field_found ;
  struct range_pair *rp ;
  size_t n_rp ;
  size_t n_rp_allocated ;
  size_t i ;
  _Bool in_digits ;
  char *tmp ;
  char *tmp___0 ;
  char *tmp___1 ;
  char *tmp___2 ;
  void *tmp___3 ;
  char *tmp___4 ;
  void *tmp___5 ;
  char *tmp___6 ;
  void *tmp___7 ;
  size_t len ;
  size_t tmp___8 ;
  char *bad_num ;
  char *tmp___9 ;
  char const   *tmp___10 ;
  char *tmp___11 ;
  char const   *tmp___12 ;
  char *tmp___13 ;
  int tmp___14 ;
  char *tmp___15 ;
  unsigned short const   **tmp___16 ;
  unsigned char tmp___17 ;
  void *tmp___18 ;
  size_t j ;
  size_t rsi_candidate ;
  _Bool tmp___19 ;
  _Bool tmp___20 ;

  {
  initial = 1UL;
  value = 0UL;
  lhs_specified = (_Bool)0;
  rhs_specified = (_Bool)0;
  dash_found = (_Bool)0;
  field_found = (_Bool)0;
  rp = (struct range_pair *)((void *)0);
  n_rp = 0UL;
  n_rp_allocated = 0UL;
  in_digits = (_Bool)0;
  while (1) {
    __repair_app_270__3a5: /* CIL Label */ 
    {
    if ((int const   )*fieldstr == 45) {
      in_digits = (_Bool)0;
      if (dash_found) {
        while (1) {
          tmp = gettext("invalid byte or field list");
          error(0, 0, (char const   *)tmp);
          usage(1);
          break;
        }
      } else {

      }
      dash_found = (_Bool)1;
      fieldstr ++;
      if (lhs_specified) {
        initial = value;
      } else {
        initial = 1UL;
      }
      value = 0UL;
    } else {
      if ((int const   )*fieldstr == 44) {
        goto _L___2;
      } else {
        tmp___16 = __ctype_b_loc();
        tmp___17 = to_uchar((char )*fieldstr);
        if ((int const   )*(*tmp___16 + (int )tmp___17) & 1) {
          goto _L___2;
        } else {
          if ((int const   )*fieldstr == 0) {
            _L___2: 
            in_digits = (_Bool)0;
            if (dash_found) {
              dash_found = (_Bool)0;
              if (! lhs_specified) {
                if (! rhs_specified) {
                  while (1) {
                    tmp___0 = gettext("invalid range with no endpoint: -");
                    error(0, 0, (char const   *)tmp___0);
                    usage(1);
                    break;
                  }
                } else {

                }
              } else {

              }
              if (! rhs_specified) {
                eol_range_start = initial;
                field_found = (_Bool)1;
              } else {
                if (value < initial) {
                  while (1) {
                    tmp___1 = gettext("invalid decreasing range");
                    error(0, 0, (char const   *)tmp___1);
                    usage(1);
                    break;
                  }
                } else {

                }
                if (eol_range_start != 0UL) {
                  if (initial < eol_range_start) {
                    if (eol_range_start <= value) {
                      eol_range_start = initial;
                    } else {
                      while (1) {
                        if (initial == 0UL) {
                          goto _L;
                        } else {
                          if (value == 0UL) {
                            _L: 
                            while (1) {
                              tmp___2 = gettext("fields and positions are numbered from 1");
                              error(0, 0, (char const   *)tmp___2);
                              usage(1);
                              break;
                            }
                          } else {

                          }
                        }
                        if (n_rp >= n_rp_allocated) {
                          rp = (struct range_pair *)tmp___3;
                        } else {

                        }
                        (rp + n_rp)->lo = initial;
                        (rp + n_rp)->hi = value;
                        n_rp ++;
                        break;
                      }
                    }
                    field_found = (_Bool)1;
                  } else {

                  }
                } else {
                  while (1) {
                    if (initial == 0UL) {
                      goto _L___0;
                    } else {
                      if (value == 0UL) {
                        _L___0: 
                        while (1) {
                          tmp___4 = gettext("fields and positions are numbered from 1");
                          error(0, 0, (char const   *)tmp___4);
                          usage(1);
                          break;
                        }
                      } else {

                      }
                    }
                    if (n_rp >= n_rp_allocated) {
                      rp = (struct range_pair *)tmp___5;
                    } else {

                    }
                    (rp + n_rp)->lo = initial;
                    (rp + n_rp)->hi = value;
                    n_rp ++;
                    break;
                  }
                  field_found = (_Bool)1;
                }
                value = 0UL;
              }
            } else {
              while (1) {
                if (value == 0UL) {
                  goto _L___1;
                } else {
                  if (value == 0UL) {
                    _L___1: 
                    while (1) {
                      tmp___6 = gettext("fields and positions are numbered from 1");
                      error(0, 0, (char const   *)tmp___6);
                      usage(1);
                      break;
                    }
                  } else {

                  }
                }
                if (n_rp >= n_rp_allocated) {
                  rp = (struct range_pair *)tmp___7;
                } else {

                }
                (rp + n_rp)->lo = value;
                (rp + n_rp)->hi = value;
                n_rp ++;
                break;
              }
              value = 0UL;
              field_found = (_Bool)1;
            }
            if ((int const   )*fieldstr == 0) {
              break;
            } else {

            }
            fieldstr ++;
            lhs_specified = (_Bool)0;
            rhs_specified = (_Bool)0;
          } else {
            if ((unsigned int )*fieldstr - 48U <= 9U) {
              if (! in_digits) {
                num_start = fieldstr;
              } else {
                if (! num_start) {
                  num_start = fieldstr;
                } else {

                }
              }
              in_digits = (_Bool)1;
              if (dash_found) {
                rhs_specified = (_Bool)1;
              } else {
                lhs_specified = (_Bool)1;
              }
              if (0UL < value) {
                tmp___14 = 0;
              } else {
                if (value * 10UL + (unsigned long )((int const   )*fieldstr - 48) < value) {
                  tmp___14 = 0;
                } else {
                  value = value * 10UL + (unsigned long )((int const   )*fieldstr - 48);
                  tmp___14 = 1;
                }
              }
              if (! tmp___14) {
                tmp___8 = strspn(num_start, "0123456789");
                len = tmp___8;
                tmp___9 = xstrndup(num_start, len);
                bad_num = tmp___9;
                if ((unsigned int )operating_mode == 1U) {
                  tmp___10 = quote((char const   *)bad_num);
                  tmp___11 = gettext("byte offset %s is too large");
                  error(0, 0, (char const   *)tmp___11, tmp___10);
                } else {
                  tmp___12 = quote((char const   *)bad_num);
                  tmp___13 = gettext("field number %s is too large");
                  error(0, 0, (char const   *)tmp___13, tmp___12);
                }
                free((void *)bad_num);
                exit(1);
              } else {

              }
              fieldstr ++;
            } else {
              while (1) {
                tmp___15 = gettext("invalid byte or field list");
                error(0, 0, (char const   *)tmp___15);
                usage(1);
                break;
              }
            }
          }
        }
      }
    }
    value = 0UL;
    }
  }
  max_range_endpoint = 0UL;
  i = 0UL;
  while (i < n_rp) {
    if ((rp + i)->hi > max_range_endpoint) {
      max_range_endpoint = (rp + i)->hi;
    } else {

    }
    i ++;
  }
  tmp___18 = xzalloc(max_range_endpoint / 8UL + 1UL);
  printable_field = (unsigned char *)tmp___18;
  qsort((void *)rp, n_rp, sizeof(*(rp + 0)), & compare_ranges);
  i = 0UL;
  while (i < n_rp) {
    if (complement) {
      rsi_candidate = (rp + i)->hi + 1UL;
    } else {
      rsi_candidate = (rp + i)->lo;
    }
    if (output_delimiter_specified) {
      tmp___19 = is_printable_field(rsi_candidate);
      if (! tmp___19) {
        mark_range_start(rsi_candidate);
      } else {

      }
    } else {

    }
    j = (rp + i)->lo;
    while (j <= (rp + i)->hi) {
      mark_printable_field(j);
      j ++;
    }
    i ++;
  }
  if (output_delimiter_specified) {
    if (! complement) {
      if (eol_range_start) {
        tmp___20 = is_printable_field(eol_range_start);
        if (! tmp___20) {
          mark_range_start(eol_range_start);
        } else {

        }
      } else {

      }
    } else {

    }
  } else {

  }
  free((void *)rp);
  return (field_found);
}
}
Пример #11
0
size_t
Pexecute (char const *buf, size_t size, size_t *match_size,
          char const *start_ptr)
{
#if !HAVE_LIBPCRE
  /* We can't get here, because Pcompile would have been called earlier.  */
  error (EXIT_TROUBLE, 0, _("internal error"));
  return -1;
#else
  int sub[NSUB];
  char const *p = start_ptr ? start_ptr : buf;
  bool bol = p[-1] == eolbyte;
  char const *line_start = buf;
  int e = PCRE_ERROR_NOMATCH;
  char const *line_end;

  /* The search address to pass to pcre_exec.  This is the start of
     the buffer, or just past the most-recently discovered encoding
     error.  */
  char const *subject = buf;

  /* If the input type is unknown, the caller is still testing the
     input, which means the current buffer cannot contain encoding
     errors and a multiline search is typically more efficient.
     Otherwise, a single-line search is typically faster, so that
     pcre_exec doesn't waste time validating the entire input
     buffer.  */
  bool multiline = input_textbin == TEXTBIN_UNKNOWN;

  for (; p < buf + size; p = line_start = line_end + 1)
    {
      bool too_big;

      if (multiline)
        {
          size_t pcre_size_max = MIN (INT_MAX, SIZE_MAX - 1);
          size_t scan_size = MIN (pcre_size_max + 1, buf + size - p);
          line_end = memrchr (p, eolbyte, scan_size);
          too_big = ! line_end;
        }
      else
        {
          line_end = memchr (p, eolbyte, buf + size - p);
          too_big = INT_MAX < line_end - p;
        }

      if (too_big)
        error (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));

      for (;;)
        {
          /* Skip past bytes that are easily determined to be encoding
             errors, treating them as data that cannot match.  This is
             faster than having pcre_exec check them.  */
          while (mbclen_cache[to_uchar (*p)] == (size_t) -1)
            {
              p++;
              bol = false;
            }

          int search_offset = p - subject;

          /* Check for an empty match; this is faster than letting
             pcre_exec do it.  */
          if (p == line_end)
            {
              sub[0] = sub[1] = search_offset;
              e = empty_match[bol];
              break;
            }

          int options = 0;
          if (!bol)
            options |= PCRE_NOTBOL;
          if (multiline)
            options |= PCRE_NO_UTF8_CHECK;

          e = jit_exec (subject, line_end - subject, search_offset,
                        options, sub);
          if (e != PCRE_ERROR_BADUTF8)
            {
              if (0 < e && multiline && sub[1] - sub[0] != 0)
                {
                  char const *nl = memchr (subject + sub[0], eolbyte,
                                           sub[1] - sub[0]);
                  if (nl)
                    {
                      /* This match crosses a line boundary; reject it.  */
                      p = subject + sub[0];
                      line_end = nl;
                      continue;
                    }
                }
              break;
            }
          int valid_bytes = sub[0];

          /* Try to match the string before the encoding error.  */
          if (valid_bytes < search_offset)
            e = PCRE_ERROR_NOMATCH;
          else if (valid_bytes == 0)
            {
              /* Handle the empty-match case specially, for speed.
                 This optimization is valid if VALID_BYTES is zero,
                 which means SEARCH_OFFSET is also zero.  */
              sub[1] = 0;
              e = empty_match[bol];
            }
          else
            e = jit_exec (subject, valid_bytes, search_offset,
                          options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);

          if (e != PCRE_ERROR_NOMATCH)
            break;

          /* Treat the encoding error as data that cannot match.  */
          p = subject += valid_bytes + 1;
          bol = false;
        }

      if (e != PCRE_ERROR_NOMATCH)
        break;
      bol = true;
    }

  if (e <= 0)
    {
      switch (e)
        {
        case PCRE_ERROR_NOMATCH:
          break;

        case PCRE_ERROR_NOMEMORY:
          error (EXIT_TROUBLE, 0, _("memory exhausted"));

# if PCRE_STUDY_JIT_COMPILE
        case PCRE_ERROR_JIT_STACKLIMIT:
          error (EXIT_TROUBLE, 0, _("exhausted PCRE JIT stack"));
# endif

        case PCRE_ERROR_MATCHLIMIT:
          error (EXIT_TROUBLE, 0, _("exceeded PCRE's backtracking limit"));

        default:
          /* For now, we lump all remaining PCRE failures into this basket.
             If anyone cares to provide sample grep usage that can trigger
             particular PCRE errors, we can add to the list (above) of more
             detailed diagnostics.  */
          error (EXIT_TROUBLE, 0, _("internal PCRE error: %d"), e);
        }

      return -1;
    }
  else
    {
      char const *matchbeg = subject + sub[0];
      char const *matchend = subject + sub[1];
      char const *beg;
      char const *end;
      if (start_ptr)
        {
          beg = matchbeg;
          end = matchend;
        }
      else if (multiline)
        {
          char const *prev_nl = memrchr (line_start - 1, eolbyte,
                                         matchbeg - (line_start - 1));
          char const *next_nl = memchr (matchend, eolbyte,
                                        line_end + 1 - matchend);
          beg = prev_nl + 1;
          end = next_nl + 1;
        }
      else
        {
          beg = line_start;
          end = line_end + 1;
        }
      *match_size = end - beg;
      return beg - buf;
    }
#endif
}
Пример #12
0
static int
next_char_1 (void)
{
  int ch;

  while (1)
    {
      if (isp == NULL)
        {
          current_file = "";
          current_line = 0;
          return CHAR_EOF;
        }

      if (input_change)
        {
          current_file = isp->file;
          current_line = isp->line;
          input_change = false;
        }

      switch (isp->type)
        {
        case INPUT_STRING:
          ch = to_uchar (*isp->u.u_s.string++);
          if (ch != '\0')
            return ch;
          break;

        case INPUT_FILE:
          if (start_of_input_line)
            {
              start_of_input_line = false;
              current_line = ++isp->line;
            }

          /* If stdin is a terminal, calling getc after peek_input
             already called it would make the user have to hit ^D
             twice to quit.  */
          ch = isp->u.u_f.end ? EOF : getc (isp->u.u_f.fp);
          if (ch != EOF)
            {
              if (ch == '\n')
                start_of_input_line = true;
              return ch;
            }
          break;

        case INPUT_MACRO:
          pop_input (); /* INPUT_MACRO input sources has only one token */
          return CHAR_MACRO;

        default:
          M4ERROR ((warning_status, 0,
                    "INTERNAL ERROR: input stack botch in next_char ()"));
          abort ();
        }

      /* End of input source --- pop one level.  */
      pop_input ();
    }
}
Пример #13
0
 constexpr byte operator<<(byte b, IntegerType shift) noexcept
 {
   return byte{ to_uchar(to_uchar( b ) << shift) };
 }
Пример #14
0
static bool
expand_argument (struct obstack *obs, token_data *argp)
{
  token_type t;
  token_data td;
  char *text;
  int paren_level;
  const char *file = current_file;
  int line = current_line;

  TOKEN_DATA_TYPE (argp) = TOKEN_VOID;

  /* Skip leading white space.  */
  do
    {
      t = next_token (&td, NULL);
    }
  while (t == TOKEN_SIMPLE && isspace (to_uchar (*TOKEN_DATA_TEXT (&td))));

  paren_level = 0;

  while (1)
    {

      switch (t)
        { /* TOKSW */
        case TOKEN_COMMA:
        case TOKEN_CLOSE:
          if (paren_level == 0)
            {
              /* The argument MUST be finished, whether we want it or not.  */
              obstack_1grow (obs, '\0');
              text = (char *) obstack_finish (obs);

              if (TOKEN_DATA_TYPE (argp) == TOKEN_VOID)
                {
                  TOKEN_DATA_TYPE (argp) = TOKEN_TEXT;
                  TOKEN_DATA_TEXT (argp) = text;
                }
              return t == TOKEN_COMMA;
            }
          /* fallthru */
        case TOKEN_OPEN:
        case TOKEN_SIMPLE:
          text = TOKEN_DATA_TEXT (&td);

          if (*text == '(')
            paren_level++;
          else if (*text == ')')
            paren_level--;
          expand_token (obs, t, &td, line);
          break;

        case TOKEN_EOF:
          /* current_file changed to "" if we see TOKEN_EOF, use the
             previous value we stored earlier.  */
          M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
                            "ERROR: end of file in argument list"));
          break;

        case TOKEN_WORD:
        case TOKEN_STRING:
          expand_token (obs, t, &td, line);
          break;

        case TOKEN_MACDEF:
          if (obstack_object_size (obs) == 0)
            {
              TOKEN_DATA_TYPE (argp) = TOKEN_FUNC;
              TOKEN_DATA_FUNC (argp) = TOKEN_DATA_FUNC (&td);
            }
          break;

        default:
          M4ERROR ((warning_status, 0,
                    "INTERNAL ERROR: bad token type in expand_argument ()"));
          abort ();
        }

      t = next_token (&td, NULL);
    }
}
Пример #15
0
void
format (struct obstack *obs, int argc, token_data **argv)
{
  char *fmt;			/* format control string */
  const char *fstart;		/* beginning of current format spec */
  int c;			/* a simple character */

  /* Flags.  */
  char flags;			/* 1 iff treating flags */

  /* Precision specifiers.  */
  int width;			/* minimum field width */
  int prec;			/* precision */
  char lflag;			/* long flag */
  char hflag;			/* short flag */

  /* Buffer and stuff.  */
  char *str;			/* malloc'd buffer of formatted text */
  enum {INT, UINT, LONG, ULONG, DOUBLE, STR} datatype;

  fmt = (char *) ARG_STR (argc, argv);
  for (;;)
    {
      while ((c = *fmt++) != '%')
	{
	  if (c == 0)
	    return;
	  obstack_1grow (obs, c);
	}

      fstart = fmt - 1;

      if (*fmt == '%')
	{
	  obstack_1grow (obs, '%');
	  fmt++;
	  continue;
	}

      /* Parse flags.  */
      flags = 1;
      do
	{
	  switch (*fmt)
	    {
	    case '-':		/* left justification */
	    case '+':		/* mandatory sign */
	    case ' ':		/* space instead of positive sign */
	    case '0':		/* zero padding */
	    case '#':		/* alternate output */
	      break;

	    default:
	      flags = 0;
	      break;
	    }
	}
      while (flags && fmt++);

      /* Minimum field width.  */
      width = -1;
      if (*fmt == '*')
	{
	  width = ARG_INT (argc, argv);
	  fmt++;
	}
      else if (isdigit (to_uchar (*fmt)))
	{
	  do
	    {
	      fmt++;
	    }
	  while (isdigit (to_uchar (*fmt)));
	}

      /* Maximum precision.  */
      prec = -1;
      if (*fmt == '.')
	{
	  if (*(++fmt) == '*')
	    {
	      prec = ARG_INT (argc, argv);
	      ++fmt;
	    }
	  else if (isdigit (to_uchar (*fmt)))
	    {
	      do
		{
		  fmt++;
		}
	      while (isdigit (to_uchar (*fmt)));
	    }
	}

      /* Length modifiers.  */
      lflag = (*fmt == 'l');
      hflag = (*fmt == 'h');
      if (lflag || hflag)
	fmt++;

      switch (*fmt++)
	{

	case '\0':
	  return;

	case 'c':
	  datatype = INT;
	  break;

	case 's':
	  datatype = STR;
	  break;

	case 'd':
	case 'i':
	  if (lflag)
	    {
	      datatype = LONG;
	    }
	  else
	    {
	      datatype = INT;
	    }
	  break;

	case 'o':
	case 'x':
	case 'X':
	case 'u':
	  if (lflag)
	    {
	      datatype = ULONG;
	    }
	  else
	    {
	      datatype = UINT;
	    }
	  break;

	case 'e':
	case 'E':
	case 'f':
	case 'F':
	case 'g':
	case 'G':
	  datatype = DOUBLE;
	  break;

	default:
	  continue;
	}

      c = *fmt;
      *fmt = '\0';

      switch(datatype)
	{
	case INT:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_INT(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_INT(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_INT(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_INT(argc, argv));
	  break;

	case UINT:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_UINT(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_UINT(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_UINT(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_UINT(argc, argv));
	  break;

	case LONG:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_LONG(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_LONG(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_LONG(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_LONG(argc, argv));
	  break;

	case ULONG:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_ULONG(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_ULONG(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_ULONG(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_ULONG(argc, argv));
	  break;

	case DOUBLE:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_DOUBLE(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_DOUBLE(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_DOUBLE(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_DOUBLE(argc, argv));
	  break;

	case STR:
	  if (width != -1 && prec != -1)
	    str = xasprintf (fstart, width, prec, ARG_STR(argc, argv));
	  else if (width != -1)
	    str = xasprintf (fstart, width, ARG_STR(argc, argv));
	  else if (prec != -1)
	    str = xasprintf (fstart, prec, ARG_STR(argc, argv));
	  else
	    str = xasprintf (fstart, ARG_STR(argc, argv));
	  break;

	default:
	  abort();
	}

      *fmt = c;

      /* NULL was returned on failure, such as invalid format string.  For
	 now, just silently ignore that bad specifier.  */
      if (str == NULL)
	continue;

      obstack_grow (obs, str, strlen (str));
      free (str);
    }
}
Пример #16
0
token_type
next_token (token_data *td, int *line)
{
  int ch;
  int quote_level;
  token_type type;
#ifdef ENABLE_CHANGEWORD
  int startpos;
  char *orig_text = NULL;
#endif
  const char *file;
  int dummy;

  obstack_free (&token_stack, token_bottom);
  if (!line)
    line = &dummy;

 /* Can't consume character until after CHAR_MACRO is handled.  */
  ch = peek_input ();
  if (ch == CHAR_EOF)
    {
#ifdef DEBUG_INPUT
      xfprintf (stderr, "next_token -> EOF\n");
#endif
      next_char ();
      return TOKEN_EOF;
    }
  if (ch == CHAR_MACRO)
    {
      init_macro_token (td);
      next_char ();
#ifdef DEBUG_INPUT
      xfprintf (stderr, "next_token -> MACDEF (%s)\n",
                find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
#endif
      return TOKEN_MACDEF;
    }

  next_char (); /* Consume character we already peeked at.  */
  file = current_file;
  *line = current_line;
  if (MATCH (ch, bcomm.string, true))
    {
      obstack_grow (&token_stack, bcomm.string, bcomm.length);
      while ((ch = next_char ()) != CHAR_EOF
             && !MATCH (ch, ecomm.string, true))
        obstack_1grow (&token_stack, ch);
      if (ch != CHAR_EOF)
        obstack_grow (&token_stack, ecomm.string, ecomm.length);
      else
        /* current_file changed to "" if we see CHAR_EOF, use the
           previous value we stored earlier.  */
        M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
                          "ERROR: end of file in comment"));

      type = TOKEN_STRING;
    }
  else if (default_word_regexp && (isalpha (ch) || ch == '_'))
    {
      obstack_1grow (&token_stack, ch);
      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
        {
          obstack_1grow (&token_stack, ch);
          next_char ();
        }
      type = TOKEN_WORD;
    }

#ifdef ENABLE_CHANGEWORD

  else if (!default_word_regexp && word_regexp.fastmap[ch])
    {
      obstack_1grow (&token_stack, ch);
      while (1)
        {
          ch = peek_input ();
          if (ch == CHAR_EOF)
            break;
          obstack_1grow (&token_stack, ch);
          startpos = re_search (&word_regexp,
                                (char *) obstack_base (&token_stack),
                                obstack_object_size (&token_stack), 0, 0,
                                &regs);
          if (startpos ||
              regs.end [0] != (regoff_t) obstack_object_size (&token_stack))
            {
              *(((char *) obstack_base (&token_stack)
                 + obstack_object_size (&token_stack)) - 1) = '\0';
              break;
            }
          next_char ();
        }

      obstack_1grow (&token_stack, '\0');
      orig_text = (char *) obstack_finish (&token_stack);

      if (regs.start[1] != -1)
        obstack_grow (&token_stack,orig_text + regs.start[1],
                      regs.end[1] - regs.start[1]);
      else
        obstack_grow (&token_stack, orig_text,regs.end[0]);

      type = TOKEN_WORD;
    }

#endif /* ENABLE_CHANGEWORD */

  else if (!MATCH (ch, lquote.string, true))
    {
      switch (ch)
        {
        case '(':
          type = TOKEN_OPEN;
          break;
        case ',':
          type = TOKEN_COMMA;
          break;
        case ')':
          type = TOKEN_CLOSE;
          break;
        default:
          type = TOKEN_SIMPLE;
          break;
        }
      obstack_1grow (&token_stack, ch);
    }
  else
    {
      bool fast = lquote.length == 1 && rquote.length == 1;
      quote_level = 1;
      while (1)
        {
          /* Try scanning a buffer first.  */
          const char *buffer = (isp && isp->type == INPUT_STRING
                                ? isp->u.u_s.string : NULL);
          if (buffer && *buffer)
            {
              size_t len = isp->u.u_s.end - buffer;
              const char *p = buffer;
              do
                {
                  p = (char *) memchr2 (p, *lquote.string, *rquote.string,
                                        buffer + len - p);
                }
              while (p && fast && (*p++ == *rquote.string
                                   ? --quote_level : ++quote_level));
              if (p)
                {
                  if (fast)
                    {
                      assert (!quote_level);
                      obstack_grow (&token_stack, buffer, p - buffer - 1);
                      isp->u.u_s.string += p - buffer;
                      break;
                    }
                  obstack_grow (&token_stack, buffer, p - buffer);
                  ch = to_uchar (*p);
                  isp->u.u_s.string += p - buffer + 1;
                }
              else
                {
                  obstack_grow (&token_stack, buffer, len);
                  isp->u.u_s.string += len;
                  continue;
                }
            }
          /* Fall back to a byte.  */
          else
            ch = next_char ();
          if (ch == CHAR_EOF)
            /* current_file changed to "" if we see CHAR_EOF, use
               the previous value we stored earlier.  */
            M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
                              "ERROR: end of file in string"));

          if (MATCH (ch, rquote.string, true))
            {
              if (--quote_level == 0)
                break;
              obstack_grow (&token_stack, rquote.string, rquote.length);
            }
          else if (MATCH (ch, lquote.string, true))
            {
              quote_level++;
              obstack_grow (&token_stack, lquote.string, lquote.length);
            }
          else
            obstack_1grow (&token_stack, ch);
        }
      type = TOKEN_STRING;
    }

  obstack_1grow (&token_stack, '\0');

  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
  TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack);
#ifdef ENABLE_CHANGEWORD
  if (orig_text == NULL)
    orig_text = TOKEN_DATA_TEXT (td);
  TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
  xfprintf (stderr, "next_token -> %s (%s)\n",
            token_type_string (type), TOKEN_DATA_TEXT (td));
#endif
  return type;
}
Пример #17
0
static const char b64c[64] =
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/* Base64 encode IN array of size INLEN into OUT array. OUT needs
   to be of length >= BASE64_LENGTH(INLEN), and INLEN needs to be
   a multiple of 3.  */
static void
base64_encode_fast (const char *restrict in, size_t inlen, char *restrict out)
{
  while (inlen)
    {
      *out++ = b64c[to_uchar (in[0]) >> 2];
      *out++ = b64c[((to_uchar (in[0]) << 4) + (to_uchar (in[1]) >> 4)) & 0x3f];
      *out++ = b64c[((to_uchar (in[1]) << 2) + (to_uchar (in[2]) >> 6)) & 0x3f];
      *out++ = b64c[to_uchar (in[2]) & 0x3f];

      inlen -= 3;
      in += 3;
    }
}

/* Base64 encode IN array of size INLEN into OUT array of size OUTLEN.
   If OUTLEN is less than BASE64_LENGTH(INLEN), write as many bytes as
   possible.  If OUTLEN is larger than BASE64_LENGTH(INLEN), also zero
   terminate the output buffer. */
void
base64_encode (const char *restrict in, size_t inlen,
               char *restrict out, size_t outlen)
{
  /* Note this outlen constraint can be enforced at compile time.
Пример #18
0
/* Given the list of field or byte range specifications FIELDSTR,
   allocate and initialize the FRP array. FIELDSTR should
   be composed of one or more numbers or ranges of numbers, separated
   by blanks or commas.  Incomplete ranges may be given: '-m' means '1-m';
   'n-' means 'n' through end of line.
   n=0 and n>=SIZE_MAX values will trigger an error.

   if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
   (otherwise a single dash triggers an error).

   if SETFLD_COMPLEMENT option is used, the specified field list
   is complemented (e.g. '1-3' will result in fields '4-').

   if SETFLD_ERRMSG_USE_POS option is used, error messages
   will say 'position' (or 'byte/character positions')
   instead of fields (used with cut -b/-c).

   The function terminates on failure.

   Upon return, the FRP array is initialized to contain
   a non-overlapping, increasing list of field ranges.

   N_FRP holds the number of field ranges in the FRP array.

   The first field is stored as 1 (zero is not used).
   An open-ended range (i.e., until the last field of the input line)
   is indicated with hi = SIZE_MAX.

   A sentinel of SIZE_MAX/SIZE_MAX is always added as the last
   field range pair.

   Examples:
   given '1-2,4', frp = [ { .lo = 1,        .hi = 2 },
                          { .lo = 4,        .hi = 4 },
                          { .lo = SIZE_MAX, .hi = SIZE_MAX } ];

   given '3-',    frp = [ { .lo = 3,        .hi = SIZE_MAX },
                          { .lo = SIZE_MAX, .hi = SIZE_MAX } ];
*/
void
set_fields (const char *fieldstr, unsigned int options)
{
  size_t initial = 1;		/* Value of first number in a range.  */
  size_t value = 0;		/* If nonzero, a number being accumulated.  */
  bool lhs_specified = false;
  bool rhs_specified = false;
  bool dash_found = false;	/* True if a '-' is found in this field.  */

  size_t i;
  bool in_digits = false;

  /* Collect and store in RP the range end points. */

  /* Special case: '--field=-' means all fields, emulate '--field=1-' . */
  if ((options & SETFLD_ALLOW_DASH) && STREQ (fieldstr,"-"))
    {
      value = 1;
      lhs_specified = true;
      dash_found = true;
      fieldstr++;
    }

  while (true)
    {
      if (*fieldstr == '-')
        {
          in_digits = false;
          /* Starting a range. */
          if (dash_found)
            FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
                          ?_("invalid byte or character range")
                          :_("invalid field range"));

          dash_found = true;
          fieldstr++;

          if (lhs_specified && !value)
            FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
                          ?_("byte/character positions are numbered from 1")
                          :_("fields are numbered from 1"));

          initial = (lhs_specified ? value : 1);
          value = 0;
        }
      else if (*fieldstr == ','
               || isblank (to_uchar (*fieldstr)) || *fieldstr == '\0')
        {
          in_digits = false;
          /* Ending the string, or this field/byte sublist. */
          if (dash_found)
            {
              dash_found = false;

              if (!lhs_specified && !rhs_specified)
                {
                  /* if a lone dash is allowed, emulate '1-' for all fields */
                  if (options & SETFLD_ALLOW_DASH)
                    initial = 1;
                  else
                    FATAL_ERROR (_("invalid range with no endpoint: -"));
                }

              /* A range.  Possibilities: -n, m-n, n-.
                 In any case, 'initial' contains the start of the range. */
              if (!rhs_specified)
                {
                  /* 'n-'.  From 'initial' to end of line. */
                  add_range_pair (initial, SIZE_MAX);
                }
              else
                {
                  /* 'm-n' or '-n' (1-n). */
                  if (value < initial)
                    FATAL_ERROR (_("invalid decreasing range"));

                  add_range_pair (initial, value);
                }
              value = 0;
            }
          else
            {
              /* A simple field number, not a range. */
              if (value == 0)
                FATAL_ERROR ( (options & SETFLD_ERRMSG_USE_POS)
                              ?_("byte/character positions are numbered from 1")
                              :_("fields are numbered from 1"));

              add_range_pair (value, value);
              value = 0;
            }

          if (*fieldstr == '\0')
            break;

          fieldstr++;
          lhs_specified = false;
          rhs_specified = false;
        }
      else if (ISDIGIT (*fieldstr))
        {
          /* Record beginning of digit string, in case we have to
             complain about it.  */
          static char const *num_start;
          if (!in_digits || !num_start)
            num_start = fieldstr;
          in_digits = true;

          if (dash_found)
            rhs_specified = 1;
          else
            lhs_specified = 1;

          /* Detect overflow.  */
          if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
              || value == SIZE_MAX)
            {
              /* In case the user specified -c$(echo 2^64|bc),22,
                 complain only about the first number.  */
              /* Determine the length of the offending number.  */
              size_t len = strspn (num_start, "0123456789");
              char *bad_num = xstrndup (num_start, len);
              error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
                           ?_("byte/character offset %s is too large")
                           :_("field number %s is too large"),
                           quote (bad_num));
              free (bad_num);
              usage (EXIT_FAILURE);
            }

          fieldstr++;
        }
      else
        {
          error (0, 0, (options & SETFLD_ERRMSG_USE_POS)
                       ?_("invalid byte/character position %s")
                       :_("invalid field value %s"),
                       quote (fieldstr));
          usage (EXIT_FAILURE);
        }
    }

  if (!n_frp)
    FATAL_ERROR ( (options&SETFLD_ERRMSG_USE_POS)
                  ?_("missing list of byte/character positions")
                  :_("missing list of fields"));

  qsort (frp, n_frp, sizeof (frp[0]), compare_ranges);

  /* Merge range pairs (e.g. `2-5,3-4' becomes `2-5'). */
  for (i = 0; i < n_frp; ++i)
    {
      for (size_t j = i + 1; j < n_frp; ++j)
        {
          if (frp[j].lo <= frp[i].hi)
            {
              frp[i].hi = MAX (frp[j].hi, frp[i].hi);
              memmove (frp + j, frp + j + 1, (n_frp - j - 1) * sizeof *frp);
              n_frp--;
              j--;
            }
          else
            break;
        }
    }

  if (options & SETFLD_COMPLEMENT)
    complement_rp ();

  /* After merging, reallocate RP so we release memory to the system.
     Also add a sentinel at the end of RP, to avoid out of bounds access
     and for performance reasons.  */
  ++n_frp;
  frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
  frp[n_frp - 1].lo = frp[n_frp - 1].hi = SIZE_MAX;
}