Exemple #1
0
static void
phase3_scan_regex ()
{
    int c;

    for (;;)
      {
        c = phase1_getc ();
        if (c == '/')
          break;
        if (c == '\\')
          {
            c = phase1_getc ();
            if (c != EOF)
              continue;
          }
        if (c == EOF)
          {
            error_with_progname = false;
            error (0, 0,
                   _("%s:%d: warning: regular expression literal terminated too early"),
                   logical_file_name, line_number);
            error_with_progname = true;
            return;
          }
      }

    c = phase2_getc ();
    if (!(c == 'i' || c == 's' || c == 'm' || c == 'x'))
      phase2_ungetc (c);
}
Exemple #2
0
static int
phase3_getc ()
{
  if (phase3_pushback_length)
    return phase3_pushback[--phase3_pushback_length];
  for (;;)
    {
      int c = phase2_getc ();
      if (c != '\\')
	return c;
      c = phase2_getc ();
      if (c != '\n')
	{
	  phase2_ungetc (c);
	  return '\\';
	}
    }
}
Exemple #3
0
static int
phase3_getc ()
{
  int c = phase2_getc ();

  for (;;)
    {
      if (c != '\\')
	return c;

      c = phase2_getc ();
      if (c != '\n')
	{
	  phase2_ungetc (c);
	  return '\\';
	}

      /* Skip the backslash-newline and all whitespace that follows it.  */
      do
	c = phase2_getc ();
      while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
    }
}
Exemple #4
0
static void
phase5_get (token_ty *tp)
{
  static char *buffer;
  static int bufmax;
  int bufpos;
  int c;

  if (phase5_pushback_length)
    {
      *tp = phase5_pushback[--phase5_pushback_length];
      return;
    }
  for (;;)
    {
      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* FALLTHROUGH */
        case '\r':
        case '\t':
        case ' ':
          /* Ignore whitespace and comments.  */
          continue;
        }

      last_non_comment_line = tp->line_number;

      switch (c)
        {
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
        case 'Y': case 'Z':
        case '_':
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
        case 'y': case 'z':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          /* Symbol, or part of a number.  */
          bufpos = 0;
          for (;;)
            {
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
              c = phase2_getc ();
              switch (c)
                {
                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
                case 'Y': case 'Z':
                case '_':
                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
                case 'y': case 'z':
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                  continue;
                default:
                  if (bufpos == 1 && buffer[0] == '_' && c == '(')
                    {
                      tp->type = token_type_i18n;
                      return;
                    }
                  phase2_ungetc (c);
                  break;
                }
              break;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_symbol;
          return;

        case '"':
          bufpos = 0;
          for (;;)
            {
              c = phase7_getc ();
              if (c == EOF || c == P7_QUOTES)
                break;
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_string_literal;
          tp->comment = add_reference (savable_comment);
          return;

        case '(':
          tp->type = token_type_lparen;
          return;

        case ')':
          tp->type = token_type_rparen;
          return;

        case ',':
          tp->type = token_type_comma;
          return;

        default:
          /* We could carefully recognize each of the 2 and 3 character
             operators, but it is not necessary, as we only need to recognize
             gettext invocations.  Don't bother.  */
          tp->type = token_type_other;
          return;
        }
    }
}
Exemple #5
0
static void
x_awk_lex (token_ty *tp)
{
  static char *buffer;
  static int bufmax;
  int bufpos;
  int c;

  for (;;)
    {
      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* Newline is not allowed inside expressions.  It usually
             introduces a fresh statement.
             FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
             does *not* introduce a fresh statement.  */
          prefer_division_over_regexp = false;
          /* FALLTHROUGH */
        case '\t':
        case ' ':
          /* Ignore whitespace and comments.  */
          continue;

        case '\\':
          /* Backslash ought to be immediately followed by a newline.  */
          continue;
        }

      last_non_comment_line = tp->line_number;

      switch (c)
        {
        case '.':
          {
            int c2 = phase2_getc ();
            phase2_ungetc (c2);
            if (!(c2 >= '0' && c2 <= '9'))
              {

                tp->type = token_type_other;
                prefer_division_over_regexp = false;
                return;
              }
          }
          /* FALLTHROUGH */
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
        case 'Y': case 'Z':
        case '_':
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
        case 'y': case 'z':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          /* Symbol, or part of a number.  */
          bufpos = 0;
          for (;;)
            {
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
              c = phase2_getc ();
              switch (c)
                {
                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
                case 'Y': case 'Z':
                case '_':
                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
                case 'y': case 'z':
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                  continue;
                default:
                  if (bufpos == 1 && buffer[0] == '_' && c == '"')
                    {
                      tp->type = token_type_i18nstring;
                      goto case_string;
                    }
                  phase2_ungetc (c);
                  break;
                }
              break;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_symbol;
          /* Most identifiers can be variable names; after them we must
             interpret '/' as division operator.  But for awk's builtin
             keywords we have three cases:
             (a) Must interpret '/' as division operator. "length".
             (b) Must interpret '/' as start of a regular expression.
                 "do", "exit", "print", "printf", "return".
             (c) '/' after this keyword in invalid anyway. All others.
             I used the following script for the distinction.
                for k in $awk_keywords; do
                  echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
                done
           */
          if (strcmp (buffer, "do") == 0
              || strcmp (buffer, "exit") == 0
              || strcmp (buffer, "print") == 0
              || strcmp (buffer, "printf") == 0
              || strcmp (buffer, "return") == 0)
            prefer_division_over_regexp = false;
          else
            prefer_division_over_regexp = true;
          return;

        case '"':
          tp->type = token_type_string;
        case_string:
          bufpos = 0;
          for (;;)
            {
              c = phase7_getc ();
              if (c == EOF || c == P7_QUOTES)
                break;
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          prefer_division_over_regexp = true;
          return;

        case '(':
          tp->type = token_type_lparen;
          prefer_division_over_regexp = false;
          return;

        case ')':
          tp->type = token_type_rparen;
          prefer_division_over_regexp = true;
          return;

        case ',':
          tp->type = token_type_comma;
          prefer_division_over_regexp = false;
          return;

        case ';':
          tp->type = token_type_semicolon;
          prefer_division_over_regexp = false;
          return;

        case ']':
          tp->type = token_type_other;
          prefer_division_over_regexp = true;
          return;

        case '/':
          if (!prefer_division_over_regexp)
            {
              /* Regular expression.
                 Counting brackets is non-trivial. [[] is balanced, and so is
                 [\]]. Also, /[/]/ is balanced and ends at the third slash.
                 Do not count [ or ] if either one is preceded by a \.
                 A '[' should be counted if
                  a) it is the first one so far (brackets == 0), or
                  b) it is the '[' in '[:'.
                 A ']' should be counted if not preceded by a \.
                 According to POSIX, []] is how you put a ] into a set.
                 Try to handle that too.
               */
              int brackets = 0;
              bool pos0 = true;         /* true at start of regexp */
              bool pos1_open = false;   /* true after [ at start of regexp */
              bool pos2_open_not = false; /* true after [^ at start of regexp */

              for (;;)
                {
                  c = phase1_getc ();

                  if (c == EOF || c == '\n')
                    {
                      phase1_ungetc (c);
                      error_with_progname = false;
                      error (0, 0, _("%s:%d: warning: unterminated regular expression"),
                             logical_file_name, line_number);
                      error_with_progname = true;
                      break;
                    }
                  else if (c == '[')
                    {
                      if (brackets == 0)
                        brackets++;
                      else
                        {
                          c = phase1_getc ();
                          if (c == ':')
                            brackets++;
                          phase1_ungetc (c);
                        }
                      if (pos0)
                        {
                          pos0 = false;
                          pos1_open = true;
                          continue;
                        }
                    }
                  else if (c == ']')
                    {
                      if (!(pos1_open || pos2_open_not))
                        brackets--;
                    }
                  else if (c == '^')
                    {
                      if (pos1_open)
                        {
                          pos1_open = false;
                          pos2_open_not = true;
                          continue;
                        }
                    }
                  else if (c == '\\')
                    {
                      c = phase1_getc ();
                      /* Backslash-newline is valid and ignored.  */
                    }
                  else if (c == '/')
                    {
                      if (brackets <= 0)
                        break;
                    }

                  pos0 = false;
                  pos1_open = false;
                  pos2_open_not = false;
                }

              tp->type = token_type_other;
              prefer_division_over_regexp = false;
              return;
            }
          /* FALLTHROUGH */

        default:
          /* We could carefully recognize each of the 2 and 3 character
             operators, but it is not necessary, as we only need to recognize
             gettext invocations.  Don't bother.  */
          tp->type = token_type_other;
          prefer_division_over_regexp = false;
          return;
        }
    }
}
Exemple #6
0
static inline void
phase3_ungetc (int c)
{
  phase2_ungetc (c);
}