Esempio n. 1
0
static void
phase3_scan_regex ()
{
    int c;

    for (;;)
      {
        c = phase1_getc ();
        if (c == '/')
          break;
        if (c == '\\')
          {
            c = phase1_getc ();
            if (c != EOF)
              continue;
          }
        if (c == EOF)
          {
            error_with_progname = false;
            error (0, 0,
                   _("%s:%d: warning: regular expression literal terminated too early"),
                   logical_file_name, line_number);
            error_with_progname = true;
            return;
          }
      }

    c = phase2_getc ();
    if (!(c == 'i' || c == 's' || c == 'm' || c == 'x'))
      phase2_ungetc (c);
}
Esempio n. 2
0
File: x-c.c Progetto: alan707/senuti
static int
phase3_getc ()
{
  if (phase3_pushback_length)
    return phase3_pushback[--phase3_pushback_length];
  for (;;)
    {
      int c = phase2_getc ();
      if (c != '\\')
	return c;
      c = phase2_getc ();
      if (c != '\n')
	{
	  phase2_ungetc (c);
	  return '\\';
	}
    }
}
Esempio n. 3
0
static int
phase3_getc ()
{
  int c = phase2_getc ();

  for (;;)
    {
      if (c != '\\')
	return c;

      c = phase2_getc ();
      if (c != '\n')
	{
	  phase2_ungetc (c);
	  return '\\';
	}

      /* Skip the backslash-newline and all whitespace that follows it.  */
      do
	c = phase2_getc ();
      while (c == ' ' || c == '\t' || c == '\r' || c == '\f');
    }
}
Esempio n. 4
0
static void
phase5_get (token_ty *tp)
{
  static char *buffer;
  static int bufmax;
  int bufpos;
  int c;

  if (phase5_pushback_length)
    {
      *tp = phase5_pushback[--phase5_pushback_length];
      return;
    }
  for (;;)
    {
      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* FALLTHROUGH */
        case '\r':
        case '\t':
        case ' ':
          /* Ignore whitespace and comments.  */
          continue;
        }

      last_non_comment_line = tp->line_number;

      switch (c)
        {
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
        case 'Y': case 'Z':
        case '_':
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
        case 'y': case 'z':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          /* Symbol, or part of a number.  */
          bufpos = 0;
          for (;;)
            {
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
              c = phase2_getc ();
              switch (c)
                {
                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
                case 'Y': case 'Z':
                case '_':
                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
                case 'y': case 'z':
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                  continue;
                default:
                  if (bufpos == 1 && buffer[0] == '_' && c == '(')
                    {
                      tp->type = token_type_i18n;
                      return;
                    }
                  phase2_ungetc (c);
                  break;
                }
              break;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_symbol;
          return;

        case '"':
          bufpos = 0;
          for (;;)
            {
              c = phase7_getc ();
              if (c == EOF || c == P7_QUOTES)
                break;
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_string_literal;
          tp->comment = add_reference (savable_comment);
          return;

        case '(':
          tp->type = token_type_lparen;
          return;

        case ')':
          tp->type = token_type_rparen;
          return;

        case ',':
          tp->type = token_type_comma;
          return;

        default:
          /* We could carefully recognize each of the 2 and 3 character
             operators, but it is not necessary, as we only need to recognize
             gettext invocations.  Don't bother.  */
          tp->type = token_type_other;
          return;
        }
    }
}
Esempio n. 5
0
static void
x_awk_lex (token_ty *tp)
{
  static char *buffer;
  static int bufmax;
  int bufpos;
  int c;

  for (;;)
    {
      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* Newline is not allowed inside expressions.  It usually
             introduces a fresh statement.
             FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
             does *not* introduce a fresh statement.  */
          prefer_division_over_regexp = false;
          /* FALLTHROUGH */
        case '\t':
        case ' ':
          /* Ignore whitespace and comments.  */
          continue;

        case '\\':
          /* Backslash ought to be immediately followed by a newline.  */
          continue;
        }

      last_non_comment_line = tp->line_number;

      switch (c)
        {
        case '.':
          {
            int c2 = phase2_getc ();
            phase2_ungetc (c2);
            if (!(c2 >= '0' && c2 <= '9'))
              {

                tp->type = token_type_other;
                prefer_division_over_regexp = false;
                return;
              }
          }
          /* FALLTHROUGH */
        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
        case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
        case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
        case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
        case 'Y': case 'Z':
        case '_':
        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
        case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
        case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
        case 's': case 't': case 'u': case 'v': case 'w': case 'x':
        case 'y': case 'z':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
          /* Symbol, or part of a number.  */
          bufpos = 0;
          for (;;)
            {
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
              c = phase2_getc ();
              switch (c)
                {
                case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
                case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
                case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
                case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
                case 'Y': case 'Z':
                case '_':
                case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
                case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
                case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
                case 's': case 't': case 'u': case 'v': case 'w': case 'x':
                case 'y': case 'z':
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                  continue;
                default:
                  if (bufpos == 1 && buffer[0] == '_' && c == '"')
                    {
                      tp->type = token_type_i18nstring;
                      goto case_string;
                    }
                  phase2_ungetc (c);
                  break;
                }
              break;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          tp->type = token_type_symbol;
          /* Most identifiers can be variable names; after them we must
             interpret '/' as division operator.  But for awk's builtin
             keywords we have three cases:
             (a) Must interpret '/' as division operator. "length".
             (b) Must interpret '/' as start of a regular expression.
                 "do", "exit", "print", "printf", "return".
             (c) '/' after this keyword in invalid anyway. All others.
             I used the following script for the distinction.
                for k in $awk_keywords; do
                  echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
                done
           */
          if (strcmp (buffer, "do") == 0
              || strcmp (buffer, "exit") == 0
              || strcmp (buffer, "print") == 0
              || strcmp (buffer, "printf") == 0
              || strcmp (buffer, "return") == 0)
            prefer_division_over_regexp = false;
          else
            prefer_division_over_regexp = true;
          return;

        case '"':
          tp->type = token_type_string;
        case_string:
          bufpos = 0;
          for (;;)
            {
              c = phase7_getc ();
              if (c == EOF || c == P7_QUOTES)
                break;
              if (bufpos >= bufmax)
                {
                  bufmax = 2 * bufmax + 10;
                  buffer = xrealloc (buffer, bufmax);
                }
              buffer[bufpos++] = c;
            }
          if (bufpos >= bufmax)
            {
              bufmax = 2 * bufmax + 10;
              buffer = xrealloc (buffer, bufmax);
            }
          buffer[bufpos] = '\0';
          tp->string = xstrdup (buffer);
          prefer_division_over_regexp = true;
          return;

        case '(':
          tp->type = token_type_lparen;
          prefer_division_over_regexp = false;
          return;

        case ')':
          tp->type = token_type_rparen;
          prefer_division_over_regexp = true;
          return;

        case ',':
          tp->type = token_type_comma;
          prefer_division_over_regexp = false;
          return;

        case ';':
          tp->type = token_type_semicolon;
          prefer_division_over_regexp = false;
          return;

        case ']':
          tp->type = token_type_other;
          prefer_division_over_regexp = true;
          return;

        case '/':
          if (!prefer_division_over_regexp)
            {
              /* Regular expression.
                 Counting brackets is non-trivial. [[] is balanced, and so is
                 [\]]. Also, /[/]/ is balanced and ends at the third slash.
                 Do not count [ or ] if either one is preceded by a \.
                 A '[' should be counted if
                  a) it is the first one so far (brackets == 0), or
                  b) it is the '[' in '[:'.
                 A ']' should be counted if not preceded by a \.
                 According to POSIX, []] is how you put a ] into a set.
                 Try to handle that too.
               */
              int brackets = 0;
              bool pos0 = true;         /* true at start of regexp */
              bool pos1_open = false;   /* true after [ at start of regexp */
              bool pos2_open_not = false; /* true after [^ at start of regexp */

              for (;;)
                {
                  c = phase1_getc ();

                  if (c == EOF || c == '\n')
                    {
                      phase1_ungetc (c);
                      error_with_progname = false;
                      error (0, 0, _("%s:%d: warning: unterminated regular expression"),
                             logical_file_name, line_number);
                      error_with_progname = true;
                      break;
                    }
                  else if (c == '[')
                    {
                      if (brackets == 0)
                        brackets++;
                      else
                        {
                          c = phase1_getc ();
                          if (c == ':')
                            brackets++;
                          phase1_ungetc (c);
                        }
                      if (pos0)
                        {
                          pos0 = false;
                          pos1_open = true;
                          continue;
                        }
                    }
                  else if (c == ']')
                    {
                      if (!(pos1_open || pos2_open_not))
                        brackets--;
                    }
                  else if (c == '^')
                    {
                      if (pos1_open)
                        {
                          pos1_open = false;
                          pos2_open_not = true;
                          continue;
                        }
                    }
                  else if (c == '\\')
                    {
                      c = phase1_getc ();
                      /* Backslash-newline is valid and ignored.  */
                    }
                  else if (c == '/')
                    {
                      if (brackets <= 0)
                        break;
                    }

                  pos0 = false;
                  pos1_open = false;
                  pos2_open_not = false;
                }

              tp->type = token_type_other;
              prefer_division_over_regexp = false;
              return;
            }
          /* FALLTHROUGH */

        default:
          /* We could carefully recognize each of the 2 and 3 character
             operators, but it is not necessary, as we only need to recognize
             gettext invocations.  Don't bother.  */
          tp->type = token_type_other;
          prefer_division_over_regexp = false;
          return;
        }
    }
}
Esempio n. 6
0
  if (phase3_pushback_length)
    {
      *tp = phase3_pushback[--phase3_pushback_length];
      last_token_type = tp->type;
      return;
    }

  for (;;)
    {
      bool template;
      bool verbatim;
      int c;

      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = last_token_type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* FALLTHROUGH */
        case ' ':
        case '\f':
        case '\t':
          /* Ignore whitespace and comments.  */
Esempio n. 7
0
static void
phase3_get (token_ty *tp)
{
  int c;
  int c2;
  int c_start;

  if (phase3_pushback_length)
    {
      *tp = phase3_pushback[--phase3_pushback_length];
      return;
    }

  tp->string = NULL;

  for (;;)
    {
      tp->line_number = line_number;
      c = phase2_getc ();

      switch (c)
        {
        case EOF:
          tp->type = token_type_eof;
          return;

        case '\n':
          if (last_non_comment_line > last_comment_line)
            savable_comment_reset ();
          /* Intentionally not breaking.  */
        case ' ':
        case '\t':
        case '\f':
          continue;

        case '+':
        case '-':
        case '*':
        case '/':
        case '^':
        case '%':
        case '#':
          tp->type = token_type_operator1;
          return;
        case '<':
        case '>':
        case '=':
          c2 = phase1_getc ();
          if (c2 != '=')
            phase1_ungetc (c2);
          tp->type = token_type_operator2;
          return;
        case '~':
          c2 = phase1_getc ();
          if (c2 == '=')
            {
              tp->type = token_type_operator2;
              return;
            }
          else
            phase1_ungetc (c2);
          continue;
        case '(':
          tp->type = token_type_lparen;
          return;
        case ')':
          tp->type = token_type_rparen;
          return;
        case ',':
          tp->type = token_type_comma;
          return;

        case ';':
          tp->type = token_type_other;
          return;

          /* There are three operators beginning with a dot.  '.',
             '..' and '...'.  The most useful for us is the string
             concatenation operator ('..').  */
        case '.':
          c = phase1_getc ();
          if (c == '.')
            {
              c = phase1_getc ();
              if (c == '.')
                {
                  tp->type = token_type_other;
                  return;
                }
              else
                {
                  phase1_ungetc (c);
                  tp->type = token_type_doubledot;
                  return;
                }
            }
          else if (c >= '0' && c <= '9')
            {
              /* It's a number.  We aren't interested in the actual
                 numeric value, so ignore the dot and let next
                 iteration eat the number.  */
              phase1_ungetc (c);
              continue;
            }
          else
            {
              phase1_ungetc (c);
              tp->type = token_type_dot;
              return;
            }

        case '"':
        case '\'':
          c_start = c;
          string_start ();

          for (;;)
            {
              /* We need unprocessed characters from phase 1.  */
              c = phase1_getc ();

              /* We got '\', this is probably an escape sequence.  */
              if (c == '\\')
                {
                  c = phase1_getc ();
                  switch (c)
                    {
                    case 'a':
                      string_add ('\a');
                      break;
                    case 'b':
                      string_add ('\b');
                      break;
                    case 'f':
                      string_add ('\f');
                      break;
                    case 'n':
                      string_add ('\n');
                      break;
                    case 'r':
                      string_add ('\r');
                      break;
                    case 't':
                      string_add ('\t');
                      break;
                    case 'v':
                      string_add ('\v');
                      break;
                    case 'x':
                      {
                        int num = 0;
                        int i = 0;

                        for (i = 0; i < 2; i++)
                          {
                            c = phase1_getc ();
                            if (c >= '0' && c <= '9')
                              num += c - '0';
                            else if (c >= 'a' && c <= 'f')
                              num += c - 'a' + 10;
                            else if (c >= 'A' && c <= 'F')
                              num += c - 'A' + 10;
                            else
                              {
                                phase1_ungetc (c);
                                break;
                              }

                            if (i == 0)
                              num *= 16;
                          }

                        if (i == 2)
                          string_add (num);
                      }

                      break;
                    case 'z':
                      /* Ignore the following whitespace.  */
                      do
                        {
                          c = phase1_getc ();
                        }
                      while (c == ' ' || c == '\n' || c == '\t' || c == '\r'
                             || c == '\f' || c == '\v');

                      phase1_ungetc (c);

                      break;
                    default:
                      /* Check if it's a '\ddd' sequence.  */
                      if (c >= '0' && c <= '9')
                        {
                          int num = 0;
                          int i = 0;

                          while (c >= '0' && c <= '9' && i < 3)
                            {
                              num *= 10;
                              num += (c - '0');
                              c = phase1_getc ();
                              i++;
                            }

                          /* The last read character is either a
                             non-number or another number after our
                             '\ddd' sequence.  We need to ungetc it.  */
                          phase1_ungetc (c);

                          /* The sequence number is too big, this
                             causes a lexical error.  Ignore it.  */
                          if (num < 256)
                            string_add (num);
                        }
                      else
                        string_add (c);
                    }
                }
              else if (c == c_start || c == EOF || c == '\n')
                {
                  /* End of string.  */
                  string_end ();
                  tp->string = xstrdup (string_buf);
                  tp->comment = add_reference (savable_comment);
                  tp->type = token_type_string;
                  return;
                }
              else
                string_add (c);
            }
          break;

        case '[':
          c = phase1_getc ();

          /* Count the number of equal signs.  */
          int esigns = 0;
          while (c == '=')
            {
              esigns++;
              c = phase1_getc ();
            }

          if (c != '[')
            {
              /* We did not find what we were looking for, ungetc it.  */
              phase1_ungetc (c);
              if (esigns == 0)
                {
                  /* Our current character isn't '[' and we got 0 equal
                     signs, so the first '[' must have been a left
                     bracket.  */
                  tp->type = token_type_lbracket;
                  return;
                }
              else
                /* Lexical error, ignore it.  */
                continue;
            }

          string_start ();

          for (;;)
            {
              c = phase1_getc ();

              if (c == ']')
                {
                  c = phase1_getc ();

                  /* Count the number of equal signs.  */
                  int esigns2 = 0;
                  while (c == '=')
                    {
                      esigns2++;
                      c = phase1_getc ();
                    }

                  if (c == ']' && esigns == esigns2)
                    {
                      /* We got ']==...==]', where the number of equal
                         signs matches the number of equal signs in
                         the opening bracket.  */
                      string_end ();
                      tp->string = xstrdup (string_buf);
                      tp->comment = add_reference (savable_comment);
                      tp->type = token_type_string;
                      return;
                    }
                  else
                    {
                      /* Otherwise we got either ']==' garbage or
                         ']==...==]' with a different number of equal
                         signs.

                         Add ']' and equal signs to the string, and
                         ungetc the current character, because the
                         second ']' might be a part of another closing
                         long bracket, e.g. '==]===]'.  */
                      phase1_ungetc (c);

                      string_add (']');
                      while (esigns2--)
                        string_add ('=');
                    }
                }
              else
                {
                  if (c == EOF)
                    {
                      string_end ();
                      tp->string = xstrdup (string_buf);
                      tp->comment = add_reference (savable_comment);
                      tp->type = token_type_string;
                      return;
                    }
                  else
                    string_add (c);
                }
            }
          break;

        case ']':
          tp->type = token_type_rbracket;
          return;

        default:
          if (c >= '0' && c <= '9')
            {
              while (c >= '0' && c <= '9')
                c = phase1_getc ();

              if (c == '.')
                {
                  c = phase1_getc ();
                  while (c >= '0' && c <= '9')
                    c = phase1_getc ();
                }

              if (c == 'e' || c == 'E')
                {
                  if (c == '+' || c == '-')
                    c = phase1_getc ();
                  while (c >= '0' && c <= '9')
                    c = phase1_getc ();
                }

              phase1_ungetc (c);

              tp->type = token_type_number;
              return;
            }
          else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                   || c == '_')
            {
              string_start ();
              while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
                     || c == '_' || (c >= '0' && c <= '9'))
                {
                  string_add (c);
                  c = phase1_getc ();
                }
              string_end ();
              phase1_ungetc (c);

              if (strcmp (string_buf, "not") == 0)
                tp->type = token_type_operator1;
              else if (strcmp (string_buf, "and") == 0)
                tp->type = token_type_operator2;
              else if (strcmp (string_buf, "or") == 0)
                tp->type = token_type_operator2;
              else
                {
                  tp->string = xstrdup (string_buf);
                  tp->type = token_type_symbol;
                }
              return;
            }
          else
            tp->type = token_type_other;
        }
    }
}