コード例 #1
0
ファイル: input.c プロジェクト: WndSks/msys
token_type
next_token (token_data *td)
{
  int ch;
  int quote_level;
  token_type type;
#ifdef ENABLE_CHANGEWORD
  int startpos;
  char *orig_text = 0;
#endif

  obstack_free (&token_stack, token_bottom);
  obstack_1grow (&token_stack, '\0');
  token_bottom = obstack_finish (&token_stack);

  ch = peek_input ();
  if (ch == CHAR_EOF)
    {
      return TOKEN_EOF;
#ifdef DEBUG_INPUT
      fprintf (stderr, "next_token -> EOF\n");
#endif
    }
  if (ch == CHAR_MACRO)
    {
      init_macro_token (td);
      (void) next_char ();
      return TOKEN_MACDEF;
    }

  (void) next_char ();
  if (MATCH (ch, bcomm.string))
    {
      obstack_grow (&token_stack, bcomm.string, bcomm.length);
      while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm.string))
	obstack_1grow (&token_stack, ch);
      if (ch != CHAR_EOF)
	obstack_grow (&token_stack, ecomm.string, ecomm.length);
      type = TOKEN_STRING;
    }
#ifdef ENABLE_CHANGEWORD
  else if (default_word_regexp && (isalpha (ch) || ch == '_'))
#else
  else if (isalpha (ch) || ch == '_')
#endif
    {
      obstack_1grow (&token_stack, ch);
      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
	{
	  obstack_1grow (&token_stack, ch);
	  (void) next_char ();
	}
      type = TOKEN_WORD;
    }

#ifdef ENABLE_CHANGEWORD

  else if (!default_word_regexp && strchr (word_start, ch))
    {
      obstack_1grow (&token_stack, ch);
      while (1)
        {
	  ch = peek_input ();
	  if (ch == CHAR_EOF)
	    break;
	  obstack_1grow (&token_stack, ch);
	  startpos = re_search (&word_regexp, obstack_base (&token_stack),
				obstack_object_size (&token_stack), 0, 0,
				&regs);
	  if (startpos != 0 ||
	      regs.end [0] != obstack_object_size (&token_stack))
	    {
	      *(((char *) obstack_base (&token_stack)
		 + obstack_object_size (&token_stack)) - 1) = '\0';
	      break;
	    }
	  next_char ();
	}

      obstack_1grow (&token_stack, '\0');
      orig_text = obstack_finish (&token_stack);

      if (regs.start[1] != -1)
	obstack_grow (&token_stack,orig_text + regs.start[1],
		      regs.end[1] - regs.start[1]);
      else
	obstack_grow (&token_stack, orig_text,regs.end[0]);

      type = TOKEN_WORD;
    }

#endif /* ENABLE_CHANGEWORD */

  else if (!MATCH (ch, lquote.string))
    {
      type = TOKEN_SIMPLE;
      obstack_1grow (&token_stack, ch);
    }
  else
    {
      quote_level = 1;
      while (1)
	{
	  ch = next_char ();
	  if (ch == CHAR_EOF)
	    M4ERROR ((EXIT_FAILURE, 0,
		      "ERROR: EOF in string"));

	  if (MATCH (ch, rquote.string))
	    {
	      if (--quote_level == 0)
		break;
	      obstack_grow (&token_stack, rquote.string, rquote.length);
	    }
	  else if (MATCH (ch, lquote.string))
	    {
	      quote_level++;
	      obstack_grow (&token_stack, lquote.string, lquote.length);
	    }
	  else
	    obstack_1grow (&token_stack, ch);
	}
      type = TOKEN_STRING;
    }

  obstack_1grow (&token_stack, '\0');

  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
  TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
#ifdef ENABLE_CHANGEWORD
  if (orig_text == NULL)
    orig_text = TOKEN_DATA_TEXT (td);
  TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
  fprintf (stderr, "next_token -> %d (%s)\n", type, TOKEN_DATA_TEXT (td));
#endif
  return type;
}
コード例 #2
0
ファイル: input.c プロジェクト: thewml/wml
token_type
next_token (token_data *td, read_type expansion, boolean in_string)
{
  int ch;
  token_type type = TOKEN_NONE;
  int quote_level;

  if (TOKEN_DATA_TYPE (&token_read) != TOKEN_VOID)
    {
      type = TOKEN_STRING;
      obstack_grow (&token_stack, TOKEN_DATA_TEXT (&token_read),
              strlen (TOKEN_DATA_TEXT (&token_read)));
      xfree ((voidstar) TOKEN_DATA_TEXT (&token_read));
      TOKEN_DATA_TYPE (&token_read) = TOKEN_VOID;
    }

  while (type == TOKEN_NONE)
    {
      obstack_free (&token_stack, token_bottom);
      obstack_1grow (&token_stack, '\0');
      token_bottom = obstack_finish (&token_stack);

      ch = peek_input ();
      if (ch == CHAR_EOF)                 /* EOF */
        {
#ifdef DEBUG_INPUT
          fprintf (stderr, "next_token -> EOF\n");
#endif
          return TOKEN_EOF;
        }

      if (ch == CHAR_MACRO)               /* MACRO TOKEN */
        {
          init_macro_token (td);
          (void) next_char ();
#ifdef DEBUG_INPUT
          print_token("next_token", TOKEN_MACDEF, td);
#endif
          return TOKEN_MACDEF;
        }

      (void) next_char ();
      if (IS_TAG(ch))             /* ESCAPED WORD */
        {
          if (lquote.length > 0 && MATCH (ch, lquote.string))
            {
              if (visible_quotes || expansion == READ_ATTR_VERB
                  || expansion == READ_ATTR_ASIS || expansion == READ_BODY)
                obstack_grow (&token_stack, lquote.string, lquote.length);
              while ((ch = next_char ()) != CHAR_EOF)
                {
                  if (rquote.length > 0 && MATCH (ch, rquote.string))
                    break;
                  obstack_1grow (&token_stack, ch);
                }
              if (visible_quotes || expansion == READ_ATTR_VERB
                  || expansion == READ_ATTR_ASIS || expansion == READ_BODY)
                {
                  obstack_grow (&token_stack, rquote.string, rquote.length);
                  type = TOKEN_STRING;
                }
              else
                type = TOKEN_QUOTED;
            }
          else
            {
              obstack_1grow (&token_stack, ch);
              if ((ch = peek_input ()) != CHAR_EOF)
                {
                  if (ch == '/')
                    {
                      obstack_1grow (&token_stack, '/');
                      (void) next_char ();
                      ch = peek_input ();
                    }
                  if (IS_ALPHA(ch))
                    {
                      ch = next_char ();
                      obstack_1grow (&token_stack, ch);
                      while ((ch = next_char ()) != CHAR_EOF && IS_ALNUM(ch))
                        {
                          obstack_1grow (&token_stack, ch);
                        }
                      if (ch == '*')
                        {
                          obstack_1grow (&token_stack, ch);
                          ch = peek_input ();
                        }
                      else
                        unget_input(ch);

                      if (IS_SPACE(ch) || IS_CLOSE(ch) || IS_SLASH (ch))
                        type = TOKEN_WORD;
                      else
                        type = TOKEN_STRING;
                    }
                  else
                    type = TOKEN_STRING;
                }
              else
                type = TOKEN_SIMPLE;        /* escape before eof */
            }
        }
      else if (IS_CLOSE(ch))
        {
          obstack_1grow (&token_stack, ch);
          type = TOKEN_SIMPLE;
        }
      else if (IS_ENTITY(ch))             /* entity  */
        {
          obstack_1grow (&token_stack, ch);
          if ((ch = peek_input ()) != CHAR_EOF)
            {
              if (IS_ALPHA(ch))
                {
                  ch = next_char ();
                  obstack_1grow (&token_stack, ch);
                  while ((ch = next_char ()) != CHAR_EOF && IS_ALNUM(ch))
                    {
                      obstack_1grow (&token_stack, ch);
                    }

                  if (ch == ';')
                    {
                      obstack_1grow (&token_stack, ch);
                      type = TOKEN_ENTITY;
                    }
                  else
                    {
                      type = TOKEN_STRING;
                      unget_input(ch);
                    }
                }
              else
                type = TOKEN_STRING;
            }
          else
            type = TOKEN_SIMPLE;        /* escape before eof */
        }
      else if (eolcomm.length > 0 && MATCH (ch, eolcomm.string))
        skip_line ();
      else if (expansion == READ_BODY)
        {
          if (ch == '"')
            obstack_1grow (&token_stack, CHAR_QUOTE);
          else
            obstack_1grow (&token_stack, ch);
          while ((ch = next_char ()) != CHAR_EOF
                  && ! IS_TAG(ch) && ! IS_CLOSE (ch))
            {
              if (eolcomm.length > 0 && MATCH (ch, eolcomm.string))
                {
                  skip_line ();
                  ch = CHAR_EOF;
                  break;
                }
              if (ch == '"')
                obstack_1grow (&token_stack, CHAR_QUOTE);
              else
                obstack_1grow (&token_stack, ch);
            }
          unget_input(ch);

          type = TOKEN_STRING;
        }
      /*  Below we know that expansion != READ_BODY  */
      else if (IS_ALPHA(ch))
        {
          obstack_1grow (&token_stack, ch);
          while ((ch = next_char ()) != CHAR_EOF && (IS_ALNUM(ch)))
            {
              obstack_1grow (&token_stack, ch);
            }
          if (ch == '*')
            {
              obstack_1grow (&token_stack, ch);
              ch = peek_input ();
            }
          else
            unget_input(ch);

          type = TOKEN_STRING;
        }
      else if (IS_RQUOTE(ch))
        {
          MP4HERROR ((EXIT_FAILURE, 0,
             "INTERNAL ERROR: CHAR_RQUOTE found."));
        }
      else if (IS_LQUOTE(ch))             /* QUOTED STRING */
        {
          quote_level = 1;
          while (1)
            {
              ch = next_char ();
              if (ch == CHAR_EOF)
                MP4HERROR ((EXIT_FAILURE, 0,
                   "INTERNAL ERROR: EOF in string"));

              if (IS_BGROUP(ch) || IS_EGROUP(ch))
                continue;
              else if (IS_RQUOTE(ch))
                {
                  quote_level--;
                  if (quote_level == 0)
                      break;
                }
              else if (IS_LQUOTE(ch))
                quote_level++;
              else
                obstack_1grow (&token_stack, ch);
            }
          type = TOKEN_QUOTED;
        }
      else if (IS_BGROUP(ch))             /* BEGIN GROUP */
        type = TOKEN_BGROUP;
      else if (IS_EGROUP(ch))             /* END GROUP */
        type = TOKEN_EGROUP;
      else if (ch == '"')                 /* QUOTED STRING */
        {
          switch (expansion)
          {
            case READ_NORMAL:
              obstack_1grow (&token_stack, CHAR_QUOTE);
              type = TOKEN_SIMPLE;
              break;

            case READ_ATTRIBUTE:
            case READ_ATTR_VERB:
              type = TOKEN_QUOTE;
              break;

            case READ_ATTR_ASIS:
            case READ_ATTR_QUOT:
              obstack_1grow (&token_stack, '"');
              type = TOKEN_QUOTE;
              break;

            default:
              MP4HERROR ((warning_status, 0,
                "INTERNAL ERROR: Unknown expansion type"));
              exit (1);
          }
        }
      else if (ch == '\\')
        {
          switch (expansion)
          {
            case READ_NORMAL:
              obstack_1grow (&token_stack, ch);
              type = TOKEN_SIMPLE;
              break;

            case READ_ATTRIBUTE:
            case READ_ATTR_QUOT:
              ch = next_char();
              if (ch == 'n')
                obstack_1grow (&token_stack, '\n');
              else if (ch == 't')
                obstack_1grow (&token_stack, '\t');
              else if (ch == 'r')
                obstack_1grow (&token_stack, '\r');
              else if (ch == '\\')
                obstack_1grow (&token_stack, ch);
              else if (ch == '"' && in_string)
                obstack_1grow (&token_stack, CHAR_QUOTE);
              else
                {
                  if (!(exp_flags & EXP_STD_BSLASH))
                    obstack_1grow (&token_stack, '\\');
                  obstack_1grow (&token_stack, ch);
                }

              type = TOKEN_STRING;
              break;

            case READ_ATTR_VERB:
              ch = next_char();
              if (ch == '"' && in_string)
                obstack_1grow (&token_stack, CHAR_QUOTE);
              else
                {
                  obstack_1grow (&token_stack, '\\');
                  obstack_1grow (&token_stack, ch);
                }

              type = TOKEN_STRING;
              break;

            case READ_ATTR_ASIS:
              obstack_1grow (&token_stack, ch);
              ch = next_char();
              obstack_1grow (&token_stack, ch);
              type = TOKEN_STRING;
              break;

            default:
              MP4HERROR ((warning_status, 0,
                "INTERNAL ERROR: Unknown expansion type"));
              exit (1);
          }
        }
      else /* EVERYTHING ELSE */
        {
          obstack_1grow (&token_stack, ch);

          if (IS_OTHER(ch) || IS_NUM(ch))
            type = TOKEN_STRING;
          else if (IS_SPACE(ch))
            {
              while ((ch = next_char ()) != CHAR_EOF && IS_SPACE(ch))
                obstack_1grow (&token_stack, ch);
              unget_input(ch);
              type = TOKEN_SPACE;
            }
          else
            type = TOKEN_SIMPLE;
        }
    }

  obstack_1grow (&token_stack, '\0');

  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
  TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);

#ifdef DEBUG_INPUT
  print_token("next_token", type, td);
#endif

  return type;
}
コード例 #3
0
ファイル: input.c プロジェクト: LambdaCalculus379/SLS-1.02
token_type
next_token (token_data *td)
{
  int ch;
  int quote_level;
  token_type type;

  obstack_free (&token_stack, token_bottom);
  obstack_1grow (&token_stack, '\0');
  token_bottom = obstack_finish (&token_stack);

  ch = peek_input ();
  if (ch == CHAR_EOF)
    {
      return TOKEN_EOF;
#ifdef DEBUG_INPUT
      fprintf (stderr, "next_token -> EOF\n");
#endif
    }
  if (ch == CHAR_MACRO)
    {
      init_macro_token (td);
      (void) next_char ();
      return TOKEN_MACDEF;
    }

  (void) next_char ();
  if (MATCH (ch, bcomm))
    {

      obstack_grow (&token_stack, bcomm, len_bcomm);
      while ((ch = next_char ()) != CHAR_EOF && !MATCH (ch, ecomm))
	obstack_1grow (&token_stack, ch);
      if (ch != CHAR_EOF)
	obstack_grow (&token_stack, ecomm, len_ecomm);
      type = TOKEN_STRING;

    }
  else if (isalpha (ch) || ch == '_')
    {

      obstack_1grow (&token_stack, ch);
      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
	{
	  obstack_1grow (&token_stack, ch);
	  (void) next_char ();
	}
      type = TOKEN_WORD;

    }
  else if (!MATCH (ch, lquote))
    {

      type = TOKEN_SIMPLE;
      obstack_1grow (&token_stack, ch);

    }
  else
    {

      quote_level = 1;
      while (1)
	{
	  ch = next_char ();
	  if (ch == CHAR_EOF)
	    fatal ("EOF in string");

	  if (MATCH (ch, rquote))
	    {
	      if (--quote_level == 0)
		break;
	      obstack_grow (&token_stack, rquote, len_rquote);
	    }
	  else if (MATCH (ch, lquote))
	    {
	      quote_level++;
	      obstack_grow (&token_stack, lquote, len_lquote);
	    }
	  else
	    obstack_1grow (&token_stack, ch);
	}
      type = TOKEN_STRING;
    }

  obstack_1grow (&token_stack, '\0');

  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
  TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
#ifdef DEBUG_INPUT
  fprintf (stderr, "next_token -> %d (%s)\n", type, TOKEN_DATA_TEXT (td));
#endif
  return type;
}
コード例 #4
0
ファイル: input.c プロジェクト: huther/personal_projects
token_type
next_token (token_data *td, int *line)
{
  int ch;
  int quote_level;
  token_type type;
#ifdef ENABLE_CHANGEWORD
  int startpos;
  char *orig_text = NULL;
#endif
  const char *file;
  int dummy;

  obstack_free (&token_stack, token_bottom);
  if (!line)
    line = &dummy;

 /* Can't consume character until after CHAR_MACRO is handled.  */
  ch = peek_input ();
  if (ch == CHAR_EOF)
    {
#ifdef DEBUG_INPUT
      xfprintf (stderr, "next_token -> EOF\n");
#endif
      next_char ();
      return TOKEN_EOF;
    }
  if (ch == CHAR_MACRO)
    {
      init_macro_token (td);
      next_char ();
#ifdef DEBUG_INPUT
      xfprintf (stderr, "next_token -> MACDEF (%s)\n",
                find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
#endif
      return TOKEN_MACDEF;
    }

  next_char (); /* Consume character we already peeked at.  */
  file = current_file;
  *line = current_line;
  if (MATCH (ch, bcomm.string, true))
    {
      obstack_grow (&token_stack, bcomm.string, bcomm.length);
      while ((ch = next_char ()) != CHAR_EOF
             && !MATCH (ch, ecomm.string, true))
        obstack_1grow (&token_stack, ch);
      if (ch != CHAR_EOF)
        obstack_grow (&token_stack, ecomm.string, ecomm.length);
      else
        /* current_file changed to "" if we see CHAR_EOF, use the
           previous value we stored earlier.  */
        M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
                          "ERROR: end of file in comment"));

      type = TOKEN_STRING;
    }
  else if (default_word_regexp && (isalpha (ch) || ch == '_'))
    {
      obstack_1grow (&token_stack, ch);
      while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
        {
          obstack_1grow (&token_stack, ch);
          next_char ();
        }
      type = TOKEN_WORD;
    }

#ifdef ENABLE_CHANGEWORD

  else if (!default_word_regexp && word_regexp.fastmap[ch])
    {
      obstack_1grow (&token_stack, ch);
      while (1)
        {
          ch = peek_input ();
          if (ch == CHAR_EOF)
            break;
          obstack_1grow (&token_stack, ch);
          startpos = re_search (&word_regexp,
                                (char *) obstack_base (&token_stack),
                                obstack_object_size (&token_stack), 0, 0,
                                &regs);
          if (startpos ||
              regs.end [0] != (regoff_t) obstack_object_size (&token_stack))
            {
              *(((char *) obstack_base (&token_stack)
                 + obstack_object_size (&token_stack)) - 1) = '\0';
              break;
            }
          next_char ();
        }

      obstack_1grow (&token_stack, '\0');
      orig_text = (char *) obstack_finish (&token_stack);

      if (regs.start[1] != -1)
        obstack_grow (&token_stack,orig_text + regs.start[1],
                      regs.end[1] - regs.start[1]);
      else
        obstack_grow (&token_stack, orig_text,regs.end[0]);

      type = TOKEN_WORD;
    }

#endif /* ENABLE_CHANGEWORD */

  else if (!MATCH (ch, lquote.string, true))
    {
      switch (ch)
        {
        case '(':
          type = TOKEN_OPEN;
          break;
        case ',':
          type = TOKEN_COMMA;
          break;
        case ')':
          type = TOKEN_CLOSE;
          break;
        default:
          type = TOKEN_SIMPLE;
          break;
        }
      obstack_1grow (&token_stack, ch);
    }
  else
    {
      bool fast = lquote.length == 1 && rquote.length == 1;
      quote_level = 1;
      while (1)
        {
          /* Try scanning a buffer first.  */
          const char *buffer = (isp && isp->type == INPUT_STRING
                                ? isp->u.u_s.string : NULL);
          if (buffer && *buffer)
            {
              size_t len = isp->u.u_s.end - buffer;
              const char *p = buffer;
              do
                {
                  p = (char *) memchr2 (p, *lquote.string, *rquote.string,
                                        buffer + len - p);
                }
              while (p && fast && (*p++ == *rquote.string
                                   ? --quote_level : ++quote_level));
              if (p)
                {
                  if (fast)
                    {
                      assert (!quote_level);
                      obstack_grow (&token_stack, buffer, p - buffer - 1);
                      isp->u.u_s.string += p - buffer;
                      break;
                    }
                  obstack_grow (&token_stack, buffer, p - buffer);
                  ch = to_uchar (*p);
                  isp->u.u_s.string += p - buffer + 1;
                }
              else
                {
                  obstack_grow (&token_stack, buffer, len);
                  isp->u.u_s.string += len;
                  continue;
                }
            }
          /* Fall back to a byte.  */
          else
            ch = next_char ();
          if (ch == CHAR_EOF)
            /* current_file changed to "" if we see CHAR_EOF, use
               the previous value we stored earlier.  */
            M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, *line,
                              "ERROR: end of file in string"));

          if (MATCH (ch, rquote.string, true))
            {
              if (--quote_level == 0)
                break;
              obstack_grow (&token_stack, rquote.string, rquote.length);
            }
          else if (MATCH (ch, lquote.string, true))
            {
              quote_level++;
              obstack_grow (&token_stack, lquote.string, lquote.length);
            }
          else
            obstack_1grow (&token_stack, ch);
        }
      type = TOKEN_STRING;
    }

  obstack_1grow (&token_stack, '\0');

  TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
  TOKEN_DATA_TEXT (td) = (char *) obstack_finish (&token_stack);
#ifdef ENABLE_CHANGEWORD
  if (orig_text == NULL)
    orig_text = TOKEN_DATA_TEXT (td);
  TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
  xfprintf (stderr, "next_token -> %s (%s)\n",
            token_type_string (type), TOKEN_DATA_TEXT (td));
#endif
  return type;
}