Пример #1
0
/* This is the lexer.  Build a token up in buf, null terminate it, and
   return a pointer to buf.  Clients must copy this out of buf before
   making any more calls to gettoken. */
static char *gettoken(reader *r) {
  int ch;

  /* If we have a valid lookahead, just return the buffer again. */
  if(r->token_la_valid) { r->token_la_valid = 0; return r->buf; }

  /* Empty the buffer */
  r->bufused = 0;


  /* Skip whitespace and comments.
     Comments begin with a ; and continue to the end of
     the line. */
  do {
    if((ch = r->char_reader(r)) == EOF)
        return NULL;
    if(ch == ';') {
      do {
        ch = r->char_reader(r);
      } while(ch != '\n' && ch != EOF);
      if(ch == EOF) return NULL;
    }
  } while(isspace(ch));
  add_to_buf(r, ch);

  /* If the token begins with one of these characters, then we know
     we are about to return. */
  if(strchr("()\'`,;", ch)) {
    if(ch == ',') {
      /* Treat ,@ as a single token.  It will be converted into
         unquote_splicing. */
      ch = r->char_reader(r);
      if(ch == '@')
        add_to_buf(r, ch);
      else
        put_back_char(r, ch);
    }
    r->buf[r->bufused] = '\0';
    return r->buf;
  }


  /* Keep reading characters until we find a whitespace or a character
     that begins a new token. */
  for(;;) {
    if((ch = r->char_reader(r)) == EOF)
        return NULL;
    if(strchr("()\'", ch) || isspace(ch)) {
      /* We have found a whitespace, a parenthesis, or a single
         quote.  Put it back, null terminate the buffer, and return. */
      put_back_char(r, ch);
      r->buf[r->bufused] = '\0';
      return r->buf;
    }
    add_to_buf(r, ch);
  }
}
Пример #2
0
int parse_get_token (char *buf, int limit)
{
	int token_index = 0;
	int escape_state = FALSE;
	int quoted_state = FALSE;
	int comment_state = FALSE;
	int token_truncated = FALSE;

	char	current_ch, ch;

	do
	{
		current_ch = get_next_char();
		if (comment_state)
		{
			/*
				A comment is ended by an EOLN or EOF.
				If cont_line is not 0, we keep looking
				for a token (or until a EOF is hit).
				If it is, we return EOF or EOLN.
			*/
			if (current_ch == '\n')
			{
				if (cont_line == 0)
				{
					first_word = TRUE;
					return GW_EOLN;
				}
				comment_state = FALSE;
			}
			else if (current_ch == '\0')
			{
				first_word = TRUE;
				if (cont_line)
					return GW_ERRCONTINUATION;
				else
					return GW_EOF;
			}
		}
		else if (escape_state)
		{
			/*
				An escape character was hit.  The next
				character is copied in (re-escaped) with
				the exception of the end of line - which is
				copied unescaped.

				If a digit follows the escape character there
				are supposed to be two more, but that error
				checking is left to the next level of parsing.
			*/
			if (current_ch == '\n')
			{
				ADD_CHAR (buf, current_ch);
			}
			else if (current_ch == 0x7F || current_ch < 0x20)
			{
				do
					ch = get_next_char();
				while (ch != '\n' && ch != '\0');
				if (ch== '\0') put_back_char (ch);

				first_word = TRUE;
				cont_line = 0;
				return GW_ERRILLEGALCHAR;
			}
			else
			{
				ADD_CHAR (buf, '\\');
				ADD_CHAR (buf, current_ch);
			}
			escape_state = FALSE;
		}
		else if (quoted_state)
		{
			/*
				In the middle of a quote, escapes matter,
				and we don't return the enclosing quote
				marks.
			*/
			if (current_ch == '\\')
				escape_state = TRUE;
			else if (current_ch == '\n' || current_ch == '\0')
			{
				buf[token_index]='\0';
				if (cont_line > 0)
				{
					while (cont_line > 0)
					{
						do
							ch = get_next_char();
						while (ch != '\n' && ch != '\0');
						cont_line--;
					}
					put_back_char (ch);
				}
				first_word = TRUE;
				cont_line = 0;
				return GW_ERRDANGLINGQUOTE;
			}
			else if (current_ch == '"')
			{
				buf[token_index]='\0';
				return GW_MAYBE_WORD;
			}
			else if (current_ch!='\t' && (current_ch<0x20 || current_ch==0x7F))
			{
				do
					ch = get_next_char();
				while (ch != '\n' && ch != '\0');

				first_word = TRUE;
				cont_line = 0;
				return GW_ERRILLEGALCHAR;
			}
			else
			{
				ADD_CHAR (buf, current_ch);
			}
		}
		else if (token_index > 0)
		{
			/*
				In this case, there is a token being
				built.  It is either added to or a
				separating character ends it and itself
				may be left (e.g., a ; beginning a comment
				ends the previous token and is left to
				signal the start of the comment next time
				around.
			*/
			if (isspace(current_ch)&&current_ch!='\n'&&current_ch!='\0')
			{
				first_word = FALSE;
				buf[token_index]='\0';
				if (current_ch== '\0') put_back_char (current_ch);
				return GW_MAYBE_WORD;
			}
			else if (current_ch == '\\')
			{
				escape_state = TRUE;
			}
			else if (SEPARATOR(current_ch))
			{
				first_word = FALSE;
				put_back_char (current_ch);
				buf[token_index]='\0';
				return GW_MAYBE_WORD;
			}
			else if (current_ch < 0x20 || current_ch == 0x7F)
			{
				do
					ch = get_next_char();
				while (ch != '\n' && ch != '\0');
				if (ch== '\0') put_back_char (ch);

				first_word = TRUE;
				cont_line = 0;
				return GW_ERRILLEGALCHAR;
			}
			else
				ADD_CHAR (buf, current_ch);
		}
		else
			/*
				The character is potentially the
				start of a token.  Some of these
				may have been seen by the above code and
				returned to the input stream.
			*/
			switch (current_ch)
			{
				case ';':
					buf[token_index]='\0';
					comment_state = TRUE;
					break;
				case '(':
					cont_line++;
					do
						ch = get_next_char();
					while (ch != '\n' && ch != '\0');
					if (ch== '\0') put_back_char (ch);

					first_word = FALSE;
					break;
				case ')':
					cont_line--;
					while ((ch = get_next_char()) != '\n')
					{
						if (ch == '\0') return GW_EOF;
						if (ch == ')')
						{
							put_back_char (ch);
							break;
						}
					}
					if (cont_line==0)
					{
						first_word = TRUE;
						return GW_EOLN;
					}
					if (cont_line < 0)
					{
						/*
							We've already read to
							the end of the line
						*/
						first_word = TRUE;
						cont_line = 0;
						return GW_ERRCONTINUATION;
					}
					break;
				case ' ': case '\t':
					if (first_word)
					{
						ADD_CHAR (buf, current_ch);
						buf[token_index]='\0';
						first_word = FALSE;
						return GW_MAYBE_WORD;
					}
					break;
				case '\\':
					escape_state = TRUE;
					break;
				case '"':
					quoted_state = TRUE;
					break;
				case '\n':
					if (cont_line==0)
					{
						first_word = TRUE;
						return GW_EOLN;
					}
					break;
				case '\0':
					if (cont_line == 0)
						return GW_EOF;
					else
					{
						cont_line = 0;
						return GW_ERRCONTINUATION;
					}
				default:
					if (current_ch<0x20 || current_ch==0x7F)
					{
						do
							ch = get_next_char();
						while (ch != '\n' && ch != '\0');
						if (ch== '\0') put_back_char (ch);

						first_word = TRUE;
						cont_line = 0;
						return GW_ERRILLEGALCHAR;
					}
					else
					{
						ADD_CHAR (buf, current_ch);
					}
			} /* end switch body */
	} while (TRUE); /* end the do-while loop */
}