Ejemplo n.º 1
0
Token
Lexer::on_string()
{
  String str = build_.take();

  // Translate the spelling of the lexeme ion the basic
  // character set into the execution character set.
  String rep;
  rep.reserve(str.size());
  char const* p = str.c_str() + 1;
  while (*p != '\"') {
    if (*p != '\\')
      rep.push_back(*p);
    else
      rep.push_back(translate_escape(*++p));
    ++p;
  }
  Symbol* sym = syms_.put<String_sym>(str, string_tok, rep);

  return Token(loc_, string_tok, sym);
}
Ejemplo n.º 2
0
Token
Lexer::on_character()
{
  String str = build_.take();

  // Translate the spelling of the lexeme in the
  // basic character set into the execution character
  // set.
  //
  // TODO: This belongs is a separate facility
  // in order to better enable translation between
  // the basic and execution character sets.
  int rep;
  char const* p = str.c_str();
  if (*++p == '\\')
    rep = *p;
  else
    rep = translate_escape(*++p);
  Symbol* sym = syms_.put<Character_sym>(str, character_tok, rep);

  return Token(loc_, character_tok, sym);
}
Ejemplo n.º 3
0
UBYTE *tokenize_word(register UBYTE  *line,/* (in)	-> current line position   */
					 register UBYTE  *word,/* (in)	-> output token buffer	   */
					 UBYTE	*qstring,	   /* (in)	-> quoted string o/p buffer*/
					 SHORT	*plen,		   /* (out) # of bytes put in word buf */
					 SHORT	*ttype, 	   /* (out) token type				   */
					 Boolean quote		   /* (in)	preserve quotes on string? */
				   )
{
UBYTE	*wrkptr;
UBYTE	*sword = word;
int 	toklen;
SHORT	toktype;
register unsigned int c;
UBYTE	c1;
UBYTE	c2;

/*----------------------------------------------------------------------------
 * Skip leading whitespace, get the first character, return NULL if no char.
 *--------------------------------------------------------------------------*/

	while (isspace(*line) )
		++line;

	if ('\0' == (c = *line))
		{
		line = NULL;
		goto OUT;
		}

/*----------------------------------------------------------------------------
 * Handle keywords, types, symbols
 *--------------------------------------------------------------------------*/

	if (iscsymf(c))
		{
		line = po_chop_csym(line, word, MAX_SYM_LEN-1, &wrkptr);
		word = wrkptr;
		toktype = TOK_UNDEF;
		}
#ifdef DEADWOOD
	else if (iscsymf(c))
		{
		toktype = TOK_UNDEF;
		*word++ = c;
		++line;
		toklen = MAX_SYM_LEN;
		for (;;)
			{
			c = *line;
			if (iscsym(c))
				{
				++line;
				if (toklen)
					{
					*word++ = c;
					--toklen;
					}
				 }
			else
				break;
			}
		}

#endif /* DEADWOOD */

/*----------------------------------------------------------------------------
 * Handle numeric constants
 *--------------------------------------------------------------------------*/

	else if (isdigit(c))
		{
		toklen = get_digits(line,word,&toktype);
		line  += toklen;
		word  += toklen;
		}

/*----------------------------------------------------------------------------
 * Handle C operators and string/char constants...
 * This processes non-alphanumeric characters.	Most will be passed through
 * as single character tokens.	Some, like ==, !=, >= and <= are easier to
 * handle here than in parser (which only has a one-token look-ahead).
 * Also, quoted strings and chars are now handled in this switch statement.
 *--------------------------------------------------------------------------*/

	else
		{

		c1 = line[1];		/* lookahead characters */
		c2 = line[2];

		switch (c)
			{

			case '"':                       /* note: a shortcut in this loop */
											/* assumes that a quoted string  */
				if (qstring != NULL)		/* could never be longer than	 */
					sword = word = qstring; /* SZTOKE-3 chars.				 */
				toktype = TOK_QUO;
				if (quote)
					*word++ = '"';
				++line;
				toklen = SZTOKE-3; /* room for nullterm and two quotes */
				while(--toklen)
					{
					c = *line++;
					if (c == 0)
						break;
					else if (c == '\\')
						{
						if (quote)
							{
							*word++  = c;
							*word++ = *line++;
							}
						else
							{
							wrkptr = line;
							*word++ = translate_escape(&wrkptr);
							line = wrkptr;
							}
						}
					else if (c == '\"')
						{
						break;
						}
					else
						{
						*word++ = c;
						}
					}
				if (quote)
					*word++ = '"';
				break;

			case '\'':

				if (quote)
					*word++ = '\'';
				toktype = TOK_SQUO;
				++line;
				for (;;)
					{
					c = *line++;
					if (c == 0)
						break;
					else if (c == '\\')
						{
						if (quote)
							{
							*word++  = c;
							*word++ = *line++;
							}
						else
							{
							wrkptr = line;
							*word++ = translate_escape(&wrkptr);
							line = wrkptr;
							}
						}
					else if (c == '\'')
						{
						break;
						}
					else
						{
						*word++ = c;
						}
					}
				if (quote)
					*word++ = '\'';
				break;

			case '.':

				if (c1 == '.' && c2 == '.')
					{
					toktype = TOK_UNDEF;
					goto THREECHAR;
					}
				else
					goto SIMPLE;

			case '%':

				if (c1 == '=') /* mod-equals */
					{
					toktype = TOK_MOD_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '/':

				if (c1 == '=') /* div-equals */
					{
					toktype = TOK_DIV_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '*':

				if (c1 == '=')
					{
					toktype = TOK_MUL_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '+':

				if (c1 == '+')
					{
					toktype = TOK_PLUS_PLUS;
					goto TWOCHAR;
					}
				else if (c1 == '=')
					{
					toktype = TOK_PLUS_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '-':

				if (c1 == '-')
					{
					toktype = TOK_MINUS_MINUS;
					goto TWOCHAR;
					}
				else if (c1 == '=')
					{
					toktype = TOK_MINUS_EQUALS;
					goto TWOCHAR;
					}
				else if (c1 == '>')
					{
					toktype = TOK_ARROW;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '=':

				if (c1 == '=') /* double equals */
					{
					toktype = TOK_EQ;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '!':

				if (c1 == '=') /* != */
					{
					toktype = TOK_NE;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '<':

				if (c1 == '=') /* <= */
					{
					toktype = TOK_LE;
					goto TWOCHAR;
					}
				else if (c1 == '<')    /* << */
					{
					if (c2 == '=')
						{
						toktype = TOK_LSHIFT_EQUALS;
						goto THREECHAR;
						}
					else
						{
						toktype = TOK_LSHIFT;
						goto TWOCHAR;
						}
					}
				else
					goto SIMPLE;

			case '>':

				if (c1 == '=') /* >= */
					{
					toktype = TOK_GE;
					goto TWOCHAR;
					}
				else if (c1 == '>')    /* >> */
					{
					if (c2 == '=')
						{
						toktype = TOK_RSHIFT_EQUALS;
						goto THREECHAR;
						}
					else
						{
						toktype = TOK_RSHIFT;
						goto TWOCHAR;
						}
					}
				else
					goto SIMPLE;

			case '&':

				if (c1 == '&') /* logical and - && */
					{
					toktype = TOK_LAND;
					goto TWOCHAR;
					}
				else if (c1 == '=')
					{
					toktype = TOK_AND_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '|':

				if (c1 == '|') /* logical or - || */
					{
					toktype = TOK_LOR;
					goto TWOCHAR;
					}
				else if (c1 == '=')
					{
					toktype = TOK_OR_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;

			case '^':

				if (c1 == '=')
					{
					toktype = TOK_XOR_EQUALS;
					goto TWOCHAR;
					}
				else
					goto SIMPLE;
			default:
	SIMPLE:
				toktype = *word++ = c;
				++line;
				break;
			}
		}

OUT:

	*ttype = toktype;
	*word = 0;
	if (plen != NULL)
		*plen = word - sword;
	return line;


TWOCHAR:

	*word++ = c;
	*word++ = c1;
	line += 2;
	goto OUT;

THREECHAR:

	*word++ = c;
	*word++ = c1;
	*word++ = c2;
	line += 3;
	goto OUT;

}
Ejemplo n.º 4
0
static char *
tcl_value(char *value)
{
	int			literal,
				last;
	char	   *p;

	if (!value)
		return (char *) NULL;

#ifdef TCL_ARRAYS_DEBUG
	printf("pq_value  = '%s'\n", value);
#endif
	last = strlen(value) - 1;
	if ((last >= 1) && (value[0] == '{') && (value[last] == '}'))
	{
		/* Looks like an array, replace ',' with spaces */
		/* Remove the outer pair of { }, the last first! */
		value[last] = '\0';
		value++;
		literal = 0;
		for (p = value; *p; p++)
		{
			if (!literal)
			{
				/* We are at the list level, look for ',' and '"' */
				switch (*p)
				{
					case '"':	/* beginning of literal */
						literal = 1;
						break;
					case ',':	/* replace the ',' with space */
						*p = ' ';
						break;
				}
			}
			else
			{
				/* We are inside a C string */
				switch (*p)
				{
					case '"':	/* end of literal */
						literal = 0;
						break;
					case '\\':

						/*
						 * escape sequence, translate it
						 */
						p = translate_escape(p, 1);
						break;
				}
			}
			if (!*p)
				break;
		}
	}
	else
	{
		/* Looks like a normal scalar value */
		for (p = value; *p; p++)
		{
			if (*p == '\\')
			{
				/*
				 * escape sequence, translate it
				 */
				p = translate_escape(p, 0);
			}
			if (!*p)
				break;
		}
	}
#ifdef TCL_ARRAYS_DEBUG
	printf("tcl_value = '%s'\n\n", value);
#endif
	return value;
}