Beispiel #1
0
/**
 * Do lexical analysis of buffer in 'str'.
 *
 * And start do add tokens after 'prev'
 *
 * Returns pointer to last token, or NULL if an error occured.
 * If an error occured 'ei' will be filled with data
 */
token_t *
glw_view_lexer(glw_root_t *gr, const char *src, errorinfo_t *ei,
               rstr_t *f, token_t *prev)
{
  const char *start;
  int line = 1;
  token_t *t;

  while(*src != 0) {
      
    if(*src == '\n') {
      /* newline */
      /* TODO: DOS CR support ? */
      src++;
      line++;
      continue;
    }

    if(*src <= 32) {
      /* whitespace */
      src++;
      continue;
    }

    if(src[0] == 'v' && src[1] == 'o' && src[2] == 'i' && src[3] == 'd') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_VOID);
      src+=4;
      continue;
    }

    if(src[0] == 't' && src[1] == 'r' && src[2] == 'u' && src[3] == 'e') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_INT);
      src+=4;
      prev->t_int = 1;
      continue;
    }

    if(src[0] == 'f' && src[1] == 'a' && src[2] == 'l' && src[3] == 's' &&
       src[4] == 'e') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_INT);
      src+=5;
      prev->t_int = 0;
      continue;
    }

    if(*src == '/' && src[1] == '/') {
      // C++ style comment
      src += 2;
      while(*src != '\n')
	src++;
      src++;
      line++;
      continue;
    }

    if(*src == '/' && src[1] == '*') {
      /* A normal C-comment */
      src += 2;

      while(*src != '/' || src[-1] != '*') {
	if(*src == '\n')
	  line++;
	src++;
      }

      src++;
      continue;
    }

    if(src[0] == '&' && src[1] == '&') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_AND);
      src+=2;
      continue;
    }

    if(src[0] == '?' && src[1] == '=') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_COND_ASSIGNMENT);
      src+=2;
      continue;
    }

    if(src[0] == '<' && src[1] == '-') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_LINK_ASSIGNMENT);
      src+=2;
      continue;
    }

    if(src[0] == ':' && src[1] == '=') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_REF_ASSIGNMENT);
      src+=2;
      continue;
    }

    if(src[0] == '_' && src[1] == '=' && src[2] == '_') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_DEBUG_ASSIGNMENT);
      src+=3;
      continue;
    }

    if(src[0] == '|' && src[1] == '|') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_OR);
      src+=2;
      continue;
    }

    if(src[0] == '^' && src[1] == '^') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_XOR);
      src+=2;
      continue;
    }

    if(src[0] == '=' && src[1] == '=') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_EQ);
      src+=2;
      continue;
    }

    if(src[0] == '!' && src[1] == '=') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_NEQ);
      src+=2;
      continue;
    }

    if(src[0] == '?' && src[1] == '?') {
      prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_NULL_COALESCE);
      src+=2;
      continue;
    }


    if(!(src[0] == '-' && lex_isdigit(src[1]))) {
      if((t = lexer_single_char(gr, prev, f, line, *src)) != NULL) {
	src++;
	prev = t;
	continue;
      }
    }


    start = src;


    if(*src == '"' || *src == '\'') {
      /* A quoted string " ... " */
      char stop = *src;
      src++;
      start++;

      while((*src != stop || (src[-1] == '\\' && src[-2] != '\\')) && *src != 0) {
	if(*src == '\n')
	  line++;
	src++;
      }
      if(*src != stop) {
	snprintf(ei->error, sizeof(ei->error), "Unterminated quote");
	snprintf(ei->file,  sizeof(ei->file),  "%s", rstr_get(f));
	ei->line = line;
	return NULL;
      }

      prev = lexer_add_token_string(gr, prev, f, line, start, src,
				    TOKEN_RSTRING);
      if(stop == '\'')
	prev->t_rstrtype = PROP_STR_RICH;
      src++;
      continue;
    }


    if(lex_isalpha(*src)) {
      /* Alphanumeric string */
      while(lex_isalnum(*src))
	src++;

      prev = lexer_add_token_string(gr, prev, f, line, start, src, 
				    TOKEN_IDENTIFIER);
      continue;
    }

    if(lex_isdigit(*src)) {
      /* Integer */
      while(lex_isdigit(*src))
	src++;

      if(*src == '.') {
	src++;
	/* , or a float */
	while(lex_isdigit(*src))
	  src++;

      }
      if(*src == 'f')
	/* we support having the 'f' postfix around too */
	src++;
      
      prev = lexer_add_token_float(gr, prev, f, line, start, src);
      continue;
    }

    snprintf(ei->error, sizeof(ei->error), "Invalid char '%c'",
	     *src > 31 ? *src : ' ');
    snprintf(ei->file,  sizeof(ei->file),  "%s", rstr_get(f));
    ei->line = line;
    return NULL;
  }
  return prev;
}
Beispiel #2
0
static void read_wstring (LexState *LS, int del, SemInfo *seminfo) {
  size_t l = 0;
  checkbuffer(LS, l * 2);
  wsave_and_next(LS, l);
  while (LS->current != del) {
    checkbuffer(LS, l * 2);
    switch (LS->current) {
      case EOZ:
        wsave(LS, '\0', l);
        luaX_lexerror(LS, "unfinished string", TK_EOS);
        break;  /* to avoid warnings */
      case '\n':
        wsave(LS, '\0', l);
        luaX_lexerror(LS, "unfinished string", TK_STRING);
        break;  /* to avoid warnings */
      case '\\':
        next(LS);  /* do not save the `\' */
        switch (LS->current) {
          case 'a': wsave(LS, '\a', l); next(LS); break;
          case 'b': wsave(LS, '\b', l); next(LS); break;
          case 'f': wsave(LS, '\f', l); next(LS); break;
          case 'n': wsave(LS, '\n', l); next(LS); break;
          case 'r': wsave(LS, '\r', l); next(LS); break;
          case 't': wsave(LS, '\t', l); next(LS); break;
          case 'v': wsave(LS, '\v', l); next(LS); break;
          case '\n': wsave(LS, '\n', l); inclinenumber(LS); break;
          case EOZ: break;  /* will raise an error next loop */
          case 'x': {
			  int ch;
			  next(LS);
			  ch = tolower(LS->current);
              if (!lex_isdigit(ch) && !(ch >= 'a' && ch <= 'f') )
		          save(LS, 'x', l);  /* handles \\, \", \', and \? */
			  else {  /* \xxx */
				  int c = 0;
				  int i = 0;
				  int numDigits = 4;
				  do {
					  ch = tolower(LS->current);
					  if (lex_isdigit(ch))
					    c = 16*c + (ch-'0');
					  else if (ch >= 'a' && ch <= 'f')
						c = 16*c + (ch-'a') + 10;
					  next(LS);
					  ch = tolower(LS->current);
				  } while (++i<numDigits && (lex_isdigit(ch) || (ch >= 'a' && ch <= 'f')));
				  wsave(LS, c, l);
			  }
			  break;
          }
          default: {
            if (!lex_isdigit(LS->current))
              wsave_and_next(LS, l);  /* handles \\, \", \', and \? */
            else {  /* \xxx */
              int c = 0;
              int i = 0;
              do {
                c = 10*c + (LS->current-'0');
                next(LS);
              } while (++i<3 && lex_isdigit(LS->current));
              if (c > UCHAR_MAX) {
                wsave(LS, '\0', l);
                luaX_lexerror(LS, "escape sequence too large", TK_STRING);
              }
              wsave(LS, c, l);
            }
          }
        }
        break;
      default:
        wsave_and_next(LS, l);
    }
  }
  wsave_and_next(LS, l);  /* skip delimiter */
  wsave(LS, '\0', l);
  seminfo->ts = luaS_newlwstr(LS->L, (const lua_WChar*)(luaZ_buffer(LS->buff) + 1 * 2), (l - 3 * 2) / 2);
}
Beispiel #3
0
int luaX_lex (LexState *LS, SemInfo *seminfo) {
  for (;;) {
    switch (LS->current) {

      case '\n': {
        inclinenumber(LS);
        continue;
      }
      case '-': {
        next(LS);
        if (LS->current != '-') return '-';
        /* else is a comment */
        next(LS);
        if (LS->current == '[' && (next(LS), LS->current == '['))
          read_long_string(LS, NULL);  /* long comment */
        else  /* short comment */
          while (LS->current != '\n' && LS->current != EOZ)
            next(LS);
        continue;
      }
      case '[': {
        next(LS);
        if (LS->current != '[') return '[';
        else {
          read_long_string(LS, seminfo);
          return TK_STRING;
        }
      }
      case '=': {
        next(LS);
        if (LS->current != '=') return '=';
        else { next(LS); return TK_EQ; }
      }
      case '<': {
        next(LS);
		if (LS->current == '<') { next(LS); return TK_SHL; }
        else if (LS->current != '=') return '<';
        else { next(LS); return TK_LE; }
      }
      case '>': {
        next(LS);
		if (LS->current == '>') { next(LS); return TK_SHR; }
        else if (LS->current != '=') return '>';
        else { next(LS); return TK_GE; }
      }
      case '~': {
        next(LS);
        if (LS->current != '=') return '~';
        else { next(LS); return TK_NE; }
      }
      case '"':
      case '\'': {
        read_string(LS, LS->current, seminfo);
        return TK_STRING;
      }
      case '.': {
        next(LS);
        if (LS->current == '.') {
          next(LS);
          if (LS->current == '.') {
            next(LS);
            return TK_DOTS;   /* ... */
          }
          else return TK_CONCAT;   /* .. */
        }
        else if (!lex_isdigit(LS->current)) return '.';
        else {
          return read_numeral(LS, 1, seminfo);
        }
      }
      case EOZ: {
        return TK_EOS;
      }
      default: {
        if (isspace(LS->current)) {
          next(LS);
          continue;
        }
        else if (lex_isdigit(LS->current)) {
          return (read_numeral(LS, 0, seminfo));
        }
        else if (lex_isalpha(LS->current) || LS->current == '_') {
		  char saveCh = 0;
		  size_t l;
		  TString *ts;
		  if (LS->current == 'L')
		  {
			  next(LS);
			  if (LS->current == '"')
			  {
				  read_wstring(LS, LS->current, seminfo);
				  return TK_WSTRING;
			  }
			  saveCh = 'L';
		  }
          /* identifier or reserved word */
          l = readname(LS, saveCh);
          ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l);
          if (ts->tsv.reserved > 0)  /* reserved word? */
            return ts->tsv.reserved - 1 + FIRST_RESERVED;
          seminfo->ts = ts;
          return TK_NAME;
        }
        else {
          int c = LS->current;
          if (iscntrl(c))
            luaX_error(LS, "invalid control char",
                           luaO_pushfstring(LS->L, "char(%d)", c));
          next(LS);
          return c;  /* single-char tokens (+ - / ...) */
        }
      }
    }
  }
}
Beispiel #4
0
/* LUA_NUMBER */
static int read_numeral (LexState *LS, int period, SemInfo *seminfo) {
  int isReal = 0;
  int startsWithZero = LS->current == '0';
  size_t l = 0;
  checkbuffer(LS, l);
  if (period) {
	save(LS, '.', l);
	isReal = 1;
  }
  if (startsWithZero) {
	next(LS);
	if (LS->current == 'x') {
	  /* Process a hex number */
	  int ch = 0;
      int c = 0;
      int i = 0;
      int numDigits = 8;
	  next(LS);
      do {
        ch = tolower(LS->current);
        if (lex_isdigit(ch))
          c = 16*c + (ch-'0');
        else if (ch >= 'a' && ch <= 'f')
          c = 16*c + (ch-'a') + 10;
        next(LS);
        ch = tolower(LS->current);
      } while (++i<numDigits && (lex_isdigit(ch) || (ch >= 'a' && ch <= 'f')));
	  seminfo->r = c;
	  return TK_NUMBER;
	} else {
      checkbuffer(LS, 1);
      save(LS, '0', l);
	}
  }
  while (lex_isdigit(LS->current)) {
    checkbuffer(LS, l);
    save_and_next(LS, l);
  }
  if (LS->current == '.') {
    isReal = 1;
    save_and_next(LS, l);
    if (LS->current == '.') {
      save_and_next(LS, l);
      save(LS, '\0', l);
      luaX_lexerror(LS,
                 "ambiguous syntax (decimal point x string concatenation)",
                 TK_NUMBER);
    }
  }
  while (lex_isdigit(LS->current)) {
    checkbuffer(LS, l);
    save_and_next(LS, l);
  }
  if (LS->current == 'e' || LS->current == 'E') {
    isReal = 1;
    save_and_next(LS, l);  /* read `E' */
    if (LS->current == '+' || LS->current == '-')
      save_and_next(LS, l);  /* optional exponent sign */
    while (lex_isdigit(LS->current)) {
      checkbuffer(LS, l);
      save_and_next(LS, l);
    }
  }
  save(LS, '\0', l);
  if (isReal) {
    if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r))
      luaX_lexerror(LS, "malformed number", TK_NUMBER);
	return TK_NUMBER;
  } else {
    if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r))
      luaX_lexerror(LS, "malformed integer", TK_NUMBER);
	return TK_NUMBER;
  }
}