static int parseEscapedCharacter (void) { int d = '\0'; int c = fileGetc (); switch (c) { case 'A': d = '@'; break; case 'B': d = '\b'; break; case 'C': d = '^'; break; case 'D': d = '$'; break; case 'F': d = '\f'; break; case 'H': d = '\\'; break; case 'L': d = '~'; break; case 'N': d = '\n'; break; #ifdef QDOS case 'Q': d = 0x9F; break; #else case 'Q': d = '`'; break; #endif case 'R': d = '\r'; break; case 'S': d = '#'; break; case 'T': d = '\t'; break; case 'U': d = '\0'; break; case 'V': d = '|'; break; case '%': d = '%'; break; case '\'': d = '\''; break; case '"': d = '"'; break; case '(': d = '['; break; case ')': d = ']'; break; case '<': d = '{'; break; case '>': d = '}'; break; case '\n': skipToCharacter ('%'); break; case '/': { vString *string = parseInteger ('\0'); const char *value = vStringValue (string); const unsigned long ascii = atol (value); vStringDelete (string); c = fileGetc (); if (c == '/' && ascii < 256) d = ascii; break; } default: break; } return d; }
static int parseCharacter (void) { int c = getcFromInputFile (); int result = c; if (c == '%') result = parseEscapedCharacter (); c = getcFromInputFile (); if (c != '\'') skipToCharacter ('\n'); return result; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); i++; } while (c == '\t' || c == ' ' || ((c == '\r' || c == '\n') && ! include_newlines)); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\r': case '\n': /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left invalid. Hopefully this should not have false negatives * (e.g. should not miss insertion of a semicolon) but might have * false positives (e.g. it will wrongfully emit a semicolon for the * newline in "foo\n+bar"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ switch (LastTokenType) { /* these cannot be the end of a statement, so hold the newline */ case TOKEN_EQUAL_SIGN: case TOKEN_COLON: case TOKEN_PERIOD: case TOKEN_FORWARD_SLASH: case TOKEN_BINARY_OPERATOR: /* and these already end one, no need to duplicate it */ case TOKEN_SEMICOLON: case TOKEN_COMMA: case TOKEN_CLOSE_CURLY: case TOKEN_OPEN_CURLY: include_newlines = FALSE; /* no need to recheck */ goto getNextChar; break; default: token->type = TOKEN_SEMICOLON; } break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: if (! InPhp) { c = findPhpStart (); if (c != EOF) InPhp = TRUE; } else c = getcFromInputFile (); c = skipWhitespaces (c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '&': token->type = TOKEN_AMPERSAND; break; case '\\': token->type = TOKEN_BACKSLASH; break; case '=': { int d = getcFromInputFile (); if (d == '=' || d == '>') token->type = TOKEN_OPERATOR; else { ungetcToInputFile (d); token->type = TOKEN_EQUAL_SIGN; } break; } case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '/') { /* </script[:white:]*> */ if (tolower ((d = getcFromInputFile ())) == 's' && tolower ((d = getcFromInputFile ())) == 'c' && tolower ((d = getcFromInputFile ())) == 'r' && tolower ((d = getcFromInputFile ())) == 'i' && tolower ((d = getcFromInputFile ())) == 'p' && tolower ((d = getcFromInputFile ())) == 't' && (d = skipWhitespaces (getcFromInputFile ())) == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } } else if (d == '<' && (d = getcFromInputFile ()) == '<') { token->type = TOKEN_STRING; parseHeredoc (token->string); } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } case '#': /* comment */ skipSingleComment (); goto getNextChar; break; case '+': case '-': case '*': case '%': { int d = getcFromInputFile (); if (d != '=' && ! (c == '-' && d == '>')) ungetcToInputFile (d); token->type = TOKEN_OPERATOR; break; } case '/': /* division or comment start */ { int d = getcFromInputFile (); if (d == '/') /* single-line comment */ { skipSingleComment (); goto getNextChar; } else if (d == '*') { do { c = skipToCharacter ('*'); if (c != EOF) { c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } } while (c != EOF && c != '\0'); goto getNextChar; } else { if (d != '=') ungetcToInputFile (d); token->type = TOKEN_OPERATOR; } break; } case '$': /* variable start */ { int d = getcFromInputFile (); if (! isIdentChar (d)) { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } else { parseIdentifier (token->string, d); token->type = TOKEN_VARIABLE; } break; } case '?': /* maybe the end of the PHP chunk */ { int d = getcFromInputFile (); if (d == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, getInputLanguage ()); if (token->keyword == KEYWORD_NONE) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } if (token->type == TOKEN_SEMICOLON || token->type == TOKEN_OPEN_CURLY || token->type == TOKEN_CLOSE_CURLY) { /* reset current statement details on statement end, and when entering * a deeper scope. * it is a bit ugly to do this in readToken(), but it makes everything * a lot simpler. */ CurrentStatement.access = ACCESS_UNDEFINED; CurrentStatement.impl = IMPL_UNDEFINED; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = getcFromInputFile (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '!': token->type = TOKEN_BANG; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '$': token->type = TOKEN_DOLLAR; break; case '.': token->type = TOKEN_DOT; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '[': token->type = TOKEN_OPEN_BRACKET; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = getcFromInputFile (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': { int c2 = getcFromInputFile (); if (c2 == '=') token->type = TOKEN_OPERATOR; else { if (!isspace (c2)) ungetcToInputFile (c2); if (c == ':') token->type = TOKEN_COLON; else token->type = TOKEN_QUESTION; } break; } case '<': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '>': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '/': c = getcFromInputFile (); if (c != '/' && c != '=' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '\\': c = getcFromInputFile (); if (c != '\\' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, Lang_eiffel); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vString* numeric = parseNumeric (c); vStringCat (token->string, numeric); vStringDelete (numeric); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else token->type = TOKEN_UNDEFINED; break; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '!': token->type = TOKEN_BANG; break; case '$': token->type = TOKEN_DOLLAR; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_DOT; break; case ';': goto getNextChar; case '[': token->type = TOKEN_OPEN_BRACKET; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = fileGetc (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': c = fileGetc (); if (c == '=') token->type = TOKEN_OPERATOR; else { token->type = TOKEN_COLON; if (!isspace (c)) fileUngetc (c); } break; case '<': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '>': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '/': c = fileGetc (); if (c != '/' && c != '=' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '\\': c = fileGetc (); if (c != '\\' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vStringCat (token->string, parseNumeric (c)); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else { token->type = TOKEN_UNDEFINED; Assert (! isType (token, TOKEN_UNDEFINED)); } break; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); break; case '-': c = fileGetc (); if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '/': { int d = fileGetc (); if (d != '*') /* is this the start of a comment? */ fileUngetc (d); else { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != '\0'); goto getNextChar; } break; } default: if (! isIdentChar1 (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_rem)) { vStringClear (token->string); skipToCharacter ('\n'); goto getNextChar; } else if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }