void DefaultLexer::readIdentifier(char startChar) { char c = startChar; putChar(c); skipChar(); c = lookChar(); while (isLetter(c) | isDigit(c)) { putChar(c); skipChar(); c = lookChar(); }; }
bool DefaultLexer::readCharacter() { skipChar(); // skip leading ''' char c = lookChar(); while (c != '\'') { if (!readEscapeCharacter(c)) return false; c = lookChar(); } skipChar(); // skip trailing ''' return true; }
void DefaultLexer::readLineComment() { skipChar(); // skip '/' skipChar(); // skip '/' char c = lookChar(); while (c && !isNewline(c)) { skipChar(); c = lookChar(); } if (isNewline(c)) readNewline(c); }
void DefaultLexer::readNewline(char c) { if (c == '\n') { skipChar(); if (lookChar() == '\r') skipChar(); } if (c == '\r') { skipChar(); if (lookChar() == '\n') skipChar(); } signalNewline(); }
void DefaultLexer::readOperator(char startChar) { char c = startChar; do { putChar(c); skipChar(); c = lookChar(); } while (isOperatorChar(c)); }
void DefaultLexer::readInteger(char startChar) { char c = startChar; do { putChar(c); skipChar(); c = lookChar(); } while (isDigit(c)); }
void DefaultLexer::readHexInteger() { char c = lookChar(); while (isDigit(c) || isHexDigit(c)) { putChar(c); skipChar(); c = lookChar(); } }
bool DefaultLexer::readFloatExp(char startChar) { putChar(startChar); // put 'e' or 'E' skipChar(); char c = lookChar(); if (c == '+' || c == '-') { putChar(c); skipChar(); } else return false; c = lookChar(); if (isDigit(c)) { readInteger(c); return true; } return false; }
bool DefaultLexer::readEscapeCharacter(char c) { // translate string contents on the fly. if (c == 0) { signalLexicalError(); return false; } if (c == '\n' || c == '\r' || c == '\t') { signalLexicalError(); return false; } if (c == '\\') { skipChar(); c = lookChar(); switch (c) { case 0: signalLexicalError(); return false; case 'n': putChar('\n'); break; case 'r': putChar('\r'); break; case 't': putChar('\t'); break; default: putChar(c); break; } } else { putChar(c); } skipChar(); return true; }
Token DefaultLexer::readToken() { char c = lookChar(); while (true) { // skip whitespace while (isWhiteSpace(c)) { skipChar(); c = lookChar(); } // newlines if (isNewline(c)) { readNewline(c); if (interactive_ && getCurrentBraceNesting() == 0) return Token(TK_Newline); c = lookChar(); continue; } // treat line comments as newlines if (c == '/' && lookChar(1) == '/') { readLineComment(); c = lookChar(); continue; } break; } SourceLocation sloc = getCurrentLocation(); // punctuation if (c == '(') { skipChar(); signalOpenBrace(TK_LParen); return Token(TK_LParen, "(", sloc); } if (c == ')') { skipChar(); signalCloseBrace(TK_LParen); return Token(TK_RParen, ")", sloc); } if (c == '{') { skipChar(); signalOpenBrace(TK_LCurlyBrace); return Token(TK_LCurlyBrace, "{", sloc); } if (c == '}') { skipChar(); signalCloseBrace(TK_LCurlyBrace); return Token(TK_RCurlyBrace, "}", sloc); } if (c == '[') { skipChar(); signalOpenBrace(TK_LSquareBrace); return Token(TK_LSquareBrace, "[", sloc); } if (c == ']') { skipChar(); signalCloseBrace(TK_LSquareBrace); return Token(TK_RSquareBrace, "]", sloc); } if (c == ',') { skipChar(); return Token(TK_Comma, ",", sloc); } if (c == ';') { skipChar(); return Token(TK_Semicolon, ";", sloc); } if (c == ':' && !isOperatorChar(lookChar(1))) { skipChar(); return Token(TK_Colon, ":", sloc); } if (c == '.') { skipChar(); return Token(TK_Period, ".", sloc); } // identifiers if (isLetter(c)) { readIdentifier(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Identifier, str, sloc); } // generic operators if (isOperatorChar(c)) { readOperator(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Operator, str, sloc); } // numbers if (isDigit(c)) { readInteger(c); StringRef str = copyStr(finishToken()); return Token(TK_LitInteger, str, sloc); } // characters if (c == '\'') { if (!readCharacter()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitCharacter, str, sloc); } // strings if (c == '\"') { if (!readString()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitString, str, sloc); } // if we're out of buffer, put in an EOF token. if (c == 0 || stream_eof()) { return Token(TK_EOF, "", sloc); } // Can't get the next token -- signal an error and bail. signalLexicalError(); return Token(TK_Error, "", sloc); }