static int makeSubToken(EcToken *tp, int c, int tokenId, int subId, int groupMask) { if (addCharToToken(tp, c) < 0) { return T_ERR; } return finishToken(tp, tokenId, subId, groupMask); }
static int makeToken(EcToken *tp, int c, int tokenId, int groupMask) { if (c && addCharToToken(tp, c) < 0) { return T_ERR; } return finishToken(tp, tokenId, -1, groupMask); }
Token *Lexer::scanName() { startToken(); consume(); while(_currentChar.isLetterOrNumber() || _currentChar == '_' || _currentChar == '!' || _currentChar == '?') consume(); const QStringRef text(tokenTextRef()); if(text == "yes" || text == "no" || text == "true" || text == "false") return finishToken(Token::Boolean); return finishToken(Token::Name); }
Token *Lexer::scanOperator() { startToken(); QString text(_currentChar); do { consume(); text.append(_currentChar); } while(operatorTable()->has(text)); return finishToken(Token::Operator); }
Token *Lexer::scanText() { startToken(); consume(); // left double quote while(_currentChar != '"') { if(isEof()) throw lexerException("unexpected EOF found in a text literal"); if(_currentChar == '\\') consumeEscapeSequence(); else consume(); }; consume(); // right double quote return finishToken(Token::Text); }
Token *Lexer::scanNumber() { startToken(); consume(); short base = 10; bool decimalPointFound = false; bool eFound = false; bool oneMoreDigitExpected = false; if(_previousChar == '0') { if(_currentChar == 'x' || _currentChar == 'X') { base = 16; consume(); oneMoreDigitExpected = true; } else if(_currentChar.isNumber()) { base = 8; oneMoreDigitExpected = true; } else if(_currentChar == 'b' || _currentChar == 'B') { base = 2; consume(); oneMoreDigitExpected = true; } } while(true) { if(_currentChar.isNumber()) { if(base == 2 && !(_currentChar == '0' || _currentChar == '1')) throw lexerException("a binary number can only contain 0 or 1"); if(base == 8 && !QString("01234567").contains(_currentChar)) throw lexerException("an octal number can only contain digits from 0 to 7"); oneMoreDigitExpected = false; } else if(base == 16 && QString("abcdef").contains(_currentChar, Qt::CaseInsensitive)) { oneMoreDigitExpected = false; } else if(_currentChar == '.' && _nextChar.isNumber()) { if(decimalPointFound) throw lexerException("too many decimal points in a number"); if(eFound) throw lexerException("the exponential part of a number cannot contain a decimal point"); if(base == 16) throw lexerException("an hexadecimal number cannot contain a decimal point"); if(base == 8) throw lexerException("an octal number cannot contain a decimal point"); if(base == 2) throw lexerException("a binary number cannot contain a decimal point"); decimalPointFound = true; oneMoreDigitExpected = true; } else if(base != 16 && (_currentChar == 'e' || _currentChar == 'E')) { if(eFound) throw lexerException("a number cannot contain more than one exponential part"); if(base == 8) throw lexerException("an octal number cannot contain an exponential part"); if(base == 2) throw lexerException("a binary number cannot contain an exponential part"); eFound = true; oneMoreDigitExpected = true; } else if((_currentChar == '+' || _currentChar == '-') && base == 10 && (_previousChar == 'e' || _previousChar == 'E')) { // All is good in the hood! } else if(isName()) { throw lexerException(QString("unexpected character found in a number: '%1'").arg(_currentChar)); } else break; consume(); } if(oneMoreDigitExpected) throw lexerException(QString("unexpected character found in a number: '%1'").arg(_currentChar)); return finishToken(Token::Number); }
Token *Lexer::scanBackquotedName() { consume(); // opening backquote startToken(); while(_currentChar != '`') { if(isEof()) throw lexerException("unexpected EOF found in a text literal"); consume(); }; Token *token = finishToken(Token::Name); consume(); // closing backquote return token; }
Token *Lexer::scanCharacter() { startToken(); consume(); // left single quote if(isEof()) throw lexerException("unexpected EOF found in a character literal"); if(_currentChar != '\'') { if(_currentChar == '\\') consumeEscapeSequence(); else consume(); if(isEof()) throw lexerException("unexpected EOF found in a character literal"); if(_currentChar != '\'') throw lexerException("a character literal can't have more than one character"); } consume(); // right single quote return finishToken(Token::Character); }
// TODO - handle triple quoting static int getQuotedToken(EcInput *input, EcToken *tp, int c) { EcStream *stream; int quoteType; stream = input->stream; quoteType = c; for (c = getNextChar(stream); c && c != quoteType; c = getNextChar(stream)) { if (c == 0) { return makeToken(tp, 0, T_ERR, 0); } if (c == '\\') { c = getNextChar(stream); switch (c) { // TBD -- others case '\\': break; case '\'': case '\"': break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'u': case 'x': c = decodeNumber(input, 16, 4); break; case '0': c = decodeNumber(input, 8, 3); break; default: break; } } addCharToToken(tp, c); } return finishToken(tp, T_STRING, -1, 0); }
static int getAlphaToken(EcInput *input, EcToken *tp, int c) { ReservedWord *rp; EcStream *stream; /* * We know that c is an alpha already */ // TBD -- does ES4 allow $ stream = input->stream; while (isalnum(c) || c == '_' || c == '$' || c == '\\') { if (c == '\\') { c = getNextChar(stream); if (c == '\n' || c == '\r') { break; } else if (c == 'u') { c = decodeNumber(input, 16, 4); } } addCharToToken(tp, c); c = getNextChar(stream); } if (c) { putBackChar(stream, c); } // TODO - need to take into account contextually reserved and // full reserved words. rp = (ReservedWord*) mprLookupHash(input->lexer->keywords, (char*) tp->text); if (rp) { return finishToken(tp, rp->tokenId, rp->subId, rp->groupMask); } else { return finishToken(tp, T_ID, -1, 0); } }
/* * TODO rationalize with ecParser T_NUMBER code. This could be a lot faster. */ static int getNumberToken(EcInput *input, EcToken *tp, int c) { EcStream *stream; int lowc, isHex, isFloat; isHex = isFloat = 0; stream = input->stream; if (c == '0') { addCharToToken(tp, c); c = getNextChar(stream); if (tolower(c) == 'x') { do { addCharToToken(tp, c); c = getNextChar(stream); } while (isxdigit(c)); putBackChar(stream, c); return finishToken(tp, T_NUMBER, -1, 0); } } lowc = tolower(c); while (isdigit(lowc) || lowc == '.' || lowc == 'e' || lowc == 'f') { if (lowc == '.' || lowc == 'e' || lowc == 'f') { isFloat++; } addCharToToken(tp, c); c = getNextChar(stream); lowc = tolower(c); } putBackChar(stream, c); return finishToken(tp, T_NUMBER, -1, 0); }
Token *Lexer::scanNewline() { startToken(); do consume(); while(isNewline() || isSpace()); return finishToken(Token::Newline); }
Token *Lexer::scanRightBracket() { startToken(); consume(); // ] while(_currentChar == '!' || _currentChar == '?') consume(); return finishToken(Token::RightBracket); }
Token DefaultLexer::readToken() { char c = lookChar(); while (true) { // skip whitespace while (isWhiteSpace(c)) { skipChar(); c = lookChar(); } // newlines if (isNewline(c)) { readNewline(c); if (interactive_ && getCurrentBraceNesting() == 0) return Token(TK_Newline); c = lookChar(); continue; } // treat line comments as newlines if (c == '/' && lookChar(1) == '/') { readLineComment(); c = lookChar(); continue; } break; } SourceLocation sloc = getCurrentLocation(); // punctuation if (c == '(') { skipChar(); signalOpenBrace(TK_LParen); return Token(TK_LParen, "(", sloc); } if (c == ')') { skipChar(); signalCloseBrace(TK_LParen); return Token(TK_RParen, ")", sloc); } if (c == '{') { skipChar(); signalOpenBrace(TK_LCurlyBrace); return Token(TK_LCurlyBrace, "{", sloc); } if (c == '}') { skipChar(); signalCloseBrace(TK_LCurlyBrace); return Token(TK_RCurlyBrace, "}", sloc); } if (c == '[') { skipChar(); signalOpenBrace(TK_LSquareBrace); return Token(TK_LSquareBrace, "[", sloc); } if (c == ']') { skipChar(); signalCloseBrace(TK_LSquareBrace); return Token(TK_RSquareBrace, "]", sloc); } if (c == ',') { skipChar(); return Token(TK_Comma, ",", sloc); } if (c == ';') { skipChar(); return Token(TK_Semicolon, ";", sloc); } if (c == ':' && !isOperatorChar(lookChar(1))) { skipChar(); return Token(TK_Colon, ":", sloc); } if (c == '.') { skipChar(); return Token(TK_Period, ".", sloc); } // identifiers if (isLetter(c)) { readIdentifier(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Identifier, str, sloc); } // generic operators if (isOperatorChar(c)) { readOperator(c); StringRef str = copyStr(finishToken()); unsigned keyid = lookupKeyword(str.c_str()); if (keyid) { return Token(keyid, str, sloc); } return Token(TK_Operator, str, sloc); } // numbers if (isDigit(c)) { readInteger(c); StringRef str = copyStr(finishToken()); return Token(TK_LitInteger, str, sloc); } // characters if (c == '\'') { if (!readCharacter()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitCharacter, str, sloc); } // strings if (c == '\"') { if (!readString()) return Token(TK_Error); StringRef str = copyStr(finishToken()); return Token(TK_LitString, str, sloc); } // if we're out of buffer, put in an EOF token. if (c == 0 || stream_eof()) { return Token(TK_EOF, "", sloc); } // Can't get the next token -- signal an error and bail. signalLexicalError(); return Token(TK_Error, "", sloc); }