Token* Scanner::GetSymbol(char currentChar) { string s = ""; int pos = currentPos, line = currentLine; bool escapeDetector = false; currentChar = GetChar(); s += currentChar; if(IsCharSeparator(currentChar)) throw ScannerException(currentLine, currentPos, "Empty character constant"); if(IsEndOfLine(currentChar)) throw ScannerException(currentLine, currentPos, "Newline in character constant"); if(IsTabulationSymbol(currentChar)) throw ScannerException(currentLine, currentPos, "Tabulation symbol in character constant"); if(currentChar == '\\') { escapeDetector = true; s += (currentChar = GetChar()); if(!IsEscapeSequence(currentChar)) throw ScannerException(currentLine, currentPos, "Invalid ESCAPE-sequence: \"" + s + "\""); } if( IsCharSeparator(currentChar = GetChar()) ) return new Token(line, pos, escapeDetector ? ESCAPE_SEQUENCE : CONST_CHAR, DEFAULT, s); else { s += currentChar; throw ScannerException(currentLine, currentPos, "Too many long character constant: \"" + s + "\""); } }
bool Scanner::Next() { char currentChar; do { if(lastString) { currentToken = new Token(currentLine, currentPos, END_OF_FILE, DEFAULT, ""); return false; } currentChar = GetChar(); } while(IsSpace(currentChar) || IsTabulationSymbol(currentChar) || IsEndOfLine(currentChar)); if(IsCommentBegin(currentChar)) { char secondChar; if( IsCommentBegin(secondChar = GetChar()) ) { NextLine(); return Next(); } if(secondChar == '*') { while( GetChar() != '*' || !IsCommentBegin(GetChar()) ) {} return Next(); } else BackToPreviousChar(); } if(IsLetter(currentChar)) currentToken = GetIdentificator(currentChar); else if(IsNumber(currentChar)) currentToken = GetNumber(currentChar); else if(IsCharSeparator(currentChar)) currentToken = GetSymbol(currentChar); else if(IsStringSeparator(currentChar)) currentToken = GetString(currentChar); else if(IsSeparator(currentChar)) currentToken = GetSeparator(currentChar); else if(IsSpecialSymbol(currentChar)) currentToken = GetOperation(currentChar); else throw ScannerException(currentLine, currentPos, "Indefinite character: \"" + string(1, currentChar) + "\""); return true; }
Token* Scanner::GetNumber(char currentChar) { string s = ""; int pos = currentPos, line = currentLine, dot = 0, E = 0; s += currentChar; while( (currentChar = GetChar()) != '\0' && !IsSpace(currentChar)) { if(dot && IsDot(currentChar)) throw ScannerException(currentLine, currentPos, "Too many dots in real number: \"" + GetInvalidToken(pos) + "\""); if(E) { if(IsE(currentChar)) throw ScannerException(currentLine, currentPos, "Too many symbol \"E\" in real number: \"" + GetInvalidToken(pos) + "\""); if(IsNumber(currentChar) || currentChar == '-' || currentChar == '+') { s += currentChar; continue; } else throw ScannerException(currentLine, currentPos, "Invalid real number: \"" + GetInvalidToken(pos) + "\""); } if( IsNumber(currentChar) || IsE(currentChar) || IsDot(currentChar) ) { E += IsE(currentChar); dot += IsDot(currentChar); s += currentChar; } else { if(IsLetter(currentChar)) throw ScannerException(currentLine, currentPos, "Invalid identificator: \"" + GetInvalidToken(pos) + "\""); BackToPreviousChar(); break; } } if(IsE(s[s.length() - 1]) || IsDot(s[s.length() - 1])) throw ScannerException(currentLine, currentPos, "Invalid real number: \"" + s + "\""); return new Token(line, pos, dot || E ? CONST_REAL : CONST_INTEGER, DEFAULT, s); }
// ------------------------------------------------------------------- // Scan for special characters // ------------------------------------------------------------------- void Scanner::getSpecial() { char tch; switch (fch) { case '\13': //Octal ?? ftoken = CodeTypes::tEolToken; nextChar(); break; case ';': ftoken = CodeTypes::tSemiColonToken; nextChar(); break; case ',': ftoken = CodeTypes::tCommaToken; nextChar(); break; case ':': ftoken = CodeTypes::tColonToken; nextChar(); break; case '=': nextChar(); if (fch == '>') { ftoken = CodeTypes::tReversibleArrow; nextChar(); } else ftoken = CodeTypes::tEqualsToken; break; case '+': ftoken = CodeTypes::tPlusToken; nextChar(); break; case '-': nextChar(); if (fch == '>') { ftoken = CodeTypes::tIrreversibleArrow; nextChar(); } else ftoken = CodeTypes::tMinusToken; break; case '*': nextChar(); ftoken = CodeTypes::tMultToken; break; case '/': // look ahead at next ch tch = nextChar(); if (tch == '/') { ftoken = CodeTypes::tStartComment; nextChar(); } else ftoken = CodeTypes::tDivToken; break; case '(': nextChar(); ftoken = CodeTypes::tLParenToken; break; case ')': nextChar(); ftoken = CodeTypes::tRParenToken; break; case '[': nextChar(); ftoken = CodeTypes::tLBracToken; break; case ']': nextChar(); ftoken = CodeTypes::tRBracToken; break; case '{': nextChar(); ftoken = CodeTypes::tLCBracToken; break; case '}': nextChar(); ftoken = CodeTypes::tRCBracToken; break; case '^': nextChar(); ftoken = CodeTypes::tPowerToken; break; case '<': nextChar(); if (fch == '=') { ftoken = CodeTypes::tLessThanOrEqualToken; nextChar(); } else ftoken = CodeTypes::tLessThanToken; break; case '>': nextChar(); if (fch == '=') { ftoken = CodeTypes::tMoreThanOrEqualToken; nextChar(); } else ftoken = CodeTypes::tMoreThanToken; break; case '!': nextChar(); if (fch == '=') { ftoken = CodeTypes::tNotEqualToken; nextChar(); } break; case '.': nextChar(); ftoken = CodeTypes::tPointToken; break; case '$': nextChar(); ftoken = CodeTypes::tDollarToken; break; default: { string aToken; aToken.push_back(fch); throw ScannerException("Syntax error: Unknown special token [" + aToken + "]"); } } }
void Scanner::getNumber() { const int MAX_DIGIT_COUNT = 3; // Max number of digits in exponent int single_digit; double scale; double evalue; int exponent_sign; int digit_count; tokenInteger = 0; tokenDouble = 0.0; tokenScalar = 0.0; evalue = 0.0; exponent_sign = 1; // Assume first it's an integer ftoken = CodeTypes::tIntToken; // Pick up number before any decimal place if (fch != '.') try { do { single_digit = fch - '0'; tokenInteger = 10*tokenInteger + single_digit; tokenScalar = tokenInteger; nextChar(); } while (FCharTable[fch] == TCharCode::cDIGIT); } catch(int) { throw ScannerException("Integer Overflow - constant value too large to read"); } scale = 1; if (fch == '.') { // Then it's a float. Start collecting fractional part ftoken = CodeTypes::tDoubleToken; tokenDouble = tokenInteger; nextChar(); if (FCharTable[fch] != TCharCode::cDIGIT) { throw ScannerException("Syntax error: expecting number after decimal point"); } try { while (FCharTable[fch] == TCharCode::cDIGIT) { scale = scale*0.1; single_digit = fch - '0'; tokenDouble = tokenDouble + (single_digit*scale); tokenScalar = tokenDouble; nextChar(); } } catch(const Exception&) { throw new ScannerException("Floating point overflow - constant value too large to read in"); } } // Next check for scientific notation if ((fch == 'e') || (fch == 'E')) { // Then it's a float. Start collecting exponent part if (ftoken == CodeTypes::tIntToken) { ftoken = CodeTypes::tDoubleToken; tokenDouble = tokenInteger; tokenScalar = tokenInteger; } nextChar(); if ((fch == '-') || (fch == '+')) { if (fch == '-') exponent_sign = -1; nextChar(); } // accumulate exponent, check that first ch is a digit if (FCharTable[fch] != TCharCode::cDIGIT) throw new ScannerException("Syntax error: number expected in exponent"); digit_count = 0; try { do { digit_count++; single_digit = fch - '0'; evalue = 10*evalue + single_digit; nextChar(); } while ((FCharTable[fch] == TCharCode::cDIGIT) && (digit_count <= MAX_DIGIT_COUNT)); } catch(const Exception&) { throw new ScannerException("Floating point overflow - Constant value too large to read"); } if (digit_count > MAX_DIGIT_COUNT) throw new ScannerException("Syntax error: too many digits in exponent"); evalue = evalue*exponent_sign; if (evalue > 300) { throw new ScannerException("Exponent overflow while parsing floating point number"); } evalue = pow(10.0, evalue); tokenDouble = tokenDouble*evalue; tokenScalar = tokenDouble; } // Check for complex number if ((fch == 'i') || (fch == 'j')) { if (ftoken == CodeTypes::tIntToken) tokenDouble = tokenInteger; ftoken = CodeTypes::tComplexToken; nextChar(); } }