Example #1
0
std::string Token::tokenClassString() const
{
  switch (tokenClass()) {
  case TokenClass::Keyword: return "Keyword";
  case TokenClass::Identifier: return "Identifier";
  case TokenClass::Constant: return "Constant";
  case TokenClass::StringLiteral: return "StringLiteral";
  case TokenClass::Punctuator: return "Punctuator";
  default: return "None";
  }
}
Example #2
0
tokenClass scannerClass::getToken()
{
	int currentStateNum = 0;
	int c;
	string currentLexeme;
	do
	{
		//First clear the buffer if it is a new state.
		if(currentStateNum == 0)
			currentLexeme = "";

		c = (int)fileManager.getNextChar();
		if(c == EOF)//Because EOF(-1) is not a valid index, we need to change it before using it in the matrix.
			c = EOF_INDEX;

		if(c >= MAX_CHAR)//If the char read is not within the range.
		{
			errorAndExit(("Illegal symbol: "+c));
			return tokenClass(EOF_T,NONE_ST,"EOF");
		}
		State s = stateMatrix[currentStateNum][c];//Get the next state.
		if(s.nextStateNum == INVALID_STATE)//If state is invalid, e.g. the char sequence is not recognized.
		{
			errorAndExit("Invalid char sequence: "+(currentLexeme+(char)c));
			return tokenClass(EOF_T,NONE_ST,"EOF");
		}
		if(s.nextStateNum == EOF_INDEX)//If reached EOF, in this case no action needs to be taken.
			return tokenClass(EOF_T,NONE_ST,"EOF");

		switch(s.action)
		{
		case NO_ACTION:
			currentLexeme += c;//Store the current char.
			break;
		case ACCEPT:
			{
				if(!s.needPushBack)
					currentLexeme += c;
				else
					fileManager.pushBack();

				int type = s.token->type;

				if(type == INTEGER_T)//Check for interger length
					if(currentLexeme.length() > 4)
					{
						errorAndExit("Integer can have at most four digits: "+currentLexeme);
						return tokenClass(EOF_T,NONE_ST,"EOF");
					}

				if(type == STRING_T)//Check for string length
					if(currentLexeme.length() > 52)
					{
						errorAndExit("String can have at most fifty characters: "+currentLexeme);
						return tokenClass(EOF_T,NONE_ST,"EOF");
					}

				if(type == IDENTIFIER_T)//Check for identifier length
				{
					if(currentLexeme.length() > 12)
					{
						errorAndExit("Identifier can have at most twelve characters: "+currentLexeme);
						return tokenClass(EOF_T,NONE_ST,"EOF");
					}

					//Here we check for the keywords
					const char* cString = currentLexeme.c_str();
					if(_strcmpi("or",cString)==0)
						return tokenClass(ADDOP_T,OR_ST,currentLexeme);
					if(_strcmpi("and",cString)==0)
						return tokenClass(MULOP_T,AND_ST,currentLexeme);
					if(_strcmpi("div",cString)==0)
						return tokenClass(MULOP_T,DIV_ST,currentLexeme);
					if(_strcmpi("mod",cString)==0)
						return tokenClass(MULOP_T,MOD_ST,currentLexeme);

					Keyword keywords[16]={PROGRAM_KEYWORD,FUNCTION_KEYWORD,BEGIN_KEYWORD,END_KEYWORD,IF_KEYWORD,THEN_KEYWORD,ELSE_KEYWORD,WHILE_KEYWORD,DO_KEYWORD,COUT_KEYWORD,CIN_KEYWORD,ENDL_KEYWORD,INT_KEYWORD,BOOLEAN_KEYWORD,TRUE_KEYWORD,FALSE_KEYWORD};
					//char* keywords[16]={"program","function","begin","end","if","then","else","while","do","cout","cin","endl","int","boolean","true","false"};
					for(int i = 0;i<16;i++)
						if(_strcmpi(keywords[i].keyword,cString)==0)
							return tokenClass(keywords[i].type,NONE_ST,currentLexeme);
				}

				return tokenClass(s.token->type,s.token->subtype,currentLexeme);
			}
		case WARNING_ACTION:
			cout<<"warning"<<endl;//Currently not used because no warning state is checked by the state matrix.
			break;
		case ERROR_ACTION:
			{
				if(s.actionInfo != NULL)
					errorAndExit(*s.actionInfo);
				else //This else is only here as a double check. Normally an error message should be included.
					errorAndExit("Error");
			}
		case CLEAR_BUFFER_ACTION:
			currentLexeme = "";//Clear buffer, for comments only.
			break;
		case CHECK_COMPILER_DIRECTIVE_ACTION:
			currentLexeme+=c;
			if(currentLexeme == "{$p+}")
				fileManager.setPrintStatus(true);
			else if (currentLexeme == "{$p-}")
				fileManager.setPrintStatus(false);
			else if (currentLexeme == "{$e+}")
				expressionDebugging = true;
			else if (currentLexeme == "{$e-}")
				expressionDebugging = false;
			else if (currentLexeme == "{$s+}")
				cout<<symbolTable.toString()<<endl;
			else
				cout<<"Warning, compiler directive "+currentLexeme+" is undefined."<<endl;
			break;
		}

		currentStateNum = s.nextStateNum;
	}
	while(true);

	//This should be unreachable
	return tokenClass(EMPTY_T,EMPTY_ST,EMPTY_LEXEME);
}