void cScanner::startScanning(){ cCode code(codeFile); cConstRecognizer Const(&code); cWordRecognizer Word(&code); std::string Token; while(!code.IsEnd()){ char ch = code.ShowCh(); if(((int)ch >= DIGIT_ASCII_LOWER_LIMIT && (int)ch <= DIGIT_ASCII_UPPER_LIMIT) || (ch == '\'') || (ch == '#')){ int Class = Const.getClass(); Token = Const.getToken(); tokensFlow->addToken(cToken(code.getStrNum(), Class, Token)); } else if(((int)ch >= LETTER_LOWERCASE_ASCII_LOWER_LIMIT && (int)ch <= LETTER_LOWERCASE_ASCII_UPPER_LIMIT) || ((int)ch >= LETTER_UPPERCASE_ASCII_LOWER_LIMIT && (int)ch <= LETTER_UPPERCASE_ASCII_UPPER_LIMIT) || (ch == '_') || (ch == '<')){ tokensFlow->addToken(Word.getToken()); } else if(ch == ' ' || ch == '\n' || ch == '\t'){ code.GiveCh(); } else if(ch == '>' || ch == '=' || ch == '/'){ code.GiveCh(); switch (ch){ case '=': { Token = "="; tokensFlow->addToken(cToken(code.getStrNum(), CLASS_OPERATION_SIGN, Token)); break; } case '>': { Token = ">"; tokensFlow->addToken(cToken(code.getStrNum(), CLASS_BRACKET, Token)); break; } } } } }
/************************************************************** * Purpose: Takes a char vector by reference, and * uses the cAnalyzer object iterator to identify * the current position, and finds the current token * in the file vector, identifies it as a number, * organizes it into a cToken obect and adds it into * the cToken vector. * * Entry: Method takes a vector of char of C text by reference, * and uses the cAnalyzer char vector iterator. * * Exit: Modifies the cAnalyzer char vector iterator * by advancing the iterator as needed, * and adds a cToken object to the cToken result vector. ****************************************************************/ int cAnalyzer::number(std::vector<char> const &file) { std::string type = "integer"; start = it; int state = 0; int errorState = 0; while (isdigit(*it) || *it == '.') { if (*it == '.') { if (!state) { state = 1; type = "float"; } else { type = error; errorState = 1; } } it++; } if (*it == 'l' && !state && !errorState) { type = "long"; it++; } else if (*it == 'd' && !errorState) { type = "double"; it++; } else if (*it == 'f' && !errorState) { type = "float"; it++; } if (isalpha(*it) || *it == '_') { type = error; while (it != file.end() && isalpha(*it) || *it == '_') { it++; } } std::string temp = std::string(start, it); result.push_back(cToken(temp, type)); return 0; }
/************************************************************** * Purpose: Takes a char vector by reference, and * uses the cAnalyzer object iterator to identify * the current position, and finds the current token * in the file vector, identifies it as an identifier, * organizes it into a cToken obect and adds it into * the cToken vector. * * Entry: Method takes a vector of char of C text by reference, * and uses the cAnalyzer char vector iterator. * * Exit: Modifies the cAnalyzer char vector iterator * by advancing the iterator as needed, * and adds a cToken object to the cToken result vector. ****************************************************************/ int cAnalyzer::identifier(std::vector<char> const &file) { std::string type = "identifier"; start = it; while (it != file.end() && isalpha(*it) || isdigit(*it) || *it == '_') { it++; } std::string temp = std::string(start, it); std::hash_set<std::string>::iterator hashIt = keywords.find(temp); if (hashIt != keywords.end()) type = "keyword"; result.push_back(cToken(temp, type)); return 0; }
/************************************************************** * Purpose: Takes a char vector by reference, and * uses the cAnalyzer object iterator to identify * the current position, and finds the current token * in the file vector, identifies it as a string literal, * organizes it into a cToken obect and adds it into * the cToken vector. * * Entry: Method takes a vector of char of C text by reference, * and uses the cAnalyzer char vector iterator. * * Exit: Modifies the cAnalyzer char vector iterator * by advancing the iterator as needed, * and adds a cToken object to the cToken result vector. ****************************************************************/ int cAnalyzer::stringLiteral(std::vector<char> const &file) { std::string type = "str literal"; start = it; char previous = *it; if (*it == '\"') { it++; while (it != file.end() && *it != '\"' || previous == '\\') { if (it == file.end()) { type = error; break; } previous = *it; it++; } } else { it++; while (it != file.end() && *it != '\'' || previous == '\\') { if (it == file.end()) { type = error; break; } previous = *it; it++; } } it++; std::string temp = std::string(start, it); result.push_back(cToken(temp, type)); return 0; }
/************************************************************** * Purpose: Takes a char vector by reference, and * uses the cAnalyzer object iterator to identify * the current position, and finds the current token * in the file vector, identifies it as an operator, * organizes it into a cToken obect and adds it into * the cToken vector. * * Entry: Method takes a vector of char of C text by reference, * and uses the cAnalyzer char vector iterator. * * Exit: Modifies the cAnalyzer char vector iterator * by advancing the iterator as needed, * and adds a cToken object to the cToken result vector. ****************************************************************/ int cAnalyzer::isOperator(std::vector<char> const &file) { std::string type = "operator"; int state = 0; // used to track path through graph - value of unnlucky 1313 indicates error, 1314 and 1315 indicate temporary errors start = it; while (it != file.end() && std::find(operators.begin(), operators.end(), *it) != operators.end()) { if (*it == '+') { switch (state) { case 0: state = 1; break; case 1: state = 10; break; case 10: state = 1314; break; case 1314: state = 10; break; default: state = 1313; } } else if (*it == '-') { switch (state) { case 0: state = 2; break; case 2: state = 11; break; case 11: state = 1315; break; case 1315: state = 11; break; default: state = 1313; } } else if (*it == '*') { state = (state == 0) ? 3 : 1313; } else if (*it == '/') { if (state == 4) { it--; if (it > start) { std::string temp = std::string(start, it); result.push_back(cToken(temp, type)); } state = 1212; // special circumstance - this is a comment! break; } else state = (state == 0) ? 4 : 1313; } else if (*it == '%') { state = (state == 0) ? 5 : 1313; } else if (*it == '|') { state = (state == 0 || state == 6) ? 6 : 1313; } else if (*it == '&') { state = (state == 0 || state == 7) ? 7 : 1313; } else if (*it == '^') { state = (state == 0) ? 8 : 1313; } else if (*it == '=') { if (state == 0) state = 9; else if (state < 9) state = 12; else if (state == 9) state = 13; else state = 1313; } it++; } if (state == 1313 || state == 1314 || state == 1315) type = error; if (state == 1212) { type = "comment"; while (it != file.end() && *it != '\n') { it++; } } std::string temp = std::string(start, it); if (state != 1212) result.push_back(cToken(temp, type)); return 0; }
/************************************************************** * Purpose: Organizes the identification of tokens and types * by branching program into different directions, * utilyzing various other methods as needed, * depending on the first character of a subset, * places the token and type into a cToken object * and adds the cToken object to a vector. * * Entry: Method takes a vector of char of C text by reference. * * Exit: Returns a vector of cToken objects in the same * order that they appeared in the file vector ****************************************************************/ std::vector<cToken> cAnalyzer::analyze(std::vector<char> const &file) { result.clear(); it = file.begin(); while (it != file.end()) { if (*it == '\n') { result.push_back(cToken("RETURN", "Crg Rtrn")); it++; } else if (isspace(*it)) { while (it != file.end() && isspace(*it) && *it != '\n') { it++; } result.push_back(cToken("SPACES", "spaces")); } else if (isdigit(*it) || *it == '.') { if (number(file) < 0) return result; } else if (isalpha(*it) || *it == '_') { if (identifier(file) < 0) return result; } else if (*it == '\"' || *it == '\'') { if (stringLiteral(file) < 0) return result; } else if (*it == '#') { std::string temp = std::string(it, it + 1); result.push_back(cToken(temp, "preprocessor")); it++; } else if (std::find(symbols.begin(), symbols.end(), *it) != symbols.end()) { std::string temp = std::string(it, it + 1); result.push_back(cToken(temp, "symbol")); it++; } else if (std::find(operators.begin(), operators.end(), *it) != operators.end()) { if (isOperator(file) < 0) return result; } else { std::string temp = std::string(it, it + 1); result.push_back(cToken(temp, error)); it++; } } return result; }