Lexem Interpret::_get_lexem(std::string& token) { std::map<std::string, Lexem>::iterator lexem_iter = _lexems.find(token); // if no lexems found if (lexem_iter == _lexems.end()) { // create new lexem Lexem lexem = Lexem(token); // store it _lexems[token] = lexem; // and return return lexem; } else { return lexem_iter->second; } }
LexemVector tokenize( const std::string & expression) { struct Graph { char ch; Token token; }; static Graph graph[] = { {'+', TOKEN_PLUS}, {'-', TOKEN_MINUS}, {'*', TOKEN_MULTIPLY}, {'/', TOKEN_DIVIDE}, {'%', TOKEN_PERCENT}, {'^', TOKEN_EXPONENTIATION}, {'?', TOKEN_QUESTION}, {',', TOKEN_COMMA}, {':', TOKEN_COLON}, {';', TOKEN_SEMI}, {'(', TOKEN_LPAREN}, {')', TOKEN_RPAREN}, {'[', TOKEN_LBRACK}, {']', TOKEN_RBRACK}, {'=', TOKEN_ASSIGN}, {'<', TOKEN_LESS}, {'>', TOKEN_GREATER}, {'|', TOKEN_ARITHMETIC_OR}, {'&', TOKEN_ARITHMETIC_AND}, {'!', TOKEN_NOT} }; struct Digraph { char ch1; char ch2; Token token; }; static Digraph digraph[] = { {'=', '=', TOKEN_EQUAL}, {'!', '=', TOKEN_NOT_EQUAL}, {'>', '=', TOKEN_GREATER_EQUAL}, {'<', '=', TOKEN_LESS_EQUAL}, {'|', '|', TOKEN_LOGICAL_OR}, {'&', '&', TOKEN_LOGICAL_AND} }; LexemVector lex_vector; const char *it = expression.c_str(); while (*it != '\0') { if (std::isspace(*it) || ::iscntrl(*it)) ++it; // Parse constant [0-9]*(\.[0-9*])?(E([+-]?[0-9]*))? // take unary plus and minus into account else if (std::isdigit(*it) || *it == '.' || ((*it == '-' || *it == '+') && (std::isdigit(*(it + 1)) || *(it + 1) == '.') && (lex_vector.empty() || lex_vector.back().getToken() == TOKEN_COMMA || lex_vector.back().getToken() == TOKEN_LPAREN || is_operator(lex_vector.back().getToken())))) { bool is_real = false; const char *from = it; if (*it == '-' || *it == '+') ++it; while (std::isdigit(*it)) ++it; if (*it == '.') { is_real = true; ++it; while (std::isdigit(*it)) ++it; } if (*from == '.' && it == from + 1) throw std::runtime_error(std::string("'.' is not a valid real number ") + *it); if (*it == '.') throw std::runtime_error(std::string("'.' is not a valid real number ") + *it); if (std::toupper(*it) == 'E') { is_real = true; ++it; if (*it == '+' || *it == '-') { ++it; } while (std::isdigit(*it)) { ++it; } } if (is_real) lex_vector.push_back(Lexem(TOKEN_REAL_CONSTANT, from, it)); else lex_vector.push_back(Lexem(TOKEN_INTEGER_CONSTANT, from, it)); } // Parse literal else if (*it == '"') { std::string s; ++it; for (; *it && *it != '"'; ++it) { if (*it == '\\') { ++it; if (*it) switch (*it) { case '"': s += '"'; break; case '\\': s += '\\'; break; case 'n': s += '\n'; break; default: s += *it; } } else s += *it; } ++it; lex_vector.push_back(Lexem(TOKEN_LITERAL, s.c_str())); } // Parse identifier [a-zA-Z][a-zA-Z0-9_.]* else if (std::isalpha(*it)) { const char *from = it; while (std::isalpha(*it) || std::isdigit(*it) || *it == '_' || *it == '.') ++it; lex_vector.push_back(Lexem(TOKEN_IDENTIFIER, from, it)); } // Parse graphs and digraphs else if (ispunct(*it)) { const char *from = it; if (*(it + 1) != '\0') { for (size_t i = 0; i < sizeof(digraph)/sizeof(digraph[0]); ++i) { if (*it == digraph[i].ch1 && *(it + 1) == digraph[i].ch2) { ++it; ++it; lex_vector.push_back(Lexem(digraph[i].token, from, it)); goto next_token; } } } for (size_t i = 0; i < sizeof(graph)/sizeof(graph[0]); ++i) if (*it == graph[i].ch) { ++it; lex_vector.push_back(Lexem(graph[i].token, from, it)); goto next_token; } throw std::runtime_error(std::string("std::expreval::tokenize: Invalid graphic character '") + *it + "'"); } else throw std::runtime_error("Impossible expression parse error"); next_token: continue; } lex_vector.push_back(Lexem(TOKEN_END, "")); return lex_vector; }
//***************************************************************************** // Lex::NextLexem (m) - returns next extracted lexem. // Your should define the function yourself. //***************************************************************************** Lexem Lex::NextLexem( void ) { return Lexem( 0 ); }