struct Token get_special(char c) { struct Token tokenOneChar, tokenTwoChar, tokenFinal; TokenCode codeOneChar, codeTwoChar, codeFinal; tokenOneChar.literalType = STRING_LIT; tokenTwoChar.literalType = STRING_LIT; tokenOneChar.literalValue.valString[0] = c; tokenOneChar.literalValue.valString[1] = '\0'; tokenTwoChar.literalValue.valString[0] = c; tokenTwoChar.literalValue.valString[1] = peek_char(); tokenTwoChar.literalValue.valString[2] = '\0'; codeOneChar = is_reserved_word(tokenOneChar.literalValue.valString); codeTwoChar = is_reserved_word(tokenTwoChar.literalValue.valString); if(codeTwoChar != NO_TOKEN) { get_char(); codeFinal = codeTwoChar; tokenFinal = tokenTwoChar; } else { codeFinal = codeOneChar; tokenFinal = tokenOneChar; } tokenFinal.tokenCode = codeFinal; return tokenFinal; }
token create_string_token(char * s, int is_string_constant) { token t; if (is_string_constant) { t.type = string_constant; t.detail.name = s; } else if (is_reserved_word(s)) { t.type = reserved_word; t.detail.id = is_reserved_word(s); } else { t.type = identifier; t.detail.name = s; } return t; }
static char* get_word(Token* theToken, char token_string[MAX_TOKEN_STRING_LENGTH],char *token_ptr) { /* Write some code to Extract the word */ int charCount = 0; while((char_table[(*(token_ptr))] == LETTER || char_table[(*(token_ptr))] == DIGIT) && charCount < (MAX_TOKEN_STRING_LENGTH-1)) { token_string[charCount] = *(token_ptr); token_string[charCount+1] = '\0'; charCount++; token_ptr++; } theToken->literal_value = downshift_word(token_string); //Downshift the word, to make it lower case /* Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier. */ if(is_reserved_word(theToken)) { theToken->literal_type = STRING_LIT; }else { theToken->literal_type = STRING_LIT; theToken->token_code = IDENTIFIER; theToken->nextToken = NULL; } return token_ptr; }
static Token *get_word(char token_string[], Token *token2) { /* Write some code to Extract the word Downshift the word, to make it lower case Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier. */ downshift_word(token_string); token2->nextptr = NULL; token2->type = STRING_LIT; strcpy(token2->token_string, token_string); /* >> Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier >> use the arrays provided in the source code I gave out to find out if something is a reserved word or not. If not then this token is an identifier */ is_reserved_word(token_string, token2); return token2; }
bool Grammar::is_identifier(const std::string& str) { if (str.size() == 0 || !str_utils::is_alpha(str[0])) return false; for (int i = 1; i < int(str.size()); i++) if (!str_utils::is_alphanumeric(str[i]) && str[i] != '_') return false; return !is_reserved_word(str); }
int read_next_token(TokenCacheManager *tcm, Token **token, pval *phplval) { /* YY_TLS_VARS; TLS_VARS; */ if (GLOBAL(tc)->count == GLOBAL(tc)->pos || GLOBAL(php3_display_source)) { /* we need to read from the lexical scanner */ Token next_token; phplval->type = IS_LONG; /* the lex scanner doesn't always set phplval->type, make sure the type is not 'dirty' */ phplval->cs_data.switched = 0; next_token.token_type = lex_scan(phplval); /*printf("Read token: %c (%d)\n",next_token.token_type,next_token.token_type);*/ if (next_token.token_type == DONE_EVAL) { return DONE_EVAL; } if (GLOBAL(tc)->count >= GLOBAL(tc)->max_tokens) { /* we need more memory */ GLOBAL(tc)->tokens = (Token *) erealloc(GLOBAL(tc)->tokens, (sizeof(Token) * (GLOBAL(tc)->max_tokens += GLOBAL(tc)->block_size))); if (!GLOBAL(tc)->tokens) { return FAILURE; } } next_token.phplval = *phplval; next_token.lineno = GLOBAL(phplineno); next_token.phplval.offset = (tcm->active<<TOKEN_BITS) + GLOBAL(tc)->count; /* ugly hack to support $keyword */ if (last_token_suggests_variable_reference() && is_reserved_word(next_token.token_type)) { next_token.phplval.value.str.val = estrndup(phptext,phpleng); next_token.phplval.value.str.len = phpleng; next_token.phplval.type = IS_STRING; next_token.token_type = STRING; } /* end of ugly hack */ last_token_type=next_token.token_type; if (GLOBAL(php3_display_source)) { syntax_highlight(&next_token); *token = &next_token; return (*token)->token_type; } GLOBAL(tc)->tokens[GLOBAL(tc)->count] = next_token; GLOBAL(tc)->count++; } *token = &GLOBAL(tc)->tokens[GLOBAL(tc)->pos++]; return (*token)->token_type; }
char* get_word(char** original, char ch[], char* ch_ptr, Token* t) { /* Write some code to Extract the word */ ch_ptr = ch; int i = 1; while(isalpha(get_char(original))){ ch[i] = get_char(original); i++; } ch[i] = '\0'; //Downshift the word, to make it lower case downshift_word(ch); /* Write some code to Check if the word is a reserved word. if it is not a reserved word its an identifier. */ if(is_reserved_word(ch, t) == FALSE){ t -> token_code = IDENTIFIER; } return ch_ptr; }
struct Token get_word(char c) { struct Token token; TokenCode code; int i = 1; token.literalType = STRING_LIT; token.literalValue.valString[0] = c; while(char_table[peek_char()] == LETTER || char_table[peek_char()] == NUMBER) { c = get_char(); token.literalValue.valString[i] = c; i++; } while(i < MAX_TOKEN_STRING_LENGTH) { token.literalValue.valString[i] = '\0'; i++; } downshift_word(token.literalValue.valString); code = is_reserved_word(token.literalValue.valString); token.tokenCode = (code == NO_TOKEN)? IDENTIFIER : code; return token; }
token_list tokenize(std::string source_) { std::list<token> token_stream; std::string token_str; bool in_string = false; bool is_comment_line = false; bool is_comment_block = false; std::string string_quote_type = "'"; std::regex replace_macros("[a-zA-Z0-9_]+?\\([a-zA-Z0-9_\\(\\), ]*?\\)"); source_ = std::regex_replace(source_, replace_macros, "MACRO_REPLACED_BY_TOKENIZE"); //std::cout << source_; for (std::string::size_type stream_pos = 0; stream_pos < source_.size(); ++stream_pos) { std::string cur_char = source_.substr(stream_pos, 1); if (!in_string) { if (cur_char == "\"" || cur_char == "'" || cur_char == " " || cur_char == "\t" || cur_char == "+" || cur_char == "-" || cur_char == "*" || cur_char == "/" || cur_char == ">" || cur_char == "<" || cur_char == "=" || cur_char == "]" || cur_char == "[" || cur_char == "(" || cur_char == ")" || cur_char == "}" || cur_char == "{" || cur_char == "," || cur_char == "!" || cur_char == "." || cur_char == ":" || cur_char == ";" ) { if (token_str != "") { if (!is_comment_line && !is_comment_block) { token new_token; new_token.val = token_str; if (is_number(token_str)) { new_token.type = type::SCALAR; } else { new_token.type = type::LITERAL; } new_token.stream_pos = stream_pos - token_str.length(); if (token_str[0] == '_') new_token.type = type::LOCALVARIABLE; if (is_reserved_word(token_str)) new_token.type = type::SCRIPTCOMMAND; token_stream.push_back(new_token); } } if (cur_char == "+" || cur_char == "-" || cur_char == "*" || cur_char == "/" || cur_char == ">" || cur_char == "<" || cur_char == "=" || cur_char == "]" || cur_char == "[" || cur_char == "(" || cur_char == ")" || cur_char == "}" || cur_char == "{" || cur_char == "," || cur_char == "!" || cur_char == "." || cur_char == ":" || cur_char == ";" ) { bool skip = false; if (cur_char == ">" || cur_char == "<" || cur_char == "!" || cur_char == "=" ) { if (source_.substr(stream_pos + 1, 1) == "=") { skip = true; cur_char += "="; } } if (cur_char == ":") { if (source_.substr(stream_pos + 1, 1) == ":") { skip = true; cur_char += ":"; } } if (cur_char == "/") { if (source_.substr(stream_pos + 1, 1) == "*") { skip = true; cur_char += "*"; is_comment_block = true; } } if (cur_char == "/") { if (source_.substr(stream_pos + 1, 1) == "/") { skip = true; cur_char += "/"; is_comment_line = true; } } if (!is_comment_line && !is_comment_block) { token operator_token; operator_token.val = cur_char; operator_token.stream_pos = stream_pos; operator_token.type = type::OPERATOR; if (cur_char == ";") { operator_token.type = type::ENDOFSTATEMENT; } token_stream.push_back(operator_token); } if (cur_char == "*") { if (source_.substr(stream_pos + 1, 1) == "/") { skip = true; cur_char += "/"; is_comment_block = false; token_str = ""; } } if (skip) { stream_pos++; } } token_str = ""; if (cur_char == "\"") { token_str = "\"" + token_str; } if (cur_char == "'") { token_str = "'" + token_str; } } else { if ( token_str == "<=" || token_str == ">=" || token_str == "!=" || token_str == "::" || token_str == "==" ) { if (!is_comment_line && !is_comment_block) { token new_token; new_token.val = token_str; new_token.stream_pos = stream_pos - token_str.length(); new_token.type = type::OPERATOR; token_stream.push_back(new_token); } token_str = ""; } else { if (cur_char != "\n") { token_str += cur_char; } else { if (!is_comment_line && !is_comment_block && token_str!= "") { token new_token; new_token.val = token_str; if (is_number(token_str)) { new_token.type = type::SCALAR; } else { new_token.type = type::LITERAL; } new_token.stream_pos = stream_pos - token_str.length(); if (token_str[0] == '_') new_token.type = type::LOCALVARIABLE; if (is_reserved_word(token_str)) new_token.type = type::SCRIPTCOMMAND; token_stream.push_back(new_token); } if (is_comment_line) { is_comment_line = false; token_str = ""; } } } } } else { token_str += cur_char; } if (!is_comment_line && !is_comment_block) { if (cur_char == "\"" || cur_char == "'") { if (!in_string) { string_quote_type = cur_char; in_string = true; } else if(in_string && cur_char == string_quote_type) { if (source_.substr(stream_pos + 1, 1) == string_quote_type) { token_str += string_quote_type; stream_pos++; } else { in_string = false; } } } } } return token_stream; }