Example #1
0
struct Token get_special(char c)
{
    struct Token tokenOneChar, tokenTwoChar, tokenFinal;
    TokenCode codeOneChar, codeTwoChar, codeFinal;

    tokenOneChar.literalType = STRING_LIT;
    tokenTwoChar.literalType = STRING_LIT;


    tokenOneChar.literalValue.valString[0] = c;
    tokenOneChar.literalValue.valString[1] = '\0';

    tokenTwoChar.literalValue.valString[0] = c;
    tokenTwoChar.literalValue.valString[1] = peek_char();
    tokenTwoChar.literalValue.valString[2] = '\0';

    codeOneChar = is_reserved_word(tokenOneChar.literalValue.valString);
    codeTwoChar = is_reserved_word(tokenTwoChar.literalValue.valString);

    if(codeTwoChar != NO_TOKEN) {
        get_char();
        codeFinal = codeTwoChar;
        tokenFinal = tokenTwoChar;
    } else {
        codeFinal = codeOneChar;
        tokenFinal = tokenOneChar;
    }
    tokenFinal.tokenCode = codeFinal;

    return tokenFinal;
}
Example #2
0
token create_string_token(char * s, int is_string_constant)
{ token t;
  if (is_string_constant)
  { t.type = string_constant;
    t.detail.name = s; }
  else if (is_reserved_word(s))
  { t.type = reserved_word;
    t.detail.id = is_reserved_word(s); }
  else
  { t.type = identifier;
    t.detail.name = s; }
  return t; }
Example #3
0
static char* get_word(Token* theToken, char token_string[MAX_TOKEN_STRING_LENGTH],char *token_ptr)
{
    /*
     Write some code to Extract the word
     */
     int charCount = 0;
     while((char_table[(*(token_ptr))] == LETTER || char_table[(*(token_ptr))] == DIGIT) && charCount < (MAX_TOKEN_STRING_LENGTH-1))
     {
        token_string[charCount] = *(token_ptr);
        token_string[charCount+1] = '\0';
        charCount++;
        token_ptr++;
     }

    theToken->literal_value = downshift_word(token_string); //Downshift the word, to make it lower case

    /*
     Write some code to Check if the word is a reserved word.
     if it is not a reserved word its an identifier.
     */
     if(is_reserved_word(theToken))
     {
         theToken->literal_type = STRING_LIT;
     }else
     {
         theToken->literal_type = STRING_LIT;
         theToken->token_code = IDENTIFIER;
         theToken->nextToken = NULL;
     }



     return token_ptr;
}
Example #4
0
static Token *get_word(char token_string[], Token *token2) {
	/*
	 Write some code to Extract the word

	 Downshift the word, to make it lower case

	 Write some code to Check if the word is a reserved word.
	 if it is not a reserved word its an identifier.
	 */

	downshift_word(token_string);

	token2->nextptr = NULL;
	token2->type = STRING_LIT;

	strcpy(token2->token_string, token_string);

	/*
	 >> Write some code to Check if the word is a reserved word.  if it is not a reserved word its an identifier
	 >> use the arrays provided in the source code I gave out to find out if something is a reserved word or not. If not then this token is an identifier
	 */

	is_reserved_word(token_string, token2);

	return token2;
}
Example #5
0
bool Grammar::is_identifier(const std::string& str) {
    if (str.size() == 0 || !str_utils::is_alpha(str[0]))
        return false;
    for (int i = 1; i < int(str.size()); i++)
        if (!str_utils::is_alphanumeric(str[i]) && str[i] != '_')
            return false;
    return !is_reserved_word(str);
}
Example #6
0
int read_next_token(TokenCacheManager *tcm, Token **token, pval *phplval)
{
/*	YY_TLS_VARS;
	TLS_VARS; */

	if (GLOBAL(tc)->count == GLOBAL(tc)->pos || GLOBAL(php3_display_source)) {
		/* we need to read from the lexical scanner */
		Token next_token;

		phplval->type = IS_LONG;	/* the lex scanner doesn't always set phplval->type, make sure the type is not 'dirty' */
		phplval->cs_data.switched = 0;
		next_token.token_type = lex_scan(phplval);
		/*printf("Read token:  %c (%d)\n",next_token.token_type,next_token.token_type);*/
		if (next_token.token_type == DONE_EVAL) {
			return DONE_EVAL;
		}
		if (GLOBAL(tc)->count >= GLOBAL(tc)->max_tokens) {	/* we need more memory */
			GLOBAL(tc)->tokens = (Token *) erealloc(GLOBAL(tc)->tokens,
										   (sizeof(Token) * (GLOBAL(tc)->max_tokens += GLOBAL(tc)->block_size)));
			if (!GLOBAL(tc)->tokens) {
				return FAILURE;
			}
		}
		next_token.phplval = *phplval;
		next_token.lineno = GLOBAL(phplineno);
		next_token.phplval.offset = (tcm->active<<TOKEN_BITS) + GLOBAL(tc)->count;
		
		/* ugly hack to support $keyword */
		if (last_token_suggests_variable_reference() && is_reserved_word(next_token.token_type)) {
			next_token.phplval.value.str.val = estrndup(phptext,phpleng);
			next_token.phplval.value.str.len = phpleng;
			next_token.phplval.type = IS_STRING;
			next_token.token_type = STRING;
		}
		/* end of ugly hack */
		last_token_type=next_token.token_type;
		
		if (GLOBAL(php3_display_source)) {
			syntax_highlight(&next_token);
			*token = &next_token;
			return (*token)->token_type;
		}
		GLOBAL(tc)->tokens[GLOBAL(tc)->count] = next_token;
		GLOBAL(tc)->count++;
	}
	*token = &GLOBAL(tc)->tokens[GLOBAL(tc)->pos++];
	return (*token)->token_type;
}
char* get_word(char** original, char ch[], char* ch_ptr, Token* t)
{
    /*
     Write some code to Extract the word
     */
    ch_ptr = ch;
    int i = 1;
    while(isalpha(get_char(original))){
		ch[i] = get_char(original);
		i++;
    }
    ch[i] = '\0';
	
    //Downshift the word, to make it lower case
    downshift_word(ch);
    /*
     Write some code to Check if the word is a reserved word.
     if it is not a reserved word its an identifier.
     */
	if(is_reserved_word(ch, t) == FALSE){
		t -> token_code = IDENTIFIER;
	}
	return ch_ptr;
}
Example #8
0
struct Token get_word(char c)
{
    struct Token token;
    TokenCode code;
    int i = 1;

    token.literalType = STRING_LIT;
    token.literalValue.valString[0] = c;
    while(char_table[peek_char()] == LETTER || char_table[peek_char()] == NUMBER) {
        c = get_char();
        token.literalValue.valString[i] = c;
        i++;
    }
    while(i < MAX_TOKEN_STRING_LENGTH) {
        token.literalValue.valString[i] = '\0';
        i++;
    }

    downshift_word(token.literalValue.valString);
    code = is_reserved_word(token.literalValue.valString);
    token.tokenCode = (code == NO_TOKEN)? IDENTIFIER : code;

    return token;
}
Example #9
0
		token_list tokenize(std::string source_) {
			std::list<token> token_stream;
			std::string token_str;
			bool in_string = false;
			bool is_comment_line = false;
			bool is_comment_block = false;
			std::string string_quote_type = "'";

            std::regex replace_macros("[a-zA-Z0-9_]+?\\([a-zA-Z0-9_\\(\\), ]*?\\)");
            source_ = std::regex_replace(source_, replace_macros, "MACRO_REPLACED_BY_TOKENIZE");
            //std::cout << source_;
			for (std::string::size_type stream_pos = 0; stream_pos < source_.size(); ++stream_pos) {
				std::string cur_char = source_.substr(stream_pos, 1);

				if (!in_string) {
					if (cur_char == "\"" || cur_char == "'" || cur_char == " " || cur_char == "\t" ||
						cur_char == "+" ||
						cur_char == "-" ||
						cur_char == "*" ||
						cur_char == "/" ||
						cur_char == ">" ||
						cur_char == "<" ||
						cur_char == "=" ||
						cur_char == "]" ||
						cur_char == "[" ||
						cur_char == "(" ||
						cur_char == ")" ||
						cur_char == "}" ||
						cur_char == "{" ||
						cur_char == "," ||
						cur_char == "!" ||
						cur_char == "." ||
						cur_char == ":" ||
						cur_char == ";"
						) {
						if (token_str != "") {
							if (!is_comment_line && !is_comment_block) {
								token new_token;
								new_token.val = token_str;
                                if (is_number(token_str)) {
                                    new_token.type = type::SCALAR;
                                }
                                else {
                                    new_token.type = type::LITERAL;
                                }
								new_token.stream_pos = stream_pos - token_str.length();
                                if (token_str[0] == '_')
                                    new_token.type = type::LOCALVARIABLE;
                                if (is_reserved_word(token_str))
                                    new_token.type = type::SCRIPTCOMMAND;
								token_stream.push_back(new_token);
							}
						}
						if (cur_char == "+" ||
							cur_char == "-" ||
							cur_char == "*" ||
							cur_char == "/" ||
							cur_char == ">" ||
							cur_char == "<" ||
							cur_char == "=" ||
							cur_char == "]" ||
							cur_char == "[" ||
							cur_char == "(" ||
							cur_char == ")" ||
							cur_char == "}" ||
							cur_char == "{" ||
							cur_char == "," ||
							cur_char == "!" ||
							cur_char == "." ||
							cur_char == ":" ||
							cur_char == ";"
							) {
							bool skip = false;
							if (cur_char == ">" ||
								cur_char == "<" ||
								cur_char == "!" ||
								cur_char == "="
								) {
								if (source_.substr(stream_pos + 1, 1) == "=") {
									skip = true;
									cur_char += "=";
								}
							}
							if (cur_char == ":") {
								if (source_.substr(stream_pos + 1, 1) == ":") {
									skip = true;
									cur_char += ":";
								}
							}
							
							if (cur_char == "/") {
								if (source_.substr(stream_pos + 1, 1) == "*") {
									skip = true;
									cur_char += "*";
									is_comment_block = true;
								}
							}
							if (cur_char == "/") {
								if (source_.substr(stream_pos + 1, 1) == "/") {
									skip = true;
									cur_char += "/";
									is_comment_line = true;
								}
							}
							
							if (!is_comment_line && !is_comment_block) {
								token operator_token;
								operator_token.val = cur_char;
								operator_token.stream_pos = stream_pos;
								operator_token.type = type::OPERATOR;
								if (cur_char == ";") {
									operator_token.type = type::ENDOFSTATEMENT;
								}
								token_stream.push_back(operator_token);
							}
							if (cur_char == "*") {
								if (source_.substr(stream_pos + 1, 1) == "/") {
									skip = true;
									cur_char += "/";
									is_comment_block = false;
									token_str = "";
								}
							}
							if (skip) {
								stream_pos++;
							}
						}
						token_str = "";
						if (cur_char == "\"") {
							token_str = "\"" + token_str;
						}
						if (cur_char == "'") {
							token_str = "'" + token_str;
						}
					}
					else {
						if (
							token_str == "<=" ||
							token_str == ">=" ||
							token_str == "!=" ||
							token_str == "::" ||
							token_str == "=="
							) {
							if (!is_comment_line && !is_comment_block) {
								token new_token;
								new_token.val = token_str;
								new_token.stream_pos = stream_pos - token_str.length();
								new_token.type = type::OPERATOR;
								token_stream.push_back(new_token);
							}
							token_str = "";
						}
						else {
							if (cur_char != "\n") {
								token_str += cur_char;
							}
							else {
                                if (!is_comment_line && !is_comment_block && token_str!= "") {
                                    token new_token;
                                    new_token.val = token_str;
                                    if (is_number(token_str)) {
                                        new_token.type = type::SCALAR;
                                    }
                                    else {
                                        new_token.type = type::LITERAL;
                                    }
                                    new_token.stream_pos = stream_pos - token_str.length();
                                    if (token_str[0] == '_')
                                        new_token.type = type::LOCALVARIABLE;
                                    if (is_reserved_word(token_str))
                                        new_token.type = type::SCRIPTCOMMAND;
                                    token_stream.push_back(new_token);
                                }
								if (is_comment_line) {
									is_comment_line = false;
									token_str = "";
								}
							}
						}
					}
				}
				else {
					token_str += cur_char;
				}
				if (!is_comment_line && !is_comment_block) {
					if (cur_char == "\"" || cur_char == "'") {
						if (!in_string) {
							string_quote_type = cur_char;
							in_string = true;
						}
						else if(in_string && cur_char == string_quote_type) {
							if (source_.substr(stream_pos + 1, 1) == string_quote_type) {
								token_str += string_quote_type;
								stream_pos++;
							}
							else {
								in_string = false;
							}
						}
					}
				}
			}

			return token_stream;
		}