Пример #1
0
int main(void){
	const char * file_to_open = "test/c89/empty-main-return-zero.c";
	struct c_lexer_state c_lexer_state;
	struct memory_pool_collection memory_pool_collection;
	struct unsigned_char_list input_characters;
	int rtn = 0;
	struct parser_state parser_state;
	struct unsigned_char_list generated_code;
	struct unsigned_char_list lexer_output;
	struct unsigned_char_list buffered_symbol_table;
	struct preprocessor_state * preprocessor_state;
	struct unsigned_char_list preprocssed_characters;
	struct struct_c_lexer_token_ptr_list output_tokens;
	struct code_gen_state code_gen_state;
	struct_c_lexer_token_ptr_list_create(&output_tokens);
	unsigned_char_list_create(&lexer_output);
	unsigned_char_list_create(&input_characters);
	unsigned_char_list_create(&buffered_symbol_table);
	unsigned_char_list_create(&preprocssed_characters);
	memory_pool_collection_create(&memory_pool_collection);
	unsigned_char_list_create(&generated_code);
	preprocessor_state = create_preprocessor_state(&memory_pool_collection);

	if(!(rtn = get_preprocessed_output_from_file(preprocessor_state, (unsigned char *)file_to_open, &output_tokens))){
		unsigned int i;
		for(i = 0; i < struct_c_lexer_token_ptr_list_size(&output_tokens); i++){
			struct c_lexer_token * current_token = struct_c_lexer_token_ptr_list_get(&output_tokens, i);
			unsigned char * j;
			for(j = current_token->first_byte; j < (current_token->last_byte + 1); j++){
				unsigned_char_list_add_end(&preprocssed_characters, *j);
				putchar(*j);
			}
		}
	}else{
		printf("Nothing to output.  Preprocessing failed for %s\n", file_to_open);
	}
	printf("\nCompleted preprocessing\n");
	printf("\n");

	create_c_lexer_state(&c_lexer_state, &lexer_output, &memory_pool_collection, (unsigned char *)file_to_open, unsigned_char_list_data(&preprocssed_characters), unsigned_char_list_size(&preprocssed_characters));
	rtn = lex_c(&c_lexer_state);
	if(!rtn){
		unsigned int i;
		for(i = 0; i < struct_c_lexer_token_ptr_list_size(&c_lexer_state.tokens); i++){
			struct c_lexer_token * tok = struct_c_lexer_token_ptr_list_get(&c_lexer_state.tokens, i);
			printf("%s\n", get_c_token_type_names()[tok->type]);
		}
		printf("Lex was successful.\n");
	}

	printf("Begin parsing:\n");
	
	create_parser_state(&parser_state, &memory_pool_collection, &c_lexer_state, &generated_code, unsigned_char_list_data(&preprocssed_characters));
	if(parse(&parser_state)){
		printf("Parsing failed.\n");
	}else{
		printf("Full parser true:\n");
		print_parser_nodes(parser_state.top_node, 0);
	}

	create_code_gen_state(&code_gen_state, &parser_state, &generated_code, &buffered_symbol_table);
	if(generate_code(&code_gen_state)){
		printf("Parsing failed.\n");
	}else{
		unsigned int i;
		unsigned int size;
		unsigned char * data;
		printf("Code generation was successful.\n");
		size = unsigned_char_list_size(&generated_code);
		data = unsigned_char_list_data(&generated_code);

		printf("About to print\n");
		printf("%c\n",'a');
		printf("print %d size\n", size);
		printf("print %c first \n", data[0]);
		for(i = 0; i < size; i++){
			printf("%c", data[i]);
		}
	}

	struct_c_lexer_token_ptr_list_destroy(&output_tokens);
	unsigned_char_list_destroy(&input_characters);
	unsigned_char_list_destroy(&generated_code);
	unsigned_char_list_destroy(&buffered_symbol_table);
	unsigned_char_list_destroy(&lexer_output);
	destroy_code_gen_state(&code_gen_state);
	destroy_parser_state(&parser_state);
	destroy_preprocessor_state(preprocessor_state);
	unsigned_char_list_destroy(&preprocssed_characters);
	destroy_c_lexer_state(&c_lexer_state);
	memory_pool_collection_destroy(&memory_pool_collection);
	return 0;
}
Пример #2
0
int lex_c(struct c_lexer_state * c_lexer_state){
	c_lexer_state->c.position = 0;
	c_lexer_state->c.current_line = 0;

	struct_c_lexer_token_ptr_list_create(&c_lexer_state->tokens);

	while(c_lexer_state->c.position < c_lexer_state->c.buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &c_lexer_state->c.buf[c_lexer_state->c.position];
		enum c_token_type type;
		struct c_lexer_token * new_token;

		if((rtn = t_space(&c_lexer_state->c, c_lexer_state->c.position))){
			type = SPACE;
		}else if((rtn = t_newline(&c_lexer_state->c, c_lexer_state->c.position))){
			type = NEWLINE;
			c_lexer_state->c.current_line = c_lexer_state->c.current_line + rtn;
		}else if((rtn = t_comment(&c_lexer_state->c, c_lexer_state->c.position, &c_lexer_state->c.current_line))){
			type = COMMENT;
		}else if((rtn = t_keyword((const unsigned char *)"auto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AUTO;
		}else if((rtn = t_keyword((const unsigned char *)"break", &c_lexer_state->c, c_lexer_state->c.position))){
			type = BREAK;
		}else if((rtn = t_keyword((const unsigned char *)"case", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CASE;
		}else if((rtn = t_keyword((const unsigned char *)"char", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"const", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONST;
		}else if((rtn = t_keyword((const unsigned char *)"continue", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONTINUE;
		}else if((rtn = t_keyword((const unsigned char *)"default", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEFAULT;
		}else if((rtn = t_keyword((const unsigned char *)"do", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DO;
		}else if((rtn = t_keyword((const unsigned char *)"double", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOUBLE;
		}else if((rtn = t_keyword((const unsigned char *)"else", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELSE;
		}else if((rtn = t_keyword((const unsigned char *)"enum", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ENUM;
		}else if((rtn = t_keyword((const unsigned char *)"extern", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXTERN;
		}else if((rtn = t_keyword((const unsigned char *)"float", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FLOAT;
		}else if((rtn = t_keyword((const unsigned char *)"for", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FOR;
		}else if((rtn = t_keyword((const unsigned char *)"goto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GOTO;
		}else if((rtn = t_keyword((const unsigned char *)"if", &c_lexer_state->c, c_lexer_state->c.position))){
			type = IF;
		}else if((rtn = t_keyword((const unsigned char *)"int", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INT;
		}else if((rtn = t_keyword((const unsigned char *)"long", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LONG;
		}else if((rtn = t_keyword((const unsigned char *)"register", &c_lexer_state->c, c_lexer_state->c.position))){
			type = REGISTER;
		}else if((rtn = t_keyword((const unsigned char *)"return", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RETURN;
		}else if((rtn = t_keyword((const unsigned char *)"short", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SHORT;
		}else if((rtn = t_keyword((const unsigned char *)"signed", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"sizeof", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIZEOF;
		}else if((rtn = t_keyword((const unsigned char *)"static", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STATIC;
		}else if((rtn = t_keyword((const unsigned char *)"struct", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STRUCT;
		}else if((rtn = t_keyword((const unsigned char *)"switch", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SWITCH;
		}else if((rtn = t_keyword((const unsigned char *)"typedef", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TYPEDEF;
		}else if((rtn = t_keyword((const unsigned char *)"union", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNION;
		}else if((rtn = t_keyword((const unsigned char *)"unsigned", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNSIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"void", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOID;
		}else if((rtn = t_keyword((const unsigned char *)"volatile", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOLATILE;
		}else if((rtn = t_keyword((const unsigned char *)"while", &c_lexer_state->c, c_lexer_state->c.position))){
			type = WHILE;
		}else if((rtn = t_identifier(&c_lexer_state->c, c_lexer_state->c.position))){
			type = IDENTIFIER;
		}else if((rtn = t_constant_hex(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_HEX;
		}else if((rtn = t_constant_exponent(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_EXPONENT;
		}else if((rtn = t_constant_float_small(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_SMALL;
		}else if((rtn = t_constant_float_large(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_LARGE;
		}else if((rtn = t_constant_decimal(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_DECIMAL;
		}else if((rtn = t_constant_character(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_CHARACTER;
		}else if((rtn = t_constant_string(&c_lexer_state->c, c_lexer_state->c.position))){
			type = STRING_LITERAL;
		}else if((rtn = t_symbol((const unsigned char *)"...", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELLIPSIS;
		}else if((rtn = t_symbol((const unsigned char *)">>=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"<<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"+=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ADD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"-=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SUB_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"*=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MUL_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"/=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIV_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"%=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MOD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"&=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"^=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = XOR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"|=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)">>", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"++", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"->", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PTR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"--", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"&&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_OP;
		}else if((rtn = t_symbol((const unsigned char *)"||", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LE_OP;
		}else if((rtn = t_symbol((const unsigned char *)">=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GE_OP;
		}else if((rtn = t_symbol((const unsigned char *)"==", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQ_OP;
		}else if((rtn = t_symbol((const unsigned char *)"!=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NE_OP;
		}else if((rtn = t_symbol((const unsigned char *)";", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SEMICOLON_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"{", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<%", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"}", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"%>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"[", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<:", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_SQUARE_BRACKET_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"]", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)":>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_SQUARE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)":", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQUALS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"(", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)")", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)".", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AMPERSAND_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"!", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXCLAMATION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"~", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TILDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MINUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"+", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PLUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"*", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MULTIPLY_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"/", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIVIDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"%", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PERCENT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)">", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"^", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CARET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"|", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PIPE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"?", &c_lexer_state->c, c_lexer_state->c.position))){
			type = QUESTION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"#", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NUMBER_SIGN_CHAR;
		}else{
			unsigned int i = 0;
			printf("Lexer stopping on character 0x%02x.  This was character %u of %u\nFollowing chars are:", c_lexer_state->c.buf[c_lexer_state->c.position], (c_lexer_state->c.position + 1), c_lexer_state->c.buffer_size);
			for(i = c_lexer_state->c.position; i < (c_lexer_state->c.position + 100) && i < c_lexer_state->c.buffer_size; i++){
				if(c_lexer_state->c.buf[i]){
					printf("%c", c_lexer_state->c.buf[i]);
				}
			}
			buffered_printf(c_lexer_state->c.buffered_output, "\n");
			assert(0 && "Invalid character in lexer.");
			return 1;
		}

		new_token = struct_c_lexer_token_memory_pool_malloc(c_lexer_state->c.memory_pool_collection->struct_c_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);
		{
			/*
			unsigned char *g = new_token->first_byte;
			printf("Got token: %s ", get_c_token_type_names()[new_token->type]);
			do{
				if(*g == '\n'){
					printf("\\n");
				}else if(*g == '\r'){
					printf("\\r");
				}else{
					printf("%c", *g);
				}
			}while(g++ != new_token->last_byte);
			printf("\n");
			*/
		}

		struct_c_lexer_token_ptr_list_add_end(&c_lexer_state->tokens, new_token);
		show_lexer_token(c_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		c_lexer_state->c.position += rtn;
	}
	return 0;
}