示例#1
0
int main(void){
	const char * file_to_open = "test/c89/empty-main-return-zero.c";
	struct c_lexer_state c_lexer_state;
	struct memory_pool_collection memory_pool_collection;
	struct unsigned_char_list input_characters;
	int rtn = 0;
	struct parser_state parser_state;
	struct unsigned_char_list generated_code;
	struct unsigned_char_list lexer_output;
	struct unsigned_char_list buffered_symbol_table;
	struct preprocessor_state * preprocessor_state;
	struct unsigned_char_list preprocssed_characters;
	struct struct_c_lexer_token_ptr_list output_tokens;
	struct code_gen_state code_gen_state;
	struct_c_lexer_token_ptr_list_create(&output_tokens);
	unsigned_char_list_create(&lexer_output);
	unsigned_char_list_create(&input_characters);
	unsigned_char_list_create(&buffered_symbol_table);
	unsigned_char_list_create(&preprocssed_characters);
	memory_pool_collection_create(&memory_pool_collection);
	unsigned_char_list_create(&generated_code);
	preprocessor_state = create_preprocessor_state(&memory_pool_collection);

	if(!(rtn = get_preprocessed_output_from_file(preprocessor_state, (unsigned char *)file_to_open, &output_tokens))){
		unsigned int i;
		for(i = 0; i < struct_c_lexer_token_ptr_list_size(&output_tokens); i++){
			struct c_lexer_token * current_token = struct_c_lexer_token_ptr_list_get(&output_tokens, i);
			unsigned char * j;
			for(j = current_token->first_byte; j < (current_token->last_byte + 1); j++){
				unsigned_char_list_add_end(&preprocssed_characters, *j);
				putchar(*j);
			}
		}
	}else{
		printf("Nothing to output.  Preprocessing failed for %s\n", file_to_open);
	}
	printf("\nCompleted preprocessing\n");
	printf("\n");

	create_c_lexer_state(&c_lexer_state, &lexer_output, &memory_pool_collection, (unsigned char *)file_to_open, unsigned_char_list_data(&preprocssed_characters), unsigned_char_list_size(&preprocssed_characters));
	rtn = lex_c(&c_lexer_state);
	if(!rtn){
		unsigned int i;
		for(i = 0; i < struct_c_lexer_token_ptr_list_size(&c_lexer_state.tokens); i++){
			struct c_lexer_token * tok = struct_c_lexer_token_ptr_list_get(&c_lexer_state.tokens, i);
			printf("%s\n", get_c_token_type_names()[tok->type]);
		}
		printf("Lex was successful.\n");
	}

	printf("Begin parsing:\n");
	
	create_parser_state(&parser_state, &memory_pool_collection, &c_lexer_state, &generated_code, unsigned_char_list_data(&preprocssed_characters));
	if(parse(&parser_state)){
		printf("Parsing failed.\n");
	}else{
		printf("Full parser true:\n");
		print_parser_nodes(parser_state.top_node, 0);
	}

	create_code_gen_state(&code_gen_state, &parser_state, &generated_code, &buffered_symbol_table);
	if(generate_code(&code_gen_state)){
		printf("Parsing failed.\n");
	}else{
		unsigned int i;
		unsigned int size;
		unsigned char * data;
		printf("Code generation was successful.\n");
		size = unsigned_char_list_size(&generated_code);
		data = unsigned_char_list_data(&generated_code);

		printf("About to print\n");
		printf("%c\n",'a');
		printf("print %d size\n", size);
		printf("print %c first \n", data[0]);
		for(i = 0; i < size; i++){
			printf("%c", data[i]);
		}
	}

	struct_c_lexer_token_ptr_list_destroy(&output_tokens);
	unsigned_char_list_destroy(&input_characters);
	unsigned_char_list_destroy(&generated_code);
	unsigned_char_list_destroy(&buffered_symbol_table);
	unsigned_char_list_destroy(&lexer_output);
	destroy_code_gen_state(&code_gen_state);
	destroy_parser_state(&parser_state);
	destroy_preprocessor_state(preprocessor_state);
	unsigned_char_list_destroy(&preprocssed_characters);
	destroy_c_lexer_state(&c_lexer_state);
	memory_pool_collection_destroy(&memory_pool_collection);
	return 0;
}
示例#2
0
文件: lexer.c 项目: chyyuu/recc
int lex_asm(struct l2_lexer_state * l2_lexer_state, unsigned char * filename, unsigned char * buffer, unsigned int buffer_size){
	l2_lexer_state->c.buf = buffer;
	l2_lexer_state->c.position = 0;
	l2_lexer_state->c.current_line = 0;
	l2_lexer_state->c.filename = filename;
	l2_lexer_state->c.buffer_size = buffer_size;

	struct_l2_lexer_token_ptr_list_create(&l2_lexer_state->tokens);

	while(l2_lexer_state->c.position < buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &l2_lexer_state->c.buf[l2_lexer_state->c.position];
		enum l2_token_type type;
		struct l2_lexer_token * new_token;

		if((rtn = t_space(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SPACE;
		}else if((rtn = t_newline(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_NEWLINE;
			l2_lexer_state->c.current_line = l2_lexer_state->c.current_line + rtn;
		}else if((rtn = t_l2_comment(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COMMENT;
		}else if((rtn = t_symbol((const unsigned char *)":", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_MINUS_CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"OFFSET", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_OFFSET;
		}else if((rtn = t_keyword((const unsigned char *)"RELOCATABLE", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_RELOCATABLE;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTS", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IMPLEMENTS;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IMPLEMENTED;
		}else if((rtn = t_keyword((const unsigned char *)"FUNCTION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_FUNCTION;
		}else if((rtn = t_keyword((const unsigned char *)"VARIABLE", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_VARIABLE;
		}else if((rtn = t_keyword((const unsigned char *)"CONSTANT", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRES", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REQUIRES;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REQUIRED;
		}else if((rtn = t_keyword((const unsigned char *)"INTERNAL", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_INTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"EXTERNAL", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_EXTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"STRING", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_STRING;
		}else if((rtn = t_keyword((const unsigned char *)"UNRESOLVED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_UNRESOLVED;
		}else if((rtn = t_keyword((const unsigned char *)"REGION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REGION;
		}else if((rtn = t_keyword((const unsigned char *)"START", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_START;
		}else if((rtn = t_keyword((const unsigned char *)"END", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_END;
		}else if((rtn = t_keyword((const unsigned char *)"PERMISSION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_PERMISSION;
		}else if((rtn = t_l2_register(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REGISTER;
		}else if((rtn = t_constant_hex(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT_HEX;
		}else if((rtn = t_constant_decimal(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT_DECIMAL;
		}else if((rtn = t_keyword((const unsigned char *)"add", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_ADD;
		}else if((rtn = t_keyword((const unsigned char *)"sub", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SUB;
		}else if((rtn = t_keyword((const unsigned char *)"mul", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_MUL;
		}else if((rtn = t_keyword((const unsigned char *)"div", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_DIV;
		}else if((rtn = t_keyword((const unsigned char *)"beq", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_BEQ;
		}else if((rtn = t_keyword((const unsigned char *)"blt", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_BLT;
		}else if((rtn = t_keyword((const unsigned char *)"loa", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_LOA;
		}else if((rtn = t_keyword((const unsigned char *)"sto", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_STO;
		}else if((rtn = t_keyword((const unsigned char *)"ll", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_LL;
		}else if((rtn = t_keyword((const unsigned char *)"and", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_AND;
		}else if((rtn = t_keyword((const unsigned char *)"or", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_OR;
		}else if((rtn = t_keyword((const unsigned char *)"not", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_NOT;
		}else if((rtn = t_keyword((const unsigned char *)"shr", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SHR;
		}else if((rtn = t_keyword((const unsigned char *)"shl", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SHL;
		}else if((rtn = t_keyword((const unsigned char *)"DW", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_DW;
		}else if((rtn = t_keyword((const unsigned char *)"SW", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SW;
		}else if((rtn = t_identifier(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IDENTIFIER;
		}else{
			unsigned int i = 0;
			buffered_printf(l2_lexer_state->c.buffered_output, "Lexer stopping on character 0x%02x.  This was character %u of %u\nFollowing chars are:", l2_lexer_state->c.buf[l2_lexer_state->c.position], (l2_lexer_state->c.position + 1), l2_lexer_state->c.buffer_size);
			for(i = l2_lexer_state->c.position; i < (l2_lexer_state->c.position + 100) && i < l2_lexer_state->c.buffer_size; i++){
				if(l2_lexer_state->c.buf[i]){
					buffered_printf(l2_lexer_state->c.buffered_output, "%c", l2_lexer_state->c.buf[i]);
				}
			}
			buffered_printf(l2_lexer_state->c.buffered_output, "\n");
			return 1;
		}

		new_token = struct_l2_lexer_token_memory_pool_malloc(l2_lexer_state->c.memory_pool_collection->struct_l2_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);

		struct_l2_lexer_token_ptr_list_add_end(&l2_lexer_state->tokens, new_token);
		show_lexer_token(l2_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		l2_lexer_state->c.position += rtn;
	}
	return 0;
}
示例#3
0
文件: lexer.c 项目: ezhangle/recc
int lex_asm(struct asm_lexer_state * asm_lexer_state, unsigned char * filename, unsigned char * buffer, unsigned int buffer_size){
	struct memory_pooler * asm_lexer_token_pool = memory_pooler_collection_get_pool(asm_lexer_state->c.memory_pooler_collection, sizeof(struct asm_lexer_token));
	asm_lexer_state->c.buf = buffer;
	asm_lexer_state->c.position = 0;
	asm_lexer_state->c.current_line = 0;
	asm_lexer_state->c.filename = filename;
	asm_lexer_state->c.buffer_size = buffer_size;

	g_format_buffer_use();

	struct_asm_lexer_token_ptr_list_create(&asm_lexer_state->tokens);

	while(asm_lexer_state->c.position < buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &asm_lexer_state->c.buf[asm_lexer_state->c.position];
		enum asm_token_type type;
		struct asm_lexer_token * new_token;

		if((rtn = t_space(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SPACE;
		}else if((rtn = t_newline(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_NEWLINE;
			asm_lexer_state->c.current_line = asm_lexer_state->c.current_line + rtn; /* NEWLINE token can be multiple newlines */
		}else if((rtn = t_asm_comment(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_ASM_COMMENT;
		}else if((rtn = t_symbol((const unsigned char *)":", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_MINUS_CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"OFFSET", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_OFFSET;
		}else if((rtn = t_keyword((const unsigned char *)"RELOCATABLE", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_RELOCATABLE;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTS", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_IMPLEMENTS;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRES", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_REQUIRES;
		}else if((rtn = t_keyword((const unsigned char *)"INTERNAL", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_INTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"EXTERNAL", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_EXTERNAL;
		}else if((rtn = t_asm_register(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_REGISTER;
		}else if((rtn = t_constant_hex(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_CONSTANT_HEX;
		}else if((rtn = t_constant_decimal(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_CONSTANT_DECIMAL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"add", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_ADD;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sub", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SUB;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"mul", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_MUL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"div", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_DIV;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"beq", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_BEQ;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"blt", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_BLT;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"loa", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_LOA;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sto", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_STO;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"ll", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_LL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"and", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_AND;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"or", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_OR;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"not", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_NOT;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"shr", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SHR;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"shl", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SHL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"dw", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_DW;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sw", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SW;
		}else if((rtn = t_identifier(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_IDENTIFIER;
		}else{
			unsigned int i = 0;
			buffered_printf(asm_lexer_state->c.buffered_output, "Lexer stopping on character '%c' 0x(%x)\n", asm_lexer_state->c.buf[asm_lexer_state->c.position], asm_lexer_state->c.buf[asm_lexer_state->c.position]);
			for(asm_lexer_state->c.position = 0; i < asm_lexer_state->c.position + 100; i++){
				if(asm_lexer_state->c.buf[i]){
					buffered_printf(asm_lexer_state->c.buffered_output, "%c", asm_lexer_state->c.buf[i]);
				}
			}
			g_format_buffer_release();
			return 1;
		}

		new_token = (struct asm_lexer_token *)memory_pooler_malloc(asm_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);

		struct_asm_lexer_token_ptr_list_add_end(&asm_lexer_state->tokens, new_token);
		show_lexer_token(asm_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		asm_lexer_state->c.position += rtn;
	}
	g_format_buffer_release();
	return 0;
}
示例#4
0
文件: lexer.c 项目: chyyuu/recc
int lex_c(struct c_lexer_state * c_lexer_state){
	c_lexer_state->c.position = 0;
	c_lexer_state->c.current_line = 0;

	struct_c_lexer_token_ptr_list_create(&c_lexer_state->tokens);

	while(c_lexer_state->c.position < c_lexer_state->c.buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &c_lexer_state->c.buf[c_lexer_state->c.position];
		enum c_token_type type;
		struct c_lexer_token * new_token;

		if((rtn = t_space(&c_lexer_state->c, c_lexer_state->c.position))){
			type = SPACE;
		}else if((rtn = t_newline(&c_lexer_state->c, c_lexer_state->c.position))){
			type = NEWLINE;
			c_lexer_state->c.current_line = c_lexer_state->c.current_line + rtn;
		}else if((rtn = t_comment(&c_lexer_state->c, c_lexer_state->c.position, &c_lexer_state->c.current_line))){
			type = COMMENT;
		}else if((rtn = t_keyword((const unsigned char *)"auto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AUTO;
		}else if((rtn = t_keyword((const unsigned char *)"break", &c_lexer_state->c, c_lexer_state->c.position))){
			type = BREAK;
		}else if((rtn = t_keyword((const unsigned char *)"case", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CASE;
		}else if((rtn = t_keyword((const unsigned char *)"char", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"const", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONST;
		}else if((rtn = t_keyword((const unsigned char *)"continue", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONTINUE;
		}else if((rtn = t_keyword((const unsigned char *)"default", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEFAULT;
		}else if((rtn = t_keyword((const unsigned char *)"do", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DO;
		}else if((rtn = t_keyword((const unsigned char *)"double", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOUBLE;
		}else if((rtn = t_keyword((const unsigned char *)"else", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELSE;
		}else if((rtn = t_keyword((const unsigned char *)"enum", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ENUM;
		}else if((rtn = t_keyword((const unsigned char *)"extern", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXTERN;
		}else if((rtn = t_keyword((const unsigned char *)"float", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FLOAT;
		}else if((rtn = t_keyword((const unsigned char *)"for", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FOR;
		}else if((rtn = t_keyword((const unsigned char *)"goto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GOTO;
		}else if((rtn = t_keyword((const unsigned char *)"if", &c_lexer_state->c, c_lexer_state->c.position))){
			type = IF;
		}else if((rtn = t_keyword((const unsigned char *)"int", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INT;
		}else if((rtn = t_keyword((const unsigned char *)"long", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LONG;
		}else if((rtn = t_keyword((const unsigned char *)"register", &c_lexer_state->c, c_lexer_state->c.position))){
			type = REGISTER;
		}else if((rtn = t_keyword((const unsigned char *)"return", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RETURN;
		}else if((rtn = t_keyword((const unsigned char *)"short", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SHORT;
		}else if((rtn = t_keyword((const unsigned char *)"signed", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"sizeof", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIZEOF;
		}else if((rtn = t_keyword((const unsigned char *)"static", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STATIC;
		}else if((rtn = t_keyword((const unsigned char *)"struct", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STRUCT;
		}else if((rtn = t_keyword((const unsigned char *)"switch", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SWITCH;
		}else if((rtn = t_keyword((const unsigned char *)"typedef", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TYPEDEF;
		}else if((rtn = t_keyword((const unsigned char *)"union", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNION;
		}else if((rtn = t_keyword((const unsigned char *)"unsigned", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNSIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"void", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOID;
		}else if((rtn = t_keyword((const unsigned char *)"volatile", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOLATILE;
		}else if((rtn = t_keyword((const unsigned char *)"while", &c_lexer_state->c, c_lexer_state->c.position))){
			type = WHILE;
		}else if((rtn = t_identifier(&c_lexer_state->c, c_lexer_state->c.position))){
			type = IDENTIFIER;
		}else if((rtn = t_constant_hex(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_HEX;
		}else if((rtn = t_constant_exponent(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_EXPONENT;
		}else if((rtn = t_constant_float_small(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_SMALL;
		}else if((rtn = t_constant_float_large(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_LARGE;
		}else if((rtn = t_constant_decimal(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_DECIMAL;
		}else if((rtn = t_constant_character(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_CHARACTER;
		}else if((rtn = t_constant_string(&c_lexer_state->c, c_lexer_state->c.position))){
			type = STRING_LITERAL;
		}else if((rtn = t_symbol((const unsigned char *)"...", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELLIPSIS;
		}else if((rtn = t_symbol((const unsigned char *)">>=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"<<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"+=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ADD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"-=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SUB_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"*=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MUL_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"/=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIV_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"%=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MOD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"&=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"^=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = XOR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"|=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)">>", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"++", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"->", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PTR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"--", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"&&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_OP;
		}else if((rtn = t_symbol((const unsigned char *)"||", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LE_OP;
		}else if((rtn = t_symbol((const unsigned char *)">=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GE_OP;
		}else if((rtn = t_symbol((const unsigned char *)"==", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQ_OP;
		}else if((rtn = t_symbol((const unsigned char *)"!=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NE_OP;
		}else if((rtn = t_symbol((const unsigned char *)";", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SEMICOLON_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"{", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<%", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"}", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"%>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"[", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<:", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_SQUARE_BRACKET_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"]", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)":>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_SQUARE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)":", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQUALS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"(", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)")", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)".", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AMPERSAND_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"!", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXCLAMATION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"~", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TILDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MINUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"+", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PLUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"*", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MULTIPLY_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"/", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIVIDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"%", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PERCENT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)">", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"^", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CARET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"|", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PIPE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"?", &c_lexer_state->c, c_lexer_state->c.position))){
			type = QUESTION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"#", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NUMBER_SIGN_CHAR;
		}else{
			unsigned int i = 0;
			printf("Lexer stopping on character 0x%02x.  This was character %u of %u\nFollowing chars are:", c_lexer_state->c.buf[c_lexer_state->c.position], (c_lexer_state->c.position + 1), c_lexer_state->c.buffer_size);
			for(i = c_lexer_state->c.position; i < (c_lexer_state->c.position + 100) && i < c_lexer_state->c.buffer_size; i++){
				if(c_lexer_state->c.buf[i]){
					printf("%c", c_lexer_state->c.buf[i]);
				}
			}
			buffered_printf(c_lexer_state->c.buffered_output, "\n");
			assert(0 && "Invalid character in lexer.");
			return 1;
		}

		new_token = struct_c_lexer_token_memory_pool_malloc(c_lexer_state->c.memory_pool_collection->struct_c_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);
		{
			/*
			unsigned char *g = new_token->first_byte;
			printf("Got token: %s ", get_c_token_type_names()[new_token->type]);
			do{
				if(*g == '\n'){
					printf("\\n");
				}else if(*g == '\r'){
					printf("\\r");
				}else{
					printf("%c", *g);
				}
			}while(g++ != new_token->last_byte);
			printf("\n");
			*/
		}

		struct_c_lexer_token_ptr_list_add_end(&c_lexer_state->tokens, new_token);
		show_lexer_token(c_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		c_lexer_state->c.position += rtn;
	}
	return 0;
}
示例#5
0
文件: lexer.c 项目: ezhangle/recc
int lex_build_script(struct build_script_lexer_state * build_script_lexer_state, unsigned char * filename, unsigned char * buffer, unsigned int buffer_size){
	struct memory_pooler * build_script_lexer_token_pool = memory_pooler_collection_get_pool(build_script_lexer_state->c.memory_pooler_collection, sizeof(struct build_script_lexer_token));
	build_script_lexer_state->c.buf = buffer;
	build_script_lexer_state->c.position = 0;
	build_script_lexer_state->c.current_line = 0;
	build_script_lexer_state->c.filename = filename;
	build_script_lexer_state->c.buffer_size = buffer_size;

	g_format_buffer_use();

	struct_build_script_lexer_token_ptr_list_create(&build_script_lexer_state->tokens);

	while(build_script_lexer_state->c.position < buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &build_script_lexer_state->c.buf[build_script_lexer_state->c.position];
		enum build_script_token_type type;
		struct build_script_lexer_token * new_token;

		if((rtn = t_space(&build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_SPACE;
		}else if((rtn = t_newline(&build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_NEWLINE;
			build_script_lexer_state->c.current_line = build_script_lexer_state->c.current_line + rtn; /* NEWLINE token can be multiple newlines */
		}else if((rtn = t_keyword((const unsigned char *)"PREPROCESS", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_PREPROCESS;
		}else if((rtn = t_keyword((const unsigned char *)"CODE GENERATE", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_CODE_GENERATE;
		}else if((rtn = t_keyword((const unsigned char *)"LINK", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_LINK;
		}else if((rtn = t_keyword((const unsigned char *)"SYMBOLS", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_SYMBOLS;
		}else if((rtn = t_keyword((const unsigned char *)"TO", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_TO;
		}else if((rtn = t_filename(&build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_FILENAME;
		}else if((rtn = t_symbol((const unsigned char *)",", &build_script_lexer_state->c, build_script_lexer_state->c.position))){
			type = B_COMMA_CHAR;
		}else{
			unsigned int i = 0;
			buffered_printf(build_script_lexer_state->c.buffered_output, "Lexer stopping on character '%c' 0x(%x)\n", build_script_lexer_state->c.buf[build_script_lexer_state->c.position], build_script_lexer_state->c.buf[build_script_lexer_state->c.position]);
			for(build_script_lexer_state->c.position = 0; i < build_script_lexer_state->c.position + 100; i++){
				if(build_script_lexer_state->c.buf[i]){
					buffered_printf(build_script_lexer_state->c.buffered_output, "%c", build_script_lexer_state->c.buf[i]);
				}
			}
			g_format_buffer_release();
			return 1;
		}

		new_token = (struct build_script_lexer_token *)memory_pooler_malloc(build_script_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);

		struct_build_script_lexer_token_ptr_list_add_end(&build_script_lexer_state->tokens, new_token);
		show_lexer_token(build_script_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		build_script_lexer_state->c.position += rtn;
	}
	g_format_buffer_release();
	return 0;
}