Ejemplo n.º 1
0
Archivo: lexer.c Proyecto: chyyuu/recc
int lex_c(struct c_lexer_state * c_lexer_state){
	c_lexer_state->c.position = 0;
	c_lexer_state->c.current_line = 0;

	struct_c_lexer_token_ptr_list_create(&c_lexer_state->tokens);

	while(c_lexer_state->c.position < c_lexer_state->c.buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &c_lexer_state->c.buf[c_lexer_state->c.position];
		enum c_token_type type;
		struct c_lexer_token * new_token;

		if((rtn = t_space(&c_lexer_state->c, c_lexer_state->c.position))){
			type = SPACE;
		}else if((rtn = t_newline(&c_lexer_state->c, c_lexer_state->c.position))){
			type = NEWLINE;
			c_lexer_state->c.current_line = c_lexer_state->c.current_line + rtn;
		}else if((rtn = t_comment(&c_lexer_state->c, c_lexer_state->c.position, &c_lexer_state->c.current_line))){
			type = COMMENT;
		}else if((rtn = t_keyword((const unsigned char *)"auto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AUTO;
		}else if((rtn = t_keyword((const unsigned char *)"break", &c_lexer_state->c, c_lexer_state->c.position))){
			type = BREAK;
		}else if((rtn = t_keyword((const unsigned char *)"case", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CASE;
		}else if((rtn = t_keyword((const unsigned char *)"char", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"const", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONST;
		}else if((rtn = t_keyword((const unsigned char *)"continue", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CONTINUE;
		}else if((rtn = t_keyword((const unsigned char *)"default", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEFAULT;
		}else if((rtn = t_keyword((const unsigned char *)"do", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DO;
		}else if((rtn = t_keyword((const unsigned char *)"double", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOUBLE;
		}else if((rtn = t_keyword((const unsigned char *)"else", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELSE;
		}else if((rtn = t_keyword((const unsigned char *)"enum", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ENUM;
		}else if((rtn = t_keyword((const unsigned char *)"extern", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXTERN;
		}else if((rtn = t_keyword((const unsigned char *)"float", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FLOAT;
		}else if((rtn = t_keyword((const unsigned char *)"for", &c_lexer_state->c, c_lexer_state->c.position))){
			type = FOR;
		}else if((rtn = t_keyword((const unsigned char *)"goto", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GOTO;
		}else if((rtn = t_keyword((const unsigned char *)"if", &c_lexer_state->c, c_lexer_state->c.position))){
			type = IF;
		}else if((rtn = t_keyword((const unsigned char *)"int", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INT;
		}else if((rtn = t_keyword((const unsigned char *)"long", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LONG;
		}else if((rtn = t_keyword((const unsigned char *)"register", &c_lexer_state->c, c_lexer_state->c.position))){
			type = REGISTER;
		}else if((rtn = t_keyword((const unsigned char *)"return", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RETURN;
		}else if((rtn = t_keyword((const unsigned char *)"short", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SHORT;
		}else if((rtn = t_keyword((const unsigned char *)"signed", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"sizeof", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SIZEOF;
		}else if((rtn = t_keyword((const unsigned char *)"static", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STATIC;
		}else if((rtn = t_keyword((const unsigned char *)"struct", &c_lexer_state->c, c_lexer_state->c.position))){
			type = STRUCT;
		}else if((rtn = t_keyword((const unsigned char *)"switch", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SWITCH;
		}else if((rtn = t_keyword((const unsigned char *)"typedef", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TYPEDEF;
		}else if((rtn = t_keyword((const unsigned char *)"union", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNION;
		}else if((rtn = t_keyword((const unsigned char *)"unsigned", &c_lexer_state->c, c_lexer_state->c.position))){
			type = UNSIGNED;
		}else if((rtn = t_keyword((const unsigned char *)"void", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOID;
		}else if((rtn = t_keyword((const unsigned char *)"volatile", &c_lexer_state->c, c_lexer_state->c.position))){
			type = VOLATILE;
		}else if((rtn = t_keyword((const unsigned char *)"while", &c_lexer_state->c, c_lexer_state->c.position))){
			type = WHILE;
		}else if((rtn = t_identifier(&c_lexer_state->c, c_lexer_state->c.position))){
			type = IDENTIFIER;
		}else if((rtn = t_constant_hex(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_HEX;
		}else if((rtn = t_constant_exponent(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_EXPONENT;
		}else if((rtn = t_constant_float_small(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_SMALL;
		}else if((rtn = t_constant_float_large(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_FLOAT_LARGE;
		}else if((rtn = t_constant_decimal(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_DECIMAL;
		}else if((rtn = t_constant_character(&c_lexer_state->c, c_lexer_state->c.position))){
			type = CONSTANT_CHARACTER;
		}else if((rtn = t_constant_string(&c_lexer_state->c, c_lexer_state->c.position))){
			type = STRING_LITERAL;
		}else if((rtn = t_symbol((const unsigned char *)"...", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ELLIPSIS;
		}else if((rtn = t_symbol((const unsigned char *)">>=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"<<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"+=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = ADD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"-=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SUB_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"*=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MUL_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"/=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIV_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"%=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MOD_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"&=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"^=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = XOR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)"|=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_ASSIGN;
		}else if((rtn = t_symbol((const unsigned char *)">>", &c_lexer_state->c, c_lexer_state->c.position))){
			type = RIGHT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LEFT_OP;
		}else if((rtn = t_symbol((const unsigned char *)"++", &c_lexer_state->c, c_lexer_state->c.position))){
			type = INC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"->", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PTR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"--", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DEC_OP;
		}else if((rtn = t_symbol((const unsigned char *)"&&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AND_OP;
		}else if((rtn = t_symbol((const unsigned char *)"||", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OR_OP;
		}else if((rtn = t_symbol((const unsigned char *)"<=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = LE_OP;
		}else if((rtn = t_symbol((const unsigned char *)">=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = GE_OP;
		}else if((rtn = t_symbol((const unsigned char *)"==", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQ_OP;
		}else if((rtn = t_symbol((const unsigned char *)"!=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NE_OP;
		}else if((rtn = t_symbol((const unsigned char *)";", &c_lexer_state->c, c_lexer_state->c.position))){
			type = SEMICOLON_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"{", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<%", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"}", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"%>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_BRACE_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"[", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)"<:", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = OPEN_SQUARE_BRACKET_CHAR;
		}else if(((rtn = t_symbol((const unsigned char *)"]", &c_lexer_state->c, c_lexer_state->c.position)) || (rtn = t_symbol((const unsigned char *)":>", &c_lexer_state->c, c_lexer_state->c.position)))){
			type = CLOSE_SQUARE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)":", &c_lexer_state->c, c_lexer_state->c.position))){
			type = COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"=", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EQUALS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"(", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)")", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_PAREN_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)".", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DOT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"&", &c_lexer_state->c, c_lexer_state->c.position))){
			type = AMPERSAND_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"!", &c_lexer_state->c, c_lexer_state->c.position))){
			type = EXCLAMATION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"~", &c_lexer_state->c, c_lexer_state->c.position))){
			type = TILDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MINUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"+", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PLUS_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"*", &c_lexer_state->c, c_lexer_state->c.position))){
			type = MULTIPLY_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"/", &c_lexer_state->c, c_lexer_state->c.position))){
			type = DIVIDE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"%", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PERCENT_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"<", &c_lexer_state->c, c_lexer_state->c.position))){
			type = OPEN_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)">", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CLOSE_ANGLE_BRACKET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"^", &c_lexer_state->c, c_lexer_state->c.position))){
			type = CARET_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"|", &c_lexer_state->c, c_lexer_state->c.position))){
			type = PIPE_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"?", &c_lexer_state->c, c_lexer_state->c.position))){
			type = QUESTION_MARK_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"#", &c_lexer_state->c, c_lexer_state->c.position))){
			type = NUMBER_SIGN_CHAR;
		}else{
			unsigned int i = 0;
			printf("Lexer stopping on character 0x%02x.  This was character %u of %u\nFollowing chars are:", c_lexer_state->c.buf[c_lexer_state->c.position], (c_lexer_state->c.position + 1), c_lexer_state->c.buffer_size);
			for(i = c_lexer_state->c.position; i < (c_lexer_state->c.position + 100) && i < c_lexer_state->c.buffer_size; i++){
				if(c_lexer_state->c.buf[i]){
					printf("%c", c_lexer_state->c.buf[i]);
				}
			}
			buffered_printf(c_lexer_state->c.buffered_output, "\n");
			assert(0 && "Invalid character in lexer.");
			return 1;
		}

		new_token = struct_c_lexer_token_memory_pool_malloc(c_lexer_state->c.memory_pool_collection->struct_c_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);
		{
			/*
			unsigned char *g = new_token->first_byte;
			printf("Got token: %s ", get_c_token_type_names()[new_token->type]);
			do{
				if(*g == '\n'){
					printf("\\n");
				}else if(*g == '\r'){
					printf("\\r");
				}else{
					printf("%c", *g);
				}
			}while(g++ != new_token->last_byte);
			printf("\n");
			*/
		}

		struct_c_lexer_token_ptr_list_add_end(&c_lexer_state->tokens, new_token);
		show_lexer_token(c_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		c_lexer_state->c.position += rtn;
	}
	return 0;
}
Ejemplo n.º 2
0
Archivo: lexer.c Proyecto: chyyuu/recc
int lex_asm(struct l2_lexer_state * l2_lexer_state, unsigned char * filename, unsigned char * buffer, unsigned int buffer_size){
	l2_lexer_state->c.buf = buffer;
	l2_lexer_state->c.position = 0;
	l2_lexer_state->c.current_line = 0;
	l2_lexer_state->c.filename = filename;
	l2_lexer_state->c.buffer_size = buffer_size;

	struct_l2_lexer_token_ptr_list_create(&l2_lexer_state->tokens);

	while(l2_lexer_state->c.position < buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &l2_lexer_state->c.buf[l2_lexer_state->c.position];
		enum l2_token_type type;
		struct l2_lexer_token * new_token;

		if((rtn = t_space(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SPACE;
		}else if((rtn = t_newline(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_NEWLINE;
			l2_lexer_state->c.current_line = l2_lexer_state->c.current_line + rtn;
		}else if((rtn = t_l2_comment(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COMMENT;
		}else if((rtn = t_symbol((const unsigned char *)":", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_MINUS_CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"OFFSET", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_OFFSET;
		}else if((rtn = t_keyword((const unsigned char *)"RELOCATABLE", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_RELOCATABLE;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTS", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IMPLEMENTS;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IMPLEMENTED;
		}else if((rtn = t_keyword((const unsigned char *)"FUNCTION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_FUNCTION;
		}else if((rtn = t_keyword((const unsigned char *)"VARIABLE", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_VARIABLE;
		}else if((rtn = t_keyword((const unsigned char *)"CONSTANT", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRES", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REQUIRES;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REQUIRED;
		}else if((rtn = t_keyword((const unsigned char *)"INTERNAL", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_INTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"EXTERNAL", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_EXTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"STRING", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_STRING;
		}else if((rtn = t_keyword((const unsigned char *)"UNRESOLVED", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_UNRESOLVED;
		}else if((rtn = t_keyword((const unsigned char *)"REGION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REGION;
		}else if((rtn = t_keyword((const unsigned char *)"START", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_START;
		}else if((rtn = t_keyword((const unsigned char *)"END", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_END;
		}else if((rtn = t_keyword((const unsigned char *)"PERMISSION", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_PERMISSION;
		}else if((rtn = t_l2_register(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_REGISTER;
		}else if((rtn = t_constant_hex(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT_HEX;
		}else if((rtn = t_constant_decimal(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_CONSTANT_DECIMAL;
		}else if((rtn = t_keyword((const unsigned char *)"add", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_ADD;
		}else if((rtn = t_keyword((const unsigned char *)"sub", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SUB;
		}else if((rtn = t_keyword((const unsigned char *)"mul", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_MUL;
		}else if((rtn = t_keyword((const unsigned char *)"div", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_DIV;
		}else if((rtn = t_keyword((const unsigned char *)"beq", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_BEQ;
		}else if((rtn = t_keyword((const unsigned char *)"blt", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_BLT;
		}else if((rtn = t_keyword((const unsigned char *)"loa", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_LOA;
		}else if((rtn = t_keyword((const unsigned char *)"sto", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_STO;
		}else if((rtn = t_keyword((const unsigned char *)"ll", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_LL;
		}else if((rtn = t_keyword((const unsigned char *)"and", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_AND;
		}else if((rtn = t_keyword((const unsigned char *)"or", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_OR;
		}else if((rtn = t_keyword((const unsigned char *)"not", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_NOT;
		}else if((rtn = t_keyword((const unsigned char *)"shr", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SHR;
		}else if((rtn = t_keyword((const unsigned char *)"shl", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SHL;
		}else if((rtn = t_keyword((const unsigned char *)"DW", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_DW;
		}else if((rtn = t_keyword((const unsigned char *)"SW", &l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_SW;
		}else if((rtn = t_identifier(&l2_lexer_state->c, l2_lexer_state->c.position))){
			type = L2_IDENTIFIER;
		}else{
			unsigned int i = 0;
			buffered_printf(l2_lexer_state->c.buffered_output, "Lexer stopping on character 0x%02x.  This was character %u of %u\nFollowing chars are:", l2_lexer_state->c.buf[l2_lexer_state->c.position], (l2_lexer_state->c.position + 1), l2_lexer_state->c.buffer_size);
			for(i = l2_lexer_state->c.position; i < (l2_lexer_state->c.position + 100) && i < l2_lexer_state->c.buffer_size; i++){
				if(l2_lexer_state->c.buf[i]){
					buffered_printf(l2_lexer_state->c.buffered_output, "%c", l2_lexer_state->c.buf[i]);
				}
			}
			buffered_printf(l2_lexer_state->c.buffered_output, "\n");
			return 1;
		}

		new_token = struct_l2_lexer_token_memory_pool_malloc(l2_lexer_state->c.memory_pool_collection->struct_l2_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);

		struct_l2_lexer_token_ptr_list_add_end(&l2_lexer_state->tokens, new_token);
		show_lexer_token(l2_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		l2_lexer_state->c.position += rtn;
	}
	return 0;
}
Ejemplo n.º 3
0
int lex_asm(struct asm_lexer_state * asm_lexer_state, unsigned char * filename, unsigned char * buffer, unsigned int buffer_size){
	struct memory_pooler * asm_lexer_token_pool = memory_pooler_collection_get_pool(asm_lexer_state->c.memory_pooler_collection, sizeof(struct asm_lexer_token));
	asm_lexer_state->c.buf = buffer;
	asm_lexer_state->c.position = 0;
	asm_lexer_state->c.current_line = 0;
	asm_lexer_state->c.filename = filename;
	asm_lexer_state->c.buffer_size = buffer_size;

	g_format_buffer_use();

	struct_asm_lexer_token_ptr_list_create(&asm_lexer_state->tokens);

	while(asm_lexer_state->c.position < buffer_size){
		unsigned int rtn = 0;
		unsigned char * first_byte = &asm_lexer_state->c.buf[asm_lexer_state->c.position];
		enum asm_token_type type;
		struct asm_lexer_token * new_token;

		if((rtn = t_space(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SPACE;
		}else if((rtn = t_newline(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_NEWLINE;
			asm_lexer_state->c.current_line = asm_lexer_state->c.current_line + rtn; /* NEWLINE token can be multiple newlines */
		}else if((rtn = t_asm_comment(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_ASM_COMMENT;
		}else if((rtn = t_symbol((const unsigned char *)":", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_COLON_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)",", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_COMMA_CHAR;
		}else if((rtn = t_symbol((const unsigned char *)"-", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_MINUS_CHAR;
		}else if((rtn = t_keyword((const unsigned char *)"OFFSET", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_OFFSET;
		}else if((rtn = t_keyword((const unsigned char *)"RELOCATABLE", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_RELOCATABLE;
		}else if((rtn = t_keyword((const unsigned char *)"IMPLEMENTS", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_IMPLEMENTS;
		}else if((rtn = t_keyword((const unsigned char *)"REQUIRES", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_REQUIRES;
		}else if((rtn = t_keyword((const unsigned char *)"INTERNAL", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_INTERNAL;
		}else if((rtn = t_keyword((const unsigned char *)"EXTERNAL", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_EXTERNAL;
		}else if((rtn = t_asm_register(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_REGISTER;
		}else if((rtn = t_constant_hex(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_CONSTANT_HEX;
		}else if((rtn = t_constant_decimal(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_CONSTANT_DECIMAL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"add", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_ADD;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sub", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SUB;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"mul", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_MUL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"div", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_DIV;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"beq", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_BEQ;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"blt", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_BLT;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"loa", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_LOA;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sto", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_STO;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"ll", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_LL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"and", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_AND;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"or", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_OR;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"not", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_NOT;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"shr", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SHR;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"shl", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SHL;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"dw", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_DW;
		}else if((rtn = t_keyword_space_check((const unsigned char *)"sw", &asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_SW;
		}else if((rtn = t_identifier(&asm_lexer_state->c, asm_lexer_state->c.position))){
			type = A_IDENTIFIER;
		}else{
			unsigned int i = 0;
			buffered_printf(asm_lexer_state->c.buffered_output, "Lexer stopping on character '%c' 0x(%x)\n", asm_lexer_state->c.buf[asm_lexer_state->c.position], asm_lexer_state->c.buf[asm_lexer_state->c.position]);
			for(asm_lexer_state->c.position = 0; i < asm_lexer_state->c.position + 100; i++){
				if(asm_lexer_state->c.buf[i]){
					buffered_printf(asm_lexer_state->c.buffered_output, "%c", asm_lexer_state->c.buf[i]);
				}
			}
			g_format_buffer_release();
			return 1;
		}

		new_token = (struct asm_lexer_token *)memory_pooler_malloc(asm_lexer_token_pool);
		new_token->type = type;
		new_token->first_byte = first_byte;
		new_token->last_byte = (unsigned char *)((first_byte + rtn) - 1);

		struct_asm_lexer_token_ptr_list_add_end(&asm_lexer_state->tokens, new_token);
		show_lexer_token(asm_lexer_state->c.buffered_output, get_c_token_type_names()[new_token->type], new_token->first_byte, new_token->last_byte, SHOW_LEXER_TOKENS);
		asm_lexer_state->c.position += rtn;
	}
	g_format_buffer_release();
	return 0;
}