token_type get_token(token *token) { token_type current_token_t; lexer_status status = INITIAL_STATUS; char cc; char only_one_char_sign = TRUE; char skip = FALSE; unsigned char token_pos = 0; while (status != INFINISH_STATUS) { skip = FALSE; cc = get_next_char(); switch (status) { case INITIAL_STATUS: if ('"' == cc) { status = INSTRING_STATUS; } else if (isdigit((int)cc)) { status = INNUM_INT_STATUS; } else if (isalpha((int)cc) || ('_' == cc)) { status = INIDENTIFIER_STATUS; } else if ('`' == cc) { status = INREGEXP_STATUS; } else if ('#' == cc) { skip = TRUE; status = INCOMMENT_STATUS; } else if (isseparator(cc)) { skip = TRUE; } else { status = INFINISH_STATUS; switch (cc) { case '+': current_token_t = PLUS; break; case '-': current_token_t = MINUS; break; case '*': current_token_t = TIMES; break; case '/': current_token_t = OVER; break; case '=': current_token_t = ASSIGN; status = INSIGN_STATUS; break; case '<': current_token_t = LT; status = INSIGN_STATUS; break; case '>': current_token_t = GT; status = INSIGN_STATUS; break; case '(': current_token_t = LPAREN; break; case ')': current_token_t = RPAREN; break; case ';': current_token_t = SEMI; break; case EOF: current_token_t = END_OF_FILE_TOKEN; break; default: current_token_t = ERROR; break; } } break; case INSTRING_STATUS: if ('"' == cc) { status = INFINISH_STATUS; } break; case INSIGN_STATUS: if (current_token_t == LT) { if ('<' == cc) { current_token_t = LSHIFT; } else if ('=' == cc) { current_token_t = LTOREQ; } else { skip = TRUE; unget_next_char(); } } else if (current_token_t == GT) { if ('>' == cc) { current_token_t = RSHIFT; } else if ('=' == cc) { current_token_t = GTOREQ; } else { skip = TRUE; unget_next_char(); } } else if (current_token_t == ASSIGN) { if ('=' == cc) { current_token_t = EQUAL; } else { skip = TRUE; unget_next_char(); } } else { /* no way to get here */ } status = INFINISH_STATUS; break; case INCOMMENT_STATUS: skip = TRUE; if ('\n' == cc) { status = INITIAL_STATUS; } else if (EOF == cc) { status == INFINISH_STATUS; current_token_t = END_OF_FILE_TOKEN; } else { status = INCOMMENT_STATUS; } break; case INNUM_INT_STATUS: if ('.' == cc) { status = INNUM_DOT_STATUS; } else if (!isdigit(cc)) { status = INFINISH_STATUS; skip = TRUE; unget_next_char(); } break; case INNUM_DOT_STATUS: if (isdigit(cc)) { } else { } break; case INNUM_DEC_STATUS: if (!isdigit(cc)) { status = INFINISH_STATUS; } break; case INIDENTIFIER_STATUS: if (isdigit((int)cc) || isalpha((int)cc) || ('_' == cc)) { current_token_t = IDENTIFIER_TOKEN; } else { skip = TRUE; status = INFINISH_STATUS; unget_next_char(); } break; case INREGEXP_STATUS: if ('`' == cc) { status = INFINISH_STATUS; } break; case INFINISH_STATUS: break; case INERROR_STATUS: /* point to where is wrong */ exit(-1); default: break; } if (!skip && (token_pos <= MAX_TOKEN_SIZE)) { token_buf[token_pos++] = cc; } else { /* skip or token length is too long*/ } if (INFINISH_STATUS == status) { /* Do some saving token work */ token_buf[token_pos] = '\0'; if (IDENTIFIER_TOKEN == current_token_t) { if (NONE != is_key_word(token_buf, token_pos)) { current_token_t = KEYWORD_TOKEN; } } printf("token = %d = %s\n", token_pos, token_buf); } } return current_token_t; }
int assemble(struct _asm_context *asm_context) { char token[TOKENLEN]; int token_type; while(1) { token_type = get_token(asm_context, token, TOKENLEN); #ifdef DEBUG printf("%d: <%d> %s\n", asm_context->line, token_type, token); #endif if (token_type == TOKEN_EOF) break; if (token_type == TOKEN_EOL) { if (asm_context->macros.stack_ptr == 0) { asm_context->line++; } } else if (token_type == TOKEN_LABEL) { int param_count_temp; if (macros_lookup(&asm_context->macros, token, ¶m_count_temp) != NULL) { print_already_defined(asm_context, token); return -1; } if (symbols_append(&asm_context->symbols, token, asm_context->address / asm_context->bytes_per_address) == -1) { return -1; } } else if (token_type == TOKEN_POUND || IS_TOKEN(token,'.')) { token_type = get_token(asm_context, token, TOKENLEN); #ifdef DEBUG printf("%d: <%d> %s\n", asm_context->line, token_type, token); #endif if (token_type == TOKEN_EOF) break; if (strcasecmp(token, "define") == 0) { if (macros_parse(asm_context, IS_DEFINE) != 0) return -1; } else if (strcasecmp(token, "ifdef") == 0) { parse_ifdef(asm_context, 0); } else if (strcasecmp(token, "ifndef") == 0) { parse_ifdef(asm_context, 1); } else if (strcasecmp(token, "if") == 0) { parse_if(asm_context); } else if (strcasecmp(token, "endif") == 0) { if (asm_context->ifdef_count < 1) { printf("Error: unmatched #endif at %s:%d\n", asm_context->filename, asm_context->ifdef_count); return -1; } return 0; } else if (strcasecmp(token, "else") == 0) { if (asm_context->ifdef_count < 1) { printf("Error: unmatched #else at %s:%d\n", asm_context->filename, asm_context->ifdef_count); return -1; } return 2; } else if (strcasecmp(token, "include") == 0) { if (parse_include(asm_context) != 0) return -1; } else if (strcasecmp(token, "binfile") == 0) { if (parse_binfile(asm_context) != 0) return -1; } else if (strcasecmp(token, "code") == 0) { asm_context->segment = SEGMENT_CODE; } else if (strcasecmp(token, "bss") == 0) { asm_context->segment = SEGMENT_BSS; } else if (strcasecmp(token, "msp430_cpu4") == 0) { asm_context->msp430_cpu4 = 1; } else if (strcasecmp(token, "macro") == 0) { if (macros_parse(asm_context, IS_MACRO) != 0) return -1; } else if (strcasecmp(token, "pragma") == 0) { if (parse_pragma(asm_context) != 0) return -1; } else if (strcasecmp(token, "device") == 0) { if (parse_device(asm_context) != 0) return -1; } else if (strcasecmp(token, "set") == 0) { if (parse_set(asm_context) != 0) return -1; } else if (strcasecmp(token, "export") == 0) { if (parse_export(asm_context) != 0) return -1; } else if (strcasecmp(token, "equ") == 0 || strcasecmp(token, "def")==0) { if (parse_equ(asm_context) != 0) return -1; } else { int ret = check_for_directive(asm_context, token); if (ret == 2) break; if (ret == -1) return -1; if (ret != 1) { printf("Error: Unknown directive '%s' at %s:%d.\n", token, asm_context->filename, asm_context->line); return -1; } } } else if (token_type == TOKEN_STRING) { int ret = check_for_directive(asm_context, token); if (ret == 2) break; if (ret == -1) return -1; if (ret != 1) { int start_address = asm_context->address; char token2[TOKENLEN]; int token_type2; token_type2 = get_token(asm_context, token2, TOKENLEN); if (strcasecmp(token2, "equ") == 0) { //token_type2=get_token(asm_context, token2, TOKENLEN); int ptr = 0; int ch = '\n'; while(1) { ch = get_next_char(asm_context); if (ch == EOF || ch == '\n') break; if (ch == '*' && ptr > 0 && token2[ptr-1] == '/') { macros_strip_comment(asm_context); ptr--; continue; } token2[ptr++] = ch; if (ptr == TOKENLEN-1) { printf("Internal Error: token overflow at %s:%d.\n", __FILE__, __LINE__); return -1; } } token2[ptr] = 0; unget_next_char(asm_context, ch); macros_strip(token2); macros_append(asm_context, token, token2, 0); } else { pushback(asm_context, token2, token_type2); //int address=asm_context->address; //ret=parse_instruction_msp430(asm_context, token); ret = asm_context->parse_instruction(asm_context, token); if (asm_context->pass == 2 && asm_context->list != NULL && asm_context->include_count==0) { asm_context->list_output(asm_context, start_address); fprintf(asm_context->list, "\n"); } if (ret < 0) return -1; #if 0 if (asm_context->address-start_address==0) { printf("ZOMG %x ret=%d %d\n", start_address, ret, asm_context->address-start_address); } #endif if (asm_context->macros.stack_ptr == 0) { asm_context->line++; } asm_context->instruction_count++; if (asm_context->address>start_address) { asm_context->code_count += (asm_context->address - start_address); } } } } else { print_error_unexp(token, asm_context); return -1; } } if (asm_context->error == 1) { return -1; } return 0; }