int get_token(void) { int type = TOKEN_TYPE_ERR; int index = 0; int status = SCAN_STATUS_START; int save; int c; while (status != SCAN_STATUS_DONE) { c = get_char(); save = BOOL_YES; switch (status) { case SCAN_STATUS_START: if (' ' == c || '\t' == c) { save = BOOL_NO; } else if ('\n' == c) { save = BOOL_NO; ++g_line_numer; } else if (isdigit(c)) { status = SCAN_STATUS_IN_CINT; } else if (isalpha(c) || '_' == c) { status = SCAN_STATUS_IN_ID; } else if ('.' == c) { status = SCAN_STATUS_IN_ACCESS; } else if ('#' == c) { save = BOOL_NO; status = SCAN_STATUE_IN_COMMENT; } else { status = SCAN_STATUS_DONE; switch (c) { case EOF: save = BOOL_NO; type = TOKEN_TYPE_EOF; break; case '=': type = TOKEN_TYPE_ASSIGN; break; case '<': type = TOKEN_TYPE_INHERIT; break; case '[': type = TOKEN_TYPE_LBRACKET; break; case ']': type = TOKEN_TYPE_RBRACKET; break; case '{': type = TOKEN_TYPE_LBRACE; break; case '}': type = TOKEN_TYPE_RBRACE; break; default: save = BOOL_NO; type = TOKEN_TYPE_ERR; break; } } break; case SCAN_STATUS_IN_ACCESS: if (isalpha(c) || '_' == c) { unget_char(); save = BOOL_NO; status = SCAN_STATUS_DONE; type = TOKEN_TYPE_ACCESS; } else { fprintf(stderr, "Lexial error: [%d] after '.' ...\n", g_line_numer); exit(1); } break; case SCAN_STATUS_IN_ID: if (!isalnum(c) && '_' != c) { unget_char(); save = BOOL_NO; status = SCAN_STATUS_DONE; type = TOKEN_TYPE_ID; } break; case SCAN_STATUS_IN_CINT: if ('.' == c) { status = SCAN_STATUS_IN_CREAL; } else { if (!isdigit(c)) { unget_char(); save = BOOL_NO; status = SCAN_STATUS_DONE; type = TOKEN_TYPE_CINT; } } break; case SCAN_STATUS_IN_CREAL: if (!isdigit(c)) { unget_char(); save = BOOL_NO; status = SCAN_STATUS_DONE; type = TOKEN_TYPE_CREAL; } break; case SCAN_STATUE_IN_COMMENT: save = BOOL_NO; if (EOF == c) { status = SCAN_STATUS_DONE; type = TOKEN_TYPE_EOF; } else if ('\n' == c) { ++g_line_numer; status = SCAN_STATUS_START; } break; case SCAN_STATUS_DONE: default: fprintf(g_scan_stream, "Scanner bug: status = %d\n", status); status = SCAN_STATUS_DONE; type = TOKEN_TYPE_ERR; break; } if (save && index < MAX_TOKEN) g_token[index++] = (char)c; if (SCAN_STATUS_DONE == status) { g_token[index] = 0; if (TOKEN_TYPE_ID == type) type = lookup_reserved(g_token); } } echo_scanner(g_scan_stream, g_line_numer, type, g_token); return type; }
static int ScanOneToken (FILE *fp, struct token_t *token) { int i, ch, nextch, prevch; ch = getc(fp); // read next char from input stream while (isspace(ch)) // if necessary, keep reading til non-space char ch = getc(fp); // (discard any white space) switch(ch) { case '/': // could either begin comment or T_DIVIDE op nextch = getc(fp); if (nextch == '/' || nextch == '*') ; // here you would skip over the comment else ungetc(nextch, fp); // fall-through to single-char token case case ';': case '(': case ')': case ',': case '=': // ... and other single char tokens token->type = ch; // ASCII value is used as token type return ch; // ASCII value used as token type case '\"': token->type = T_STRING; prevch = ch; ch = getc(fp); for (i = 0; (prevch != '\\') && (ch != '\"'); i++) { token->val.stringValue[i] = ch; prevch = ch; ch = getc(fp); } token->val.stringValue[i] = '\0'; return token->type; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': token->val.stringValue[0] = ch; for (i = 1; isupper(ch = getc(fp)); i++) // gather uppercase token->val.stringValue[i] = ch; ungetc(ch, fp); token->val.stringValue[i] = '\0'; // lookup reserved word token->type = lookup_reserved(token->val.stringValue); return token->type; case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': token->type = T_IDENTIFIER; token->val.stringValue[0] = ch; for (i = 1; islower(ch = getc(fp)); i++) token->val.stringValue[i] = ch; // gather lowercase ungetc(ch, fp); token->val.stringValue[i] = '\0'; return T_IDENTIFIER; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': token->type = T_INTEGER; token->val.intValue = ch - '0'; while (isdigit(ch = getc(fp))) // convert digit char to number token->val.intValue = token->val.intValue * 10 + ch - '0'; ungetc(ch, fp); return T_INTEGER; case EOF: return T_END; default: // anything else is not recognized token->val.intValue = ch; token->type = T_UNKNOWN; return T_UNKNOWN; } }