static void putstr(int mode, STR tp) { if (mode == ESCOFF) { fprintf(cur_fpo, "%s", tp); } else if (*tp == '\0') { fprintf(cur_fpo, "||"); // ヌル文字列を表す } else { if (num(tp)) { // read.h fputc('\\', cur_fpo); // 数値ではなく数字の場合は見分けるために'\\'を接頭辞としてつける } do { if (iskanji(*tp) && iskanji2(*(tp+1))) { fputc(*tp++, cur_fpo); fputc(*tp++, cur_fpo); } else if (!isprkana(*tp)) { fprintf(cur_fpo, "#\\%03d", *tp++); } else { if (isesc(*tp)) { fputc('\\', cur_fpo); } fputc(*tp++, cur_fpo); } } while (*tp != '\0'); } }
static void putstr(int mode, STR tp) {//N// if(mode == ESCOFF) { fprintf(cur_fpo, "%s", tp); } else if(*tp == '\0') { fprintf(cur_fpo, "||"); } else { if(num(tp)) { fputc('\\', cur_fpo); } do { if(iskanji(*tp) && iskanji2(*(tp+1))) { fputc(*tp++, cur_fpo); fputc(*tp++, cur_fpo); } else if(!isprkana(*tp)) { fprintf(cur_fpo, "#\\%03d", *tp++); } else { if(isesc(*tp)) { fputc('\\', cur_fpo); } fputc(*tp++, cur_fpo); } } while(*tp != '\0'); } }
oo_token *lex(FILE *fp, size_t sz) { size_t l; char *source = malloc(sz); oo_token *head = NULL, *curr = NULL; if(!source) goto err1; if(fseek(fp, 0L, SEEK_SET) != 0) goto err0; l = fread(source, sizeof(char), sz, fp); source[++l] = 0; char *cp = source; oo_floc empty = { .line = 0, .column = 0, .offset = 0}; curr = head = alloc_token(oot_START, empty, 0, NULL); oo_floc loc = { .line = 1, .column = 1, .offset = -1 }; oo_states state = oos_start, prev = state; uint32_t end_offset = 0; oo_floc block; do { char a = *cp; oo_tokens token = oot_UNKNOWN; step(&state, &cp, &loc); if(state == oos_err) goto syn; if (state == oos_str || state == oos_ident || state == oos_num) { block.line = loc.line; block.column = loc.column; block.offset = loc.offset; } int end_of_str = (state == oos_eo_str || state == oos_eo_id || state == oos_eo_num); if (end_of_str) { /* id, num & str */ end_offset = loc.offset; int t = -1; if (state == oos_eo_id) t = oot_IDENT; if (state == oos_eo_str) t = oot_STR; if (state == oos_eo_num) t = oot_NUM; if (t == oot_IDENT || t == oot_STR || t == oot_NUM) { curr = alloc_token(t, block, end_offset - block.offset, curr); } if(state == oos_eo_id || state == oos_eo_num) { cp--; loc.offset--; } end_offset = 0; } else { /* sym */ if (state == oos_sym) { char c = *(source + (loc.offset)); token = oo_tokens_from_char(c); } if(token != oot_UNKNOWN) { curr = alloc_token(token, loc, 1, curr); } } syn: printf("o: %d, s -> %d, s_s = '%s' (%d)", (int)loc.offset, (int)state, oo_state_to_string(state), (int)a); if(isalpha(a) || isdigit(a) || issym(a)) { printf(", '%c'", a); } printf("\n"); if(state == oos_err) { if(prev == oos_in_str) { printf("Syntax error, unterminated string constant, line %d column %d\n", block.line, block.column - 1); } /* TODO: Add oot_ERR token */ curr = alloc_token(oot_EOF, loc, 1, curr); return head; } prev = state; } while(state != oos_eof); if(state == oos_eof) { curr = alloc_token(oot_EOF, loc, 0, curr); } err0: if(source) free(source), source = NULL; err1: return head; } static void step(oo_states *state, char **cp, oo_floc *loc) { int c = **cp, k = 0; int i = -1; /* unknown */ if(c != 0 && *(*cp+1) != 0) k = *(*cp +1); i = isspace(c) ? 0 : i; /* ws */ i = c == '\n' ? 1 : i; /* new line */ i = issym(c) ? 2 : i; /* sym */ i = isdigit(c) ? 3 : i; /* numeric */ i = isid(c) ? 4 : i; /* id */ i = c == '"' ? 5 : i; /* str */ i = isesc(c, k) ? 6 : i; /* escape */ i = c == 0 ? 7 : i; /* eof */ if(i == -1) printf("---> '%c'\n", c); oo_branch *b = &table[*state][i]; *state = b->state; if(b->advance) { if(b->state == oos_nl) { loc->line++; loc->column = 0; } loc->column++; loc->offset++; (*cp)++; } } static oo_tokens oo_tokens_from_char(int t) { if (t == '~') return oot_TILDE; if (t == '`') return oot_GACCENT; if (t == '!') return oot_BANG; if (t == '@') return oot_AT; if (t == '#') return oot_POUND; if (t == '$') return oot_DOLLAR; if (t == '%') return oot_PERCENT; if (t == '^') return oot_EXP; if (t == '&') return oot_AND; if (t == '*') return oot_STAR; if (t == '(') return oot_LPAREN; if (t == ')') return oot_RPAREN; if (t == '-') return oot_MINUS; if (t == '=') return oot_EQUAL; if (t == '_') return oot_UNDER; if (t == '+') return oot_PLUS; if (t == '[') return oot_LBRACKET; if (t == ']') return oot_RBRACKET; if (t == '\\') return oot_BSLASH; if (t == '{') return oot_LBRACE; if (t == '}') return oot_RBRACE; if (t == '|') return oot_BAR; if (t == ';') return oot_SEMI; if (t == '\'') return oot_SQUOTE; if (t == ':') return oot_COLON; if (t == '"') return oot_DQUOTE; if (t == ',') return oot_COMMA; if (t == '.') return oot_DOT; if (t == '/') return oot_FSLASH; if (t == '<') return oot_LT; if (t == '>') return oot_GT; if (t == '?') return oot_WHAT; return oot_UNKNOWN; } char *string_from_oo_tokens(oo_tokens t) { if (t == oot_START) return "oot_START"; if (t == '~') return "oot_TILDE"; if (t == '`') return "oot_GACCENT"; if (t == '!') return "oot_BANG"; if (t == '@') return "oot_AT"; if (t == '#') return "oot_POUND"; if (t == '$') return "oot_DOLLAR"; if (t == '%') return "oot_PERCENT"; if (t == '^') return "oot_EXP"; if (t == '&') return "oot_AND"; if (t == '*') return "oot_STAR"; if (t == '(') return "oot_LPAREN"; if (t == ')') return "oot_RPAREN"; if (t == '-') return "oot_MINUS"; if (t == '=') return "oot_EQUAL"; if (t == '_') return "oot_UNDER"; if (t == '+') return "oot_PLUS"; if (t == '[') return "oot_LBRACKET"; if (t == ']') return "oot_RBRACKET"; if (t == '\\') return "oot_BSLASH"; if (t == '{') return "oot_LBRACE"; if (t == '}') return "oot_RBRACE"; if (t == '|') return "oot_BAR"; if (t == ';') return "oot_SEMI"; if (t == '\'') return "oot_SQUOTE"; if (t == ':') return "oot_COLON"; if (t == '"') return "oot_DQUOTE"; if (t == ',') return "oot_COMMA"; if (t == '.') return "oot_DOT"; if (t == '/') return "oot_FSLASH"; if (t == '<') return "oot_LT"; if (t == '>') return "oot_GT"; if (t == '?') return "oot_WHAT"; if (t == oot_IDENT) return "oot_IDENT"; if (t == oot_NUM) return "oot_NUM"; if (t == oot_STR) return "oot_STR"; if (t == oot_EOF) return "oot_EOF"; return "oot_UNKNOWN"; } static char *oo_state_to_string(oo_states state) { if (state == oos_start) return "oos_start"; if (state == oos_ws) return "oos_ws"; if (state == oos_nl) return "oos_nl"; if (state == oos_sym) return "oos_sym"; if (state == oos_num) return "oos_num"; if (state == oos_ident) return "oos_ident"; if (state == oos_str) return "oos_str"; if (state == oos_esc) return "oos_esc"; if (state == oos_in_str) return "oos_in_str"; if (state == oos_in_id) return "oos_in_id"; if (state == oos_in_num) return "oos_in_num"; if (state == oos_eo_str) return "oos_eo_str"; if (state == oos_eo_id) return "oos_eo_id"; if (state == oos_eo_num) return "oos_eo_num"; if (state == oos_err) return "oos_err"; if (state == oos_eof) return "oos_eof"; return "undefined"; } static int issym(int c) { return c == '[' || c == ']' || c == '\\' || c == '{' || c == '}' || c == '|' || c == ';' || c == '\''|| c == ':' || c == '"' || c == ',' || c == '.' || c == '/' || c == '<' || c == '>' || c == '?' || c == '~' || c == '`' || c == '!' || c == '@' || c == '#' || c == '$' || c == '%' || c == '^' || c == '&' || c == '*' || c == '(' || c == ')' || c == '-' || c == '_' || c == '=' || c == '+' ; } static int isid(int c) { return isalpha(c) || c == '_'; } static int isesc(int c, int k) { if(c == 0) return 0; if(k == 0) return 0; if(c == '\\') { if (k == '"') return 1; if (k == '\\') return 1; if (k == 'r') return 1; if (k == 'n') return 1; } return 0; }