static int pg_wc_isalnum(pg_wchar c) { switch (pg_regex_strategy) { case PG_REGEX_LOCALE_C: return (c <= (pg_wchar) 127 && (pg_char_properties[c] & PG_ISALNUM)); case PG_REGEX_LOCALE_WIDE: #ifdef USE_WIDE_UPPER_LOWER if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalnum((wint_t) c); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE: return (c <= (pg_wchar) UCHAR_MAX && isalnum((unsigned char) c)); case PG_REGEX_LOCALE_WIDE_L: #if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER) if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) return iswalnum_l((wint_t) c, pg_regex_locale); #endif /* FALL THRU */ case PG_REGEX_LOCALE_1BYTE_L: #ifdef HAVE_LOCALE_T return (c <= (pg_wchar) UCHAR_MAX && isalnum_l((unsigned char) c, pg_regex_locale)); #endif break; } return 0; /* can't get here, but keep compiler quiet */ }
void xlocale2_check_functions(nl_item ni, locale_t l) { /* ctype.h */ (void)isalnum_l(0, l); (void)isdigit_l(0, l); (void)isxdigit_l(0, l); /* inttypes.h */ (void)strtoimax_l("", (char**)1234, 10, l); /* langinfo.h */ (void)nl_langinfo_l(ni, l); /* monetary.h */ (void)strfmon_l((char*)1234, (size_t)0, l, "%n", 0.0); /* stdio.h */ (void)printf_l(l, "%d", 0); /* stdlib.h */ (void)strtol_l("", (char**)1234, 10, l); /* string.h */ (void)strcoll_l("", "", l); /* time.h */ (void)strftime_l((char*)1234, (size_t)0, "%s", (const struct tm *)1234, l); /* wchar.h */ (void)wcstol_l(L"", (wchar_t**)1234, 10, l); /* wctype.h */ (void)iswalnum_l((wint_t)0, l); (void)iswdigit_l((wint_t)0, l); (void)iswxdigit_l((wint_t)0, l); }
int ispunct_l(int c, locale_t loc) { return isgraph_l(c, loc) && !isalnum_l(c, loc); }
int getNextToken(void **attr) { string_t *s = NULL; keywords_t kw; int c; FSM { // --------------------------------------------------------------- // initial state // --------------------------------------------------------------- STATE(S_INIT) { SRC_GET(c); if (c == EOF) { NEXTSTATE(S_EOF); } else if (c == '\n') { NEXTSTATE(S_EOL); } else if (isspace(c)) { NEXTSTATE(S_INIT); } else if (isdigit(c)) { CHECKED(s = string_new(), alloc_failed); NEXTSTATE(S_INTEGRAL); } else if (isalpha_l(c, c_locale) || c == '_') { CHECKED(s = string_new(), alloc_failed); NEXTSTATE(S_IDENTIFIER); } else { switch (c) { default: NEXTSTATE(S_ERROR); case '+': NEXTSTATE(S_PLUS); case '-': NEXTSTATE(S_MINUS); case '!': NEXTSTATE(S_EXCL); case '=': NEXTSTATE(S_EQUAL); case '*': NEXTSTATE(S_ASTERISK); case '/': NEXTSTATE(S_SLASH); case '"': CHECKED(s = string_new(), alloc_failed); NEXTSTATE(S_QUOT); case '(': NEXTSTATE(S_LPAR); case ')': NEXTSTATE(S_RPAR); case '[': NEXTSTATE(S_LBRACK); case ']': NEXTSTATE(S_RBRACK); case '<': NEXTSTATE(S_LCHEV); case '>': NEXTSTATE(S_RCHEV); case ':': NEXTSTATE(S_COLON); case ',': NEXTSTATE(S_COMMA); } } } // --------------------------------------------------------------- // character + // --------------------------------------------------------------- STATE(S_PLUS) { _ifj_lexical_log("TOK_OP_SUM"); return TOK_OP_SUM; } // --------------------------------------------------------------- // character - // --------------------------------------------------------------- STATE(S_MINUS) { _ifj_lexical_log("TOK_OP_SUB"); return TOK_OP_SUB; } // --------------------------------------------------------------- // character ! // --------------------------------------------------------------- STATE(S_EXCL) { if (SRC_GET(c) == '=') { _ifj_lexical_log("TOK_OP_NEQ"); return TOK_OP_NEQ; } else { SRC_UNGET(c); NEXTSTATE(S_ERROR); } } // --------------------------------------------------------------- // character = // --------------------------------------------------------------- STATE(S_EQUAL) { if (SRC_GET(c) == '=') { _ifj_lexical_log("TOK_OP_EQ"); return TOK_OP_EQ; } else { SRC_UNGET(c); _ifj_lexical_log("TOK_OP_ASSIGN"); return TOK_OP_ASSIGN; } } // --------------------------------------------------------------- // character * // --------------------------------------------------------------- STATE(S_ASTERISK) { if (SRC_GET(c) == '*') { _ifj_lexical_log("TOK_OP_POW"); return TOK_OP_POW; } else { SRC_UNGET(c); _ifj_lexical_log("TOK_OP_MULT"); return TOK_OP_MULT; } } // --------------------------------------------------------------- // character ( // --------------------------------------------------------------- STATE(S_LPAR) { _ifj_lexical_log("TOK_LPAR"); return TOK_LPAR; } // --------------------------------------------------------------- // character ) // --------------------------------------------------------------- STATE(S_RPAR) { _ifj_lexical_log("TOK_RPAR"); return TOK_RPAR; } // --------------------------------------------------------------- // character [ // --------------------------------------------------------------- STATE(S_LBRACK) { _ifj_lexical_log("TOK_LBRACK"); return TOK_LBRACK; } // --------------------------------------------------------------- // character ] // --------------------------------------------------------------- STATE(S_RBRACK) { _ifj_lexical_log("TOK_RBRACK"); return TOK_RBRACK; } // --------------------------------------------------------------- // character < // --------------------------------------------------------------- STATE(S_LCHEV) { if (SRC_GET(c) == '=') { _ifj_lexical_log("TOK_OP_LE"); return TOK_OP_LE; } else { SRC_UNGET(c); _ifj_lexical_log("TOK_OP_LT"); return TOK_OP_LT; } } // --------------------------------------------------------------- // character > // --------------------------------------------------------------- STATE(S_RCHEV) { if (SRC_GET(c) == '=') { _ifj_lexical_log("TOK_OP_GE"); return TOK_OP_GE; } else { SRC_UNGET(c); _ifj_lexical_log("TOK_OP_GT"); return TOK_OP_GT; } } // --------------------------------------------------------------- // character : // --------------------------------------------------------------- STATE(S_COLON) { _ifj_lexical_log("TOK_COLON"); return TOK_COLON; } // --------------------------------------------------------------- // character , // --------------------------------------------------------------- STATE(S_COMMA) { _ifj_lexical_log("TOK_COMMA"); return TOK_COMMA; } // --------------------------------------------------------------- // character / // --------------------------------------------------------------- STATE(S_SLASH) { switch (SRC_GET(c)) { case '/': NEXTSTATE(S_COMMENT); case '*': NEXTSTATE(S_COMMENT_BLOCK); default: SRC_UNGET(c); _ifj_lexical_log("TOK_OP_DIV"); return TOK_OP_DIV; } } // --------------------------------------------------------------- // string -- inside quotes // --------------------------------------------------------------- STATE(S_QUOT) { SRC_GET(c); if (c == '"') { NEXTSTATE(S_STRING); } else if (c == '\\') { NEXTSTATE(S_ESCAPECHAR); } else if (isprint(c)) { CHECKED(string_append(&s, c), alloc_failed); NEXTSTATE(S_QUOT); } else { NEXTSTATE(S_ESTRING); } } // --------------------------------------------------------------- // string -- escape characters // --------------------------------------------------------------- STATE(S_ESCAPECHAR) { switch (SRC_GET(c)) { case 'n': CHECKED(string_append(&s, '\n'), alloc_failed); NEXTSTATE(S_QUOT); case 't': CHECKED(string_append(&s, '\t'), alloc_failed); NEXTSTATE(S_QUOT); case '"': CHECKED(string_append(&s, '"'), alloc_failed); NEXTSTATE(S_QUOT); case '\\': CHECKED(string_append(&s, '\\'), alloc_failed); NEXTSTATE(S_QUOT); case 'x': NEXTSTATE(S_HEXCHAR); default: NEXTSTATE(S_ESTRING); } } // --------------------------------------------------------------- // string -- character in hexadecimal notation // --------------------------------------------------------------- STATE(S_HEXCHAR) { unsigned char val = 0; if (isxdigit(SRC_GET(c))) { val = HEX_TO_INT(c) << 4; } else { NEXTSTATE(S_ESTRING); } if (isxdigit(SRC_GET(c))) { val += HEX_TO_INT(c); } else { NEXTSTATE(S_ESTRING); } CHECKED(string_append(&s, val), alloc_failed); NEXTSTATE(S_QUOT); } // --------------------------------------------------------------- // numeric -- integral part // --------------------------------------------------------------- STATE(S_INTEGRAL) { CHECKED(string_append(&s, c), alloc_failed); SRC_GET(c); if (isdigit(c)) { NEXTSTATE(S_INTEGRAL); } else if (c == '.') { CHECKED(string_append(&s, c), alloc_failed); SRC_GET(c); if (isdigit(c)) { NEXTSTATE(S_DECIMAL); } else { NEXTSTATE(S_ENUM); } } else if (c == 'e') { CHECKED(string_append(&s, c), alloc_failed); NEXTSTATE(S_EXPONENT_E); } else { NEXTSTATE(S_ENUM); } } // --------------------------------------------------------------- // numeric -- decimal part // --------------------------------------------------------------- STATE(S_DECIMAL) { CHECKED(string_append(&s, c), alloc_failed); SRC_GET(c); if (isdigit(c)) { NEXTSTATE(S_DECIMAL); } else if (c == 'e') { CHECKED(string_append(&s, c), alloc_failed); NEXTSTATE(S_EXPONENT_E); } else { SRC_UNGET(c); NEXTSTATE(S_NUMERIC); } } // --------------------------------------------------------------- // numeric -- exponent part // --------------------------------------------------------------- STATE(S_EXPONENT_E) { SRC_GET(c); if (c == '+' || c == '-') { CHECKED(string_append(&s, c), alloc_failed); SRC_GET(c); } if (isdigit(c)) { NEXTSTATE(S_EXPONENT); } else { NEXTSTATE(S_ENUM); } } STATE(S_EXPONENT) { CHECKED(string_append(&s, c), alloc_failed); if (isdigit(SRC_GET(c))) { NEXTSTATE(S_EXPONENT); } else { SRC_UNGET(c); NEXTSTATE(S_NUMERIC); } } // --------------------------------------------------------------- // identifier // --------------------------------------------------------------- STATE(S_IDENTIFIER) { CHECKED(string_append(&s, c), alloc_failed); if (isalnum_l(SRC_GET(c), c_locale) || c == '_') { NEXTSTATE(S_IDENTIFIER); } SRC_UNGET(c); #define X_(pstate, str, e) \ if (0 == strcmp(string_getstr(s), (str))) { \ kw = e; \ string_dispose(s); s = NULL; \ NEXTSTATE(pstate); \ } else #define X(a,b) X_(S_KEYWORD, b, KW_##a) #include "keywords.def" X_KEYWORD #undef X #define X(a,b) X_(S_BUILTIN, b, FN_##a) #include "builtin.def" //X_BUILTIN_FN #undef X #define X(a,b) X_(S_RESERVED_KEYWORD, b, RKW_##a) #include "keywords.def" X_KEYWORD_RESERVED #undef X #undef X_ // ------------------------------------------------------- // identifier // ------------------------------------------------------- { // else *attr = s; _ifj_lexical_log("TOK_TYPE_ID \"%s\"", string_getstr(s)); return TOK_TYPE_ID; } } // --------------------------------------------------------------- // reserved keyword // --------------------------------------------------------------- STATE(S_RESERVED_KEYWORD) { switch (kw) { default: break; #define X(a,b) \ case (RKW_##a): \ _ifj_lexical_log("TOK_TYPE_RKW \"%s\"", (b)); \ break; #include "keywords.def" X_KEYWORD_RESERVED #undef X } *attr = (void *) kw; return TOK_TYPE_RKW; } // --------------------------------------------------------------- // built-in function // --------------------------------------------------------------- #if 0 STATE(S_BUILTIN) { switch (kw) { default: break; #define X(a,b) \ case (FN_##a): \ _ifj_lexical_log("TOK_TYPE_FN \"%s\"", (b)); \ break; #include "builtin.def" //X_BUILTIN_FN #undef X } *attr = (void *) kw; return TOK_TYPE_FN; } #endif // --------------------------------------------------------------- // term -- numeric // --------------------------------------------------------------- STATE(S_NUMERIC) { *attr = s; _ifj_lexical_log("TOK_TYPE_NUM \"%s\"", string_getstr(s)); return TOK_TYPE_NUM; } STATE(S_KEYWORD) { switch (kw) { // ----------------------------------------------------------- // keyword // ----------------------------------------------------------- default: switch (kw) { default: break; #define X(a,b) \ case (KW_##a): \ _ifj_lexical_log("TOK_TYPE_KW \"%s\"", (b)); \ break; #include "keywords.def" X_KEYWORD #undef X } *attr = (void *) kw; return TOK_TYPE_KW; // ----------------------------------------------------------- // term -- boolean // ----------------------------------------------------------- case KW_TRUE: *attr = (void *) true; _ifj_lexical_log("TOK_TYPE_BOOL \"true\""); return TOK_TYPE_BOOL; case KW_FALSE: _ifj_lexical_log("TOK_TYPE_BOOL \"false\""); *attr = (void *) false; return TOK_TYPE_BOOL; // ----------------------------------------------------------- // term -- nil // ----------------------------------------------------------- case KW_NIL: _ifj_lexical_log("TOK_TYPE_NIL \"Nil\""); *attr = NULL; return TOK_TYPE_NIL; } } // --------------------------------------------------------------- // term -- string // --------------------------------------------------------------- STATE(S_STRING) { *attr = s; _ifj_lexical_log("TOK_TYPE_STR \"%s\"", string_getstr(s)); return TOK_TYPE_STR; } // --------------------------------------------------------------- // comment // --------------------------------------------------------------- STATE(S_COMMENT) { SRC_GET(c); if (c == EOF) { NEXTSTATE(S_EOF); } else if (c == '\n') { NEXTSTATE(S_EOL); } else { NEXTSTATE(S_COMMENT); } } // --------------------------------------------------------------- // block comment // --------------------------------------------------------------- STATE(S_COMMENT_BLOCK) { SRC_GET(c); if (c == EOF) { NEXTSTATE(S_EEOF); } else if (c != '*') { NEXTSTATE(S_COMMENT_BLOCK); } else { NEXTSTATE(S_COMMENT_BLOCK_2); } } STATE(S_COMMENT_BLOCK_2) { SRC_GET(c); if (c == EOF) { NEXTSTATE(S_EEOF); } else if (c == '/') { NEXTSTATE(S_INIT); } else { NEXTSTATE(S_COMMENT_BLOCK); } } // --------------------------------------------------------------- // eol // --------------------------------------------------------------- STATE(S_EOL) { ++_line; _col = 0; if (SRC_GET(c) == '\n') { NEXTSTATE(S_EOL); } else { SRC_UNGET(c); _ifj_lexical_log("TOK_EOL"); return TOK_EOL; } } // --------------------------------------------------------------- // eof // --------------------------------------------------------------- STATE(S_EOF) { _ifj_lexical_log("TOK_EOF"); return TOK_EOF; } // --------------------------------------------------------------- // error states // --------------------------------------------------------------- STATE(S_ERROR) { _ifj_error_log("%s:%zu:%zu: Neznamy znak %c", src_name, getCurrLine(), getCurrCol(), c); _ifj_lexical_log("TOK_EINVAL"); return TOK_EINVAL; } STATE(S_EEOF) { _ifj_error_log("%s:%zu:%zu: Neocekavany konec souboru", src_name, getCurrLine(), getCurrCol()); _ifj_lexical_log("TOK_EEOF"); return TOK_EEOF; } STATE(S_ESTRING) { _ifj_error_log ("%s:%zu:%zu: Neocekavany znak v retezci %c", src_name, getCurrLine(), getCurrCol(), c); _ifj_lexical_log("TOK_ESTRING"); return TOK_EINVAL; } STATE(S_ENUM) { _ifj_error_log("%s:%zu:%zu: Neocekavany tvar cisla %c", src_name, getCurrLine(), getCurrCol(), c); _ifj_lexical_log("TOK_ENUM"); return TOK_ENUM; } } NEXTSTATE(S_ERROR); alloc_failed: string_dispose(s); _ifj_error_log("%s", strerror(ENOMEM)); _ifj_lexical_log("TOK_ENOMEM"); return TOK_ENOMEM; }