// return NFC-normalized UTF8-encoded version of s static char *normalize(char *s) { static size_t buflen = 0; static void *buf = NULL; // persistent buffer (avoid repeated malloc/free) // options equivalent to utf8proc_NFC: const int options = UTF8PROC_NULLTERM|UTF8PROC_STABLE|UTF8PROC_COMPOSE; ssize_t result; size_t newlen; result = utf8proc_decompose((uint8_t*) s, 0, NULL, 0, options); if (result < 0) goto error; newlen = result * sizeof(int32_t) + 1; if (newlen > buflen) { buflen = newlen * 2; buf = realloc(buf, buflen); if (!buf) lerror(MemoryError, "error allocating UTF8 buffer"); } result = utf8proc_decompose((uint8_t*)s,0, (int32_t*)buf,result, options); if (result < 0) goto error; result = utf8proc_reencode((int32_t*)buf,result, options); if (result < 0) goto error; return (char*) buf; error: lerrorf(symbol("error"), "error normalizing identifier %s: %s", s, utf8proc_errmsg(result)); }
// return NFC-normalized UTF8-encoded version of s, with // additional custom normalizations defined by jl_charmap above. static char *normalize(fl_context_t *fl_ctx, char *s) { // options equivalent to utf8proc_NFC: const int options = UTF8PROC_NULLTERM|UTF8PROC_STABLE|UTF8PROC_COMPOSE; ssize_t result; size_t newlen; result = utf8proc_decompose_custom((uint8_t*) s, 0, NULL, 0, (utf8proc_option_t)options, jl_charmap_map, NULL); if (result < 0) goto error; newlen = result * sizeof(int32_t) + 1; if (newlen > fl_ctx->jlbuflen) { fl_ctx->jlbuflen = newlen * 2; fl_ctx->jlbuf = realloc(fl_ctx->jlbuf, fl_ctx->jlbuflen); if (!fl_ctx->jlbuf) lerror(fl_ctx, fl_ctx->OutOfMemoryError, "error allocating UTF8 buffer"); } result = utf8proc_decompose_custom((uint8_t*)s,0, (int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options, jl_charmap_map, NULL); if (result < 0) goto error; result = utf8proc_reencode((int32_t*)fl_ctx->jlbuf,result, (utf8proc_option_t)options); if (result < 0) goto error; return (char*) fl_ctx->jlbuf; error: lerrorf(fl_ctx, symbol(fl_ctx, "error"), "error normalizing identifier %s: %s", s, utf8proc_errmsg(result)); }
static int read_numtok(char *tok, value_t *pval, int base) { int result; errno = 0; result = isnumtok_base(tok, pval, base); if (errno == ERANGE) lerrorf(ParseError, "read: overflow in numeric constant %s", tok); return result; }
static value_t fl_path_cwd(value_t *args, uint32_t nargs) { uv_err_t err; if (nargs > 1) argcount("path.cwd", nargs, 1); if (nargs == 0) { char buf[1024]; err = uv_cwd(buf, sizeof(buf)); if (err.code != UV_OK) lerrorf(IOError, "path.cwd: could not get cwd: %s", uv_strerror(err)); return string_from_cstr(buf); } char *ptr = tostring(args[0], "path.cwd"); err = uv_chdir(ptr); if (err.code != UV_OK) lerrorf(IOError, "path.cwd: could not cd to %s: %s", ptr, uv_strerror(err)); return FL_T; }
static value_t fl_path_cwd(value_t *args, uint32_t nargs) { if (nargs > 1) argcount("path.cwd", nargs, 1); if (nargs == 0) { char buf[1024]; get_cwd(buf, sizeof(buf)); return string_from_cstr(buf); } char *ptr = tostring(args[0], "path.cwd"); if (set_cwd(ptr)) lerrorf(IOError, "path.cwd: could not cd to %s", ptr); return FL_T; }
// label is the backreference we'd like to fix up with this read static value_t do_read_sexpr(value_t label) { value_t v, sym, oldtokval, *head; value_t *pv; u_int32_t t; char c; t = peek(); take(); switch (t) { case TOK_CLOSE: lerror(ParseError, "read: unexpected ')'"); case TOK_CLOSEB: lerror(ParseError, "read: unexpected ']'"); case TOK_DOT: lerror(ParseError, "read: unexpected '.'"); case TOK_SYM: case TOK_NUM: return tokval; case TOK_COMMA: head = &COMMA; goto listwith; case TOK_COMMAAT: head = &COMMAAT; goto listwith; case TOK_COMMADOT: head = &COMMADOT; goto listwith; case TOK_BQ: head = &BACKQUOTE; goto listwith; case TOK_QUOTE: head = "E; listwith: v = cons_reserve(2); car_(v) = *head; cdr_(v) = tagptr(((cons_t*)ptr(v))+1, TAG_CONS); car_(cdr_(v)) = cdr_(cdr_(v)) = NIL; PUSH(v); if (label != UNBOUND) ptrhash_put(&readstate->backrefs, (void*)label, (void*)v); v = do_read_sexpr(UNBOUND); car_(cdr_(Stack[SP-1])) = v; return POP(); case TOK_SHARPQUOTE: // femtoLisp doesn't need symbol-function, so #' does nothing return do_read_sexpr(label); case TOK_OPEN: PUSH(NIL); read_list(&Stack[SP-1], label); return POP(); case TOK_SHARPSYM: sym = tokval; if (sym == tsym || sym == Tsym) return FL_T; else if (sym == fsym || sym == Fsym) return FL_F; // constructor notation c = nextchar(); if (c != '(') { take(); lerrorf(ParseError, "read: expected argument list for %s", symbol_name(tokval)); } PUSH(NIL); read_list(&Stack[SP-1], UNBOUND); if (sym == vu8sym) { sym = arraysym; Stack[SP-1] = fl_cons(uint8sym, Stack[SP-1]); } else if (sym == fnsym) { sym = FUNCTION; } v = symbol_value(sym); if (v == UNBOUND) fl_raise(fl_list2(UnboundError, sym)); return fl_apply(v, POP()); case TOK_OPENB: return read_vector(label, TOK_CLOSEB); case TOK_SHARPOPEN: return read_vector(label, TOK_CLOSE); case TOK_SHARPDOT: // eval-when-read // evaluated expressions can refer to existing backreferences, but they // cannot see pending labels. in other words: // (... #2=#.#0# ... ) OK // (... #2=#.(#2#) ... ) DO NOT WANT sym = do_read_sexpr(UNBOUND); if (issymbol(sym)) { v = symbol_value(sym); if (v == UNBOUND) fl_raise(fl_list2(UnboundError, sym)); return v; } return fl_toplevel_eval(sym); case TOK_LABEL: // create backreference label if (ptrhash_has(&readstate->backrefs, (void*)tokval)) lerrorf(ParseError, "read: label %ld redefined", numval(tokval)); oldtokval = tokval; v = do_read_sexpr(tokval); ptrhash_put(&readstate->backrefs, (void*)oldtokval, (void*)v); return v; case TOK_BACKREF: // look up backreference v = (value_t)ptrhash_get(&readstate->backrefs, (void*)tokval); if (v == (value_t)HT_NOTFOUND) lerrorf(ParseError, "read: undefined label %ld", numval(tokval)); return v; case TOK_GENSYM: pv = (value_t*)ptrhash_bp(&readstate->gensyms, (void*)tokval); if (*pv == (value_t)HT_NOTFOUND) *pv = fl_gensym(NULL, 0); return *pv; case TOK_DOUBLEQUOTE: return read_string(); } return FL_UNSPECIFIED; }
static u_int32_t peek(void) { char c, *end; fixnum_t x; int ch, base; if (toktype != TOK_NONE) return toktype; c = nextchar(); if (ios_eof(F)) return TOK_NONE; if (c == '(') { toktype = TOK_OPEN; } else if (c == ')') { toktype = TOK_CLOSE; } else if (c == '[') { toktype = TOK_OPENB; } else if (c == ']') { toktype = TOK_CLOSEB; } else if (c == '\'') { toktype = TOK_QUOTE; } else if (c == '`') { toktype = TOK_BQ; } else if (c == '"') { toktype = TOK_DOUBLEQUOTE; } else if (c == '#') { ch = ios_getc(F); c = (char)ch; if (ch == IOS_EOF) lerror(ParseError, "read: invalid read macro"); if (c == '.') { toktype = TOK_SHARPDOT; } else if (c == '\'') { toktype = TOK_SHARPQUOTE; } else if (c == '\\') { uint32_t cval; if (ios_getutf8(F, &cval) == IOS_EOF) lerror(ParseError, "read: end of input in character constant"); if (cval == (uint32_t)'u' || cval == (uint32_t)'U' || cval == (uint32_t)'x') { read_token('u', 0); if (buf[1] != '\0') { // not a solitary 'u','U','x' if (!read_numtok(&buf[1], &tokval, 16)) lerror(ParseError, "read: invalid hex character constant"); cval = numval(tokval); } } else if (cval >= 'a' && cval <= 'z') { read_token((char)cval, 0); tokval = symbol(buf); if (buf[1] == '\0') /* one character */; else if (tokval == nulsym) cval = 0x00; else if (tokval == alarmsym) cval = 0x07; else if (tokval == backspacesym) cval = 0x08; else if (tokval == tabsym) cval = 0x09; else if (tokval == linefeedsym) cval = 0x0A; else if (tokval == newlinesym) cval = 0x0A; else if (tokval == vtabsym) cval = 0x0B; else if (tokval == pagesym) cval = 0x0C; else if (tokval == returnsym) cval = 0x0D; else if (tokval == escsym) cval = 0x1B; else if (tokval == spacesym) cval = 0x20; else if (tokval == deletesym) cval = 0x7F; else lerrorf(ParseError, "read: unknown character #\\%s", buf); } toktype = TOK_NUM; tokval = mk_wchar(cval); } else if (c == '(') { toktype = TOK_SHARPOPEN; } else if (c == '<') { lerror(ParseError, "read: unreadable object"); } else if (isdigit(c)) { read_token(c, 1); c = (char)ios_getc(F); if (c == '#') toktype = TOK_BACKREF; else if (c == '=') toktype = TOK_LABEL; else lerror(ParseError, "read: invalid label"); errno = 0; x = strtol(buf, &end, 10); if (*end != '\0' || errno) lerror(ParseError, "read: invalid label"); tokval = fixnum(x); } else if (c == '!') { // #! single line comment for shbang script support do { ch = ios_getc(F); } while (ch != IOS_EOF && (char)ch != '\n'); return peek(); } else if (c == '|') { // multiline comment int commentlevel=1; while (1) { ch = ios_getc(F); hashpipe_gotc: if (ch == IOS_EOF) lerror(ParseError, "read: eof within comment"); if ((char)ch == '|') { ch = ios_getc(F); if ((char)ch == '#') { commentlevel--; if (commentlevel == 0) break; else continue; } goto hashpipe_gotc; } else if ((char)ch == '#') { ch = ios_getc(F); if ((char)ch == '|') commentlevel++; else goto hashpipe_gotc; } } // this was whitespace, so keep peeking return peek(); } else if (c == ';') { // datum comment (void)do_read_sexpr(UNBOUND); // skip return peek(); } else if (c == ':') { // gensym ch = ios_getc(F); if ((char)ch == 'g') ch = ios_getc(F); read_token((char)ch, 0); errno = 0; x = strtol(buf, &end, 10); if (*end != '\0' || buf[0] == '\0' || errno) lerror(ParseError, "read: invalid gensym label"); toktype = TOK_GENSYM; tokval = fixnum(x); } else if (symchar(c)) { read_token(ch, 0); if (((c == 'b' && (base= 2)) || (c == 'o' && (base= 8)) || (c == 'd' && (base=10)) || (c == 'x' && (base=16))) && (isdigit_base(buf[1],base) || buf[1]=='-')) { if (!read_numtok(&buf[1], &tokval, base)) lerrorf(ParseError, "read: invalid base %d constant", base); return (toktype=TOK_NUM); } toktype = TOK_SHARPSYM; tokval = symbol(buf); } else { lerror(ParseError, "read: unknown read macro"); } } else if (c == ',') { toktype = TOK_COMMA; ch = ios_getc(F); if (ch == IOS_EOF) return toktype; if ((char)ch == '@') toktype = TOK_COMMAAT; else if ((char)ch == '.') toktype = TOK_COMMADOT; else ios_ungetc((char)ch, F); } else { if (!read_token(c, 0)) { if (buf[0]=='.' && buf[1]=='\0') { return (toktype=TOK_DOT); } else { if (read_numtok(buf, &tokval, 0)) return (toktype=TOK_NUM); } } toktype = TOK_SYM; tokval = symbol(buf); } return toktype; }
void __attribute__((noreturn)) yyerror(const char *s) { lerrorf(¤t_loc,"%s",s); }