value_t fl_accum_julia_symbol(value_t *args, u_int32_t nargs) { argcount("accum-julia-symbol", nargs, 2); ios_t *s = fl_toiostream(args[1], "accum-julia-symbol"); if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != wchartype) type_error("accum-julia-symbol", "wchar", args[0]); uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); ios_t str; ios_mem(&str, 0); while (jl_id_char(wc)) { ios_getutf8(s, &wc); if (wc == '!') { uint32_t nwc; ios_peekutf8(s, &nwc); // make sure != is always an operator if (nwc == '=') { ios_ungetc('!', s); break; } } ios_pututf8(&str, wc); if (ios_peekutf8(s, &wc) == IOS_EOF) break; } ios_pututf8(&str, 0); return symbol(normalize(str.buf)); }
// return: 1 if escaped (forced to be symbol) static int read_token(char c, int digits) { int i=0, ch, escaped=0, issym=0, first=1; while (1) { if (!first) { ch = ios_getc(F); if (ch == IOS_EOF) goto terminate; c = (char)ch; } first = 0; if (c == '|') { issym = 1; escaped = !escaped; } else if (c == '\\') { issym = 1; ch = ios_getc(F); if (ch == IOS_EOF) goto terminate; accumchar((char)ch, &i); } else if (!escaped && !(symchar(c) && (!digits || isdigit(c)))) { break; } else { accumchar(c, &i); } } ios_ungetc(c, F); terminate: buf[i++] = '\0'; return issym; }
value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) { argcount(fl_ctx, "accum-julia-symbol", nargs, 2); ios_t *s = fl_toiostream(fl_ctx, args[1], "accum-julia-symbol"); if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype) type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]); uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); ios_t str; int allascii=1; ios_mem(&str, 0); do { allascii &= (wc <= 0x7f); ios_getutf8(s, &wc); if (wc == '!') { uint32_t nwc; ios_peekutf8(s, &nwc); // make sure != is always an operator if (nwc == '=') { ios_ungetc('!', s); break; } } ios_pututf8(&str, wc); if (ios_peekutf8(s, &wc) == IOS_EOF) break; } while (jl_id_char(wc)); ios_pututf8(&str, 0); return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf)); }
static value_t read_string(void) { char *buf, *temp; char eseq[10]; size_t i=0, j, sz = 64, ndig; int c; value_t s; u_int32_t wc=0; buf = malloc(sz); while (1) { if (i >= sz-4) { // -4: leaves room for longest utf8 sequence sz *= 2; temp = realloc(buf, sz); if (temp == NULL) { free(buf); lerror(ParseError, "read: out of memory reading string"); } buf = temp; } c = ios_getc(F); if (c == IOS_EOF) { free(buf); lerror(ParseError, "read: unexpected end of input in string"); } if (c == '"') break; else if (c == '\\') { c = ios_getc(F); if (c == IOS_EOF) { free(buf); lerror(ParseError, "read: end of input in escape sequence"); } j=0; if (octal_digit(c)) { do { eseq[j++] = c; c = ios_getc(F); } while (octal_digit(c) && j<3 && (c!=IOS_EOF)); if (c!=IOS_EOF) ios_ungetc(c, F); eseq[j] = '\0'; wc = strtol(eseq, NULL, 8); // \DDD and \xXX read bytes, not characters buf[i++] = ((char)wc); } else if ((c=='x' && (ndig=2)) || (c=='u' && (ndig=4)) || (c=='U' && (ndig=8))) { c = ios_getc(F); while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) { eseq[j++] = c; c = ios_getc(F); } if (c!=IOS_EOF) ios_ungetc(c, F); eseq[j] = '\0'; if (j) wc = strtol(eseq, NULL, 16); if (!j || wc > 0x10ffff) { free(buf); lerror(ParseError, "read: invalid escape sequence"); } if (ndig == 2) buf[i++] = ((char)wc); else i += u8_wc_toutf8(&buf[i], wc); } else { buf[i++] = read_escape_control_char((char)c); } } else { buf[i++] = c; } } s = cvalue_string(i); memcpy(cvalue_data(s), buf, i); free(buf); return s; }
static u_int32_t peek(void) { char c, *end; fixnum_t x; int ch, base; if (toktype != TOK_NONE) return toktype; c = nextchar(); if (ios_eof(F)) return TOK_NONE; if (c == '(') { toktype = TOK_OPEN; } else if (c == ')') { toktype = TOK_CLOSE; } else if (c == '[') { toktype = TOK_OPENB; } else if (c == ']') { toktype = TOK_CLOSEB; } else if (c == '\'') { toktype = TOK_QUOTE; } else if (c == '`') { toktype = TOK_BQ; } else if (c == '"') { toktype = TOK_DOUBLEQUOTE; } else if (c == '#') { ch = ios_getc(F); c = (char)ch; if (ch == IOS_EOF) lerror(ParseError, "read: invalid read macro"); if (c == '.') { toktype = TOK_SHARPDOT; } else if (c == '\'') { toktype = TOK_SHARPQUOTE; } else if (c == '\\') { uint32_t cval; if (ios_getutf8(F, &cval) == IOS_EOF) lerror(ParseError, "read: end of input in character constant"); if (cval == (uint32_t)'u' || cval == (uint32_t)'U' || cval == (uint32_t)'x') { read_token('u', 0); if (buf[1] != '\0') { // not a solitary 'u','U','x' if (!read_numtok(&buf[1], &tokval, 16)) lerror(ParseError, "read: invalid hex character constant"); cval = numval(tokval); } } else if (cval >= 'a' && cval <= 'z') { read_token((char)cval, 0); tokval = symbol(buf); if (buf[1] == '\0') /* one character */; else if (tokval == nulsym) cval = 0x00; else if (tokval == alarmsym) cval = 0x07; else if (tokval == backspacesym) cval = 0x08; else if (tokval == tabsym) cval = 0x09; else if (tokval == linefeedsym) cval = 0x0A; else if (tokval == newlinesym) cval = 0x0A; else if (tokval == vtabsym) cval = 0x0B; else if (tokval == pagesym) cval = 0x0C; else if (tokval == returnsym) cval = 0x0D; else if (tokval == escsym) cval = 0x1B; else if (tokval == spacesym) cval = 0x20; else if (tokval == deletesym) cval = 0x7F; else lerrorf(ParseError, "read: unknown character #\\%s", buf); } toktype = TOK_NUM; tokval = mk_wchar(cval); } else if (c == '(') { toktype = TOK_SHARPOPEN; } else if (c == '<') { lerror(ParseError, "read: unreadable object"); } else if (isdigit(c)) { read_token(c, 1); c = (char)ios_getc(F); if (c == '#') toktype = TOK_BACKREF; else if (c == '=') toktype = TOK_LABEL; else lerror(ParseError, "read: invalid label"); errno = 0; x = strtol(buf, &end, 10); if (*end != '\0' || errno) lerror(ParseError, "read: invalid label"); tokval = fixnum(x); } else if (c == '!') { // #! single line comment for shbang script support do { ch = ios_getc(F); } while (ch != IOS_EOF && (char)ch != '\n'); return peek(); } else if (c == '|') { // multiline comment int commentlevel=1; while (1) { ch = ios_getc(F); hashpipe_gotc: if (ch == IOS_EOF) lerror(ParseError, "read: eof within comment"); if ((char)ch == '|') { ch = ios_getc(F); if ((char)ch == '#') { commentlevel--; if (commentlevel == 0) break; else continue; } goto hashpipe_gotc; } else if ((char)ch == '#') { ch = ios_getc(F); if ((char)ch == '|') commentlevel++; else goto hashpipe_gotc; } } // this was whitespace, so keep peeking return peek(); } else if (c == ';') { // datum comment (void)do_read_sexpr(UNBOUND); // skip return peek(); } else if (c == ':') { // gensym ch = ios_getc(F); if ((char)ch == 'g') ch = ios_getc(F); read_token((char)ch, 0); errno = 0; x = strtol(buf, &end, 10); if (*end != '\0' || buf[0] == '\0' || errno) lerror(ParseError, "read: invalid gensym label"); toktype = TOK_GENSYM; tokval = fixnum(x); } else if (symchar(c)) { read_token(ch, 0); if (((c == 'b' && (base= 2)) || (c == 'o' && (base= 8)) || (c == 'd' && (base=10)) || (c == 'x' && (base=16))) && (isdigit_base(buf[1],base) || buf[1]=='-')) { if (!read_numtok(&buf[1], &tokval, base)) lerrorf(ParseError, "read: invalid base %d constant", base); return (toktype=TOK_NUM); } toktype = TOK_SHARPSYM; tokval = symbol(buf); } else { lerror(ParseError, "read: unknown read macro"); } } else if (c == ',') { toktype = TOK_COMMA; ch = ios_getc(F); if (ch == IOS_EOF) return toktype; if ((char)ch == '@') toktype = TOK_COMMAAT; else if ((char)ch == '.') toktype = TOK_COMMADOT; else ios_ungetc((char)ch, F); } else { if (!read_token(c, 0)) { if (buf[0]=='.' && buf[1]=='\0') { return (toktype=TOK_DOT); } else { if (read_numtok(buf, &tokval, 0)) return (toktype=TOK_NUM); } } toktype = TOK_SYM; tokval = symbol(buf); } return toktype; }