Example #1
0
value_t fl_accum_julia_symbol(value_t *args, u_int32_t nargs)
{
    argcount("accum-julia-symbol", nargs, 2);
    ios_t *s = fl_toiostream(args[1], "accum-julia-symbol");
    if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != wchartype)
        type_error("accum-julia-symbol", "wchar", args[0]);
    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
    ios_t str;
    ios_mem(&str, 0);
    while (jl_id_char(wc)) {
        ios_getutf8(s, &wc);
        if (wc == '!') {
            uint32_t nwc;
            ios_peekutf8(s, &nwc);
            // make sure != is always an operator
            if (nwc == '=') {
                ios_ungetc('!', s);
                break;
            }
        }
        ios_pututf8(&str, wc);
        if (ios_peekutf8(s, &wc) == IOS_EOF)
            break;
    }
    ios_pututf8(&str, 0);
    return symbol(normalize(str.buf));
}
Example #2
0
// return: 1 if escaped (forced to be symbol)
static int read_token(char c, int digits)
{
    int i=0, ch, escaped=0, issym=0, first=1;

    while (1) {
        if (!first) {
            ch = ios_getc(F);
            if (ch == IOS_EOF)
                goto terminate;
            c = (char)ch;
        }
        first = 0;
        if (c == '|') {
            issym = 1;
            escaped = !escaped;
        }
        else if (c == '\\') {
            issym = 1;
            ch = ios_getc(F);
            if (ch == IOS_EOF)
                goto terminate;
            accumchar((char)ch, &i);
        }
        else if (!escaped && !(symchar(c) && (!digits || isdigit(c)))) {
            break;
        }
        else {
            accumchar(c, &i);
        }
    }
    ios_ungetc(c, F);
 terminate:
    buf[i++] = '\0';
    return issym;
}
Example #3
0
value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
{
    argcount(fl_ctx, "accum-julia-symbol", nargs, 2);
    ios_t *s = fl_toiostream(fl_ctx, args[1], "accum-julia-symbol");
    if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
        type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]);
    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
    ios_t str;
    int allascii=1;
    ios_mem(&str, 0);
    do {
        allascii &= (wc <= 0x7f);
        ios_getutf8(s, &wc);
        if (wc == '!') {
            uint32_t nwc;
            ios_peekutf8(s, &nwc);
            // make sure != is always an operator
            if (nwc == '=') {
                ios_ungetc('!', s);
                break;
            }
        }
        ios_pututf8(&str, wc);
        if (ios_peekutf8(s, &wc) == IOS_EOF)
            break;
    } while (jl_id_char(wc));
    ios_pututf8(&str, 0);
    return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf));
}
Example #4
0
static value_t read_string(void)
{
    char *buf, *temp;
    char eseq[10];
    size_t i=0, j, sz = 64, ndig;
    int c;
    value_t s;
    u_int32_t wc=0;

    buf = malloc(sz);
    while (1) {
        if (i >= sz-4) {  // -4: leaves room for longest utf8 sequence
            sz *= 2;
            temp = realloc(buf, sz);
            if (temp == NULL) {
                free(buf);
                lerror(ParseError, "read: out of memory reading string");
            }
            buf = temp;
        }
        c = ios_getc(F);
        if (c == IOS_EOF) {
            free(buf);
            lerror(ParseError, "read: unexpected end of input in string");
        }
        if (c == '"')
            break;
        else if (c == '\\') {
            c = ios_getc(F);
            if (c == IOS_EOF) {
                free(buf);
                lerror(ParseError, "read: end of input in escape sequence");
            }
            j=0;
            if (octal_digit(c)) {
                do {
                    eseq[j++] = c;
                    c = ios_getc(F);
                } while (octal_digit(c) && j<3 && (c!=IOS_EOF));
                if (c!=IOS_EOF) ios_ungetc(c, F);
                eseq[j] = '\0';
                wc = strtol(eseq, NULL, 8);
                // \DDD and \xXX read bytes, not characters
                buf[i++] = ((char)wc);
            }
            else if ((c=='x' && (ndig=2)) ||
                     (c=='u' && (ndig=4)) ||
                     (c=='U' && (ndig=8))) {
                c = ios_getc(F);
                while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
                    eseq[j++] = c;
                    c = ios_getc(F);
                }
                if (c!=IOS_EOF) ios_ungetc(c, F);
                eseq[j] = '\0';
                if (j) wc = strtol(eseq, NULL, 16);
                if (!j || wc > 0x10ffff) {
                    free(buf);
                    lerror(ParseError, "read: invalid escape sequence");
                }
                if (ndig == 2)
                    buf[i++] = ((char)wc);
                else
                    i += u8_wc_toutf8(&buf[i], wc);
            }
            else {
                buf[i++] = read_escape_control_char((char)c);
            }
        }
        else {
            buf[i++] = c;
        }
    }
    s = cvalue_string(i);
    memcpy(cvalue_data(s), buf, i);
    free(buf);
    return s;
}
Example #5
0
static u_int32_t peek(void)
{
    char c, *end;
    fixnum_t x;
    int ch, base;

    if (toktype != TOK_NONE)
        return toktype;
    c = nextchar();
    if (ios_eof(F)) return TOK_NONE;
    if (c == '(') {
        toktype = TOK_OPEN;
    }
    else if (c == ')') {
        toktype = TOK_CLOSE;
    }
    else if (c == '[') {
        toktype = TOK_OPENB;
    }
    else if (c == ']') {
        toktype = TOK_CLOSEB;
    }
    else if (c == '\'') {
        toktype = TOK_QUOTE;
    }
    else if (c == '`') {
        toktype = TOK_BQ;
    }
    else if (c == '"') {
        toktype = TOK_DOUBLEQUOTE;
    }
    else if (c == '#') {
        ch = ios_getc(F); c = (char)ch;
        if (ch == IOS_EOF)
            lerror(ParseError, "read: invalid read macro");
        if (c == '.') {
            toktype = TOK_SHARPDOT;
        }
        else if (c == '\'') {
            toktype = TOK_SHARPQUOTE;
        }
        else if (c == '\\') {
            uint32_t cval;
            if (ios_getutf8(F, &cval) == IOS_EOF)
                lerror(ParseError, "read: end of input in character constant");
            if (cval == (uint32_t)'u' || cval == (uint32_t)'U' ||
                cval == (uint32_t)'x') {
                read_token('u', 0);
                if (buf[1] != '\0') {  // not a solitary 'u','U','x'
                    if (!read_numtok(&buf[1], &tokval, 16))
                        lerror(ParseError,
                               "read: invalid hex character constant");
                    cval = numval(tokval);
                }
            }
            else if (cval >= 'a' && cval <= 'z') {
                read_token((char)cval, 0);
                tokval = symbol(buf);
                if (buf[1] == '\0')       /* one character */;
                else if (tokval == nulsym)        cval = 0x00;
                else if (tokval == alarmsym)      cval = 0x07;
                else if (tokval == backspacesym)  cval = 0x08;
                else if (tokval == tabsym)        cval = 0x09;
                else if (tokval == linefeedsym)   cval = 0x0A;
                else if (tokval == newlinesym)    cval = 0x0A;
                else if (tokval == vtabsym)       cval = 0x0B;
                else if (tokval == pagesym)       cval = 0x0C;
                else if (tokval == returnsym)     cval = 0x0D;
                else if (tokval == escsym)        cval = 0x1B;
                else if (tokval == spacesym)      cval = 0x20;
                else if (tokval == deletesym)     cval = 0x7F;
                else
                    lerrorf(ParseError, "read: unknown character #\\%s", buf);
            }
            toktype = TOK_NUM;
            tokval = mk_wchar(cval);
        }
        else if (c == '(') {
            toktype = TOK_SHARPOPEN;
        }
        else if (c == '<') {
            lerror(ParseError, "read: unreadable object");
        }
        else if (isdigit(c)) {
            read_token(c, 1);
            c = (char)ios_getc(F);
            if (c == '#')
                toktype = TOK_BACKREF;
            else if (c == '=')
                toktype = TOK_LABEL;
            else
                lerror(ParseError, "read: invalid label");
            errno = 0;
            x = strtol(buf, &end, 10);
            if (*end != '\0' || errno)
                lerror(ParseError, "read: invalid label");
            tokval = fixnum(x);
        }
        else if (c == '!') {
            // #! single line comment for shbang script support
            do {
                ch = ios_getc(F);
            } while (ch != IOS_EOF && (char)ch != '\n');
            return peek();
        }
        else if (c == '|') {
            // multiline comment
            int commentlevel=1;
            while (1) {
                ch = ios_getc(F);
            hashpipe_gotc:
                if (ch == IOS_EOF)
                    lerror(ParseError, "read: eof within comment");
                if ((char)ch == '|') {
                    ch = ios_getc(F);
                    if ((char)ch == '#') {
                        commentlevel--;
                        if (commentlevel == 0)
                            break;
                        else
                            continue;
                    }
                    goto hashpipe_gotc;
                }
                else if ((char)ch == '#') {
                    ch = ios_getc(F);
                    if ((char)ch == '|')
                        commentlevel++;
                    else
                        goto hashpipe_gotc;
                }
            }
            // this was whitespace, so keep peeking
            return peek();
        }
        else if (c == ';') {
            // datum comment
            (void)do_read_sexpr(UNBOUND); // skip
            return peek();
        }
        else if (c == ':') {
            // gensym
            ch = ios_getc(F);
            if ((char)ch == 'g')
                ch = ios_getc(F);
            read_token((char)ch, 0);
            errno = 0;
            x = strtol(buf, &end, 10);
            if (*end != '\0' || buf[0] == '\0' || errno)
                lerror(ParseError, "read: invalid gensym label");
            toktype = TOK_GENSYM;
            tokval = fixnum(x);
        }
        else if (symchar(c)) {
            read_token(ch, 0);

            if (((c == 'b' && (base= 2)) ||
                 (c == 'o' && (base= 8)) ||
                 (c == 'd' && (base=10)) ||
                 (c == 'x' && (base=16))) &&
                (isdigit_base(buf[1],base) ||
                 buf[1]=='-')) {
                if (!read_numtok(&buf[1], &tokval, base))
                    lerrorf(ParseError, "read: invalid base %d constant", base);
                return (toktype=TOK_NUM);
            }

            toktype = TOK_SHARPSYM;
            tokval = symbol(buf);
        }
        else {
            lerror(ParseError, "read: unknown read macro");
        }
    }
    else if (c == ',') {
        toktype = TOK_COMMA;
        ch = ios_getc(F);
        if (ch == IOS_EOF)
            return toktype;
        if ((char)ch == '@')
            toktype = TOK_COMMAAT;
        else if ((char)ch == '.')
            toktype = TOK_COMMADOT;
        else
            ios_ungetc((char)ch, F);
    }
    else {
        if (!read_token(c, 0)) {
            if (buf[0]=='.' && buf[1]=='\0') {
                return (toktype=TOK_DOT);
            }
            else {
                if (read_numtok(buf, &tokval, 0))
                    return (toktype=TOK_NUM);
            }
        }
        toktype = TOK_SYM;
        tokval = symbol(buf);
    }
    return toktype;
}