static Tok *toknext() { Tok *t; int c; eatspace(); c = peek(); if (c == End) { t = mktok(0); } else if (c == '\n') { curloc.line++; next(); t = mktok(Tendln); } else if (isalpha(c) || c == '_' || c == '$') { t = kwident(); } else if (c == '"') { t = strlit(); } else if (c == '\'') { t = charlit(); } else if (isdigit(c)) { t = numlit(); } else if (c == '@') { t = typaram(); } else { t = oper(); } if (!t || t->type == Terror) lfatal(curloc, "Unable to parse token starting with %c", c); return t; }
static Tok *charlit(void) { Tok *t; int c; uint32_t val; size_t len, sz; char *buf; assert(next() == '\''); buf = NULL; len = 0; sz = 0; val = 0; c = next(); if (c == End) lfatal(curloc, "Unexpected EOF within char lit"); else if (c == '\n') lfatal(curloc, "Newlines not allowed in char lit"); else if (c == '\\') val = decode(&buf, &len, &sz); else val = readutf(c, &buf, &len, &sz); append(&buf, &len, &sz, '\0'); if (next() != '\'') lfatal(curloc, "Character constant with multiple characters"); t = mktok(Tchrlit); t->chrval = val; t->id = buf; return t; }
static Tok *strlit(void) { Tok *t; int c; size_t len, sz; char *buf; assert(next() == '"'); buf = NULL; len = 0; sz = 0; while (1) { c = next(); /* we don't unescape here, but on output */ if (c == '"') break; else if (c == End) lfatal(curloc, "Unexpected EOF within string"); else if (c == '\n') lfatal(curloc, "Newlines not allowed in strings"); else if (c == '\\') decode(&buf, &len, &sz); else append(&buf, &len, &sz, c); }; t = mktok(Tstrlit); t->strval.len = len; /* null terminator should not count towards length */ append(&buf, &len, &sz, '\0'); t->strval.buf = buf; t->id = buf; return t; }
static Tok *strlit() { Tok *t; int c; size_t len, sz; char *buf; assert(next() == '"'); buf = NULL; len = 0; sz = 0; while (1) { c = next(); /* we don't unescape here, but on output */ if (c == '"') break; else if (c == End) fatal(line, "Unexpected EOF within string"); else if (c == '\n') fatal(line, "Newlines not allowed in strings"); else if (c == '\\') decode(&buf, &len, &sz); else append(&buf, &len, &sz, c); }; append(&buf, &len, &sz, '\0'); t = mktok(Tstrlit); t->str = buf; return t; }
static Tok *kwident(void) { char buf[1024]; Tok *t; if (!identstr(buf, sizeof buf)) return NULL; t = mktok(kwd(buf)); t->id = strdup(buf); return t; }
static Tok *typaram(void) { Tok *t; char buf[1024]; t = NULL; if (!match('@')) return NULL; if (!identstr(buf, 1024)) return NULL; t = mktok(Ttyparam); t->id = strdup(buf); return t; }
static Tok *number(int base) { Tok *t; int start; int c; int isfloat; int unsignedval; /* because we allow '_' in numbers, and strtod/stroull don't, we * need a buffer that holds the number without '_'. */ char buf[2048]; size_t nbuf; t = NULL; isfloat = 0; start = fidx; nbuf = 0; for (c = peek(); isxdigit(c) || c == '.' || c == '_'; c = peek()) { next(); if (c == '_') continue; if (c == '.') isfloat = 1; else if (hexval(c) < 0 || hexval(c) > base) lfatal(curloc, "Integer digit '%c' outside of base %d", c, base); if (nbuf >= sizeof buf - 1) { buf[nbuf-1] = '\0'; lfatal(curloc, "number %s... too long to represent", buf); } buf[nbuf++] = c; } buf[nbuf] = '\0'; /* we only support base 10 floats */ if (isfloat && base == 10) { t = mktok(Tfloatlit); t->id = strdupn(&fbuf[start], fidx - start); t->fltval = strtod(buf, NULL); } else { t = mktok(Tintlit); t->id = strdupn(&fbuf[start], fidx - start); t->intval = strtoull(buf, NULL, base); /* check suffixes: * u -> unsigned * l -> 64 bit * i -> 32 bit * w -> 16 bit * b -> 8 bit */ unsignedval = 0; nextsuffix: switch (peek()) { case 'u': if (unsignedval == 1) lfatal(curloc, "Duplicate 'u' integer specifier"); next(); unsignedval = 1; goto nextsuffix; case 'l': next(); if (unsignedval) t->inttype = Tyuint64; else t->inttype = Tyint64; break; case 'i': next(); if (unsignedval) t->inttype = Tyuint32; else t->inttype = Tyint32; break; case 's': next(); if (unsignedval) t->inttype = Tyuint16; else t->inttype = Tyint16; break; case 'b': next(); if (unsignedval) t->inttype = Tyuint8; else t->inttype = Tyint8; break; default: if (unsignedval) lfatal(curloc, "Unrecognized character int type specifier after 'u'"); break; } } return t; }
static Tok *oper(void) { int tt; char c; c = next(); switch (c) { case '{': tt = Tobrace; break; case '}': tt = Tcbrace; break; case '(': tt = Toparen; break; case ')': tt = Tcparen; break; case '[': tt = Tosqbrac; break; case ']': tt = Tcsqbrac; break; case ',': tt = Tcomma; break; case '`': tt = Ttick; break; case '#': tt = Tderef; break; case ':': if (match(':')) tt = Twith; else tt = Tcolon; break; case '~': tt = Tbnot; break; case ';': if (match(';')) tt = Tendblk; else tt = Tendln; break; case '.': if (match('.')) { if (match('.')) { tt = Tellipsis; } else { unget(); tt = Tdot; } } else { tt = Tdot; } break; case '+': if (match('=')) tt = Taddeq; else if (match('+')) tt = Tinc; else tt = Tplus; break; case '-': if (match('=')) tt = Tsubeq; else if (match('-')) tt = Tdec; else if (match('>')) tt = Tret; else tt = Tminus; break; case '*': if (match('=')) tt = Tmuleq; else tt = Tmul; break; case '/': if (match('=')) tt = Tdiveq; else tt = Tdiv; break; case '%': if (match('=')) tt = Tmodeq; else tt = Tmod; break; case '=': if (match('=')) tt = Teq; else tt = Tasn; break; case '|': if (match('=')) tt = Tboreq; else if (match('|')) tt = Tlor; else tt = Tbor; break; case '&': if (match('=')) tt = Tbandeq; else if (match('&')) tt = Tland; else tt = Tband; break; case '^': if (match('=')) tt = Tbxoreq; else tt = Tbxor; break; case '<': if (match('=')) { tt = Tle; } else if (match('<')) { if (match('=')) tt = Tbsleq; else tt = Tbsl; } else { tt = Tlt; } break; case '>': if (match('=')) { tt = Tge; } else if (match('>')) { if (match('=')) tt = Tbsreq; else tt = Tbsr; } else { tt = Tgt; } break; case '!': if (match('=')) tt = Tne; else tt = Tlnot; break; default: tt = Terror; lfatal(curloc, "Junk character %c", c); break; } return mktok(tt); }
static Tok *number(int base) { Tok *t; int start; int c; int isfloat; int unsignedval; t = NULL; isfloat = 0; start = fidx; for (c = peek(); isxdigit(c) || c == '.' || c == '_'; c = peek()) { next(); if (c == '_') continue; if (c == '.') isfloat = 1; else if (hexval(c) < 0 || hexval(c) > base) fatal(line, "Integer digit '%c' outside of base %d", c, base); } /* we only support base 10 floats */ if (isfloat && base == 10) { t = mktok(Tfloatlit); t->str = strdupn(&fbuf[start], fidx - start); t->fltval = strtod(t->str, NULL); } else { t = mktok(Tintlit); t->str = strdupn(&fbuf[start], fidx - start); t->intval = strtol(t->str, NULL, base); /* check suffixes: * u -> unsigned * l -> 64 bit * i -> 32 bit * w -> 16 bit * b -> 8 bit */ unsignedval = 0; nextsuffix: switch (peek()) { case 'u': if (unsignedval == 1) fatal(line, "Duplicate 'u' integer specifier"); next(); unsignedval = 1; goto nextsuffix; case 'l': next(); if (unsignedval) t->inttype = Tyuint64; else t->inttype = Tyint64; break; case 'i': next(); if (unsignedval) t->inttype = Tyuint32; else t->inttype = Tyint32; break; case 's': next(); if (unsignedval) t->inttype = Tyuint16; else t->inttype = Tyint16; break; case 'b': next(); if (unsignedval) t->inttype = Tyuint8; else t->inttype = Tyint8; break; default: if (unsignedval) fatal(line, "Unrecognized character int type specifier after 'u'"); break; } } return t; }