buzztok_t buzzlex_nexttok(buzzlex_t lex) { buzzlex_file_t lexf = buzzlex_getfile(lex); do { /* Look for a non-space character */ do { /* Keep reading until you find a non-space character or end of stream */ while(lexf->cur_c < lexf->buf_size && buzzlex_isspace(lexf->buf[lexf->cur_c])) { nextchar(); } /* End of stream? */ if(lexf->cur_c >= lexf->buf_size) { /* Done with current file, go back to previous */ buzzdarray_pop(lex); if(buzzdarray_isempty(lex)) /* No file to go back to, done parsing */ return NULL; lexf = buzzlex_getfile(lex); } else /* Non-space character found */ break; } while(1); /* Non-space character found */ /* If the current character is a '#' ignore the rest of the line */ if(lexf->buf[lexf->cur_c] == '#') { do { nextchar(); } while(lexf->cur_c < lexf->buf_size && lexf->buf[lexf->cur_c] != '\n'); /* End of stream? */ if(lexf->cur_c >= lexf->buf_size) { /* Done with current file, go back to previous */ buzzdarray_pop(lex); if(buzzdarray_isempty(lex)) /* No file to go back to, done parsing */ return NULL; lexf = buzzlex_getfile(lex); } else { /* New line and carry on */ ++lexf->cur_line; lexf->cur_col = 0; ++lexf->cur_c; } } else if(strncmp(lexf->buf + lexf->cur_c, "include", 7) == 0) { /* Manage file inclusion */ lexf->cur_c += 7; lexf->cur_col += 7; /* Skip whitespace */ while(lexf->cur_c < lexf->buf_size && buzzlex_isspace(lexf->buf[lexf->cur_c])) { nextchar(); } /* End of file or not-string opening -> syntax error */ if(lexf->cur_c >= lexf->buf_size || !buzzlex_isquote(lexf->buf[lexf->cur_c])) { fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected string after include\n", lexf->fname, lexf->cur_line, lexf->cur_col); return NULL; } /* Read string */ char quote = lexf->buf[lexf->cur_c]; size_t start = lexf->cur_c + 1; nextchar(); while(lexf->cur_c < lexf->buf_size && lexf->buf[lexf->cur_c] != quote && lexf->buf[lexf->cur_c] != '\n') { nextchar(); } /* End of file or newline -> syntax error */ if(lexf->cur_c >= lexf->buf_size || lexf->buf[lexf->cur_c] == '\n') { fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected end of string\n", lexf->fname, lexf->cur_line, lexf->cur_col); return NULL; } /* Copy data into a new string */ char* fname = (char*)malloc(lexf->cur_c - start + 1); strncpy(fname, lexf->buf + start, lexf->cur_c - start); fname[lexf->cur_c - start] = '\0'; /* Get to next character in this file */ nextchar(); /* Create new file structure */ buzzlex_file_t f = buzzlex_file_new(fname); if(!f) { fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Can't read '%s'\n", lexf->fname, lexf->cur_line, lexf->cur_col, fname); free(fname); return NULL; } free(fname); /* Make sure the file hasn't been already included */ if(buzzdarray_find(lex, buzzlex_file_cmp, &f) < buzzdarray_size(lex)) { buzzlex_file_destroy(0, &f, NULL); } else { /* Push file structure */ buzzdarray_push(lex, &f); lexf = buzzlex_getfile(lex); } } else /* The character must be parsed */ break; } while(1); /* If we get here it's because we read potential token character */ uint64_t tokstart = lexf->cur_col - 1; char c = lexf->buf[lexf->cur_c]; nextchar(); /* Consider the 1-char non-alphanumeric cases first */ switch(c) { case '\n': { buzztok_t tok = buzzlex_newtok(BUZZTOK_STATEND, NULL, lexf->cur_line, tokstart, lexf->fname); ++lexf->cur_line; lexf->cur_col = 0; return tok; } casetokchar(';', BUZZTOK_STATEND); casetokchar('{', BUZZTOK_BLOCKOPEN); casetokchar('}', BUZZTOK_BLOCKCLOSE); casetokchar('(', BUZZTOK_PAROPEN); casetokchar(')', BUZZTOK_PARCLOSE); casetokchar('[', BUZZTOK_IDXOPEN); casetokchar(']', BUZZTOK_IDXCLOSE); casetokchar(',', BUZZTOK_LISTSEP); casetokchar('.', BUZZTOK_DOT); } /* If we get here, it's because we found either a constant, an * identifier, a keyword, an assignment, a comparison operator, * an arithmetic operator, or an unexpected character */ if(isdigit(c)) { /* It's a constant */ readval(buzzlex_isnumber); return buzzlex_newtok(BUZZTOK_CONST, val, lexf->cur_line, tokstart, lexf->fname); } else if(isalpha(c)) { /* It's either a keyword or an identifier */ readval(buzzlex_isid); /* Go through the possible keywords */ checkkeyword("var", BUZZTOK_VAR); checkkeyword("if", BUZZTOK_IF); checkkeyword("else", BUZZTOK_ELSE); checkkeyword("function", BUZZTOK_FUN); checkkeyword("return", BUZZTOK_RETURN); checkkeyword("for", BUZZTOK_FOR); checkkeyword("while", BUZZTOK_WHILE); checkkeyword("and", BUZZTOK_ANDOR); checkkeyword("or", BUZZTOK_ANDOR); checkkeyword("not", BUZZTOK_NOT); checkkeyword("nil", BUZZTOK_NIL); /* No keyword found, consider it an id */ return buzzlex_newtok(BUZZTOK_ID, val, lexf->cur_line, tokstart, lexf->fname); } else if(c == '=') { /* Either an assignment or a comparison */ if(lexf->cur_c < lexf->buf_size && lexf->buf[lexf->cur_c] == '=') { /* It's a comparison */ nextchar(); return buzzlex_newtok(BUZZTOK_CMP, strdup("=="), lexf->cur_line, tokstart, lexf->fname); } else { /* It's an assignment */ return buzzlex_newtok(BUZZTOK_ASSIGN, NULL, lexf->cur_line, tokstart, lexf->fname); } } else if(c == '!') { /* Comparison operator? */ if(lexf->cur_c < lexf->buf_size && lexf->buf[lexf->cur_c] == '=') { /* It's a comparison */ nextchar(); return buzzlex_newtok(BUZZTOK_CMP, strdup("!="), lexf->cur_line, tokstart, lexf->fname); } else { /* Syntax error */ fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected '=' after '!'\n", lexf->fname, lexf->cur_line, tokstart); return NULL; } } else if((c == '<') || (c == '>')) { /* It's a comparison operator */ size_t start = lexf->cur_c - 1; /* Include the '=' if present */ if(lexf->cur_c < lexf->buf_size && lexf->buf[lexf->cur_c] == '=') { nextchar(); } char* val = (char*)malloc(lexf->cur_c - start + 1); strncpy(val, lexf->buf + start, lexf->cur_c - start); val[lexf->cur_c - start] = 0; return buzzlex_newtok(BUZZTOK_CMP, val, lexf->cur_line, tokstart, lexf->fname); } else if(buzzlex_isarith(c)) { /* Arithmetic operator */ char* val = (char*)malloc(2); strncpy(val, lexf->buf + lexf->cur_c - 1, 1); val[1] = 0; switch(c) { case '+': case '-': { return buzzlex_newtok(BUZZTOK_ADDSUB, val, lexf->cur_line, tokstart, lexf->fname); } case '*': case '/': { return buzzlex_newtok(BUZZTOK_MULDIV, val, lexf->cur_line, tokstart, lexf->fname); } case '%': { return buzzlex_newtok(BUZZTOK_MOD, val, lexf->cur_line, tokstart, lexf->fname); } case '^': { return buzzlex_newtok(BUZZTOK_POW, val, lexf->cur_line, tokstart, lexf->fname); } default: return NULL; } } else if(buzzlex_isquote(c)) { /* String - eat any character until you find the next matching quote */ size_t start = lexf->cur_c; char last1 = 0, last2 = 0; while(lexf->cur_c < lexf->buf_size && /* Not end of stream */ ((lexf->buf[lexf->cur_c] != c) || /* Matching quote not found */ (lexf->buf[lexf->cur_c] == c && /* Matching quote found, but preceded by \ and not \\ */ last1 == '\\' && last2 != '\\'))) { /* Remember the last two characters read */ last2 = last1; last1 = lexf->buf[lexf->cur_c]; /* Keep parsing the string */ if(lexf->buf[lexf->cur_c] != '\n') { nextchar(); } else { fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: string closing quote not found\n", lexf->fname, lexf->cur_line, tokstart); return NULL; } } /* End of stream? Syntax error */ if(lexf->cur_c >= lexf->buf_size) { fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: string closing quote not found\n", lexf->fname, lexf->cur_line, tokstart); return NULL; } /* We have a valid string */ char* val = buzzlex_newstring(lexf->buf + start, lexf->cur_c - start); nextchar(); return buzzlex_newtok(BUZZTOK_STRING, val, lexf->cur_line, tokstart, lexf->fname); } else { /* Unknown character */ fprintf(stderr, "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: unknown character '%c' (octal: %o; hex: %x)\n", lexf->fname, lexf->cur_line, tokstart, c, c, c); return NULL; } }
buzztok_t buzzlex_nexttok(buzzlex_t lex) { do { /* Keep reading until you find a non-space character or end of stream */ while(lex->cur_c < lex->buf_size && buzzlex_isspace(lex->buf[lex->cur_c])) { nextchar(); } /* End of stream? No token */ if(lex->cur_c >= lex->buf_size) return NULL; /* If the current character is a '#' ignore the rest of the line */ if(lex->buf[lex->cur_c] == '#') { do { nextchar(); } while(lex->cur_c < lex->buf_size && lex->buf[lex->cur_c] != '\n'); /* End of stream? No token */ if(lex->cur_c >= lex->buf_size) return NULL; /* New line and carry on */ ++lex->cur_line; lex->cur_col = 0; ++lex->cur_c; } else { /* The current character must be parsed */ break; } } while(1); /* If we get here it's because we read a non-space character */ char c = lex->buf[lex->cur_c]; nextchar(); /* Consider the 1-char non-alphanumeric cases first */ switch(c) { case '\n': { buzztok_t tok = buzzlex_newtok(BUZZTOK_STATEND, NULL, lex->cur_line, lex->cur_col); ++lex->cur_line; lex->cur_col = 0; return tok; } casetokchar(';', BUZZTOK_STATEND); casetokchar('{', BUZZTOK_BLOCKOPEN); casetokchar('}', BUZZTOK_BLOCKCLOSE); casetokchar('(', BUZZTOK_PAROPEN); casetokchar(')', BUZZTOK_PARCLOSE); casetokchar('[', BUZZTOK_IDXOPEN); casetokchar(']', BUZZTOK_IDXCLOSE); casetokchar(',', BUZZTOK_LISTSEP); casetokchar('.', BUZZTOK_DOT); } /* If we get here, it's because we found either a constant, an * identifier, a keyword, an assignment, a comparison operator, * an arithmetic operator, or an unexpected character */ if(isdigit(c)) { /* It's a constant */ readval(buzzlex_isnumber); return buzzlex_newtok(BUZZTOK_CONST, val, lex->cur_line, lex->cur_col); } else if(isalpha(c)) { /* It's either a keyword or an identifier */ readval(buzzlex_isid); /* Go through the possible keywords */ checkkeyword("var", BUZZTOK_VAR); checkkeyword("if", BUZZTOK_IF); checkkeyword("else", BUZZTOK_ELSE); checkkeyword("function", BUZZTOK_FUN); checkkeyword("return", BUZZTOK_RETURN); checkkeyword("for", BUZZTOK_FOR); checkkeyword("while", BUZZTOK_WHILE); checkkeyword("and", BUZZTOK_ANDOR); checkkeyword("or", BUZZTOK_ANDOR); checkkeyword("not", BUZZTOK_NOT); checkkeyword("nil", BUZZTOK_NIL); /* No keyword found, consider it an id */ return buzzlex_newtok(BUZZTOK_ID, val, lex->cur_line, lex->cur_col); } else if(c == '=') { /* Either an assignment or a comparison */ if(lex->cur_c < lex->buf_size && lex->buf[lex->cur_c] == '=') { /* It's a comparison */ nextchar(); return buzzlex_newtok(BUZZTOK_CMP, strdup("=="), lex->cur_line, lex->cur_col); } else { /* It's an assignment */ return buzzlex_newtok(BUZZTOK_ASSIGN, NULL, lex->cur_line, lex->cur_col); } } else if(c == '!') { /* Comparison operator? */ if(lex->cur_c < lex->buf_size && lex->buf[lex->cur_c] == '=') { /* It's a comparison */ nextchar(); return buzzlex_newtok(BUZZTOK_CMP, strdup("!="), lex->cur_line, lex->cur_col); } else { /* Syntax error */ fprintf(stderr, "%s:%llu:%llu: Syntax error: expected '=' after '!'\n", lex->fname, lex->cur_line, lex->cur_col); return NULL; } } else if((c == '<') || (c == '>')) { /* It's a comparison operator */ size_t start = lex->cur_c - 1; /* Include the '=' if present */ if(lex->cur_c < lex->buf_size && lex->buf[lex->cur_c] == '=') { nextchar(); } char* val = (char*)malloc(lex->cur_c - start + 1); strncpy(val, lex->buf + start, lex->cur_c - start); val[lex->cur_c - start] = 0; return buzzlex_newtok(BUZZTOK_CMP, val, lex->cur_line, lex->cur_col); } else if(buzzlex_isarith(c)) { /* Arithmetic operator */ char* val = (char*)malloc(2); strncpy(val, lex->buf + lex->cur_c - 1, 1); val[1] = 0; switch(c) { case '+': case '-': { return buzzlex_newtok(BUZZTOK_ADDSUB, val, lex->cur_line, lex->cur_col); } case '*': case '/': { return buzzlex_newtok(BUZZTOK_MULDIV, val, lex->cur_line, lex->cur_col); } case '%': { return buzzlex_newtok(BUZZTOK_MOD, val, lex->cur_line, lex->cur_col); } case '^': { return buzzlex_newtok(BUZZTOK_POW, val, lex->cur_line, lex->cur_col); } default: return NULL; } } else if(buzzlex_isquote(c)) { /* String - eat any character until you find the next matching quote */ size_t start = lex->cur_c; while(lex->cur_c < lex->buf_size && lex->buf[lex->cur_c] != c) { if(lex->buf[lex->cur_c] != '\n') { nextchar(); } else { ++lex->cur_line; lex->cur_col = 0; ++lex->cur_c; } } /* End of stream? Syntax error */ if(lex->cur_c >= lex->buf_size) { fprintf(stderr, "%s:%llu:%llu: Syntax error: string closing quote not found\n", lex->fname, lex->cur_line, lex->cur_col); return NULL; } /* Valid string */ char* val = (char*)malloc(lex->cur_c - start + 1); strncpy(val, lex->buf + start, lex->cur_c - start); val[lex->cur_c - start] = '\0'; nextchar(); return buzzlex_newtok(BUZZTOK_STRING, val, lex->cur_line, lex->cur_col); } else { /* Unknown character */ fprintf(stderr, "%s:%llu:%llu: Syntax error: unknown character '%c' (octal: %o; hex: %x)\n", lex->fname, lex->cur_line, lex->cur_col, c, c, c); return NULL; } }