Пример #1
0
buzztok_t buzzlex_nexttok(buzzlex_t lex) {
   buzzlex_file_t lexf = buzzlex_getfile(lex);
   do {
      /* Look for a non-space character */
      do {
         /* Keep reading until you find a non-space character or end of stream */
         while(lexf->cur_c < lexf->buf_size &&
               buzzlex_isspace(lexf->buf[lexf->cur_c])) {
            nextchar();
         }
         /* End of stream? */
         if(lexf->cur_c >= lexf->buf_size) {
            /* Done with current file, go back to previous */
            buzzdarray_pop(lex);
            if(buzzdarray_isempty(lex))
               /* No file to go back to, done parsing */
               return NULL;
            lexf = buzzlex_getfile(lex);
         }
         else
            /* Non-space character found */
            break;
      } while(1);
      /* Non-space character found */
      /* If the current character is a '#' ignore the rest of the line */
      if(lexf->buf[lexf->cur_c] == '#') {
         do {
            nextchar();
         }
         while(lexf->cur_c < lexf->buf_size &&
               lexf->buf[lexf->cur_c] != '\n');
         /* End of stream? */
         if(lexf->cur_c >= lexf->buf_size) {
            /* Done with current file, go back to previous */
            buzzdarray_pop(lex);
            if(buzzdarray_isempty(lex))
               /* No file to go back to, done parsing */
               return NULL;
            lexf = buzzlex_getfile(lex);
         }
         else {
            /* New line and carry on */
            ++lexf->cur_line;
            lexf->cur_col = 0;
            ++lexf->cur_c;
         }
      }
      else if(strncmp(lexf->buf + lexf->cur_c, "include", 7) == 0) {
         /* Manage file inclusion */
         lexf->cur_c += 7;
         lexf->cur_col += 7;
         /* Skip whitespace */
         while(lexf->cur_c < lexf->buf_size &&
               buzzlex_isspace(lexf->buf[lexf->cur_c])) {
            nextchar();
         }
         /* End of file or not-string opening -> syntax error */
         if(lexf->cur_c >= lexf->buf_size ||
            !buzzlex_isquote(lexf->buf[lexf->cur_c])) {
            fprintf(stderr,
                    "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected string after include\n",
                    lexf->fname,
                    lexf->cur_line,
                    lexf->cur_col);
            return NULL;
         }
         /* Read string */
         char quote = lexf->buf[lexf->cur_c];
         size_t start = lexf->cur_c + 1;
         nextchar();
         while(lexf->cur_c < lexf->buf_size &&
               lexf->buf[lexf->cur_c] != quote &&
               lexf->buf[lexf->cur_c] != '\n') {
            nextchar();
         }
         /* End of file or newline -> syntax error */
         if(lexf->cur_c >= lexf->buf_size ||
            lexf->buf[lexf->cur_c] == '\n') {
            fprintf(stderr,
                    "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected end of string\n",
                    lexf->fname,
                    lexf->cur_line,
                    lexf->cur_col);
            return NULL;
         }
         /* Copy data into a new string */
         char* fname = (char*)malloc(lexf->cur_c - start + 1);
         strncpy(fname, lexf->buf + start, lexf->cur_c - start);
         fname[lexf->cur_c - start] = '\0';
         /* Get to next character in this file */
         nextchar();
         /* Create new file structure */
         buzzlex_file_t f = buzzlex_file_new(fname);
         if(!f) {
            fprintf(stderr,
                    "%s:%" PRIu64 ":%" PRIu64 ": Can't read '%s'\n",
                    lexf->fname,
                    lexf->cur_line,
                    lexf->cur_col,
                    fname);
            free(fname);
            return NULL;
         }
         free(fname);
         /* Make sure the file hasn't been already included */
         if(buzzdarray_find(lex, buzzlex_file_cmp, &f) < buzzdarray_size(lex)) {
            buzzlex_file_destroy(0, &f, NULL);
         }
         else {
            /* Push file structure */
            buzzdarray_push(lex, &f);
            lexf = buzzlex_getfile(lex);
         }
      }
      else
         /* The character must be parsed */
         break;
   }
   while(1);
   /* If we get here it's because we read potential token character */
   uint64_t tokstart = lexf->cur_col - 1;
   char c = lexf->buf[lexf->cur_c];
   nextchar();
   /* Consider the 1-char non-alphanumeric cases first */
   switch(c) {
      case '\n': {
         buzztok_t tok = buzzlex_newtok(BUZZTOK_STATEND,
                                        NULL,
                                        lexf->cur_line,
                                        tokstart,
                                        lexf->fname);
         ++lexf->cur_line;
         lexf->cur_col = 0;
         return tok;
      }
      casetokchar(';', BUZZTOK_STATEND);
      casetokchar('{', BUZZTOK_BLOCKOPEN);
      casetokchar('}', BUZZTOK_BLOCKCLOSE);
      casetokchar('(', BUZZTOK_PAROPEN);
      casetokchar(')', BUZZTOK_PARCLOSE);
      casetokchar('[', BUZZTOK_IDXOPEN);
      casetokchar(']', BUZZTOK_IDXCLOSE);
      casetokchar(',', BUZZTOK_LISTSEP);
      casetokchar('.', BUZZTOK_DOT);
   }
   /* If we get here, it's because we found either a constant, an
    * identifier, a keyword, an assignment, a comparison operator,
    * an arithmetic operator, or an unexpected character */
   if(isdigit(c)) {
      /* It's a constant */
      readval(buzzlex_isnumber);
      return buzzlex_newtok(BUZZTOK_CONST,
                            val,
                            lexf->cur_line,
                            tokstart,
                            lexf->fname);
   }
   else if(isalpha(c)) {
      /* It's either a keyword or an identifier */
      readval(buzzlex_isid);
      /* Go through the possible keywords */
      checkkeyword("var",      BUZZTOK_VAR);
      checkkeyword("if",       BUZZTOK_IF);
      checkkeyword("else",     BUZZTOK_ELSE);
      checkkeyword("function", BUZZTOK_FUN);
      checkkeyword("return",   BUZZTOK_RETURN);
      checkkeyword("for",      BUZZTOK_FOR);
      checkkeyword("while",    BUZZTOK_WHILE);
      checkkeyword("and",      BUZZTOK_ANDOR);
      checkkeyword("or",       BUZZTOK_ANDOR);
      checkkeyword("not",      BUZZTOK_NOT);
      checkkeyword("nil",      BUZZTOK_NIL);
      /* No keyword found, consider it an id */
      return buzzlex_newtok(BUZZTOK_ID,
                            val,
                            lexf->cur_line,
                            tokstart,
                            lexf->fname);
   }
   else if(c == '=') {
      /* Either an assignment or a comparison */
      if(lexf->cur_c < lexf->buf_size &&
         lexf->buf[lexf->cur_c] == '=') {
         /* It's a comparison */
         nextchar();
         return buzzlex_newtok(BUZZTOK_CMP,
                               strdup("=="),
                               lexf->cur_line,
                               tokstart,
                               lexf->fname);
      }
      else {
         /* It's an assignment */
         return buzzlex_newtok(BUZZTOK_ASSIGN,
                               NULL,
                               lexf->cur_line,
                               tokstart,
                               lexf->fname);
      }
   }
   else if(c == '!') {
      /* Comparison operator? */
      if(lexf->cur_c < lexf->buf_size &&
         lexf->buf[lexf->cur_c] == '=') {
         /* It's a comparison */
         nextchar();
         return buzzlex_newtok(BUZZTOK_CMP,
                               strdup("!="),
                               lexf->cur_line,
                               tokstart,
                               lexf->fname);
      }
      else {
         /* Syntax error */
         fprintf(stderr,
                 "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: expected '=' after '!'\n",
                 lexf->fname,
                 lexf->cur_line,
                 tokstart);
         return NULL;
      }
   }
   else if((c == '<') || (c == '>')) {
      /* It's a comparison operator */
      size_t start = lexf->cur_c - 1;
      /* Include the '=' if present */
      if(lexf->cur_c < lexf->buf_size &&
         lexf->buf[lexf->cur_c] == '=') {
         nextchar();
      }
      char* val = (char*)malloc(lexf->cur_c - start + 1);
      strncpy(val, lexf->buf + start, lexf->cur_c - start);
      val[lexf->cur_c - start] = 0;
      return buzzlex_newtok(BUZZTOK_CMP,
                            val,
                            lexf->cur_line,
                            tokstart,
                            lexf->fname);
   }
   else if(buzzlex_isarith(c)) {
      /* Arithmetic operator */
      char* val = (char*)malloc(2);
      strncpy(val, lexf->buf + lexf->cur_c - 1, 1);
      val[1] = 0;
      switch(c) {
         case '+': case '-': {
            return buzzlex_newtok(BUZZTOK_ADDSUB,
                                  val,
                                  lexf->cur_line,
                                  tokstart,
                                  lexf->fname);
         }
         case '*': case '/': {
            return buzzlex_newtok(BUZZTOK_MULDIV,
                                  val,
                                  lexf->cur_line,
                                  tokstart,
                                  lexf->fname);
         }
         case '%': {
            return buzzlex_newtok(BUZZTOK_MOD,
                                  val,
                                  lexf->cur_line,
                                  tokstart,
                                  lexf->fname);
         }
         case '^': {
            return buzzlex_newtok(BUZZTOK_POW,
                                  val,
                                  lexf->cur_line,
                                  tokstart,
                                  lexf->fname);
         }
         default:
            return NULL;
      }
   }
   else if(buzzlex_isquote(c)) {
      /* String - eat any character until you find the next matching quote */
      size_t start = lexf->cur_c;
      char last1 = 0, last2 = 0;
      while(lexf->cur_c < lexf->buf_size &&    /* Not end of stream */
            ((lexf->buf[lexf->cur_c] != c) ||  /* Matching quote not found */
             (lexf->buf[lexf->cur_c] == c &&   /* Matching quote found, but preceded by \ and not \\ */
              last1 == '\\' && last2 != '\\'))) {
         /* Remember the last two characters read */
         last2 = last1;
         last1 = lexf->buf[lexf->cur_c];
         /* Keep parsing the string */
         if(lexf->buf[lexf->cur_c] != '\n') {
            nextchar();
         }
         else {
            fprintf(stderr,
                    "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: string closing quote not found\n",
                    lexf->fname,
                    lexf->cur_line,
                    tokstart);
            return NULL;
         }
      }
      /* End of stream? Syntax error */
      if(lexf->cur_c >= lexf->buf_size) {
         fprintf(stderr,
                 "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: string closing quote not found\n",
                 lexf->fname,
                 lexf->cur_line,
                 tokstart);
         return NULL;
      }
      /* We have a valid string */
      char* val = buzzlex_newstring(lexf->buf + start,
                                    lexf->cur_c - start);
      nextchar();
      return buzzlex_newtok(BUZZTOK_STRING,
                            val,
                            lexf->cur_line,
                            tokstart,
                            lexf->fname);
   }
   else {
      /* Unknown character */
      fprintf(stderr,
              "%s:%" PRIu64 ":%" PRIu64 ": Syntax error: unknown character '%c' (octal: %o; hex: %x)\n",
              lexf->fname,
              lexf->cur_line,
              tokstart,
              c, c, c);
      return NULL;
   }
}
Пример #2
0
buzztok_t buzzlex_nexttok(buzzlex_t lex) {
   do {
      /* Keep reading until you find a non-space character or end of stream */
      while(lex->cur_c < lex->buf_size &&
            buzzlex_isspace(lex->buf[lex->cur_c])) {
         nextchar();
      }
      /* End of stream? No token */
      if(lex->cur_c >= lex->buf_size) return NULL;
      /* If the current character is a '#' ignore the rest of the line */
      if(lex->buf[lex->cur_c] == '#') {
         do {
            nextchar();
         }
         while(lex->cur_c < lex->buf_size &&
               lex->buf[lex->cur_c] != '\n');
         /* End of stream? No token */
         if(lex->cur_c >= lex->buf_size) return NULL;
         /* New line and carry on */
         ++lex->cur_line;
         lex->cur_col = 0;
         ++lex->cur_c;
      }
      else {
         /* The current character must be parsed */
         break;
      }
   }
   while(1);
   /* If we get here it's because we read a non-space character */
   char c = lex->buf[lex->cur_c];
   nextchar();
   /* Consider the 1-char non-alphanumeric cases first */
   switch(c) {
      case '\n': {
         buzztok_t tok = buzzlex_newtok(BUZZTOK_STATEND,
                                        NULL,
                                        lex->cur_line,
                                        lex->cur_col);
         ++lex->cur_line;
         lex->cur_col = 0;
         return tok;
      }
      casetokchar(';', BUZZTOK_STATEND);
      casetokchar('{', BUZZTOK_BLOCKOPEN);
      casetokchar('}', BUZZTOK_BLOCKCLOSE);
      casetokchar('(', BUZZTOK_PAROPEN);
      casetokchar(')', BUZZTOK_PARCLOSE);
      casetokchar('[', BUZZTOK_IDXOPEN);
      casetokchar(']', BUZZTOK_IDXCLOSE);
      casetokchar(',', BUZZTOK_LISTSEP);
      casetokchar('.', BUZZTOK_DOT);
   }
   /* If we get here, it's because we found either a constant, an
    * identifier, a keyword, an assignment, a comparison operator,
    * an arithmetic operator, or an unexpected character */
   if(isdigit(c)) {
      /* It's a constant */
      readval(buzzlex_isnumber);
      return buzzlex_newtok(BUZZTOK_CONST,
                            val,
                            lex->cur_line,
                            lex->cur_col);
   }
   else if(isalpha(c)) {
      /* It's either a keyword or an identifier */
      readval(buzzlex_isid);
      /* Go through the possible keywords */
      checkkeyword("var",      BUZZTOK_VAR);
      checkkeyword("if",       BUZZTOK_IF);
      checkkeyword("else",     BUZZTOK_ELSE);
      checkkeyword("function", BUZZTOK_FUN);
      checkkeyword("return",   BUZZTOK_RETURN);
      checkkeyword("for",      BUZZTOK_FOR);
      checkkeyword("while",    BUZZTOK_WHILE);
      checkkeyword("and",      BUZZTOK_ANDOR);
      checkkeyword("or",       BUZZTOK_ANDOR);
      checkkeyword("not",      BUZZTOK_NOT);
      checkkeyword("nil",      BUZZTOK_NIL);
      /* No keyword found, consider it an id */
      return buzzlex_newtok(BUZZTOK_ID,
                            val,
                            lex->cur_line,
                            lex->cur_col);
   }
   else if(c == '=') {
      /* Either an assignment or a comparison */
      if(lex->cur_c < lex->buf_size &&
         lex->buf[lex->cur_c] == '=') {
         /* It's a comparison */
         nextchar();
         return buzzlex_newtok(BUZZTOK_CMP,
                               strdup("=="),
                               lex->cur_line,
                               lex->cur_col);
      }
      else {
         /* It's an assignment */
         return buzzlex_newtok(BUZZTOK_ASSIGN,
                               NULL,
                               lex->cur_line,
                               lex->cur_col);
      }
   }
   else if(c == '!') {
      /* Comparison operator? */
      if(lex->cur_c < lex->buf_size &&
         lex->buf[lex->cur_c] == '=') {
         /* It's a comparison */
         nextchar();
         return buzzlex_newtok(BUZZTOK_CMP,
                               strdup("!="),
                               lex->cur_line,
                               lex->cur_col);
      }
      else {
         /* Syntax error */
         fprintf(stderr,
                 "%s:%llu:%llu: Syntax error: expected '=' after '!'\n",
                 lex->fname,
                 lex->cur_line,
                 lex->cur_col);
         return NULL;
      }
   }
   else if((c == '<') || (c == '>')) {
      /* It's a comparison operator */
      size_t start = lex->cur_c - 1;
      /* Include the '=' if present */
      if(lex->cur_c < lex->buf_size &&
         lex->buf[lex->cur_c] == '=') {
         nextchar();
      }
      char* val = (char*)malloc(lex->cur_c - start + 1);
      strncpy(val, lex->buf + start, lex->cur_c - start);
      val[lex->cur_c - start] = 0;
      return buzzlex_newtok(BUZZTOK_CMP,
                            val,
                            lex->cur_line,
                            lex->cur_col);
   }
   else if(buzzlex_isarith(c)) {
      /* Arithmetic operator */
      char* val = (char*)malloc(2);
      strncpy(val, lex->buf + lex->cur_c - 1, 1);
      val[1] = 0;
      switch(c) {
         case '+': case '-': {
            return buzzlex_newtok(BUZZTOK_ADDSUB,
                                  val,
                                  lex->cur_line,
                                  lex->cur_col);
         }
         case '*': case '/': {
            return buzzlex_newtok(BUZZTOK_MULDIV,
                                  val,
                                  lex->cur_line,
                                  lex->cur_col);
         }
         case '%': {
            return buzzlex_newtok(BUZZTOK_MOD,
                                  val,
                                  lex->cur_line,
                                  lex->cur_col);
         }
         case '^': {
            return buzzlex_newtok(BUZZTOK_POW,
                                  val,
                                  lex->cur_line,
                                  lex->cur_col);
         }
         default:
            return NULL;
      }
   }
   else if(buzzlex_isquote(c)) {
      /* String - eat any character until you find the next matching quote */
      size_t start = lex->cur_c;
      while(lex->cur_c < lex->buf_size &&
            lex->buf[lex->cur_c] != c) {
         if(lex->buf[lex->cur_c] != '\n') {
            nextchar();
         }
         else {
            ++lex->cur_line;
            lex->cur_col = 0;
            ++lex->cur_c;
         }
      }
      /* End of stream? Syntax error */
      if(lex->cur_c >= lex->buf_size) {
         fprintf(stderr,
                 "%s:%llu:%llu: Syntax error: string closing quote not found\n",
                 lex->fname,
                 lex->cur_line,
                 lex->cur_col);
         return NULL;
      }
      /* Valid string */
      char* val = (char*)malloc(lex->cur_c - start + 1);
      strncpy(val, lex->buf + start, lex->cur_c - start);
      val[lex->cur_c - start] = '\0';
      nextchar();
      return buzzlex_newtok(BUZZTOK_STRING,
                            val,
                            lex->cur_line,
                            lex->cur_col);
   }
   else {
      /* Unknown character */
      fprintf(stderr,
              "%s:%llu:%llu: Syntax error: unknown character '%c' (octal: %o; hex: %x)\n",
              lex->fname,
              lex->cur_line,
              lex->cur_col,
              c, c, c);
      return NULL;
   }
}