/* *** CVC4 NOTE *** * This is copied, largely unmodified, from antlr3lexer.c * */ pANTLR3_COMMON_TOKEN AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource) { pANTLR3_LEXER lexer; lexer = (pANTLR3_LEXER)(toksource->super); /// Loop until we get a non skipped token or EOF /// for (;;) { // Get rid of any previous token (token factory takes care of // any de-allocation when this token is finally used up. // lexer->rec->state->token = NULL; lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception lexer->rec->state->failed = ANTLR3_FALSE; // Now call the matching rules and see if we can generate a new token // for (;;) { // Record the start of the token in our input stream. // lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL; lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream); lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input); lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input); lexer->rec->state->text = NULL; if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF) { // Reached the end of the current stream, nothing more to do if this is // the last in the stack. // pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken); teof->setStartIndex (teof, lexer->getCharIndex(lexer)); teof->setStopIndex (teof, lexer->getCharIndex(lexer)); teof->setLine (teof, lexer->getLine(lexer)); teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it return teof; } lexer->rec->state->token = NULL; lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception lexer->rec->state->failed = ANTLR3_FALSE; // Call the generated lexer, see if it can get a new token together. // lexer->mTokens(lexer->ctx); if (lexer->rec->state->error == ANTLR3_TRUE) { // Recognition exception, report it and try to recover. // lexer->rec->state->failed = ANTLR3_TRUE; // *** CVC4 EDIT: Just call the AntlrInput error routine lexerError(lexer->rec); lexer->recover(lexer); } else { if (lexer->rec->state->token == NULL) { // Emit the real token, which adds it in to the token stream basically // // *** CVC4 Edit: call emit on the lexer object lexer->emit(lexer); } else if (lexer->rec->state->token == &(toksource->skipToken)) { // A real token could have been generated, but "Computer say's naaaaah" and it // it is just something we need to skip altogether. // continue; } // Good token, not skipped, not EOF token // return lexer->rec->state->token; } } } }
NodeList* parse(char *string) { NodeList *begin = new NodeList; begin->next = begin->prev = null; begin->node = null; NodeList *nl = begin; Sym *s = new Sym; s->row = 1; s->col = 0; s->pos = 0; s->string = string; char c; while ((c = getc(s))) { if (isspace(c)) { continue; } if (nl->node) { nl->next = new NodeList; nl->next->prev = nl; nl = nl->next; nl->next = NULL; nl->node = NULL; } Node *n; switch (c) { case '+': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_PLUS; nl->node = n; writePos(n, s); continue; case '-': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_MINUS; nl->node = n; writePos(n, s); continue; case '*': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_MUL; nl->node = n; writePos(n, s); continue; case '/': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_DIV; nl->node = n; writePos(n, s); continue; case ',': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_COMMA; nl->node = n; writePos(n, s); continue; case '=': n = new Node; if (getc(s) == '=') { n->value = new char[3]; n->value[0] = c; n->value[1] = c; n->value[2] = 0; n->nodeType = NODE_TYPE_EQUAL; } else { revokec(s); n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_ASSIGN; } nl->node = n; writePos(n, s); continue; case '>': n = new Node; if (getc(s) == '=') { n->value = new char[3]; n->value[0] = '>'; n->value[1] = '='; n->value[2] = 0; n->nodeType = NODE_TYPE_GREATER_EQ; } else { revokec(s); n->value = new char[2]; n->value[0] = '>'; n->value[1] = 0; n->nodeType = NODE_TYPE_GREATER; } nl->node = n; writePos(n, s); continue; case '!': if (getc(s) == '=') { n = new Node; n->value = new char[3]; n->value[0] = '!'; n->value[1] = '='; n->value[2] = 0; n->nodeType = NODE_TYPE_NOT_EQUAL; nl->node = n; writePos(n, s); } else { revokec(s); lexerError(s, "Unknown construction, did you mean '!='"); } continue; case '<': n = new Node; if (getc(s) == '=') { n->value = new char[3]; n->value[0] = '<'; n->value[1] = '='; n->value[2] = 0; n->nodeType = NODE_TYPE_LESS_EQ; } else { revokec(s); n->value = new char[2]; n->value[0] = '<'; n->value[1] = 0; n->nodeType = NODE_TYPE_LESS; } nl->node = n; writePos(n, s); continue; case ';': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_SEMICOLON; nl->node = n; writePos(n, s); continue; case '@': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_AMPERSAT; nl->node = n; writePos(n, s); continue; case '(': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_LPAREN; nl->node = n; writePos(n, s); continue; case ')': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_RPAREN; nl->node = n; writePos(n, s); continue; case '{': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_LBRACE; nl->node = n; writePos(n, s); continue; case '}': n = new Node; n->value = new char[2]; n->value[0] = c; n->value[1] = 0; n->nodeType = NODE_TYPE_RBRACE; nl->node = n; writePos(n, s); continue; case '#': c = getc(s); if (c == '*') { do { c = getc(s); if (c == '*') { c = getc(s); if (c == '#') { // comment is closed! break; } } } while (c); } else { do { c = getc(s); if (c == '\n') { break; } } while (c); } continue; case '"': int pos = s->pos + 1; do { c = getc(s); if (c == '\n') { lexerError(s, "Multi line strings are not supported."); break; } else if (c == '"') { // end of string n = new Node; int num = s->pos - pos; n->value = (char*) malloc(sizeof(char) * num + 1); strncpy(n->value, s->string + sizeof(char) * pos - 1, num); n->nodeType = NODE_TYPE_STRING; nl->node = n; writePos(n, s); break; } } while (c); continue; } if (isdigit(c)) { n = new Node(); n->nodeType = NODE_TYPE_NUMBER; n->value = new char[1]; n->value[0] = 0; do { char* str = new char[2]; str[0] = c; str[1] = 0; strcat(n->value, str); c = getc(s); } while (isdigit(c)); if (c) revokec(s); nl->node = n; writePos(n, s); } else if (isalpha(c)) { n = new Node(); n->value = new char[1]; n->value[0] = 0; do { char* str = new char[2]; str[0] = c; str[1] = 0; strcat(n->value, str); c = getc(s); } while (isalpha(c) || isdigit(c)); if (c) revokec(s); nl->node = n; if (strcmp(n->value, "def") == 0) { n->nodeType = NODE_TYPE_DEF; } else if (strcmp(n->value, "print") == 0) { n->nodeType = NODE_TYPE_PRINT; } else if (strcmp(n->value, "if") == 0) { n->nodeType = NODE_TYPE_IF; } else if (strcmp(n->value, "then") == 0) { n->nodeType = NODE_TYPE_THEN; } else if (strcmp(n->value, "else") == 0) { n->nodeType = NODE_TYPE_ELSE; } else if (strcmp(n->value, "while") == 0) { n->nodeType = NODE_TYPE_WHILE; } else if (strcmp(n->value, "break") == 0) { n->nodeType = NODE_TYPE_BREAK; } else if (strcmp(n->value, "continue") == 0) { n->nodeType = NODE_TYPE_CONTINUE; } else if (strcmp(n->value, "func") == 0) { n->nodeType = NODE_TYPE_FUNC; } else { n->nodeType = NODE_TYPE_IDENTIFIER; } writePos(n, s); } else if (c == EOF || c == 25) { // rollback to previous node and break reading nl = nl->prev; break; } else { lexerError(s, "Unexpected symbol."); } } nl->next = new NodeList; nl->next->node = new Node; nl->next->prev = nl; nl = nl->next; nl->node->nodeType = NODE_TYPE_EOF; nl->node->value = "eof"; writePos(nl->node, s); return begin; }