Esempio n. 1
0
/* *** CVC4 NOTE ***
 * This is copied, largely unmodified, from antlr3lexer.c
 *
 */
pANTLR3_COMMON_TOKEN
AntlrInput::nextTokenStr (pANTLR3_TOKEN_SOURCE toksource)
{
    pANTLR3_LEXER lexer;

    lexer = (pANTLR3_LEXER)(toksource->super);

    /// Loop until we get a non skipped token or EOF
    ///
    for (;;)
    {
        // Get rid of any previous token (token factory takes care of
        // any de-allocation when this token is finally used up.
        //
        lexer->rec->state->token = NULL;
        lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
        lexer->rec->state->failed = ANTLR3_FALSE;

        // Now call the matching rules and see if we can generate a new token
        //
        for (;;)
        {
            // Record the start of the token in our input stream.
            //
            lexer->rec->state->channel = ANTLR3_TOKEN_DEFAULT_CHANNEL;
            lexer->rec->state->tokenStartCharIndex = lexer->input->istream->index(lexer->input->istream);
            lexer->rec->state->tokenStartCharPositionInLine = lexer->input->getCharPositionInLine(lexer->input);
            lexer->rec->state->tokenStartLine = lexer->input->getLine(lexer->input);
            lexer->rec->state->text = NULL;

            if (lexer->input->istream->_LA(lexer->input->istream, 1) == ANTLR3_CHARSTREAM_EOF)
            {
                // Reached the end of the current stream, nothing more to do if this is
                // the last in the stack.
                //
                pANTLR3_COMMON_TOKEN teof = &(toksource->eofToken);

                teof->setStartIndex (teof, lexer->getCharIndex(lexer));
                teof->setStopIndex (teof, lexer->getCharIndex(lexer));
                teof->setLine (teof, lexer->getLine(lexer));
                teof->factoryMade = ANTLR3_TRUE; // This isn't really manufactured but it stops things from trying to free it
                return teof;
            }

            lexer->rec->state->token = NULL;
            lexer->rec->state->error = ANTLR3_FALSE; // Start out without an exception
            lexer->rec->state->failed = ANTLR3_FALSE;

            // Call the generated lexer, see if it can get a new token together.
            //
            lexer->mTokens(lexer->ctx);

            if (lexer->rec->state->error == ANTLR3_TRUE)
            {
                // Recognition exception, report it and try to recover.
                //
                lexer->rec->state->failed = ANTLR3_TRUE;
                // *** CVC4 EDIT: Just call the AntlrInput error routine
                lexerError(lexer->rec);
                lexer->recover(lexer);
            }
            else
            {
                if (lexer->rec->state->token == NULL)
                {
                    // Emit the real token, which adds it in to the token stream basically
                    //
                    // *** CVC4 Edit: call emit on the lexer object
                    lexer->emit(lexer);
                }
                else if (lexer->rec->state->token == &(toksource->skipToken))
                {
                    // A real token could have been generated, but "Computer say's naaaaah" and it
                    // it is just something we need to skip altogether.
                    //
                    continue;
                }

                // Good token, not skipped, not EOF token
                //
                return lexer->rec->state->token;
            }
        }
    }
}
Esempio n. 2
0
NodeList* parse(char *string) {

	NodeList *begin = new NodeList;
	begin->next = begin->prev = null;
	begin->node = null;

	NodeList *nl = begin;

	Sym *s = new Sym;
	s->row = 1;
	s->col = 0;
	s->pos = 0;
	s->string = string;

	char c;
	while ((c = getc(s))) {

		if (isspace(c)) {
			continue;
		}

		if (nl->node) {
			nl->next = new NodeList;
			nl->next->prev = nl;
			nl = nl->next;
			nl->next = NULL;
			nl->node = NULL;
		}

		Node *n;
		switch (c) {
		case '+':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_PLUS;
			nl->node = n;
			writePos(n, s);
			continue;
		case '-':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_MINUS;
			nl->node = n;
			writePos(n, s);
			continue;
		case '*':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_MUL;
			nl->node = n;
			writePos(n, s);
			continue;
		case '/':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_DIV;
			nl->node = n;
			writePos(n, s);
			continue;
		case ',':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_COMMA;
			nl->node = n;
			writePos(n, s);
			continue;
		case '=':
			n = new Node;
			if (getc(s) == '=') {
				n->value = new char[3];
				n->value[0] = c;
				n->value[1] = c;
				n->value[2] = 0;
				n->nodeType = NODE_TYPE_EQUAL;
			}
			else {
				revokec(s);
				n->value = new char[2];
				n->value[0] = c;
				n->value[1] = 0;
				n->nodeType = NODE_TYPE_ASSIGN;
			}
			nl->node = n;
			writePos(n, s);
			continue;
		case '>':
			n = new Node;
			if (getc(s) == '=') {
				n->value = new char[3];
				n->value[0] = '>';
				n->value[1] = '=';
				n->value[2] = 0;
				n->nodeType = NODE_TYPE_GREATER_EQ;
			}
			else {
				revokec(s);
				n->value = new char[2];
				n->value[0] = '>';
				n->value[1] = 0;
				n->nodeType = NODE_TYPE_GREATER;
			}
			nl->node = n;
			writePos(n, s);
			continue;
		case '!':
			if (getc(s) == '=') {
				n = new Node;
				n->value = new char[3];
				n->value[0] = '!';
				n->value[1] = '=';
				n->value[2] = 0;
				n->nodeType = NODE_TYPE_NOT_EQUAL;
				nl->node = n;
				writePos(n, s);
			}
			else {
				revokec(s);
				lexerError(s, "Unknown construction, did you mean '!='");
			}

			continue;
		case '<':
			n = new Node;
			if (getc(s) == '=') {
				n->value = new char[3];
				n->value[0] = '<';
				n->value[1] = '=';
				n->value[2] = 0;
				n->nodeType = NODE_TYPE_LESS_EQ;
			}
			else {
				revokec(s);
				n->value = new char[2];
				n->value[0] = '<';
				n->value[1] = 0;
				n->nodeType = NODE_TYPE_LESS;
			}
			nl->node = n;
			writePos(n, s);
			continue;
		case ';':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_SEMICOLON;
			nl->node = n;
			writePos(n, s);
			continue;
		case '@':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_AMPERSAT;
			nl->node = n;
			writePos(n, s);
			continue;
		case '(':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_LPAREN;
			nl->node = n;
			writePos(n, s);
			continue;
		case ')':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_RPAREN;
			nl->node = n;
			writePos(n, s);
			continue;
		case '{':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_LBRACE;
			nl->node = n;
			writePos(n, s);
			continue;
		case '}':
			n = new Node;
			n->value = new char[2];
			n->value[0] = c;
			n->value[1] = 0;
			n->nodeType = NODE_TYPE_RBRACE;
			nl->node = n;
			writePos(n, s);
			continue;
		case '#':
			c = getc(s);
			if (c == '*') {
				do {
					c = getc(s);
					if (c == '*') {
						c = getc(s);
						if (c == '#') {
							// comment is closed!
							break;
						}
					}
				} while (c);
			}
			else {
				do {
					c = getc(s);
					if (c == '\n') {
						break;
					}
				} while (c);
			}
			continue;
		case '"':
			int pos = s->pos + 1;
			do {
				c = getc(s);
				if (c == '\n') {
					lexerError(s, "Multi line strings are not supported.");
					break;
				}
				else if (c == '"') {
					// end of string
					n = new Node;
					int num = s->pos - pos;
					n->value = (char*) malloc(sizeof(char) * num + 1);
					strncpy(n->value, s->string + sizeof(char) * pos - 1, num);
					n->nodeType = NODE_TYPE_STRING;
					nl->node = n;
					writePos(n, s);
					break;
				}
			} while (c);
			continue;
		}

		if (isdigit(c)) {
			n = new Node();
			n->nodeType = NODE_TYPE_NUMBER;
			n->value = new char[1];
			n->value[0] = 0;
			do {
				char* str = new char[2];
				str[0] = c;
				str[1] = 0;
				strcat(n->value, str);
				c = getc(s);
			} while (isdigit(c));
			if (c) revokec(s);
			nl->node = n;
			writePos(n, s);
		}
		else if (isalpha(c)) {
			n = new Node();
			n->value = new char[1];
			n->value[0] = 0;
			do {
				char* str = new char[2];
				str[0] = c;
				str[1] = 0;
				strcat(n->value, str);
				c = getc(s);
			} while (isalpha(c) || isdigit(c));
			if (c) revokec(s);
			nl->node = n;

			if (strcmp(n->value, "def") == 0) {
				n->nodeType = NODE_TYPE_DEF;
			}
			else if (strcmp(n->value, "print") == 0) {
				n->nodeType = NODE_TYPE_PRINT;
			}
			else if (strcmp(n->value, "if") == 0) {
				n->nodeType = NODE_TYPE_IF;
			}
			else if (strcmp(n->value, "then") == 0) {
				n->nodeType = NODE_TYPE_THEN;
			}
			else if (strcmp(n->value, "else") == 0) {
				n->nodeType = NODE_TYPE_ELSE;
			}
			else if (strcmp(n->value, "while") == 0) {
				n->nodeType = NODE_TYPE_WHILE;
			}
			else if (strcmp(n->value, "break") == 0) {
				n->nodeType = NODE_TYPE_BREAK;
			}
			else if (strcmp(n->value, "continue") == 0) {
				n->nodeType = NODE_TYPE_CONTINUE;
			}
			else if (strcmp(n->value, "func") == 0) {
				n->nodeType = NODE_TYPE_FUNC;
			}
			else {
				n->nodeType = NODE_TYPE_IDENTIFIER;
			}
			writePos(n, s);
		}
		else if (c == EOF || c == 25) {
			// rollback to previous node and break reading
			nl = nl->prev;
			break;
		}
		else {
			lexerError(s, "Unexpected symbol.");
		}
	}

	nl->next = new NodeList;
	nl->next->node = new Node;
	nl->next->prev = nl;
	nl = nl->next;
	nl->node->nodeType = NODE_TYPE_EOF;
	nl->node->value = "eof";
	writePos(nl->node, s);

	return begin;
}