static void reportError (pANTLR3_BASE_RECOGNIZER rec) { // Indicate this recognizer had an error while processing. // rec->state->errorCount++; rec->displayRecognitionError(rec, rec->state->tokenNames); }
static void reportError (pANTLR3_BASE_RECOGNIZER recognizer) { if (recognizer->errorRecovery == ANTLR3_TRUE) { /* In error recovery so don't display another error while doing so */ return; } /* Signal we are in error recovery now */ recognizer->errorRecovery = ANTLR3_TRUE; recognizer->displayRecognitionError(recognizer, recognizer->tokenNames); }
static void recoverFromMismatchedSet (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recoverFromMismatchedSet called by unknown paresr type - provide override for this function\n"); return; break; } /* TODO - Single token deletion like in recoverFromMismatchedToken() */ if (recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE) { recognizer->error = ANTLR3_TRUE; recognizer->failed = ANTLR3_TRUE; return; } }
/** * \remark Mismatch only works for parsers and must be overridden for anything else. */ static void mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; /* Install a mismatched token exception in the exception stack */ antlr3MTExceptionNew(recognizer); recognizer->exception->expecting = ttype; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; default: fprintf(stderr, "Base recognizerfunction 'mismatch' called by unknown parser type - provide override for this function\n"); return; break; } /* Enter error recovery mode */ recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); return; }
/** Override for standard base recognizer mismatch function * as we have DOWN/UP nodes in the stream that have no line info, * plus we want to alter the exception type. */ static void mismatch (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow) { recognizer->exConstruct(recognizer); recognizer->recoverFromMismatchedToken(recognizer, ttype, follow); }
static void reportError (pANTLR3_BASE_RECOGNIZER rec) { rec->displayRecognitionError(rec, rec->state->tokenNames); }
/** This code is factored out from mismatched token and mismatched set * recovery. It handles "single token insertion" error recovery for * both. No tokens are consumed to recover from insertions. Return * true if recovery was possible else return false. */ static ANTLR3_BOOLEAN recoverFromMismatchedElement (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow) { pANTLR3_BITSET viableToksFollowingRule; pANTLR3_BITSET newFollow; pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n"); return ANTLR3_FALSE; break; } newFollow = NULL; if (follow == NULL) { /* The follow set is NULL, which means we don't know what can come * next, so we "hit and hope" by just signifying that we cannot * recover, which will just cause the next token to be consumed, * which might dig us out. */ return ANTLR3_FALSE; } /* We have a bitmap for the follow set, hence we can compute * what can follow this grammar element reference. */ if (follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE) { /* First we need to know which of the available tokens are viable * to follow this reference. */ viableToksFollowingRule = recognizer->computeCSRuleFollow(recognizer); /* Knowing that, we can or in the follow set */ newFollow = follow->or(follow, viableToksFollowingRule); /* Remove the EOR token, which we do not wish to compute with */ newFollow->remove(follow, ANTLR3_EOR_TOKEN_TYPE); viableToksFollowingRule->free(viableToksFollowingRule); /* We now have the computed set of what can follow the current token */ follow = newFollow; } /* We can now see if the current token works with the set of tokens * that could follow the current grammar reference. If it looks like it * is consistent, then we can "insert" that token by not throwing * an exception and assumimng that we saw it. */ if ( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE) { /* report the error, but don't cause any rules to abort and stuff */ recognizer->reportError(recognizer); if (newFollow != NULL) { newFollow->free(newFollow); } recognizer->error = ANTLR3_FALSE; recognizer->failed = ANTLR3_FALSE; return ANTLR3_TRUE; /* Success in recovery */ } if (newFollow != NULL) { newFollow->free(newFollow); } /* We could not find anything viable to do, so this is going to * cause an exception. */ return ANTLR3_FALSE; }
/** Attempt to recover from a single missing or extra token. * * EXTRA TOKEN * * LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input. * * MISSING TOKEN * * If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain: * * stat -> expr -> atom * * and it will be trying to match the ')' at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'. * * May need ot come back and look at the exception stuff here, I am assuming * that the exception that was passed in in the java implementation is * sotred in the recognizer exception stack. To 'throw' it we set the * error flag and rules can cascade back when this is set. */ static void recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recoverFromMismatchedToken called by unknown paresr type - provide override for this function\n"); return; break; } /* If the next token after the one we are looking at in the input stream * is what we are looking for then we remove the one we have discovered * from the stream by consuming it, then consume this next one along too as * if nothing had happened. */ if ( is->_LA(is, 2) == ttype) { /* Print out the error */ recognizer->reportError(recognizer); /* Call resync hook (for debuggeres and so on) */ recognizer->beginResync(recognizer); /* "delete" the extra token */ is->consume(is); /* End resync hook */ recognizer->endResync(recognizer); /* consume the token that the rule actually expected to get */ is->consume(is); recognizer->error = ANTLR3_FALSE; /* Exception is not outstanding any more */ } /* The next token (after the one that is current, is not the one * that we were expecting, so the input is in more of an error state * than we hoped. * If we are able to recover from the error using the follow set, then * we are hunky dory again and can move on, if we cannot, then we resort * to throwing the exception. */ if (recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE) { recognizer->error = ANTLR3_TRUE; recognizer->failed = ANTLR3_TRUE; return; } }
/** Recover from an error found on the input stream. Mostly this is * NoViableAlt exceptions, but could be a mismatched token that * the match() routine could not recover from. */ static void recover (pANTLR3_BASE_RECOGNIZER recognizer) { /* Used to compute the follow set of tokens */ pANTLR3_BITSET followSet; pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n"); return; break; } /* I know that all the indirection looks confusing, but you get used to it and it really isn't. * Don't be tempted to use macros like we do for the generated C code, you will never know * what is going on. The generated C code does this to hide implementation details not clarify them. */ if (recognizer->lastErrorIndex == is->index(is)) { /* The last error was at the same token index point. This must be a case * where LT(1) is in the recovery token set so nothing is * consumed. Consume a single token so at least to prevent * an infinite loop; this is a failsafe. */ is->consume(is); } /* Record error index position */ recognizer->lastErrorIndex = is->index(is); /* Work out the follows set for error recovery */ followSet = recognizer->computeErrorRecoverySet(recognizer); /* Call resync hook (for debuggers and so on) */ recognizer->beginResync(recognizer); /* Consume tokens until we have resynced to something in the follows set */ recognizer->consumeUntilSet(recognizer, followSet); /* End resync hook */ recognizer->endResync(recognizer); /* Destoy the temporary bitset we produced. */ followSet->free(followSet); /* Reset the in error bit so we don't re-report the exception */ recognizer->error = ANTLR3_FALSE; }
/** Compute the context-sensitive FOLLOW set for current rule. * This is set of token types that can follow a specific rule * reference given a specific call chain. You get the set of * viable tokens that can possibly come next (lookahead depth 1) * given the current call chain. Contrast this with the * definition of plain FOLLOW for rule r: * * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} * * where x in T* and alpha, beta in V*; T is set of terminals and * V is the set of terminals and nonterminals. In other words, * FOLLOW(r) is the set of all tokens that can possibly follow * references to r in *any* sentential form (context). At * runtime, however, we know precisely which context applies as * we have the call chain. We may compute the exact (rather * than covering superset) set of following tokens. * * For example, consider grammar: * * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} * | "return" expr '.' * ; * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} * atom : INT // FOLLOW(atom)=={'+',')',';','.'} * | '(' expr ')' * ; * * The FOLLOW sets are all inclusive whereas context-sensitive * FOLLOW sets are precisely what could follow a rule reference. * For input input "i=(3);", here is the derivation: * * stat => ID '=' expr ';' * => ID '=' atom ('+' atom)* ';' * => ID '=' '(' expr ')' ('+' atom)* ';' * => ID '=' '(' atom ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ('+' atom)* ';' * => ID '=' '(' INT ')' ';' * * At the "3" token, you'd have a call chain of * * stat -> expr -> atom -> expr -> atom * * What can follow that specific nested ref to atom? Exactly ')' * as you can see by looking at the derivation of this specific * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. * * You want the exact viable token set when recovering from a * token mismatch. Upon token mismatch, if LA(1) is member of * the viable next token set, then you know there is most likely * a missing token in the input stream. "Insert" one by just not * throwing an exception. */ static pANTLR3_BITSET computeCSRuleFollow (pANTLR3_BASE_RECOGNIZER recognizer) { return recognizer->combineFollows(recognizer, ANTLR3_FALSE); }
/** * Documentation below is from the Java implementation. * * Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various "local" * follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth local follow set after call to rule * 0 <EOF> a (from main()) * 1 ']' b * 3 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is in this set so we would * not consume anything and after printing an error rule c would * return normally. It would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you cna see that the parser walks up call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implemented local FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ static pANTLR3_BITSET computeErrorRecoverySet (pANTLR3_BASE_RECOGNIZER recognizer) { return recognizer->combineFollows(recognizer, ANTLR3_FALSE); }
/** Match current input symbol against ttype. Upon error, do one token * insertion or deletion if possible. You can override to not recover * here and bail out of the current production to the normal error * exception catch (at the end of the method) by just throwing * MismatchedTokenException upon input._LA(1)!=ttype. */ static ANTLR3_BOOLEAN match( pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction 'match' called by unknown paresr type - provide override for this function\n"); return ANTLR3_FALSE; break; } if (is->_LA(is, 1) == ttype) { /* The token was the one we were told to expect */ is->consume(is); /* Consume that token from the stream */ recognizer->errorRecovery = ANTLR3_FALSE; /* Not in error recovery now (if we were) */ recognizer->failed = ANTLR3_FALSE; /* The match was a success */ return ANTLR3_TRUE; /* We are done */ } /* We did not find the expectd token type, if we are backtracking then * we just set the failed flag and return. */ if (recognizer->backtracking > 0) { /* Backtracking is going on */ recognizer->failed = ANTLR3_TRUE; return ANTLR3_FALSE; } /* We did not find the expected token and there is no backtracking * going on, so we mismatch, which creates an exception in the recognizer exception * stack. */ recognizer->mismatch(recognizer, ttype, follow); return ANTLR3_FALSE; }
/** Has this rule already parsed input at the current index in the * input stream? Return ANTLR3_TRUE if we have and ANTLR3_FALSE * if we have not. * * This method has a side-effect: if we have seen this input for * this rule and successfully parsed before, then seek ahead to * 1 past the stop token matched for this rule last time. */ static ANTLR3_BOOLEAN alreadyParsedRule (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ruleIndex) { ANTLR3_UINT64 stopIndex; pANTLR3_LEXER lexer; pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; lexer = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; lexer = NULL; is = tparser->ctnstream->tnstream->istream; break; case ANTLR3_TYPE_LEXER: lexer = (pANTLR3_LEXER) (recognizer->super); parser = NULL; tparser = NULL; is = lexer->input->istream; default: fprintf(stderr, "Base recognizerfunction 'alreadyParsedRule' called by unknown paresr type - provide override for this function\n"); return ANTLR3_FALSE; break; } /* See if we have a memo marker for this. */ stopIndex = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is)); if (stopIndex == MEMO_RULE_UNKNOWN) { return ANTLR3_FALSE; } if (stopIndex == MEMO_RULE_FAILED) { recognizer->failed = ANTLR3_TRUE; } else { is->seek(is, stopIndex+1); } /* If here then the rule was executed for this input already */ return ANTLR3_TRUE; }