/** Attempt to recover from a single missing or extra token. * * EXTRA TOKEN * * LA(1) is not what we are looking for. If LA(2) has the right token, * however, then assume LA(1) is some extra spurious token. Delete it * and LA(2) as if we were doing a normal match(), which advances the * input. * * MISSING TOKEN * * If current token is consistent with what could come after * ttype then it is ok to "insert" the missing token, else throw * exception For example, Input "i=(3;" is clearly missing the * ')'. When the parser returns from the nested call to expr, it * will have call chain: * * stat -> expr -> atom * * and it will be trying to match the ')' at this point in the * derivation: * * => ID '=' '(' INT ')' ('+' atom)* ';' * ^ * match() will see that ';' doesn't match ')' and report a * mismatched token error. To recover, it sees that LA(1)==';' * is in the set of tokens that can follow the ')' token * reference in rule atom. It can assume that you forgot the ')'. * * May need ot come back and look at the exception stuff here, I am assuming * that the exception that was passed in in the java implementation is * sotred in the recognizer exception stack. To 'throw' it we set the * error flag and rules can cascade back when this is set. */ static void recoverFromMismatchedToken (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recoverFromMismatchedToken called by unknown paresr type - provide override for this function\n"); return; break; } /* If the next token after the one we are looking at in the input stream * is what we are looking for then we remove the one we have discovered * from the stream by consuming it, then consume this next one along too as * if nothing had happened. */ if ( is->_LA(is, 2) == ttype) { /* Print out the error */ recognizer->reportError(recognizer); /* Call resync hook (for debuggeres and so on) */ recognizer->beginResync(recognizer); /* "delete" the extra token */ is->consume(is); /* End resync hook */ recognizer->endResync(recognizer); /* consume the token that the rule actually expected to get */ is->consume(is); recognizer->error = ANTLR3_FALSE; /* Exception is not outstanding any more */ } /* The next token (after the one that is current, is not the one * that we were expecting, so the input is in more of an error state * than we hoped. * If we are able to recover from the error using the follow set, then * we are hunky dory again and can move on, if we cannot, then we resort * to throwing the exception. */ if (recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE) { recognizer->error = ANTLR3_TRUE; recognizer->failed = ANTLR3_TRUE; return; } }
/** Recover from an error found on the input stream. Mostly this is * NoViableAlt exceptions, but could be a mismatched token that * the match() routine could not recover from. */ static void recover (pANTLR3_BASE_RECOGNIZER recognizer) { /* Used to compute the follow set of tokens */ pANTLR3_BITSET followSet; pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; switch (recognizer->type) { case ANTLR3_TYPE_PARSER: parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; break; default: fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n"); return; break; } /* I know that all the indirection looks confusing, but you get used to it and it really isn't. * Don't be tempted to use macros like we do for the generated C code, you will never know * what is going on. The generated C code does this to hide implementation details not clarify them. */ if (recognizer->lastErrorIndex == is->index(is)) { /* The last error was at the same token index point. This must be a case * where LT(1) is in the recovery token set so nothing is * consumed. Consume a single token so at least to prevent * an infinite loop; this is a failsafe. */ is->consume(is); } /* Record error index position */ recognizer->lastErrorIndex = is->index(is); /* Work out the follows set for error recovery */ followSet = recognizer->computeErrorRecoverySet(recognizer); /* Call resync hook (for debuggers and so on) */ recognizer->beginResync(recognizer); /* Consume tokens until we have resynced to something in the follows set */ recognizer->consumeUntilSet(recognizer, followSet); /* End resync hook */ recognizer->endResync(recognizer); /* Destoy the temporary bitset we produced. */ followSet->free(followSet); /* Reset the in error bit so we don't re-report the exception */ recognizer->error = ANTLR3_FALSE; }