Exemple #1
0
static void			
reportError		    (pANTLR3_BASE_RECOGNIZER rec)
{
    // Indicate this recognizer had an error while processing.
	//
	rec->state->errorCount++;

    rec->displayRecognitionError(rec, rec->state->tokenNames);
}
Exemple #2
0
static void			
reportError		    (pANTLR3_BASE_RECOGNIZER recognizer)
{
    if	(recognizer->errorRecovery == ANTLR3_TRUE)
    {
	/* In error recovery so don't display another error while doing so
	 */
	return;
    }

    /* Signal we are in error recovery now
     */
    recognizer->errorRecovery = ANTLR3_TRUE;

    recognizer->displayRecognitionError(recognizer, recognizer->tokenNames);
}
Exemple #3
0
static void		
recoverFromMismatchedSet	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow)
{
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction recoverFromMismatchedSet called by unknown paresr type - provide override for this function\n");
	return;

	break;
    }

    /* TODO - Single token deletion like in recoverFromMismatchedToken()
     */
    if	(recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE)
    {
	recognizer->error	= ANTLR3_TRUE;
	recognizer->failed	= ANTLR3_TRUE;
	return;
    }
}
Exemple #4
0
/**
 * \remark Mismatch only works for parsers and must be overridden for anything else.
 */
static	void
mismatch(pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow)
{
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    /* Install a mismatched token exception in the exception stack
     */
    antlr3MTExceptionNew(recognizer);
    recognizer->exception->expecting    = ttype;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction 'mismatch' called by unknown parser type - provide override for this function\n");
	return;

	break;
    }



    /* Enter error recovery mode
     */
    recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);

    return;

}
/** Override for standard base recognizer mismatch function
 *  as we have DOWN/UP nodes in the stream that have no line info,
 *  plus we want to alter the exception type.
 */
static void
mismatch	    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET_LIST follow)
{
    recognizer->exConstruct(recognizer);
    recognizer->recoverFromMismatchedToken(recognizer, ttype, follow);
}
static void			
reportError		    (pANTLR3_BASE_RECOGNIZER rec)
{
    rec->displayRecognitionError(rec, rec->state->tokenNames);
}
Exemple #7
0
/** This code is factored out from mismatched token and mismatched set
 *  recovery.  It handles "single token insertion" error recovery for
 *  both.  No tokens are consumed to recover from insertions.  Return
 *  true if recovery was possible else return false.
 */
static ANTLR3_BOOLEAN	
recoverFromMismatchedElement	    (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_BITSET follow)
{
    pANTLR3_BITSET	    viableToksFollowingRule;
    pANTLR3_BITSET	    newFollow;
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n");
	return ANTLR3_FALSE;

	break;
    }

    newFollow	= NULL;

    if	(follow == NULL)
    {
	/* The follow set is NULL, which means we don't know what can come 
	 * next, so we "hit and hope" by just signifying that we cannot
	 * recover, which will just cause the next token to be consumed,
	 * which might dig us out.
	 */
	return	ANTLR3_FALSE;
    }

    /* We have a bitmap for the follow set, hence we can compute 
     * what can follow this grammar element reference.
     */
    if	(follow->isMember(follow, ANTLR3_EOR_TOKEN_TYPE) == ANTLR3_TRUE)
    {
	/* First we need to know which of the available tokens are viable
	 * to follow this reference.
	 */
	viableToksFollowingRule	= recognizer->computeCSRuleFollow(recognizer);

	/* Knowing that, we can or in the follow set
	 */
	newFollow   = follow->or(follow, viableToksFollowingRule);
	
	/* Remove the EOR token, which we do not wish to compute with
	 */
	newFollow->remove(follow, ANTLR3_EOR_TOKEN_TYPE);
	viableToksFollowingRule->free(viableToksFollowingRule);
	/* We now have the computed set of what can follow the current token
	 */
	follow	= newFollow;
    }

    /* We can now see if the current token works with the set of tokens
     * that could follow the current grammar reference. If it looks like it
     * is consistent, then we can "insert" that token by not throwing
     * an exception and assumimng that we saw it. 
     */
    if	( follow->isMember(follow, is->_LA(is, 1)) == ANTLR3_TRUE)
    {
	/* report the error, but don't cause any rules to abort and stuff
	 */
	recognizer->reportError(recognizer);
	if	(newFollow != NULL)
	{
		newFollow->free(newFollow);
	}
	recognizer->error			= ANTLR3_FALSE;
	recognizer->failed			= ANTLR3_FALSE;
	return ANTLR3_TRUE;	/* Success in recovery	*/
    }

    if	(newFollow != NULL)
    {
	newFollow->free(newFollow);
    }

    /* We could not find anything viable to do, so this is going to 
     * cause an exception.
     */
    return  ANTLR3_FALSE;
}
Exemple #8
0
/** Attempt to recover from a single missing or extra token.
 *
 *  EXTRA TOKEN
 *
 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
 *  however, then assume LA(1) is some extra spurious token.  Delete it
 *  and LA(2) as if we were doing a normal match(), which advances the
 *  input.
 *
 *  MISSING TOKEN
 *
 *  If current token is consistent with what could come after
 *  ttype then it is ok to "insert" the missing token, else throw
 *  exception For example, Input "i=(3;" is clearly missing the
 *  ')'.  When the parser returns from the nested call to expr, it
 *  will have call chain:
 *
 *    stat -> expr -> atom
 *
 *  and it will be trying to match the ')' at this point in the
 *  derivation:
 *
 *       => ID '=' '(' INT ')' ('+' atom)* ';'
 *                          ^
 *  match() will see that ';' doesn't match ')' and report a
 *  mismatched token error.  To recover, it sees that LA(1)==';'
 *  is in the set of tokens that can follow the ')' token
 *  reference in rule atom.  It can assume that you forgot the ')'.
 *
 * May need ot come back and look at the exception stuff here, I am assuming 
 * that the exception that was passed in in the java implementation is
 * sotred in the recognizer exception stack. To 'throw' it we set the
 * error flag and rules can cascade back when this is set.
 */
static void			
recoverFromMismatchedToken  (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ttype, pANTLR3_BITSET follow)
{
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction recoverFromMismatchedToken called by unknown paresr type - provide override for this function\n");
	return;

	break;
    }

    /* If the next token after the one we are looking at in the input stream
     * is what we are looking for then we remove the one we have discovered
     * from the stream by consuming it, then consume this next one along too as
     * if nothing had happened.
     */
    if	( is->_LA(is, 2) == ttype)
    {
	/* Print out the error
	 */
	recognizer->reportError(recognizer);

	/* Call resync hook (for debuggeres and so on)
	 */
	recognizer->beginResync(recognizer);

	/* "delete" the extra token
	 */
	is->consume(is);

	/* End resync hook 
	 */
	recognizer->endResync(recognizer);

	/* consume the token that the rule actually expected to get
	 */
	is->consume(is);

	recognizer->error  = ANTLR3_FALSE;	/* Exception is not outstanding any more */

    }

    /* The next token (after the one that is current, is not the one
     * that we were expecting, so the input is in more of an error state
     * than we hoped. 
     * If we are able to recover from the error using the follow set, then
     * we are hunky dory again and can move on, if we cannot, then we resort
     * to throwing the exception.
     */
    if	(recognizer->recoverFromMismatchedElement(recognizer, follow) == ANTLR3_FALSE)
    {
	recognizer->error	    = ANTLR3_TRUE;
	recognizer->failed	    = ANTLR3_TRUE;
	return;
    }
}
Exemple #9
0
/** Recover from an error found on the input stream.  Mostly this is
 *  NoViableAlt exceptions, but could be a mismatched token that
 *  the match() routine could not recover from.
 */
static void			
recover			    (pANTLR3_BASE_RECOGNIZER recognizer)
{
    /* Used to compute the follow set of tokens
    */
    pANTLR3_BITSET	    followSet;
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction recover called by unknown paresr type - provide override for this function\n");
	return;

	break;
    }

    /* I know that all the indirection looks confusing, but you get used to it and it really isn't.
     * Don't be tempted to use macros like we do for the generated C code, you will never know
     * what is going on. The generated C code does this to hide implementation details not clarify them.
     */
    if	(recognizer->lastErrorIndex == is->index(is))
    {
	/* The last error was at the same token index point. This must be a case
	 * where LT(1) is in the recovery token set so nothing is
	 * consumed. Consume a single token so at least to prevent
	 * an infinite loop; this is a failsafe.
	 */
	is->consume(is);
    }

    /* Record error index position
     */
    recognizer->lastErrorIndex	 = is->index(is);
    
    /* Work out the follows set for error recovery
     */
    followSet	= recognizer->computeErrorRecoverySet(recognizer);

    /* Call resync hook (for debuggers and so on)
     */
    recognizer->beginResync(recognizer);

    /* Consume tokens until we have resynced to something in the follows set
     */
    recognizer->consumeUntilSet(recognizer, followSet);

    /* End resync hook 
     */
    recognizer->endResync(recognizer);

    /* Destoy the temporary bitset we produced.
     */
    followSet->free(followSet);

    /* Reset the in error bit so we don't re-report the exception
     */
    recognizer->error	= ANTLR3_FALSE;
}
Exemple #10
0
/** Compute the context-sensitive FOLLOW set for current rule.
 *  This is set of token types that can follow a specific rule
 *  reference given a specific call chain.  You get the set of
 *  viable tokens that can possibly come next (lookahead depth 1)
 *  given the current call chain.  Contrast this with the
 *  definition of plain FOLLOW for rule r:
 *
 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
 *
 *  where x in T* and alpha, beta in V*; T is set of terminals and
 *  V is the set of terminals and nonterminals.  In other words,
 *  FOLLOW(r) is the set of all tokens that can possibly follow
 *  references to r in *any* sentential form (context).  At
 *  runtime, however, we know precisely which context applies as
 *  we have the call chain.  We may compute the exact (rather
 *  than covering superset) set of following tokens.
 *
 *  For example, consider grammar:
 *
 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
 *       | "return" expr '.'
 *       ;
 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
 *       | '(' expr ')'
 *       ;
 *
 *  The FOLLOW sets are all inclusive whereas context-sensitive
 *  FOLLOW sets are precisely what could follow a rule reference.
 *  For input input "i=(3);", here is the derivation:
 *
 *  stat => ID '=' expr ';'
 *       => ID '=' atom ('+' atom)* ';'
 *       => ID '=' '(' expr ')' ('+' atom)* ';'
 *       => ID '=' '(' atom ')' ('+' atom)* ';'
 *       => ID '=' '(' INT ')' ('+' atom)* ';'
 *       => ID '=' '(' INT ')' ';'
 *
 *  At the "3" token, you'd have a call chain of
 *
 *    stat -> expr -> atom -> expr -> atom
 *
 *  What can follow that specific nested ref to atom?  Exactly ')'
 *  as you can see by looking at the derivation of this specific
 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
 *
 *  You want the exact viable token set when recovering from a
 *  token mismatch.  Upon token mismatch, if LA(1) is member of
 *  the viable next token set, then you know there is most likely
 *  a missing token in the input stream.  "Insert" one by just not
 *  throwing an exception.
 */
static pANTLR3_BITSET		
computeCSRuleFollow	    (pANTLR3_BASE_RECOGNIZER recognizer)
{
    return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
}
Exemple #11
0
/**
 * Documentation below is from the Java implementation.
 *
 * Compute the error recovery set for the current rule.  During
 *  rule invocation, the parser pushes the set of tokens that can
 *  follow that rule reference on the stack; this amounts to
 *  computing FIRST of what follows the rule reference in the
 *  enclosing rule. This local follow set only includes tokens
 *  from within the rule; i.e., the FIRST computation done by
 *  ANTLR stops at the end of a rule.
 *
 *  EXAMPLE
 *
 *  When you find a "no viable alt exception", the input is not
 *  consistent with any of the alternatives for rule r.  The best
 *  thing to do is to consume tokens until you see something that
 *  can legally follow a call to r *or* any rule that called r.
 *  You don't want the exact set of viable next tokens because the
 *  input might just be missing a token--you might consume the
 *  rest of the input looking for one of the missing tokens.
 *
 *  Consider grammar:
 *
 *  a : '[' b ']'
 *    | '(' b ')'
 *    ;
 *  b : c '^' INT ;
 *  c : ID
 *    | INT
 *    ;
 *
 *  At each rule invocation, the set of tokens that could follow
 *  that rule is pushed on a stack.  Here are the various "local"
 *  follow sets:
 *
 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
 *  FOLLOW(c_in_b) = FIRST('^') = '^'
 *
 *  Upon erroneous input "[]", the call chain is
 *
 *  a -> b -> c
 *
 *  and, hence, the follow context stack is:
 *
 *  depth  local follow set     after call to rule
 *    0         <EOF>                    a (from main())
 *    1          ']'                     b
 *    3          '^'                     c
 *
 *  Notice that ')' is not included, because b would have to have
 *  been called from a different context in rule a for ')' to be
 *  included.
 *
 *  For error recovery, we cannot consider FOLLOW(c)
 *  (context-sensitive or otherwise).  We need the combined set of
 *  all context-sensitive FOLLOW sets--the set of all tokens that
 *  could follow any reference in the call chain.  We need to
 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
 *  we resync'd to that token, we'd consume until EOF.  We need to
 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
 *  In this case, for input "[]", LA(1) is in this set so we would
 *  not consume anything and after printing an error rule c would
 *  return normally.  It would not find the required '^' though.
 *  At this point, it gets a mismatched token error and throws an
 *  exception (since LA(1) is not in the viable following token
 *  set).  The rule exception handler tries to recover, but finds
 *  the same recovery set and doesn't consume anything.  Rule b
 *  exits normally returning to rule a.  Now it finds the ']' (and
 *  with the successful match exits errorRecovery mode).
 *
 *  So, you cna see that the parser walks up call chain looking
 *  for the token that was a member of the recovery set.
 *
 *  Errors are not generated in errorRecovery mode.
 *
 *  ANTLR's error recovery mechanism is based upon original ideas:
 *
 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
 *
 *  and
 *
 *  "A note on error recovery in recursive descent parsers":
 *  http://portal.acm.org/citation.cfm?id=947902.947905
 *
 *  Later, Josef Grosch had some good ideas:
 *
 *  "Efficient and Comfortable Error Recovery in Recursive Descent
 *  Parsers":
 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
 *
 *  Like Grosch I implemented local FOLLOW sets that are combined
 *  at run-time upon error to avoid overhead during parsing.
 */
static pANTLR3_BITSET		
computeErrorRecoverySet	    (pANTLR3_BASE_RECOGNIZER recognizer)
{
    return   recognizer->combineFollows(recognizer, ANTLR3_FALSE);
}
Exemple #12
0
/** Match current input symbol against ttype.  Upon error, do one token
 *  insertion or deletion if possible.  You can override to not recover
 *  here and bail out of the current production to the normal error
 *  exception catch (at the end of the method) by just throwing
 *  MismatchedTokenException upon input._LA(1)!=ttype.
 */
static ANTLR3_BOOLEAN
match(	pANTLR3_BASE_RECOGNIZER recognizer,
		ANTLR3_UINT32 ttype, pANTLR3_BITSET follow)
{
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    default:
	    
	fprintf(stderr, "Base recognizerfunction 'match' called by unknown paresr type - provide override for this function\n");
	return ANTLR3_FALSE;

	break;
    }

    if	(is->_LA(is, 1) == ttype)
    {
	/* The token was the one we were told to expect
	 */
	is->consume(is);				/* Consume that token from the stream	    */
	recognizer->errorRecovery   = ANTLR3_FALSE;	/* Not in error recovery now (if we were)   */
	recognizer->failed	    = ANTLR3_FALSE;	/* The match was a success		    */
	return ANTLR3_TRUE;				/* We are done				    */
    }

    /* We did not find the expectd token type, if we are backtracking then
     * we just set the failed flag and return.
     */
    if	(recognizer->backtracking > 0)
    {
	/* Backtracking is going on
	 */
	recognizer->failed  = ANTLR3_TRUE;
	return ANTLR3_FALSE;
    }

    /* We did not find the expected token and there is no backtracking
     * going on, so we mismatch, which creates an exception in the recognizer exception
     * stack.
     */
    recognizer->mismatch(recognizer, ttype, follow);

    return ANTLR3_FALSE;
}
Exemple #13
0
/** Has this rule already parsed input at the current index in the
 *  input stream?  Return ANTLR3_TRUE if we have and ANTLR3_FALSE
 *  if we have not.
 *
 *  This method has a side-effect: if we have seen this input for
 *  this rule and successfully parsed before, then seek ahead to
 *  1 past the stop token matched for this rule last time.
 */
static ANTLR3_BOOLEAN	
alreadyParsedRule		    (pANTLR3_BASE_RECOGNIZER recognizer, ANTLR3_UINT32 ruleIndex)
{
    ANTLR3_UINT64	stopIndex;
    pANTLR3_LEXER	    lexer;
    pANTLR3_PARSER	    parser;
    pANTLR3_TREE_PARSER	    tparser;
    pANTLR3_INT_STREAM	    is;

    switch	(recognizer->type)
    {
    case	ANTLR3_TYPE_PARSER:

	parser  = (pANTLR3_PARSER) (recognizer->super);
	tparser	= NULL;
	lexer	= NULL;
	is	= parser->tstream->istream;

	break;

    case	ANTLR3_TYPE_TREE_PARSER:

	tparser = (pANTLR3_TREE_PARSER) (recognizer->super);
	parser	= NULL;
	lexer	= NULL;
	is	= tparser->ctnstream->tnstream->istream;

	break;

    case	ANTLR3_TYPE_LEXER:

	lexer	= (pANTLR3_LEXER)   (recognizer->super);
	parser	= NULL;
	tparser	= NULL;
	is	= lexer->input->istream;

    default:
	    
	fprintf(stderr, "Base recognizerfunction 'alreadyParsedRule' called by unknown paresr type - provide override for this function\n");
	return ANTLR3_FALSE;

	break;
    }

    /* See if we have a memo marker for this.
     */
    stopIndex	    = recognizer->getRuleMemoization(recognizer, ruleIndex, is->index(is));

    if	(stopIndex  == MEMO_RULE_UNKNOWN)
    {
	return ANTLR3_FALSE;
    }

    if	(stopIndex == MEMO_RULE_FAILED)
    {
	recognizer->failed = ANTLR3_TRUE;
    }
    else
    {
	is->seek(is, stopIndex+1);
    }

    /* If here then the rule was executed for this input already
     */
    return  ANTLR3_TRUE;
}