Ejemplo n.º 1
0
/* *** CVC4 NOTE ***
 * This function is has been modified in not-completely-trivial ways from its
 * libantlr3c implementation to support more informative error messages and to
 * invoke the error reporting mechanism of the Input class instead of the
 * default error printer.
 */
void AntlrInput::reportError(pANTLR3_BASE_RECOGNIZER recognizer) {
    pANTLR3_EXCEPTION ex = recognizer->state->exception;
    pANTLR3_UINT8 * tokenNames = recognizer->state->tokenNames;
    stringstream ss;

    // Dig the CVC4 objects out of the ANTLR3 mess
    pANTLR3_PARSER antlr3Parser = (pANTLR3_PARSER)(recognizer->super);
    assert(antlr3Parser!=NULL);
    Parser *parser = (Parser*)(antlr3Parser->super);
    assert(parser!=NULL);
    AntlrInput *input = (AntlrInput*) parser->getInput() ;
    assert(input!=NULL);

    // Signal we are in error recovery now
    recognizer->state->errorRecovery = ANTLR3_TRUE;

    // Indicate this recognizer had an error while processing.
    recognizer->state->errorCount++;

    // Call the builtin error formatter
    // recognizer->displayRecognitionError(recognizer, recognizer->state->tokenNames);

    /* TODO: Make error messages more useful, maybe by including more expected tokens and information
     * about the current token. */
    switch(ex->type) {
    case ANTLR3_UNWANTED_TOKEN_EXCEPTION:

        // Indicates that the recognizer was fed a token which seems to be
        // spurious input. We can detect this when the token that follows
        // this unwanted token would normally be part of the syntactically
        // correct stream. Then we can see that the token we are looking at
        // is just something that should not be there and throw this exception.
        //
        if(tokenNames == NULL) {
            ss << "Unexpected token." ;
        } else {
            if(ex->expecting == ANTLR3_TOKEN_EOF) {
                ss << "Expected end of file.";
            } else {
                ss << "Expected " << tokenNames[ex->expecting]
                   << ", found '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
            }
        }
        break;

    case ANTLR3_MISSING_TOKEN_EXCEPTION:

        // Indicates that the recognizer detected that the token we just
        // hit would be valid syntactically if preceded by a particular
        // token. Perhaps a missing ';' at line end or a missing ',' in an
        // expression list, and such like.
        //
        if(tokenNames == NULL) {
            ss << "Missing token (" << ex->expecting << ").";
        } else {
            if(ex->expecting == ANTLR3_TOKEN_EOF) {
                ss << "Missing end of file marker.";
            } else if( ex->expecting == 0 ) {
                ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
                if( std::string(tokenText((pANTLR3_COMMON_TOKEN)ex->token)) == std::string("IN") ) {
                    ss << " Did you mean: `IS_IN'?";
                }
            } else {
                ss << "Missing " << tokenNames[ex->expecting] << ".";
            }
        }
        break;

    case ANTLR3_RECOGNITION_EXCEPTION:

        // Indicates that the recognizer received a token
        // in the input that was not predicted. This is the basic exception type
        // from which all others are derived. So we assume it was a syntax error.
        // You may get this if there are not more tokens and more are needed
        // to complete a parse for instance.
        //
        ss <<"Syntax error.";
        break;

    case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:

        // We were expecting to see one thing and got another. This is the
        // most common error if we could not detect a missing or unwanted token.
        // Here you can spend your efforts to
        // derive more useful error messages based on the expected
        // token set and the last token and so on. The error following
        // bitmaps do a good job of reducing the set that we were looking
        // for down to something small. Knowing what you are parsing may be
        // able to allow you to be even more specific about an error.
        //
        if(tokenNames == NULL) {
            ss << "Syntax error.";
        } else {
            if(ex->expecting == ANTLR3_TOKEN_EOF) {
                ss << "Expected end of file.";
            } else if( ex->expecting == 0 ) {
                ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
            } else {
                ss << "Expected " << tokenNames[ex->expecting] << ".";
            }
        }
        break;

    case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
        // We could not pick any alt decision from the input given
        // so god knows what happened - however when you examine your grammar,
        // you should. It means that at the point where the current token occurred
        // that the DFA indicates nowhere to go from here.
        //
        ss << "Unexpected token: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
        break;

    case ANTLR3_MISMATCHED_SET_EXCEPTION:

    {
        ANTLR3_UINT32 count;
        ANTLR3_UINT32 bit;
        ANTLR3_UINT32 size;
        ANTLR3_UINT32 numbits;
        pANTLR3_BITSET errBits;

        // This means we were able to deal with one of a set of
        // possible tokens at this point, but we did not see any
        // member of that set.
        //
        ss << "Unexpected input: '" << tokenText((pANTLR3_COMMON_TOKEN)ex->token)
           << "'. Expected one of: ";

        // What tokens could we have accepted at this point in the
        // parse?
        //
        count = 0;
        errBits = antlr3BitsetLoad(ex->expectingSet);
        numbits = errBits->numBits(errBits);
        size = errBits->size(errBits);

        if(size > 0) {
            // However many tokens we could have dealt with here, it is usually
            // not useful to print ALL of the set here. I arbitrarily chose 8
            // here, but you should do whatever makes sense for you of course.
            // No token number 0, so look for bit 1 and on.
            //
            for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
                // TODO: This doesn;t look right - should be asking if the bit is set!!
                //
                if(tokenNames[bit]) {
                    if( count++ > 0 ) {
                        ss << ", ";
                    }
                    ss << tokenNames[bit];
                }
            }
        } else {
            assert(false);//("Parse error with empty set of expected tokens.");
        }
    }
    break;

    case ANTLR3_EARLY_EXIT_EXCEPTION:

        // We entered a loop requiring a number of token sequences
        // but found a token that ended that sequence earlier than
        // we should have done.
        //
        ss << "Sequence terminated early by token: '"
           << tokenText((pANTLR3_COMMON_TOKEN)ex->token) << "'.";
        break;

    default:

        // We don't handle any other exceptions here, but you can
        // if you wish. If we get an exception that hits this point
        // then we are just going to report what we know about the
        // token.
        //
        assert(false);//("Unexpected exception in parser.");
        break;
    }

    // Call the error display routine
    input->parseError(ss.str(), ((pANTLR3_COMMON_TOKEN)ex->token)->type == ANTLR3_TOKEN_EOF);
}
Ejemplo n.º 2
0
/**
displayRecognitionError()함수를 변경
*/
void produceError4Parser(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
{
	pANTLR3_PARSER			parser;
	pANTLR3_TREE_PARSER	    tparser;
	pANTLR3_INT_STREAM	    is;
	pANTLR3_STRING			ttext;
	pANTLR3_STRING			ftext;
	pANTLR3_EXCEPTION	    ex;
	pANTLR3_COMMON_TOKEN    theToken;
	pANTLR3_BASE_TREE	    theBaseTree;
	pANTLR3_COMMON_TREE	    theCommonTree;

	//char* p;
	FILE *out;
	out = fopen("ParErr.txt", "ab"); // 에러를 저장할 파일
	ANTLR3_FPRINTF(out, "\r\n");

	// Retrieve some info for easy reading.
	//
	ex	    =		recognizer->state->exception;
	ttext   =		NULL;

	// See if there is a 'filename' we can use
	//
	if	(ex->streamName == NULL)
	{
		if	(((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF)
		{
			ANTLR3_FPRINTF(out, "-end of input-(");
		}
		else
		{
			ANTLR3_FPRINTF(out, "-unknown source-(");
		}
	}
	else
	{   
		/* rwkim 수정*/
		char curPath[200];
		getcwd(curPath, 200);
		ftext = ex->streamName->to8(ex->streamName);
		delstring(ftext->chars, curPath); //앞의 경로 제거
		ANTLR3_FPRINTF(out, "%s(", ftext->chars);
	}

	// Next comes the line number
	//

	ANTLR3_FPRINTF(out, "%d) ", recognizer->state->exception->line);
	//ANTLR3_FPRINTF(out, " : error %d : %s", 
	//									recognizer->state->exception->type,
	//				(pANTLR3_UINT8)	   (recognizer->state->exception->message));
	//다음과 같아 바꾸었음
	ANTLR3_FPRINTF(out, " : %s", (pANTLR3_UINT8)(recognizer->state->exception->message));

	// How we determine the next piece is dependent on which thing raised the
	// error.
	//
	
	switch	(recognizer->type)
	{
	case	ANTLR3_TYPE_PARSER:

		// Prepare the knowledge we know we have
		//
		parser	    = (pANTLR3_PARSER) (recognizer->super);
		tparser	    = NULL;
		is			= parser->tstream->istream;
		theToken    = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
		ttext	    = theToken->toString(theToken);

		/* rwkim 수정*/
		//p = strchr(ttext->chars, '=');
		//ttext->chars = p;

		ANTLR3_FPRINTF(out, " at offset %d", recognizer->state->exception->charPositionInLine);
		if  (theToken != NULL)
		{
			if (theToken->type == ANTLR3_TOKEN_EOF)
			{
				ANTLR3_FPRINTF(out, ", at <EOF>");
			}
			else
			{
				// Guard against null text in a token
				//
				ANTLR3_FPRINTF(out, "\r\n    near %s\r\n    ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars);
			}
		}
		break;

	case	ANTLR3_TYPE_TREE_PARSER:

		tparser		= (pANTLR3_TREE_PARSER) (recognizer->super);
		parser		= NULL;
		is			= tparser->ctnstream->tnstream->istream;
		theBaseTree	= (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
		ttext		= theBaseTree->toStringTree(theBaseTree);

		if  (theBaseTree != NULL)
		{
			theCommonTree	= (pANTLR3_COMMON_TREE)	    theBaseTree->super;

			if	(theCommonTree != NULL)
			{
				theToken	= (pANTLR3_COMMON_TOKEN)    theBaseTree->getToken(theBaseTree);
			}
			ANTLR3_FPRINTF(out, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree));
			ANTLR3_FPRINTF(out, ", near %s", ttext->chars);
		}
		break;

	default:

		ANTLR3_FPRINTF(out, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\r\n");
		return;
		break;
	}

	// Although this function should generally be provided by the implementation, this one
	// should be as helpful as possible for grammar developers and serve as an example
	// of what you can do with each exception type. In general, when you make up your
	// 'real' handler, you should debug the routine with all possible errors you expect
	// which will then let you be as specific as possible about all circumstances.
	//
	// Note that in the general case, errors thrown by tree parsers indicate a problem
	// with the output of the parser or with the tree grammar itself. The job of the parser
	// is to produce a perfect (in traversal terms) syntactically correct tree, so errors
	// at that stage should really be semantic errors that your own code determines and handles
	// in whatever way is appropriate.
	//
	switch  (ex->type)
	{
	case	ANTLR3_UNWANTED_TOKEN_EXCEPTION:

		// Indicates that the recognizer was fed a token which seesm to be
		// spurious input. We can detect this when the token that follows
		// this unwanted token would normally be part of the syntactically
		// correct stream. Then we can see that the token we are looking at
		// is just something that should not be there and throw this exception.
		//
		if	(tokenNames == NULL)
		{
			ANTLR3_FPRINTF(out, " : Extraneous input...");
		}
		else
		{
			if	(ex->expecting == ANTLR3_TOKEN_EOF)
			{
				ANTLR3_FPRINTF(out, " : Extraneous input - expected <EOF>\r\n");
			}
			else
			{
				ANTLR3_FPRINTF(out, " : Extraneous input - expected %s ...\r\n", tokenNames[ex->expecting]);
			}
		}
		break;

	case	ANTLR3_MISSING_TOKEN_EXCEPTION:

		// Indicates that the recognizer detected that the token we just
		// hit would be valid syntactically if preceeded by a particular 
		// token. Perhaps a missing ';' at line end or a missing ',' in an
		// expression list, and such like.
		//
		if	(tokenNames == NULL)
		{
			ANTLR3_FPRINTF(out, " : Missing token (%d)...\r\n", ex->expecting);
		}
		else
		{
			if	(ex->expecting == ANTLR3_TOKEN_EOF)
			{
				ANTLR3_FPRINTF(out, " : Missing <EOF>\r\n");
			}
			else
			{
				ANTLR3_FPRINTF(out, " : Missing %s \r\n", tokenNames[ex->expecting]);
			}
		}
		break;

	case	ANTLR3_RECOGNITION_EXCEPTION:

		// Indicates that the recognizer received a token
		// in the input that was not predicted. This is the basic exception type 
		// from which all others are derived. So we assume it was a syntax error.
		// You may get this if there are not more tokens and more are needed
		// to complete a parse for instance.
		//
		ANTLR3_FPRINTF(out, " : syntax error...\r\n");    
		break;

	case    ANTLR3_MISMATCHED_TOKEN_EXCEPTION:

		// We were expecting to see one thing and got another. This is the
		// most common error if we coudl not detect a missing or unwanted token.
		// Here you can spend your efforts to
		// derive more useful error messages based on the expected
		// token set and the last token and so on. The error following
		// bitmaps do a good job of reducing the set that we were looking
		// for down to something small. Knowing what you are parsing may be
		// able to allow you to be even more specific about an error.
		//
		if	(tokenNames == NULL)
		{
			ANTLR3_FPRINTF(out, " : syntax error...\r\n");
		}
		else
		{
			if	(ex->expecting == ANTLR3_TOKEN_EOF)
			{
				ANTLR3_FPRINTF(out, " : expected <EOF>\r\n");
			}
			else
			{
				ANTLR3_FPRINTF(out, " : expected %s ...\r\n", tokenNames[ex->expecting]);
			}
		}
		break;

	case	ANTLR3_NO_VIABLE_ALT_EXCEPTION:

		// We could not pick any alt decision from the input given
		// so god knows what happened - however when you examine your grammar,
		// you should. It means that at the point where the current token occurred
		// that the DFA indicates nowhere to go from here.
		//
		ANTLR3_FPRINTF(out, " : cannot match to any predicted input...\r\n");

		break;

	case	ANTLR3_MISMATCHED_SET_EXCEPTION:

		{
			ANTLR3_UINT32	  count;
			ANTLR3_UINT32	  bit;
			ANTLR3_UINT32	  size;
			ANTLR3_UINT32	  numbits;
			pANTLR3_BITSET	  errBits;

			// This means we were able to deal with one of a set of
			// possible tokens at this point, but we did not see any
			// member of that set.
			//
			ANTLR3_FPRINTF(out, " : unexpected input...\r\n  expected one of : ");

			// What tokens could we have accepted at this point in the
			// parse?
			//
			count   = 0;
			errBits = antlr3BitsetLoad		(ex->expectingSet);
			numbits = errBits->numBits		(errBits);
			size    = errBits->size			(errBits);

			if  (size > 0)
			{
				// However many tokens we could have dealt with here, it is usually
				// not useful to print ALL of the set here. I arbitrarily chose 8
				// here, but you should do whatever makes sense for you of course.
				// No token number 0, so look for bit 1 and on.
				//
				for	(bit = 1; bit < numbits && count < 8 && count < size; bit++)
				{
					// TODO: This doesn;t look right - should be asking if the bit is set!!
					//
					if  (tokenNames[bit])
					{
						ANTLR3_FPRINTF(out, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); 
						count++;
					}
				}
				ANTLR3_FPRINTF(out, "\r\n");
			}
			else
			{
				ANTLR3_FPRINTF(out, "Actually dude, we didn't seem to be expecting anything here, or at least\r\n");
				ANTLR3_FPRINTF(out, "I could not work out what I was expecting, like so many of us these days!\r\n");
			}
		}
		break;

	case	ANTLR3_EARLY_EXIT_EXCEPTION:

		// We entered a loop requiring a number of token sequences
		// but found a token that ended that sequence earlier than
		// we should have done.
		//
		ANTLR3_FPRINTF(out, " : missing elements...\r\n");
		break;

	default:

		// We don't handle any other exceptions here, but you can
		// if you wish. If we get an exception that hits this point
		// then we are just going to report what we know about the
		// token.
		//
		ANTLR3_FPRINTF(out, " : syntax not recognized...\r\n");
		break;
	}

	// Here you have the token that was in error which if this is
	// the standard implementation will tell you the line and offset
	// and also record the address of the start of the line in the
	// input stream. You could therefore print the source line and so on.
	// Generally though, I would expect that your lexer/parser will keep
	// its own map of lines and source pointers or whatever as there
	// are a lot of specific things you need to know about the input
	// to do something like that.
	// Here is where you do it though :-).
	//

	fclose(out); 
}
Ejemplo n.º 3
0
    static void DisplayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 *tokenNames)
    {
      pANTLR3_EXCEPTION ex = recognizer->state->exception;
      
      freettcn::translator::CTranslator &translator = freettcn::translator::CTranslator::Instance();
      unsigned line = ex->line;
      unsigned line2;
      unsigned pos = ex->charPositionInLine + 1;
      unsigned pos2;
      std::stringstream msg;
      std::stringstream msg2;
      std::string note;
      
      // How we determine the next piece is dependent on which thing raised the error.
      switch(recognizer->type) {
      case ANTLR3_TYPE_LEXER:
        {
          pANTLR3_LEXER lexer = (pANTLR3_LEXER)recognizer->super;
          
          if(ex->type == ANTLR3_NO_VIABLE_ALT_EXCEPTION)
            msg << "Cannot match to any predicted input";
          else
            msg << (pANTLR3_UINT8)ex->message;

          ANTLR3_INT32 width;
          
          width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)lexer->input->data + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index));

          if(width >= 1) {
            if(isprint(ex->c))
              msg << " near '" << (char)ex->c << "'";
            else
              msg << " near char(0x" << std::hex << std::setw(2) << (int)ex->c << std::dec << ")";
          }
          else {
            msg << " '<EOF>'";
            
            // prepare second error
            line2 = lexer->rec->state->tokenStartLine;
            pos2 = lexer->rec->state->tokenStartCharPositionInLine;
            //            msg << fileName << ":" << (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine) <<
            //              ":" << (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) << ": ";
            width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)lexer->input->data + (lexer->input->size(lexer->input))) - (pANTLR3_UINT8)lexer->rec->state->tokenStartCharIndex);
            if(width >= 1)
              msg2 << "The lexer was matching from: " << std::string((char *)lexer->rec->state->tokenStartCharIndex, 0, 20) << std::endl;
            else
              msg2 << "The lexer was matching from the end of the line" << std::endl;
            
            note = "Above errors indicates a poorly specified lexer RULE or unterminated input element such as: \"STRING[\"]";
          }
        }
        break;

      case ANTLR3_TYPE_PARSER:
        {
          pANTLR3_COMMON_TOKEN token = (pANTLR3_COMMON_TOKEN)ex->token;
          
          if(ex->type == ANTLR3_NO_VIABLE_ALT_EXCEPTION)
            msg << "Cannot match '" << token->getText(token)->chars << "' to any predicted input";
          else {
            msg << (pANTLR3_UINT8)ex->message;
            
            if(token) {
              if (token->type == ANTLR3_TOKEN_EOF)
                msg << " at '<EOF>'";
              else {
                if(ex->type == ANTLR3_MISSING_TOKEN_EXCEPTION) {
                  if(!tokenNames)
                    msg << " [" << ex->expecting << "]";
                  else {
                    if(ex->expecting == ANTLR3_TOKEN_EOF)
                      msg << " '<EOF>'";
                    else
                      msg << " '" << tokenNames[ex->expecting] << "'";
                  }
                }
                else {
                  msg << " near '" << token->getText(token)->chars << "'";
                }
              }
            }
            
            if(ex->type == ANTLR3_UNWANTED_TOKEN_EXCEPTION) {
              if(tokenNames) {
                if(ex->expecting == ANTLR3_TOKEN_EOF)
                  msg << " ('<EOF>' expected)";
                else
                  msg << " ('" << tokenNames[ex->expecting] << "' expected)";
              }
            }
          }
        }
        break;
        
      default:
        std::cerr << "DisplayRecognitionError() called by unknown parser type - provide override for this function" << std::endl;
        return;
      }
      
      switch(ex->type) {
      case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
        // Indicates that the recognizer was fed a token which seesm to be
        // spurious input. We can detect this when the token that follows
        // this unwanted token would normally be part of the syntactically
        // correct stream. Then we can see that the token we are looking at
        // is just something that should not be there and throw this exception.
        //
        if(recognizer->type != ANTLR3_TYPE_PARSER) {
          if(tokenNames == NULL)
            ANTLR3_FPRINTF(stderr, " : Extraneous input...");
          else {
            if(ex->expecting == ANTLR3_TOKEN_EOF)
              ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n");
            else
              ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]);
          }
        }
        break;
        
      case ANTLR3_MISSING_TOKEN_EXCEPTION:
        // Indicates that the recognizer detected that the token we just
        // hit would be valid syntactically if preceeded by a particular 
        // token. Perhaps a missing ';' at line end or a missing ',' in an
        // expression list, and such like.
        break;
        
      case ANTLR3_RECOGNITION_EXCEPTION:
        // Indicates that the recognizer received a token
        // in the input that was not predicted. This is the basic exception type 
        // from which all others are derived. So we assume it was a syntax error.
        // You may get this if there are not more tokens and more are needed
        // to complete a parse for instance.
        break;
        
      case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
        // We were expecting to see one thing and got another. This is the
        // most common error if we coudl not detect a missing or unwanted token.
        // Here you can spend your efforts to
        // derive more useful error messages based on the expected
        // token set and the last token and so on. The error following
        // bitmaps do a good job of reducing the set that we were looking
        // for down to something small. Knowing what you are parsing may be
        // able to allow you to be even more specific about an error.
        //
        if(!tokenNames)
          ANTLR3_FPRINTF(stderr, " : syntax error...\n");
        else {
          if(ex->expecting == ANTLR3_TOKEN_EOF)
            ANTLR3_FPRINTF(stderr, " : expected <EOF>\n");
          else
            ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]);
        }
        break;
        
      case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
        // We could not pick any alt decision from the input given
        // so god knows what happened - however when you examine your grammar,
        // you should. It means that at the point where the current token occurred
        // that the DFA indicates nowhere to go from here.
        //
        if(recognizer->type != ANTLR3_TYPE_LEXER && recognizer->type != ANTLR3_TYPE_PARSER)
          ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n");
        break;
        
      case ANTLR3_MISMATCHED_SET_EXCEPTION:
        {
          ANTLR3_UINT32 count;
          ANTLR3_UINT32 bit;
          ANTLR3_UINT32 size;
          ANTLR3_UINT32 numbits;
          pANTLR3_BITSET errBits;
          
          // This means we were able to deal with one of a set of
          // possible tokens at this point, but we did not see any
          // member of that set.
          //
          ANTLR3_FPRINTF(stderr, " : unexpected input...\n  expected one of : ");
          
          // What tokens could we have accepted at this point in the
          // parse?
          //
          count   = 0;
          errBits = antlr3BitsetLoad(ex->expectingSet);
          numbits = errBits->numBits(errBits);
          size    = errBits->size(errBits);
          
          if(size > 0) {
            // However many tokens we could have dealt with here, it is usually
            // not useful to print ALL of the set here. I arbitrarily chose 8
            // here, but you should do whatever makes sense for you of course.
            // No token number 0, so look for bit 1 and on.
            //
            for(bit = 1; bit < numbits && count < 8 && count < size; bit++) {
              // TODO: This doesn;t look right - should be asking if the bit is set!!
              //
              if  (tokenNames[bit]) {
                ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : nullptr, tokenNames[bit]); 
                count++;
              }
            }
            ANTLR3_FPRINTF(stderr, "\n");
          }
          else {
            ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n");
            ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n");
          }
        }
        break;
        
      case ANTLR3_EARLY_EXIT_EXCEPTION:
        // We entered a loop requiring a number of token sequences
        // but found a token that ended that sequence earlier than
        // we should have done.
        //
        ANTLR3_FPRINTF(stderr, " : missing elements...\n");
        break;
        
      default:
        
        // We don't handle any other exceptions here, but you can
        // if you wish. If we get an exception that hits this point
        // then we are just going to report what we know about the
        // token.
        //
        ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n");
        std::cout << "Unknown exception type: " << ex->type << std::endl;
        break;
      }
      
      translator.Error(CLocation(translator.File(), line, pos), msg.str());
      if(!msg2.str().empty())
        translator.Error(CLocation(translator.File(), line2, pos2), msg2.str());
      if(!note.empty())
        translator.Note(CLocation(translator.File(), line2, pos2), note);
    }
Ejemplo n.º 4
0
void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames)
{
    // Adopted code from https://github.com/antlr/antlr3/blob/master/runtime/C/src/antlr3baserecognizer.c
    pANTLR3_EXCEPTION ex;
    pANTLR3_COMMON_TOKEN theToken;
    pANTLR3_BASE_TREE theBaseTree;
    pANTLR3_COMMON_TREE theCommonTree;

    // Retrieve some info for easy reading.
    ex = recognizer->state->exception;
    String fileName;
    // See if there is a 'filename' we can use
    if	(ex->streamName != NULL)
    {
        pANTLR3_STRING ftext;
        ftext = ex->streamName->to8(ex->streamName);
        fileName.assign((const char*)ftext->chars);
    }

    int line = recognizer->state->exception->line;
    StringStream ss;
    ss << (const char*) (recognizer->state->exception->message);

    // How we determine the next piece is dependent on which thing raised the
    // error.
    pANTLR3_STRING ttext;
    switch	(recognizer->type)
    {
    case ANTLR3_TYPE_PARSER:
    {
        // Prepare the knowledge we know we have
        theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token);
        ttext = theToken->toString(theToken);
        ss << ", at offset " << recognizer->state->exception->charPositionInLine;
        if (theToken != NULL)
        {
            if (theToken->type == ANTLR3_TOKEN_EOF)
                ss << ", at <EOF>";
            else
                // Guard against null text in a token
                ss << "\n near " << (ttext == NULL ? "<no text for the token>" : (const char*)ttext->chars) << "\n ";
        }
    }
        break;

    case ANTLR3_TYPE_TREE_PARSER:
        theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token);
        ttext = theBaseTree->toStringTree(theBaseTree);
        if (theBaseTree != NULL)
        {
            theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super;
            if	(theCommonTree != NULL)
            {
                theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree);
            }
            ss << ", at offset " << theBaseTree->getCharPositionInLine(theBaseTree);
            ss << ", near " << (const char*)ttext->chars;
        }
        break;

    default:
        std::cerr << "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\n";
        return;
        break;
    }

    // Although this function should generally be provided by the implementation, this one
    // should be as helpful as possible for grammar developers and serve as an example
    // of what you can do with each exception type. In general, when you make up your
    // 'real' handler, you should debug the routine with all possible errors you expect
    // which will then let you be as specific as possible about all circumstances.
    //
    // Note that in the general case, errors thrown by tree parsers indicate a problem
    // with the output of the parser or with the tree grammar itself. The job of the parser
    // is to produce a perfect (in traversal terms) syntactically correct tree, so errors
    // at that stage should really be semantic errors that your own code determines and handles
    // in whatever way is appropriate.
    switch (ex->type)
    {
    case ANTLR3_UNWANTED_TOKEN_EXCEPTION:
        // Indicates that the recognizer was fed a token which seesm to be
        // spurious input. We can detect this when the token that follows
        // this unwanted token would normally be part of the syntactically
        // correct stream. Then we can see that the token we are looking at
        // is just something that should not be there and throw this exception.
        if	(tokenNames == NULL)
            ss << " : Extraneous input...";
        else
        {
            if	(ex->expecting == ANTLR3_TOKEN_EOF)
                ss << " : Extraneous input - expected <EOF>\n";
            else
                ss << " : Extraneous input - expected " << tokenNames[ex->expecting] << " ...\n";
        }
        break;

    case ANTLR3_MISSING_TOKEN_EXCEPTION:
        // Indicates that the recognizer detected that the token we just
        // hit would be valid syntactically if preceeded by a particular
        // token. Perhaps a missing ';' at line end or a missing ',' in an
        // expression list, and such like.
        if	(tokenNames == NULL)
            ss << " : Missing token (" << ex->expecting << ")...\n";
        else
        {
            if	(ex->expecting == ANTLR3_TOKEN_EOF)
                ss << " : Missing <EOF>\n";
            else
                ss << " : Missing " << tokenNames[ex->expecting] << " \n";
        }
        break;

    case ANTLR3_RECOGNITION_EXCEPTION:
        // Indicates that the recognizer received a token
        // in the input that was not predicted. This is the basic exception type
        // from which all others are derived. So we assume it was a syntax error.
        // You may get this if there are not more tokens and more are needed
        // to complete a parse for instance.
        ss << " : syntax error...\n";
        break;

    case ANTLR3_MISMATCHED_TOKEN_EXCEPTION:
        // We were expecting to see one thing and got another. This is the
        // most common error if we coudl not detect a missing or unwanted token.
        // Here you can spend your efforts to
        // derive more useful error messages based on the expected
        // token set and the last token and so on. The error following
        // bitmaps do a good job of reducing the set that we were looking
        // for down to something small. Knowing what you are parsing may be
        // able to allow you to be even more specific about an error.
        if (tokenNames == NULL)
            ss << " : syntax error...\n";
        else
        {
            if	(ex->expecting == ANTLR3_TOKEN_EOF)
                ss << " : expected <EOF>\n";
            else
                ss << " : expected " << tokenNames[ex->expecting] << " ...\n";
        }
        break;

    case ANTLR3_NO_VIABLE_ALT_EXCEPTION:
        // We could not pick any alt decision from the input given
        // so god knows what happened - however when you examine your grammar,
        // you should. It means that at the point where the current token occurred
        // that the DFA indicates nowhere to go from here.
        ss << " : cannot match to any predicted input...\n";
        break;

    case ANTLR3_MISMATCHED_SET_EXCEPTION:
        {
            ANTLR3_UINT32 count;
            ANTLR3_UINT32 bit;
            ANTLR3_UINT32 size;
            ANTLR3_UINT32 numbits;
            pANTLR3_BITSET errBits;
            // This means we were able to deal with one of a set of
            // possible tokens at this point, but we did not see any
            // member of that set.

            ss << " : unexpected input...\n expected one of : ";
            // What tokens could we have accepted at this point in the
            // parse?
            count = 0;
            errBits = antlr3BitsetLoad	(ex->expectingSet);
            numbits = errBits->numBits	(errBits);
            size = errBits->size	(errBits);

            if (size > 0)
            {
                // However many tokens we could have dealt with here, it is usually
                // not useful to print ALL of the set here. I arbitrarily chose 8
                // here, but you should do whatever makes sense for you of course.
                // No token number 0, so look for bit 1 and on.

                for	(bit = 1; bit < numbits && count < 8 && count < size; bit++)
                {
                    // TODO: This doesn;t look right - should be asking if the bit is set!!
                    if (tokenNames[bit])
                    {
                        ss << (count > 0 ? ", " : "") << tokenNames[bit];
                        count++;
                    }
                }
                ss << "\n";
            }
            else
            {
                std::cerr << "Actually dude, we didn't seem to be expecting anything here, or at least\n";
                std::cerr << "I could not work out what I was expecting, like so many of us these days!\n";
            }
        }
        break;

    case ANTLR3_EARLY_EXIT_EXCEPTION:
        // We entered a loop requiring a number of token sequences
        // but found a token that ended that sequence earlier than
        // we should have done.
        ss << " : missing elements...\n";
        break;

    default:
        // We don't handle any other exceptions here, but you can
        // if you wish. If we get an exception that hits this point
        // then we are just going to report what we know about the
        // token.
        ss << " : syntax not recognized...\n";
        break;
    }

    THROW_EXCEPT(Exception::ERR_PARSE,
                 ss.str(),
                 fileName,
                 line);
}