void initParser(pANTLR3_INPUT_STREAM* input,pJpVocabularyLexer* lxr, pANTLR3_COMMON_TOKEN_STREAM* tstream, pJpVocabularyParser* psr) { *lxr = JpVocabularyLexerNew(*input); // CLexerNew is generated by ANTLR if (*lxr == NULL) { ANTLR3_FPRINTF(stderr, "Unable to create the lexer due to malloc() failure1\n"); exit(ANTLR3_ERR_NOMEM); } *tstream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, TOKENSOURCE((*lxr))); if (*tstream == NULL) { ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate token stream\n"); exit(ANTLR3_ERR_NOMEM); } *psr = JpVocabularyParserNew(*tstream); // CParserNew is generated by ANTLR3 if (*psr == NULL) { ANTLR3_FPRINTF(stderr, "Out of memory trying to allocate parser\n"); exit(ANTLR3_ERR_NOMEM); } }
/** * \brief * Prints out the message in all the exceptions in the supplied chain. * * \param[in] ex * Pointer to the exception structure to print. * * \remarks * You may wish to override this function by installing a pointer to a new function * in the base recognizer context structure. * * \see * ANTLR3_BASE_RECOGNIZER */ static void antlr3ExceptionPrint(pANTLR3_EXCEPTION ex) { /* Ensure valid pointer */ while (ex != NULL) { /* Number if no message, else the message */ if (ex->message == NULL) { ANTLR3_FPRINTF(stderr, "ANTLR3_EXCEPTION number %d (%08X).\n", ex->type, ex->type); } else { ANTLR3_FPRINTF(stderr, "ANTLR3_EXCEPTION: %s\n", (char *)(ex->message)); } /* Move to next in the chain (if any) */ ex = ex->nextException; } return; }
static void mTokens (pANTLR3_LEXER lexer) { if (lexer) // Fool compiler, avoid pragmas { ANTLR3_FPRINTF(stderr, "lexer->mTokens(): Error: No lexer rules were added to the lexer yet!\n"); } }
short parseJpVoc(const char* str,VocInfo vi) { pANTLR3_INPUT_STREAM input; pJpVocabularyLexer lxr; pANTLR3_COMMON_TOKEN_STREAM tstream; pJpVocabularyParser psr; ANTLR3_FPRINTF(stdout,"parseJpVoc:%s\n",str); input = antlr3StringStreamNew((pANTLR3_UINT8)str, ANTLR3_ENC_UTF8, strlen(str),(pANTLR3_UINT8)"jpVocabu"); initParser(&input,&lxr,&tstream,&psr); psr->vocabulary(psr,vi.pronun, vi.writing,vi.partOfSpeech,vi.expl); ANTLR3_FPRINTF(stdout,"parseJpVoc:pronun:%s\twriting:%s\tpartOfSpeech:%s\texpl:%s\n", vi.pronun,vi.writing,vi.partOfSpeech,vi.expl); cleanParser(&input,&lxr,&tstream,&psr); return 0; }
ConversionResult ConvertUTF16toUTF32 ( const UTF16** sourceStart, const UTF16* sourceEnd, UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { ConversionResult result = conversionOK; const UTF16* source = *sourceStart; UTF32* target = *targetStart; UTF32 ch, ch2; while (source < sourceEnd) { const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */ ch = *source++; /* If we have a surrogate pair, convert to UTF32 first. */ if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { /* If the 16 bits following the high surrogate are in the source buffer... */ if (source < sourceEnd) { ch2 = *source; /* If it's a low surrogate, convert to UTF32. */ if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) { ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + halfBase; ++source; } else if (flags == strictConversion) { /* it's an unpaired high surrogate */ --source; /* return to the illegal value itself */ result = sourceIllegal; break; } } else { /* We don't have the 16 bits following the high surrogate. */ --source; /* return to the high surrogate */ result = sourceExhausted; break; } } else if (flags == strictConversion) { /* UTF-16 surrogate values are illegal in UTF-32 */ if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { --source; /* return to the illegal value itself */ result = sourceIllegal; break; } } if (target >= targetEnd) { source = oldSource; /* Back up source pointer! */ result = targetExhausted; break; } *target++ = ch; } *sourceStart = source; *targetStart = target; #ifdef CVTUTF_DEBUG if (result == sourceIllegal) { ANTLR3_FPRINTF(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2); fflush(stderr); } #endif return result; }
// Main entry point for this example // int ANTLR3_CDECL main(int argc, char *argv[]) { pANTLR3_UINT8 fName; pANTLR3_INPUT_STREAM input; pJpVocabularyLexer lxr; pANTLR3_COMMON_TOKEN_STREAM tstream; pJpVocabularyParser psr; if (argc < 2 || argv[1] == NULL) { //fName = (pANTLR3_UINT8) "./input"; // Note in VS2005 debug, working directory must be configured VocInfo vi; initVocInfo(&vi); short ret = parseJpVoc("あいかわらず(相変わらず)[副]照旧,依然", vi); printf("pronun:%s\twriting:%s\tpartOfSpeech:%s\texpl:%s\n", vi.pronun,vi.writing, vi.partOfSpeech,vi.expl); /*printf("pronun:%s\twriting:%s\tpartOfSpeech:%s\texpl:%s\n", emptyIfNull(vi.pronun),emptyIfNull(vi.writing), emptyIfNull(vi.partOfSpeech),emptyIfNull(vi.expl));*/ clearVocInfo(&vi); return 0; } else { fName = (pANTLR3_UINT8) argv[1]; } ANTLR3_FPRINTF(stdout,"\xE6\x85\x8C\xE3\x81\xA6\xE3\x81\xBE\xE3\x81\x99\n"); ANTLR3_FPRINTF(stdout,"input file name:%s\n",fName); //ANTLR3_ENC_8BIT, ANTLR3_ENC_UTF8, ANTLR3_ENC_UTF16 input = antlr3FileStreamNew(fName, ANTLR3_ENC_UTF8); initParser(&input,&lxr,&tstream,&psr); psr->voclist(psr); cleanParser(&input,&lxr,&tstream,&psr); return 0; }
/** If oldRoot is a nil root, just copy or move the children to newRoot. * If not a nil root, make oldRoot a child of newRoot. * * \code * old=^(nil a b c), new=r yields ^(r a b c) * old=^(a b c), new=r yields ^(r ^(a b c)) * \endcode * * If newRoot is a nil-rooted single child tree, use the single * child as the new root node. * * \code * old=^(nil a b c), new=^(nil r) yields ^(r a b c) * old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) * \endcode * * If oldRoot was null, it's ok, just return newRoot (even if isNilNode). * * \code * old=null, new=r yields r * old=null, new=^(nil r) yields ^(nil r) * \endcode * * Return newRoot. Throw an exception if newRoot is not a * simple node or nil root with a single child node--it must be a root * node. If newRoot is <code>^(nil x)</endcode> return x as newRoot. * * Be advised that it's ok for newRoot to point at oldRoot's * children; i.e., you don't have to copy the list. We are * constructing these nodes so we should have this control for * efficiency. */ static pANTLR3_BASE_TREE becomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRootTree, pANTLR3_BASE_TREE oldRootTree) { /* Protect against tree rewrites if we are in some sort of error * state, but have tried to recover. In C we can end up with a null pointer * for a tree that was not produced. */ if (newRootTree == NULL) { return oldRootTree; } /* root is just the new tree as is if there is no * current root tree. */ if (oldRootTree == NULL) { return newRootTree; } /* Produce ^(nil real-node) */ if (newRootTree->isNilNode(newRootTree)) { if (newRootTree->getChildCount(newRootTree) > 1) { /* TODO: Handle tree exceptions */ ANTLR3_FPRINTF(stderr, "More than one node as root! TODO: Create tree exception hndling\n"); return newRootTree; } /* The new root is the first child */ newRootTree = newRootTree->getChild(newRootTree, 0); } /* Add old root into new root. addChild takes care of the case where oldRoot * is a flat list (nill rooted tree). All children of oldroot are added to * new root. */ newRootTree->addChild(newRootTree, oldRootTree); /* Always returns new root structure */ return newRootTree; }
int main(int /*argc*/, char** /*argv*/) { pANTLR3_UINT8 fileName = (pANTLR3_UINT8)"test.mat"; pANTLR3_COMMON_TREE_NODE_STREAM treeNodes; pANTLR3_INPUT_STREAM input = antlr3FileStreamNew(fileName, ANTLR3_ENC_UTF8); if (input == nullptr) { ANTLR3_FPRINTF(stderr, "unable to open %s", (char *)fileName); return ANTLR3_ERR_NOFILE; } pTalonMaterialLexer lexer = TalonMaterialLexerNew(input); if (lexer == nullptr) { ANTLR3_FPRINTF(stderr, "unable to create lexer."); return ANTLR3_ERR_NOMEM; } pANTLR3_COMMON_TOKEN_STREAM tokenStream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, TOKENSOURCE(lexer)); if (tokenStream == nullptr) { ANTLR3_FPRINTF(stderr, "unable to create tokenStream."); return ANTLR3_ERR_NOMEM; } pTalonMaterialParser parser = TalonMaterialParserNew(tokenStream); if (parser == nullptr) { ANTLR3_FPRINTF(stderr, "unable to create parser."); return ANTLR3_ERR_NOMEM; } auto module = parser->module(parser); auto errorCount = parser->pParser->rec->getNumberOfSyntaxErrors(parser->pParser->rec); if (errorCount > 0) { ANTLR3_FPRINTF(stderr, "The parser returned %d errors, tree walking aborted.\n", errorCount); } else { ANTLR3_FPRINTF(stdout, "Parser found no errors."); } return 0; }
/** Dummy implementation - will be supplied by super class */ static pANTLR3_STRING getText (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE t) { ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getText()\n"); return NULL; }
/** Default lexer error handler (works for 8 bit streams only!!!) */ static void displayRecognitionError (pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) { pANTLR3_LEXER lexer; pANTLR3_EXCEPTION ex; pANTLR3_STRING ftext; lexer = (pANTLR3_LEXER)(recognizer->super); ex = lexer->rec->state->exception; // See if there is a 'filename' we can use // if (ex->name == NULL) { ANTLR3_FPRINTF(stderr, "-unknown source-("); } else { ftext = ex->streamName->to8(ex->streamName); ANTLR3_FPRINTF(stderr, "%s(", ftext->chars); } ANTLR3_FPRINTF(stderr, "%d) ", recognizer->state->exception->line); ANTLR3_FPRINTF(stderr, ": lexer error %d :\n\t%s at offset %d, ", ex->type, (pANTLR3_UINT8) (ex->message), ex->charPositionInLine+1 ); { ANTLR3_INT32 width; width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)(lexer->input->data) + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); if (width >= 1) { if (isprint(ex->c)) { ANTLR3_FPRINTF(stderr, "near '%c' :\n", ex->c); } else { ANTLR3_FPRINTF(stderr, "near char(%#02X) :\n", (ANTLR3_UINT8)(ex->c)); } ANTLR3_FPRINTF(stderr, "\t%.*s\n", width > 20 ? 20 : width ,((pANTLR3_UINT8)ex->index)); } else { ANTLR3_FPRINTF(stderr, "(end of input).\n\t This indicates a poorly specified lexer RULE\n\t or unterminated input element such as: \"STRING[\"]\n"); ANTLR3_FPRINTF(stderr, "\t The lexer was matching from line %d, offset %d, which\n\t ", (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine), (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) ); width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)(lexer->input->data)+(lexer->input->size(lexer->input))) - (pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); if (width >= 1) { ANTLR3_FPRINTF(stderr, "looks like this:\n\t\t%.*s\n", width > 20 ? 20 : width ,(pANTLR3_UINT8)(lexer->rec->state->tokenStartCharIndex)); } else { ANTLR3_FPRINTF(stderr, "is also the end of the line, so you must check your lexer rules\n"); } } } }
void addChild (pANTLR3_BASE_TREE tree, pANTLR3_BASE_TREE child) { ANTLR3_UINT32 n; ANTLR3_UINT32 i; if (child == NULL) { return; } if (child->isNilNode(child) == ANTLR3_TRUE) { if (child->children != NULL && child->children == tree->children) { // TODO: Change to exception rather than ANTLR3_FPRINTF? // ANTLR3_FPRINTF(stderr, "ANTLR3: An attempt was made to add a child list to itself!\n"); return; } // Add all of the children's children to this list // if (child->children != NULL) { if (tree->children == NULL) { // We are build ing the tree structure here, so we need not // worry about duplication of pointers as the tree node // factory will only clean up each node once. So we just // copy in the child's children pointer as the child is // a nil node (has not root itself). // tree->children = child->children; child->children = NULL; freshenPACIndexesAll(tree); } else { // Need to copy the children // n = child->children->size(child->children); for (i = 0; i < n; i++) { pANTLR3_BASE_TREE entry; entry = child->children->get(child->children, i); // ANTLR3 lists can be sparse, unlike Array Lists // if (entry != NULL) { tree->children->add(tree->children, entry, (void (ANTLR3_CDECL *) (void *))child->free); } } } } } else { // Tree we are adding is not a Nil and might have children to copy // if (tree->children == NULL) { // No children in the tree we are adding to, so create a new list on // the fly to hold them. // tree->createChildrenList(tree); } tree->children->add(tree->children, child, (void (ANTLR3_CDECL *)(void *))child->free); } }
/// When constructing trees, sometimes we need to dup a token or AST /// subtree. Dup'ing a token means just creating another AST node /// around it. For trees, you must call the adaptor.dupTree(). /// static void * dupTreeNode (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * element) { ANTLR3_FPRINTF(stderr, "dup() cannot be called on a node rewrite stream!!!"); return NULL; }
/// When constructing trees, sometimes we need to dup a token or AST /// subtree. Dup'ing a token means just creating another AST node /// around it. For trees, you must call the adaptor.dupTree(). /// static void * dupTok (pANTLR3_REWRITE_RULE_ELEMENT_STREAM stream, void * el) { ANTLR3_FPRINTF(stderr, "dup() cannot be called on a token rewrite stream!!"); return NULL; }
static ANTLR3_UINT32 getChildCount (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree) { ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getChildCount()\n"); return 0; }
static pANTLR3_BASE_TREE getChild (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE tree, ANTLR3_UINT32 i) { ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement getChild()\n"); return NULL; }
/** Dummy implementation - will be supplied by super class */ static void setText8 (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_UINT8 t) { ANTLR3_FPRINTF(stderr, "Internal error - implementor of superclass containing ANTLR3_TREE_ADAPTOR did not implement setText()\n"); }
/** displayRecognitionError()함수를 변경 */ void produceError4Parser(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 * tokenNames) { pANTLR3_PARSER parser; pANTLR3_TREE_PARSER tparser; pANTLR3_INT_STREAM is; pANTLR3_STRING ttext; pANTLR3_STRING ftext; pANTLR3_EXCEPTION ex; pANTLR3_COMMON_TOKEN theToken; pANTLR3_BASE_TREE theBaseTree; pANTLR3_COMMON_TREE theCommonTree; //char* p; FILE *out; out = fopen("ParErr.txt", "ab"); // 에러를 저장할 파일 ANTLR3_FPRINTF(out, "\r\n"); // Retrieve some info for easy reading. // ex = recognizer->state->exception; ttext = NULL; // See if there is a 'filename' we can use // if (ex->streamName == NULL) { if (((pANTLR3_COMMON_TOKEN)(ex->token))->type == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(out, "-end of input-("); } else { ANTLR3_FPRINTF(out, "-unknown source-("); } } else { /* rwkim 수정*/ char curPath[200]; getcwd(curPath, 200); ftext = ex->streamName->to8(ex->streamName); delstring(ftext->chars, curPath); //앞의 경로 제거 ANTLR3_FPRINTF(out, "%s(", ftext->chars); } // Next comes the line number // ANTLR3_FPRINTF(out, "%d) ", recognizer->state->exception->line); //ANTLR3_FPRINTF(out, " : error %d : %s", // recognizer->state->exception->type, // (pANTLR3_UINT8) (recognizer->state->exception->message)); //다음과 같아 바꾸었음 ANTLR3_FPRINTF(out, " : %s", (pANTLR3_UINT8)(recognizer->state->exception->message)); // How we determine the next piece is dependent on which thing raised the // error. // switch (recognizer->type) { case ANTLR3_TYPE_PARSER: // Prepare the knowledge we know we have // parser = (pANTLR3_PARSER) (recognizer->super); tparser = NULL; is = parser->tstream->istream; theToken = (pANTLR3_COMMON_TOKEN)(recognizer->state->exception->token); ttext = theToken->toString(theToken); /* rwkim 수정*/ //p = strchr(ttext->chars, '='); //ttext->chars = p; ANTLR3_FPRINTF(out, " at offset %d", recognizer->state->exception->charPositionInLine); if (theToken != NULL) { if (theToken->type == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(out, ", at <EOF>"); } else { // Guard against null text in a token // ANTLR3_FPRINTF(out, "\r\n near %s\r\n ", ttext == NULL ? (pANTLR3_UINT8)"<no text for the token>" : ttext->chars); } } break; case ANTLR3_TYPE_TREE_PARSER: tparser = (pANTLR3_TREE_PARSER) (recognizer->super); parser = NULL; is = tparser->ctnstream->tnstream->istream; theBaseTree = (pANTLR3_BASE_TREE)(recognizer->state->exception->token); ttext = theBaseTree->toStringTree(theBaseTree); if (theBaseTree != NULL) { theCommonTree = (pANTLR3_COMMON_TREE) theBaseTree->super; if (theCommonTree != NULL) { theToken = (pANTLR3_COMMON_TOKEN) theBaseTree->getToken(theBaseTree); } ANTLR3_FPRINTF(out, ", at offset %d", theBaseTree->getCharPositionInLine(theBaseTree)); ANTLR3_FPRINTF(out, ", near %s", ttext->chars); } break; default: ANTLR3_FPRINTF(out, "Base recognizer function displayRecognitionError called by unknown parser type - provide override for this function\r\n"); return; break; } // Although this function should generally be provided by the implementation, this one // should be as helpful as possible for grammar developers and serve as an example // of what you can do with each exception type. In general, when you make up your // 'real' handler, you should debug the routine with all possible errors you expect // which will then let you be as specific as possible about all circumstances. // // Note that in the general case, errors thrown by tree parsers indicate a problem // with the output of the parser or with the tree grammar itself. The job of the parser // is to produce a perfect (in traversal terms) syntactically correct tree, so errors // at that stage should really be semantic errors that your own code determines and handles // in whatever way is appropriate. // switch (ex->type) { case ANTLR3_UNWANTED_TOKEN_EXCEPTION: // Indicates that the recognizer was fed a token which seesm to be // spurious input. We can detect this when the token that follows // this unwanted token would normally be part of the syntactically // correct stream. Then we can see that the token we are looking at // is just something that should not be there and throw this exception. // if (tokenNames == NULL) { ANTLR3_FPRINTF(out, " : Extraneous input..."); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(out, " : Extraneous input - expected <EOF>\r\n"); } else { ANTLR3_FPRINTF(out, " : Extraneous input - expected %s ...\r\n", tokenNames[ex->expecting]); } } break; case ANTLR3_MISSING_TOKEN_EXCEPTION: // Indicates that the recognizer detected that the token we just // hit would be valid syntactically if preceeded by a particular // token. Perhaps a missing ';' at line end or a missing ',' in an // expression list, and such like. // if (tokenNames == NULL) { ANTLR3_FPRINTF(out, " : Missing token (%d)...\r\n", ex->expecting); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(out, " : Missing <EOF>\r\n"); } else { ANTLR3_FPRINTF(out, " : Missing %s \r\n", tokenNames[ex->expecting]); } } break; case ANTLR3_RECOGNITION_EXCEPTION: // Indicates that the recognizer received a token // in the input that was not predicted. This is the basic exception type // from which all others are derived. So we assume it was a syntax error. // You may get this if there are not more tokens and more are needed // to complete a parse for instance. // ANTLR3_FPRINTF(out, " : syntax error...\r\n"); break; case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: // We were expecting to see one thing and got another. This is the // most common error if we coudl not detect a missing or unwanted token. // Here you can spend your efforts to // derive more useful error messages based on the expected // token set and the last token and so on. The error following // bitmaps do a good job of reducing the set that we were looking // for down to something small. Knowing what you are parsing may be // able to allow you to be even more specific about an error. // if (tokenNames == NULL) { ANTLR3_FPRINTF(out, " : syntax error...\r\n"); } else { if (ex->expecting == ANTLR3_TOKEN_EOF) { ANTLR3_FPRINTF(out, " : expected <EOF>\r\n"); } else { ANTLR3_FPRINTF(out, " : expected %s ...\r\n", tokenNames[ex->expecting]); } } break; case ANTLR3_NO_VIABLE_ALT_EXCEPTION: // We could not pick any alt decision from the input given // so god knows what happened - however when you examine your grammar, // you should. It means that at the point where the current token occurred // that the DFA indicates nowhere to go from here. // ANTLR3_FPRINTF(out, " : cannot match to any predicted input...\r\n"); break; case ANTLR3_MISMATCHED_SET_EXCEPTION: { ANTLR3_UINT32 count; ANTLR3_UINT32 bit; ANTLR3_UINT32 size; ANTLR3_UINT32 numbits; pANTLR3_BITSET errBits; // This means we were able to deal with one of a set of // possible tokens at this point, but we did not see any // member of that set. // ANTLR3_FPRINTF(out, " : unexpected input...\r\n expected one of : "); // What tokens could we have accepted at this point in the // parse? // count = 0; errBits = antlr3BitsetLoad (ex->expectingSet); numbits = errBits->numBits (errBits); size = errBits->size (errBits); if (size > 0) { // However many tokens we could have dealt with here, it is usually // not useful to print ALL of the set here. I arbitrarily chose 8 // here, but you should do whatever makes sense for you of course. // No token number 0, so look for bit 1 and on. // for (bit = 1; bit < numbits && count < 8 && count < size; bit++) { // TODO: This doesn;t look right - should be asking if the bit is set!! // if (tokenNames[bit]) { ANTLR3_FPRINTF(out, "%s%s", count > 0 ? ", " : "", tokenNames[bit]); count++; } } ANTLR3_FPRINTF(out, "\r\n"); } else { ANTLR3_FPRINTF(out, "Actually dude, we didn't seem to be expecting anything here, or at least\r\n"); ANTLR3_FPRINTF(out, "I could not work out what I was expecting, like so many of us these days!\r\n"); } } break; case ANTLR3_EARLY_EXIT_EXCEPTION: // We entered a loop requiring a number of token sequences // but found a token that ended that sequence earlier than // we should have done. // ANTLR3_FPRINTF(out, " : missing elements...\r\n"); break; default: // We don't handle any other exceptions here, but you can // if you wish. If we get an exception that hits this point // then we are just going to report what we know about the // token. // ANTLR3_FPRINTF(out, " : syntax not recognized...\r\n"); break; } // Here you have the token that was in error which if this is // the standard implementation will tell you the line and offset // and also record the address of the start of the line in the // input stream. You could therefore print the source line and so on. // Generally though, I would expect that your lexer/parser will keep // its own map of lines and source pointers or whatever as there // are a lot of specific things you need to know about the input // to do something like that. // Here is where you do it though :-). // fclose(out); }
/** If oldRoot is a nil root, just copy or move the children to newRoot. * If not a nil root, make oldRoot a child of newRoot. * * \code * old=^(nil a b c), new=r yields ^(r a b c) * old=^(a b c), new=r yields ^(r ^(a b c)) * \endcode * * If newRoot is a nil-rooted single child tree, use the single * child as the new root node. * * \code * old=^(nil a b c), new=^(nil r) yields ^(r a b c) * old=^(a b c), new=^(nil r) yields ^(r ^(a b c)) * \endcode * * If oldRoot was null, it's ok, just return newRoot (even if isNilNode). * * \code * old=null, new=r yields r * old=null, new=^(nil r) yields ^(nil r) * \endcode * * Return newRoot. Throw an exception if newRoot is not a * simple node or nil root with a single child node--it must be a root * node. If newRoot is <code>^(nil x)</endcode> return x as newRoot. * * Be advised that it's ok for newRoot to point at oldRoot's * children; i.e., you don't have to copy the list. We are * constructing these nodes so we should have this control for * efficiency. */ static pANTLR3_BASE_TREE becomeRoot (pANTLR3_BASE_TREE_ADAPTOR adaptor, pANTLR3_BASE_TREE newRootTree, pANTLR3_BASE_TREE oldRootTree) { pANTLR3_BASE_TREE saveRoot; /* Protect against tree rewrites if we are in some sort of error * state, but have tried to recover. In C we can end up with a null pointer * for a tree that was not produced. */ if (newRootTree == NULL) { return oldRootTree; } /* root is just the new tree as is if there is no * current root tree. */ if (oldRootTree == NULL) { return newRootTree; } /* Produce ^(nil real-node) */ if (newRootTree->isNilNode(newRootTree)) { if (newRootTree->getChildCount(newRootTree) > 1) { /* TODO: Handle tree exceptions */ ANTLR3_FPRINTF(stderr, "More than one node as root! TODO: Create tree exception handling\n"); return newRootTree; } /* The new root is the first child, keep track of the original newRoot * because if it was a Nil Node, then we can reuse it now. */ saveRoot = newRootTree; newRootTree = (pANTLR3_BASE_TREE)newRootTree->getChild(newRootTree, 0); // Reclaim the old nilNode() // saveRoot->reuse(saveRoot); } /* Add old root into new root. addChild takes care of the case where oldRoot * is a flat list (nill rooted tree). All children of oldroot are added to * new root. */ newRootTree->addChild(newRootTree, oldRootTree); // If the oldroot tree was a nil node, then we know at this point // it has become orphaned by the rewrite logic, so we tell it to do // whatever it needs to do to be reused. // if (oldRootTree->isNilNode(oldRootTree)) { // We have taken an old Root Tree and appended all its children to the new // root. In addition though it was a nil node, which means the generated code // will not reuse it again, so we will reclaim it here. First we want to zero out // any pointers it was carrying around. We are just the baseTree handler so we // don't know necessarilly know how to do this for the real node, we just ask the tree itself // to do it. // oldRootTree->reuse(oldRootTree); } /* Always returns new root structure */ return newRootTree; }
/// Delete children from start to stop and replace with t even if t is /// a list (nil-root tree). Num of children can increase or decrease. /// For huge child lists, inserting children can force walking rest of /// children to set their child index; could be slow. /// static void replaceChildren (pANTLR3_BASE_TREE parent, ANTLR3_INT32 startChildIndex, ANTLR3_INT32 stopChildIndex, pANTLR3_BASE_TREE newTree) { ANTLR3_INT32 replacingHowMany; // How many nodes will go away ANTLR3_INT32 replacingWithHowMany; // How many nodes will replace them ANTLR3_INT32 numNewChildren; // Tracking variable ANTLR3_INT32 delta; // Difference in new vs existing count ANTLR3_INT32 i; ANTLR3_INT32 j; pANTLR3_VECTOR newChildren; // Iterator for whatever we are going to add in ANTLR3_BOOLEAN freeNewChildren; // Whether we created the iterator locally or reused it if (parent->children == NULL) { ANTLR3_FPRINTF(stderr, "replaceChildren call: Indexes are invalid; no children in list for %s", parent->getText(parent)->chars); return; } // Either use the existing list of children in the supplied nil node, or build a vector of the // tree we were given if it is not a nil node, then we treat both situations exactly the same // if (newTree->isNilNode(newTree)) { newChildren = newTree->children; freeNewChildren = ANTLR3_FALSE; // We must NO free this memory } else { newChildren = antlr3VectorNew(1); if (newChildren == NULL) { ANTLR3_FPRINTF(stderr, "replaceChildren: out of memory!!"); exit(1); } newChildren->add(newChildren, (void *)newTree, NULL); freeNewChildren = ANTLR3_TRUE; // We must free this memory } // Initialize // replacingHowMany = stopChildIndex - startChildIndex + 1; replacingWithHowMany = newChildren->size(newChildren); delta = replacingHowMany - replacingWithHowMany; numNewChildren = newChildren->size(newChildren); // If it is the same number of nodes, then do a direct replacement // if (delta == 0) { pANTLR3_BASE_TREE child; // Same number of nodes // j = 0; for (i = startChildIndex; i <= stopChildIndex; i++) { child = (pANTLR3_BASE_TREE) newChildren->get(newChildren, j); parent->children->set(parent->children, i, child, NULL, ANTLR3_FALSE); child->setParent(child, parent); child->setChildIndex(child, i); } } else if (delta > 0) { ANTLR3_UINT32 indexToDelete; // Less nodes than there were before // reuse what we have then delete the rest // for (j = 0; j < numNewChildren; j++) { parent->children->set(parent->children, startChildIndex + j, newChildren->get(newChildren, j), NULL, ANTLR3_FALSE); } // We just delete the same index position until done // indexToDelete = startChildIndex + numNewChildren; for (j = indexToDelete; j <= (ANTLR3_INT32)stopChildIndex; j++) { parent->children->remove(parent->children, indexToDelete); } parent->freshenPACIndexes(parent, startChildIndex); } else { ANTLR3_UINT32 numToInsert; // More nodes than there were before // Use what we can, then start adding // for (j = 0; j < replacingHowMany; j++) { parent->children->set(parent->children, startChildIndex + j, newChildren->get(newChildren, j), NULL, ANTLR3_FALSE); } numToInsert = replacingWithHowMany - replacingHowMany; for (j = replacingHowMany; j < replacingWithHowMany; j++) { parent->children->add(parent->children, newChildren->get(newChildren, j), NULL); } parent->freshenPACIndexes(parent, startChildIndex); } if (freeNewChildren == ANTLR3_TRUE) { ANTLR3_FREE(newChildren->elements); newChildren->elements = NULL; newChildren->size = 0; ANTLR3_FREE(newChildren); // Will not free the nodes } }
bool parseProto (const char*filename, const char *outputFilename,const char * outputInternalNamespace, const char*outputExternalNamespace, char**package,pANTLR3_HASH_TABLE typeTable, bool cleanUp,ProtoJSParser_protocol_return*retval, pProtoJSLexer*ret_lxr, pProtoJSParser*ret_psr,pANTLR3_COMMON_TOKEN_STREAM*ret_tstream, pANTLR3_INPUT_STREAM* ret_stream) { pANTLR3_INPUT_STREAM input = antlr3AsciiFileStreamNew((pANTLR3_UINT8)filename); if ( input == NULL ) { fprintf(stderr, "Failed to open file %s\n", (char *)filename); exit(1); } pProtoJSLexer lxr = ProtoJSLexerNew(input); if ( lxr == NULL ) { fprintf(stderr, "Unable to create the lexer due to malloc() failure1\n"); exit(1); } pANTLR3_COMMON_TOKEN_STREAM tstream = antlr3CommonTokenStreamSourceNew(ANTLR3_SIZE_HINT, TOKENSOURCE(lxr)); if (tstream == NULL) { fprintf(stderr, "Out of memory trying to allocate token stream\n"); exit(1); } pProtoJSParser ctx; pProtoJSParser psr = ctx = ProtoJSParserNew(tstream); if (psr == NULL) { fprintf(stderr, "Out of memory trying to allocate parser\n"); exit(ANTLR3_ERR_NOMEM); } SCOPE_TYPE(NameSpace) ns=NameSpacePush(ctx); ctx->pProtoJSParser_NameSpaceTop=ns; ns->filename=tstream->tstream->tokenSource->strFactory->newRaw(tstream->tstream->tokenSource->strFactory); ns->filename->append8(SCOPE_TOP(NameSpace)->filename,(const char*)outputFilename); ns->internalNamespace=tstream->tstream->tokenSource->strFactory->newRaw(tstream->tstream->tokenSource->strFactory); ns->internalNamespace->append8(ns->internalNamespace,(const char*)outputInternalNamespace); ns->externalNamespace=tstream->tstream->tokenSource->strFactory->newRaw(tstream->tstream->tokenSource->strFactory); ns->externalNamespace->append8(SCOPE_TOP(NameSpace)->externalNamespace,(const char*)outputExternalNamespace); if (strlen(outputExternalNamespace)) { ns->externalNamespace->append8(ns->externalNamespace,"."); } initNameSpace(ctx,ns); pANTLR3_HASH_TABLE tempTable=ns->qualifiedTypes; if (*package){ ns->package->set8(ns->package,*package); ns->packageDot->set8(ns->packageDot,*package); ns->packageDot->append8(ns->packageDot,"."); } if (typeTable) { ns->qualifiedTypes=typeTable; } ProtoJSParser_protocol_return pbjAST=psr->protocol(psr); if (!*package) { *package=strdup((const char*)ns->package->chars); } ns->qualifiedTypes=tempTable; bool success=true; if (psr->pParser->rec->getNumberOfSyntaxErrors(psr->pParser->rec) > 0) { success=false; ANTLR3_FPRINTF(stderr, "The parser returned %d errors, tree walking aborted.\n", psr->pParser->rec->getNumberOfSyntaxErrors(psr->pParser->rec)); }else { } if (cleanUp) { NameSpacePop(ctx); psr->free(psr); psr = NULL; tstream->free(tstream); tstream = NULL; lxr->free(lxr); lxr = NULL; input->close(input); input = NULL; }else { *retval=pbjAST; *ret_lxr=lxr; *ret_psr=psr; *ret_tstream=tstream; *ret_stream=input; } return success; }
static void DisplayRecognitionError(pANTLR3_BASE_RECOGNIZER recognizer, pANTLR3_UINT8 *tokenNames) { pANTLR3_EXCEPTION ex = recognizer->state->exception; freettcn::translator::CTranslator &translator = freettcn::translator::CTranslator::Instance(); unsigned line = ex->line; unsigned line2; unsigned pos = ex->charPositionInLine + 1; unsigned pos2; std::stringstream msg; std::stringstream msg2; std::string note; // How we determine the next piece is dependent on which thing raised the error. switch(recognizer->type) { case ANTLR3_TYPE_LEXER: { pANTLR3_LEXER lexer = (pANTLR3_LEXER)recognizer->super; if(ex->type == ANTLR3_NO_VIABLE_ALT_EXCEPTION) msg << "Cannot match to any predicted input"; else msg << (pANTLR3_UINT8)ex->message; ANTLR3_INT32 width; width = ANTLR3_UINT32_CAST(( (pANTLR3_UINT8)lexer->input->data + (lexer->input->size(lexer->input) )) - (pANTLR3_UINT8)(ex->index)); if(width >= 1) { if(isprint(ex->c)) msg << " near '" << (char)ex->c << "'"; else msg << " near char(0x" << std::hex << std::setw(2) << (int)ex->c << std::dec << ")"; } else { msg << " '<EOF>'"; // prepare second error line2 = lexer->rec->state->tokenStartLine; pos2 = lexer->rec->state->tokenStartCharPositionInLine; // msg << fileName << ":" << (ANTLR3_UINT32)(lexer->rec->state->tokenStartLine) << // ":" << (ANTLR3_UINT32)(lexer->rec->state->tokenStartCharPositionInLine) << ": "; width = ANTLR3_UINT32_CAST(((pANTLR3_UINT8)lexer->input->data + (lexer->input->size(lexer->input))) - (pANTLR3_UINT8)lexer->rec->state->tokenStartCharIndex); if(width >= 1) msg2 << "The lexer was matching from: " << std::string((char *)lexer->rec->state->tokenStartCharIndex, 0, 20) << std::endl; else msg2 << "The lexer was matching from the end of the line" << std::endl; note = "Above errors indicates a poorly specified lexer RULE or unterminated input element such as: \"STRING[\"]"; } } break; case ANTLR3_TYPE_PARSER: { pANTLR3_COMMON_TOKEN token = (pANTLR3_COMMON_TOKEN)ex->token; if(ex->type == ANTLR3_NO_VIABLE_ALT_EXCEPTION) msg << "Cannot match '" << token->getText(token)->chars << "' to any predicted input"; else { msg << (pANTLR3_UINT8)ex->message; if(token) { if (token->type == ANTLR3_TOKEN_EOF) msg << " at '<EOF>'"; else { if(ex->type == ANTLR3_MISSING_TOKEN_EXCEPTION) { if(!tokenNames) msg << " [" << ex->expecting << "]"; else { if(ex->expecting == ANTLR3_TOKEN_EOF) msg << " '<EOF>'"; else msg << " '" << tokenNames[ex->expecting] << "'"; } } else { msg << " near '" << token->getText(token)->chars << "'"; } } } if(ex->type == ANTLR3_UNWANTED_TOKEN_EXCEPTION) { if(tokenNames) { if(ex->expecting == ANTLR3_TOKEN_EOF) msg << " ('<EOF>' expected)"; else msg << " ('" << tokenNames[ex->expecting] << "' expected)"; } } } } break; default: std::cerr << "DisplayRecognitionError() called by unknown parser type - provide override for this function" << std::endl; return; } switch(ex->type) { case ANTLR3_UNWANTED_TOKEN_EXCEPTION: // Indicates that the recognizer was fed a token which seesm to be // spurious input. We can detect this when the token that follows // this unwanted token would normally be part of the syntactically // correct stream. Then we can see that the token we are looking at // is just something that should not be there and throw this exception. // if(recognizer->type != ANTLR3_TYPE_PARSER) { if(tokenNames == NULL) ANTLR3_FPRINTF(stderr, " : Extraneous input..."); else { if(ex->expecting == ANTLR3_TOKEN_EOF) ANTLR3_FPRINTF(stderr, " : Extraneous input - expected <EOF>\n"); else ANTLR3_FPRINTF(stderr, " : Extraneous input - expected %s ...\n", tokenNames[ex->expecting]); } } break; case ANTLR3_MISSING_TOKEN_EXCEPTION: // Indicates that the recognizer detected that the token we just // hit would be valid syntactically if preceeded by a particular // token. Perhaps a missing ';' at line end or a missing ',' in an // expression list, and such like. break; case ANTLR3_RECOGNITION_EXCEPTION: // Indicates that the recognizer received a token // in the input that was not predicted. This is the basic exception type // from which all others are derived. So we assume it was a syntax error. // You may get this if there are not more tokens and more are needed // to complete a parse for instance. break; case ANTLR3_MISMATCHED_TOKEN_EXCEPTION: // We were expecting to see one thing and got another. This is the // most common error if we coudl not detect a missing or unwanted token. // Here you can spend your efforts to // derive more useful error messages based on the expected // token set and the last token and so on. The error following // bitmaps do a good job of reducing the set that we were looking // for down to something small. Knowing what you are parsing may be // able to allow you to be even more specific about an error. // if(!tokenNames) ANTLR3_FPRINTF(stderr, " : syntax error...\n"); else { if(ex->expecting == ANTLR3_TOKEN_EOF) ANTLR3_FPRINTF(stderr, " : expected <EOF>\n"); else ANTLR3_FPRINTF(stderr, " : expected %s ...\n", tokenNames[ex->expecting]); } break; case ANTLR3_NO_VIABLE_ALT_EXCEPTION: // We could not pick any alt decision from the input given // so god knows what happened - however when you examine your grammar, // you should. It means that at the point where the current token occurred // that the DFA indicates nowhere to go from here. // if(recognizer->type != ANTLR3_TYPE_LEXER && recognizer->type != ANTLR3_TYPE_PARSER) ANTLR3_FPRINTF(stderr, " : cannot match to any predicted input...\n"); break; case ANTLR3_MISMATCHED_SET_EXCEPTION: { ANTLR3_UINT32 count; ANTLR3_UINT32 bit; ANTLR3_UINT32 size; ANTLR3_UINT32 numbits; pANTLR3_BITSET errBits; // This means we were able to deal with one of a set of // possible tokens at this point, but we did not see any // member of that set. // ANTLR3_FPRINTF(stderr, " : unexpected input...\n expected one of : "); // What tokens could we have accepted at this point in the // parse? // count = 0; errBits = antlr3BitsetLoad(ex->expectingSet); numbits = errBits->numBits(errBits); size = errBits->size(errBits); if(size > 0) { // However many tokens we could have dealt with here, it is usually // not useful to print ALL of the set here. I arbitrarily chose 8 // here, but you should do whatever makes sense for you of course. // No token number 0, so look for bit 1 and on. // for(bit = 1; bit < numbits && count < 8 && count < size; bit++) { // TODO: This doesn;t look right - should be asking if the bit is set!! // if (tokenNames[bit]) { ANTLR3_FPRINTF(stderr, "%s%s", count > 0 ? ", " : nullptr, tokenNames[bit]); count++; } } ANTLR3_FPRINTF(stderr, "\n"); } else { ANTLR3_FPRINTF(stderr, "Actually dude, we didn't seem to be expecting anything here, or at least\n"); ANTLR3_FPRINTF(stderr, "I could not work out what I was expecting, like so many of us these days!\n"); } } break; case ANTLR3_EARLY_EXIT_EXCEPTION: // We entered a loop requiring a number of token sequences // but found a token that ended that sequence earlier than // we should have done. // ANTLR3_FPRINTF(stderr, " : missing elements...\n"); break; default: // We don't handle any other exceptions here, but you can // if you wish. If we get an exception that hits this point // then we are just going to report what we know about the // token. // ANTLR3_FPRINTF(stderr, " : syntax not recognized...\n"); std::cout << "Unknown exception type: " << ex->type << std::endl; break; } translator.Error(CLocation(translator.File(), line, pos), msg.str()); if(!msg2.str().empty()) translator.Error(CLocation(translator.File(), line2, pos2), msg2.str()); if(!note.empty()) translator.Note(CLocation(translator.File(), line2, pos2), note); }