bool cxxParserParseNextToken(void) { CXXToken * t = cxxTokenCreate(); // The token chain should not be allowed to grow arbitrairly large. // The token structures are quite big and it's easy to grow up to // 5-6GB or memory usage. However this limit should be large enough // to accomodate all the reasonable statements that could have some // information in them. This includes multiple function prototypes // in a single statement (ImageMagick has some examples) but probably // does NOT include large data tables. if(g_cxx.pTokenChain->iCount > 16384) cxxTokenChainDestroyLast(g_cxx.pTokenChain); cxxTokenChainAppend(g_cxx.pTokenChain,t); g_cxx.pToken = t; cxxParserSkipToNonWhiteSpace(); // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); // This must be done after getting char from input t->iLineNumber = getInputLineNumber(); t->oFilePosition = getInputFilePosition(); if(g_cxx.iChar == EOF) { t->eType = CXXTokenTypeEOF; t->bFollowedBySpace = false; return false; } unsigned int uInfo = UINFO(g_cxx.iChar); //fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); if(uInfo & CXXCharTypeStartOfIdentifier) { // word t->eType = CXXTokenTypeIdentifier; t->bFollowedBySpace = false; vStringPut(t->pszWord,g_cxx.iChar); // special case for tile, which may actually be an operator if(g_cxx.iChar == '~') { // may be followed by space! g_cxx.iChar = cppGetc(); if(isspace(g_cxx.iChar)) { t->bFollowedBySpace = true; g_cxx.iChar = cppGetc(); while(isspace(g_cxx.iChar)) g_cxx.iChar = cppGetc(); } // non space uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeStartOfIdentifier)) { // this is not an identifier after all t->eType = CXXTokenTypeOperator; if((!t->bFollowedBySpace) && g_cxx.iChar == '=') { // make ~= single token so it's not handled as // a separate assignment vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); } return true; } } else { g_cxx.iChar = cppGetc(); } for(;;) { uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypePartOfIdentifier)) break; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage); if(iCXXKeyword >= 0) { if( ( (iCXXKeyword == CXXKeywordFINAL) && (!g_cxx.bParsingClassStructOrUnionDeclaration) ) || ( ( (iCXXKeyword == CXXKeywordPUBLIC) || (iCXXKeyword == CXXKeywordPROTECTED) || (iCXXKeyword == CXXKeywordPRIVATE) ) && (!g_cxx.bEnablePublicProtectedPrivateKeywords) ) ) { t->eType = CXXTokenTypeIdentifier; } else { t->eType = CXXTokenTypeKeyword; t->eKeyword = (enum CXXKeyword)iCXXKeyword; if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) { // special handling for __attribute__ return cxxParserParseNextTokenCondenseAttribute(); } } } else { const cppMacroInfo * pMacro = cppFindMacro(vStringValue(t->pszWord)); if(pMacro) { CXX_DEBUG_PRINT("Macro %s",vStringValue(t->pszWord)); cxxTokenChainDestroyLast(g_cxx.pTokenChain); CXXToken * pParameterChain = NULL; if(pMacro->hasParameterList) { CXX_DEBUG_PRINT("Macro has parameter list"); if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain)) return false; } // This is used to avoid infinite recursion in substitution // (things like -D foo=foo or similar) static int iReplacementRecursionCount = 0; if(pMacro->replacements) { CXX_DEBUG_PRINT("The token has replacements: applying"); if(iReplacementRecursionCount < 1024) { // unget last char cppUngetc(g_cxx.iChar); // unget the replacement cxxParserParseNextTokenApplyReplacement( pMacro, pParameterChain ); g_cxx.iChar = cppGetc(); } } if(pParameterChain) cxxTokenDestroy(pParameterChain); iReplacementRecursionCount++; // Have no token to return: parse it CXX_DEBUG_PRINT("Parse inner token"); bool bRet = cxxParserParseNextToken(); CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx.pToken->pszWord->buffer,g_cxx.pToken->eType); iReplacementRecursionCount--; return bRet; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(g_cxx.iChar == '-') { // special case for pointer vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); if(g_cxx.iChar == '>') { t->eType = CXXTokenTypePointerOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } else { t->eType = CXXTokenTypeOperator; if(g_cxx.iChar == '-') { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '"') { // special case for strings t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care of storing the other chars: we don't need // them for parsing // FIXME: We might need them in signature:() tag.. maybe add // them up to a certain length only? for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '"') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == STRING_SYMBOL) { t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,'"'); vStringPut(t->pszWord,'"'); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '\'') { // special case for strings t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care storing the other chars: we don't // need them for parsing for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '\'') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == CHAR_SYMBOL) { t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,'\''); vStringPut(t->pszWord,'\''); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif if(uInfo & CXXCharTypeDecimalDigit) { // number t->eType = CXXTokenTypeNumber; vStringPut(t->pszWord,g_cxx.iChar); for(;;) { g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeValidInNumber)) break; vStringPut(t->pszWord,g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); int iChar = g_cxx.iChar; g_cxx.iChar = cppGetc(); if(g_cxx.iChar == iChar) { t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; // We could signal a syntax error with more than two colons // or equal signs...but we're tolerant do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } while(g_cxx.iChar == iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrOperatorToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)) { t->eType = CXXTokenTypeOperator; do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } while( uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken) ); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeOperator) { t->eType = CXXTokenTypeOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); while(uInfo & CXXCharTypeOperator) { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } t->eType = CXXTokenTypeUnknown; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; }
static bool cxxParserParseBlockInternal(bool bExpectClosingBracket) { CXX_DEBUG_ENTER(); //char * szScopeName = cxxScopeGetFullName(); //CXX_DEBUG_PRINT("Scope name is '%s'",szScopeName ? szScopeName : ""); cxxParserNewStatement(); if(bExpectClosingBracket) { // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); } for(;;) { if(!cxxParserParseNextToken()) { found_eof: if(bExpectClosingBracket) { CXX_DEBUG_LEAVE_TEXT( "Syntax error: found EOF in block but a closing " \ "bracket was expected!" ); return false; } CXX_DEBUG_LEAVE_TEXT("EOF in main block"); return true; // EOF } process_token: CXX_DEBUG_PRINT( "Token '%s' of type 0x%02x", vStringValue(g_cxx.pToken->pszWord), g_cxx.pToken->eType ); switch(g_cxx.pToken->eType) { case CXXTokenTypeKeyword: { switch(g_cxx.pToken->eKeyword) { case CXXKeywordNAMESPACE: { enum CXXScopeType eScopeType = cxxScopeGetType(); if( ( // toplevel or nested within a namespace (eScopeType == CXXScopeTypeNamespace) || // namespace X = Y inside a function (eScopeType == CXXScopeTypeFunction) ) && ( // either certainly C++ g_cxx.bConfirmedCPPLanguage || // or a "sane" namespace syntax ( !cxxTokenChainPreviousTokenOfType( g_cxx.pToken, CXXTokenTypeStar | CXXTokenTypeAnd | CXXTokenTypeKeyword ) ) ) ) { if(!cxxParserParseNamespace()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse namespace"); return false; } } else { // If we're pretty sure this is C++ then this is a syntax error. // If we're not sure (namely when we're in a *.h file) then // let's try to be flexible: treat the namespace keyword as an identifier. if(!g_cxx.bConfirmedCPPLanguage) { CXX_DEBUG_LEAVE_TEXT( "Found namespace in unexpected place, but we're not sure it's really C++ " "so we'll treat it as an identifier instead" ); g_cxx.pToken->eType = CXXTokenTypeIdentifier; continue; } CXX_DEBUG_LEAVE_TEXT( "Found namespace in a wrong place: we're probably out of sync" ); return false; } cxxParserNewStatement(); } break; case CXXKeywordTEMPLATE: if(!cxxParserParseTemplatePrefix()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse template"); return false; } // Here we are just after the "template<parameters>" prefix. break; case CXXKeywordTYPEDEF: // Mark the next declaration as a typedef g_cxx.uKeywordState |= CXXParserKeywordStateSeenTypedef; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordENUM: if(!cxxParserParseEnum()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse enum"); return false; } break; case CXXKeywordCLASS: if(!cxxParserParseClassStructOrUnion(CXXKeywordCLASS,CXXTagCPPKindCLASS,CXXScopeTypeClass)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return false; } break; case CXXKeywordSTRUCT: if(!cxxParserParseClassStructOrUnion(CXXKeywordSTRUCT,CXXTagKindSTRUCT,CXXScopeTypeStruct)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return false; } break; case CXXKeywordUNION: if(!cxxParserParseClassStructOrUnion(CXXKeywordUNION,CXXTagKindUNION,CXXScopeTypeUnion)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return false; } break; case CXXKeywordPUBLIC: case CXXKeywordPROTECTED: case CXXKeywordPRIVATE: // Note that the class keyword has its own handler // so the only possibility here is an access specifier if(!cxxParserParseAccessSpecifier()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse access specifier"); return false; } break; case CXXKeywordUSING: if(!cxxParserParseUsingClause()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse using clause"); return false; } cxxParserNewStatement(); break; case CXXKeywordIF: case CXXKeywordFOR: case CXXKeywordWHILE: case CXXKeywordSWITCH: if(!cxxParserParseIfForWhileSwitch()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse if/for/while/switch"); return false; } cxxParserNewStatement(); // Force the cpp preprocessor to think that we're in the middle of a statement. cppBeginStatement(); break; case CXXKeywordTRY: case CXXKeywordELSE: case CXXKeywordDO: // parse as normal statement/block cxxParserNewStatement(); // Force the cpp preprocessor to think that we're in the middle of a statement. cppBeginStatement(); break; case CXXKeywordRETURN: if(cxxParserCurrentLanguageIsCPP()) { // may be followed by a lambda, otherwise it's not interesting. cxxParserNewStatement(); g_cxx.uKeywordState |= CXXParserKeywordStateSeenReturn; } else { // ignore if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, false)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse return"); return false; } cxxParserNewStatement(); } break; case CXXKeywordCONTINUE: case CXXKeywordBREAK: case CXXKeywordGOTO: // ignore if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, false)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse continue/break/goto"); return false; } cxxParserNewStatement(); break; case CXXKeywordTHROW: // ignore when inside a function if(cxxScopeGetType() == CXXScopeTypeFunction) { if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF, false)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse return/continue/break"); return false; } cxxParserNewStatement(); } break; case CXXKeywordCASE: // ignore if(!cxxParserParseUpToOneOf( CXXTokenTypeSemicolon | CXXTokenTypeEOF | CXXTokenTypeSingleColon, false )) { CXX_DEBUG_LEAVE_TEXT("Failed to parse case keyword"); return false; } cxxParserNewStatement(); break; case CXXKeywordEXTERN: g_cxx.uKeywordState |= CXXParserKeywordStateSeenExtern; cxxTokenChainDestroyLast(g_cxx.pTokenChain); if(!cxxParserParseNextToken()) goto found_eof; if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeStringConstant)) { // assume extern "language" // Strictly speaking this is a C++ only syntax. // However we allow it also in C as it doesn't really hurt. cxxTokenChainDestroyLast(g_cxx.pTokenChain); // Note that extern "C" may be followed by a block with declarations // // extern "C" { ... } // // However in this case the declarations are ALSO definitions // and extern "C" is used only to specify the name mangling mode. // // extern "C" int x; <-- a declaration and not a definition // extern "C" { int x; } <-- a declaration and definition: x IS defined // here and is NOT extern. // // A variable in an extern "C" block has to be re-declared extern again // to be really treated as declaration only. // // extern "C" { extern int x; } // // So in this case we do NOT treat the inner declarations as extern // and we don't need specific handling code for this case. } else { // something else: handle it the normal way goto process_token; } break; case CXXKeywordSTATIC: g_cxx.uKeywordState |= CXXParserKeywordStateSeenStatic; cxxTokenChainDestroyLast(g_cxx.pTokenChain); break; case CXXKeywordINLINE: case CXXKeyword__INLINE: case CXXKeyword__INLINE__: case CXXKeyword__FORCEINLINE: case CXXKeyword__FORCEINLINE__: g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline; cxxTokenChainDestroyLast(g_cxx.pTokenChain); break; case CXXKeywordEXPLICIT: g_cxx.uKeywordState |= CXXParserKeywordStateSeenExplicit; cxxTokenChainDestroyLast(g_cxx.pTokenChain); break; case CXXKeywordOPERATOR: g_cxx.uKeywordState |= CXXParserKeywordStateSeenOperator; break; case CXXKeywordVIRTUAL: g_cxx.uKeywordState |= CXXParserKeywordStateSeenVirtual; cxxTokenChainDestroyLast(g_cxx.pTokenChain); break; case CXXKeywordMUTABLE: g_cxx.uKeywordState |= CXXParserKeywordStateSeenMutable; cxxTokenChainDestroyLast(g_cxx.pTokenChain); break; // "const" and "volatile" are part of the type. Don't treat them specially // and don't attempt to extract an eventual typedef yet, // as there might be a struct/class/union keyword following. case CXXKeywordVOLATILE: g_cxx.uKeywordState |= CXXParserKeywordStateSeenVolatile; break; case CXXKeywordCONST: g_cxx.uKeywordState |= CXXParserKeywordStateSeenConst; break; default: if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) { g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; if(!cxxParserParseGenericTypedef()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); return false; } cxxParserNewStatement(); } break; } } break; case CXXTokenTypeSemicolon: { if( (cxxParserCurrentLanguageIsC()) && cxxScopeIsGlobal() && (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern)) && (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef)) ) { // Special handling of K&R style function declarations. // We might be in the following situation: // // type whatever fname(par1,par2) int par1; int par2; { // ^ // switch(cxxParserMaybeParseKnRStyleFunctionDefinition()) { case 1: // K&R parser did the job and started a new statement break; case 0: // something else cxxParserAnalyzeOtherStatement(); break; default: CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition"); return false; break; } } else { // K&R style function declarations not allowed here. cxxParserAnalyzeOtherStatement(); } cxxParserNewStatement(); } break; case CXXTokenTypeSingleColon: { // label ? if( (g_cxx.pTokenChain->iCount == 2) && cxxTokenTypeIs( cxxTokenChainFirst(g_cxx.pTokenChain), CXXTokenTypeIdentifier ) ) { CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain); // assume it's label tagEntryInfo * tag = cxxTagBegin(CXXTagKindLABEL,pFirst); if(tag) { tag->isFileScope = true; cxxTagCommit(); } } else { // what is this? (default: and similar things have been handled at keyword level) } } break; case CXXTokenTypeOpeningBracket: if(!cxxParserParseBlockHandleOpeningBracket()) { CXX_DEBUG_LEAVE_TEXT("Failed to handle opening bracket"); return false; } break; case CXXTokenTypeClosingBracket: // scope finished if(!bExpectClosingBracket) { CXX_DEBUG_LEAVE_TEXT( "Found unexpected closing bracket: probably preprocessing problem" ); return false; } CXX_DEBUG_LEAVE_TEXT("Closing bracket!"); cxxParserNewStatement(); return true; break; case CXXTokenTypeOpeningParenthesis: case CXXTokenTypeOpeningSquareParenthesis: if(!cxxParserParseAndCondenseCurrentSubchain( CXXTokenTypeOpeningBracket | CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis, true, false )) { CXX_DEBUG_LEAVE_TEXT("Parsing the parenthesis failed"); return false; } if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) { if(bExpectClosingBracket) { CXX_DEBUG_LEAVE_TEXT( "Syntax error: found EOF in block but a closing bracket was expected!" ); return false; } return true; // EOF } break; case CXXTokenTypeIdentifier: if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) { g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; if(!cxxParserParseGenericTypedef()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); return false; } cxxParserNewStatement(); } break; default: // something else we didn't handle break; } } CXX_DEBUG_LEAVE_TEXT("WARNING: Not reached"); return true; }
boolean cxxParserParseNextToken(void) { CXXToken * t = cxxTokenCreate(); // The token chain should not be allowed to grow arbitrairly large. // The token structures are quite big and it's easy to grow up to // 5-6GB or memory usage. However this limit should be large enough // to accomodate all the reasonable statements that could have some // information in them. This includes multiple function prototypes // in a single statement (ImageMagick has some examples) but probably // does NOT include large data tables. if(g_cxx.pTokenChain->iCount > 16384) cxxTokenChainDestroyLast(g_cxx.pTokenChain); cxxTokenChainAppend(g_cxx.pTokenChain,t); g_cxx.pToken = t; cxxParserSkipToNonWhiteSpace(); // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); // This must be done after getting char from input t->iLineNumber = getInputLineNumber(); t->oFilePosition = getInputFilePosition(); if(g_cxx.iChar == EOF) { t->eType = CXXTokenTypeEOF; t->bFollowedBySpace = FALSE; return FALSE; } unsigned int uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; //printf("Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); if(uInfo & CXXCharTypeStartOfIdentifier) { // word t->eType = CXXTokenTypeIdentifier; t->bFollowedBySpace = FALSE; vStringPut(t->pszWord,g_cxx.iChar); // special case for tile, which may actually be an operator if(g_cxx.iChar == '~') { // may be followed by space! g_cxx.iChar = cppGetc(); if(isspace(g_cxx.iChar)) { t->bFollowedBySpace = TRUE; g_cxx.iChar = cppGetc(); while(isspace(g_cxx.iChar)) g_cxx.iChar = cppGetc(); } // non space uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypeStartOfIdentifier)) { // this is not an identifier after all t->eType = CXXTokenTypeOperator; if((!t->bFollowedBySpace) && g_cxx.iChar == '=') { // make ~= single token so it's not handled as // a separate assignment vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); } return TRUE; } } else { g_cxx.iChar = cppGetc(); } for(;;) { uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypePartOfIdentifier)) break; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage); if(iCXXKeyword >= 0) { if( ( (iCXXKeyword == CXXKeywordFINAL) && (!g_cxx.bParsingClassStructOrUnionDeclaration) ) || ( ( (iCXXKeyword == CXXKeywordPUBLIC) || (iCXXKeyword == CXXKeywordPROTECTED) || (iCXXKeyword == CXXKeywordPRIVATE) ) && (!g_cxx.bEnablePublicProtectedPrivateKeywords) ) ) { t->eType = CXXTokenTypeIdentifier; } else { t->eType = CXXTokenTypeKeyword; t->eKeyword = (enum CXXKeyword)iCXXKeyword; if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) { // special handling for __attribute__ return cxxParserParseNextTokenCondenseAttribute(); } } } else { boolean bIgnoreParens = FALSE; const char * szReplacement = NULL; if(isIgnoreToken( vStringValue(t->pszWord), &bIgnoreParens, &szReplacement )) { CXX_DEBUG_PRINT("Ignore token %s",vStringValue(t->pszWord)); // FIXME: Handle ignore parens! if(szReplacement && *szReplacement) { vStringClear(t->pszWord); vStringCatS(t->pszWord,szReplacement); } else { // skip cxxTokenChainDestroyLast(g_cxx.pTokenChain); return cxxParserParseNextToken(); } } } t->bFollowedBySpace = t->bFollowedBySpace | isspace(g_cxx.iChar); return TRUE; } if(g_cxx.iChar == '-') { // special case for pointer vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); if(g_cxx.iChar == '>') { t->eType = CXXTokenTypePointerOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } else { t->eType = CXXTokenTypeOperator; if(g_cxx.iChar == '-') { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '"') { // special case for strings t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care of storing the other chars: we don't need // them for parsing // FIXME: We might need them in signature:() tag.. maybe add // them up to a certain length only? for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } } else if(g_cxx.iChar == '"') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #else if(g_cxx.iChar == STRING_SYMBOL) { t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,'"'); vStringPut(t->pszWord,'"'); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #endif #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '\'') { // special case for strings t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care storing the other chars: we don't // need them for parsing for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } } else if(g_cxx.iChar == '\'') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #else if(g_cxx.iChar == CHAR_SYMBOL) { t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,'\''); vStringPut(t->pszWord,'\''); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #endif if(uInfo & CXXCharTypeDecimalDigit) { // number t->eType = CXXTokenTypeNumber; vStringPut(t->pszWord,g_cxx.iChar); for(;;) { g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypeValidInNumber)) break; vStringPut(t->pszWord,g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); int iChar = g_cxx.iChar; g_cxx.iChar = cppGetc(); if(g_cxx.iChar == iChar) { t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; // We could signal a syntax error with more than two colons // or equal signs...but we're tolerant do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } while(g_cxx.iChar == iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleOrOperatorToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)) { t->eType = CXXTokenTypeOperator; do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; } while( uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken) ); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeOperator) { t->eType = CXXTokenTypeOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; while(uInfo & CXXCharTypeOperator) { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } t->eType = CXXTokenTypeUnknown; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; }
// // This is the toplevel scanning function. It's a forward-only scanner that keeps // accumulating tokens in the chain until either a characteristic token is found // or the statement ends. When a characteristic token is found it usually enters // a specialized scanning routine (e.g for classes, namespaces, structs...). // When the statement ends without finding any characteristic token the chain // is passed to an analysis routine which does a second scan pass. // boolean cxxParserParseBlock(boolean bExpectClosingBracket) { CXX_DEBUG_ENTER(); //char * szScopeName = cxxScopeGetFullName(); //CXX_DEBUG_PRINT("Scope name is '%s'",szScopeName ? szScopeName : ""); cxxParserNewStatement(); if(bExpectClosingBracket) cppBeginStatement(); // FIXME: this cpp handling is broken: it works only because the moon is in the correct phase. for(;;) { if(!cxxParserParseNextToken()) { if(bExpectClosingBracket) { CXX_DEBUG_LEAVE_TEXT("Syntax error: found EOF in block but a closing bracket was expected!"); return FALSE; } CXX_DEBUG_LEAVE_TEXT("EOF in main block"); return TRUE; // EOF } CXX_DEBUG_PRINT("Token '%s' of type 0x%02x",vStringValue(g_cxx.pToken->pszWord),g_cxx.pToken->eType); switch(g_cxx.pToken->eType) { case CXXTokenTypeKeyword: { switch(g_cxx.pToken->eKeyword) { case CXXKeywordNAMESPACE: { int iCurrentScopeKind = cxxScopeGetKind(); if(iCurrentScopeKind == CXXTagKindNAMESPACE) { // namespaces can be nested only within themselves if(!cxxParserParseNamespace()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse namespace"); return FALSE; } } else { // hm... syntax error? CXX_DEBUG_LEAVE_TEXT("Found namespace in a wrong place: we're probably out of sync"); return FALSE; } //cxxParserNewStatement(); <-- already called by cxxParserParseNamespace() } break; case CXXKeywordTEMPLATE: if(!cxxParserParseTemplatePrefix()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse template"); return FALSE; } // Here we are just after the "template<parameters>" prefix. break; case CXXKeywordTYPEDEF: // Mark the next declaration as a typedef g_cxx.uKeywordState |= CXXParserKeywordStateSeenTypedef; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordENUM: if(!cxxParserParseEnum()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse enum"); return FALSE; } break; case CXXKeywordCLASS: if(!cxxParserParseClassStructOrUnion(CXXKeywordCLASS,CXXTagKindCLASS)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return FALSE; } break; case CXXKeywordSTRUCT: if(!cxxParserParseClassStructOrUnion(CXXKeywordSTRUCT,CXXTagKindSTRUCT)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return FALSE; } break; case CXXKeywordUNION: if(!cxxParserParseClassStructOrUnion(CXXKeywordUNION,CXXTagKindUNION)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union"); return FALSE; } break; case CXXKeywordPUBLIC: case CXXKeywordPROTECTED: case CXXKeywordPRIVATE: // Note that the class keyword has its own handler so the only possibility here is an access specifier if(!cxxParserParseAccessSpecifier()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse access specifier"); return FALSE; } break; case CXXKeywordUSING: if(!cxxParserParseUsingClause()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse using clause"); return FALSE; } cxxParserNewStatement(); break; case CXXKeywordIF: case CXXKeywordFOR: case CXXKeywordWHILE: case CXXKeywordSWITCH: if(!cxxParserParseIfForWhileSwitch()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse if/for/while/switch"); return FALSE; } cxxParserNewStatement(); break; case CXXKeywordTRY: case CXXKeywordELSE: case CXXKeywordDO: // parse as normal statement/block cxxParserNewStatement(); break; case CXXKeywordRETURN: if(cxxParserCurrentLanguageIsCPP()) { // may be followed by a lambda, otherwise it's not interesting. g_cxx.uKeywordState |= CXXParserKeywordStateSeenReturn; cxxParserNewStatement(); } else { // ignore if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse return"); return FALSE; } cxxParserNewStatement(); } break; case CXXKeywordCONTINUE: case CXXKeywordBREAK: case CXXKeywordGOTO: // ignore if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse continue/break/goto"); return FALSE; } cxxParserNewStatement(); break; case CXXKeywordTHROW: // ignore when inside a function if(cxxScopeGetKind() == CXXTagKindFUNCTION) { if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse return/continue/break"); return FALSE; } cxxParserNewStatement(); } break; break; case CXXKeywordCASE: // ignore if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF | CXXTokenTypeSingleColon)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse case keyword"); return FALSE; } cxxParserNewStatement(); break; case CXXKeywordEXTERN: g_cxx.uKeywordState |= CXXParserKeywordStateSeenExtern; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordSTATIC: g_cxx.uKeywordState |= CXXParserKeywordStateSeenStatic; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordINLINE: g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordEXPLICIT: g_cxx.uKeywordState |= CXXParserKeywordStateSeenExplicit; cxxTokenChainClear(g_cxx.pTokenChain); break; case CXXKeywordOPERATOR: g_cxx.uKeywordState |= CXXParserKeywordStateSeenOperator; break; case CXXKeywordVIRTUAL: g_cxx.uKeywordState |= CXXParserKeywordStateSeenVirtual; break; default: if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) { g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; if(!cxxParserParseGenericTypedef()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); return FALSE; } cxxParserNewStatement(); } break; } } break; case CXXTokenTypeSemicolon: { if( (g_cxx.eLanguage == g_cxx.eCLanguage) && cxxScopeIsGlobal() && (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern)) && (!(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef)) ) { // Special handling of K&R style function declarations. // We might be in the following situation: // // type whatever fname(par1,par2) int par1; int par2; { // ^ // switch(cxxParserMaybeExtractKnRStyleFunctionDefinition()) { case 1: // got K&R style function definition, one scope was pushed. cxxParserNewStatement(); if(!cxxParserParseBlock(TRUE)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse nested block"); return FALSE; } cxxScopePop(); break; case 0: // something else cxxParserAnalyzeOtherStatement(); break; default: CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition"); return FALSE; break; } } else { // K&R style function declarations not allowed here. cxxParserAnalyzeOtherStatement(); } cxxParserNewStatement(); } break; case CXXTokenTypeSingleColon: { // label ? if((g_cxx.pTokenChain->iCount == 2) && cxxTokenTypeIs(cxxTokenChainFirst(g_cxx.pTokenChain),CXXTokenTypeIdentifier)) { CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain); // assume it's label tagEntryInfo * tag = cxxTagBegin(CXXTagKindLABEL,pFirst); if(tag) { tag->isFileScope = TRUE; cxxTagCommit(); } } else { // what is this? (default: and similar things have been handled at keyword level) } } break; case CXXTokenTypeOpeningBracket: if(!cxxParserParseBlockHandleOpeningBracket()) { CXX_DEBUG_LEAVE_TEXT("Failed to handle opening bracket"); return FALSE; } break; case CXXTokenTypeClosingBracket: // scope finished CXX_DEBUG_LEAVE_TEXT("Closing bracket!"); cxxParserNewStatement(); return TRUE; break; case CXXTokenTypeOpeningParenthesis: case CXXTokenTypeOpeningSquareParenthesis: if(!cxxParserParseAndCondenseCurrentSubchain( CXXTokenTypeOpeningBracket | CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis, TRUE )) { CXX_DEBUG_LEAVE_TEXT("Parsing the parenthesis failed"); return FALSE; } if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) { if(bExpectClosingBracket) { CXX_DEBUG_LEAVE_TEXT("Syntax error: found EOF in block but a closing bracket was expected!"); return FALSE; } return TRUE; // EOF } break; case CXXTokenTypeIdentifier: if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef) { g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef; if(!cxxParserParseGenericTypedef()) { CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef"); return FALSE; } cxxParserNewStatement(); } break; default: // something else we didn't handle break; } } CXX_DEBUG_LEAVE_TEXT("WARNING: Not reached"); return TRUE; }