// An macro token was encountered and it expects a parameter list. // The routine has to check if there is a following parenthesis // and eventually skip it but it MUST NOT parse the next token // if it is not a parenthesis. This is because the macro token // may have a replacement and is that one that has to be returned // back to the caller from cxxParserParseNextToken(). static bool cxxParserParseNextTokenSkipMacroParenthesis(CXXToken ** ppChain) { CXX_DEBUG_ENTER(); CXX_DEBUG_ASSERT(ppChain,"ppChain should not be null here"); cxxParserSkipToNonWhiteSpace(); if(g_cxx.iChar != '(') { *ppChain = NULL; return true; // no parenthesis } if(!cxxParserParseNextToken()) { CXX_DEBUG_LEAVE_TEXT("No next token after ignored identifier"); return false; } if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningParenthesis)) { CXX_DEBUG_ASSERT(false,"Should have found an open parenthesis token here!"); CXX_DEBUG_LEAVE_TEXT("Internal error"); return false; } if(!cxxParserParseAndCondenseCurrentSubchain( CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis | CXXTokenTypeOpeningBracket, false )) { CXX_DEBUG_LEAVE_TEXT("Failed to parse and condense subchains"); return false; } CXX_DEBUG_ASSERT( cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain), "Should have a parenthesis chain as last token!" ); // Now just kill the chain. *ppChain = cxxTokenChainTakeLast(g_cxx.pTokenChain); CXX_DEBUG_LEAVE(); return true; }
bool cxxParserParseNextToken(void) { CXXToken * t = cxxTokenCreate(); // The token chain should not be allowed to grow arbitrairly large. // The token structures are quite big and it's easy to grow up to // 5-6GB or memory usage. However this limit should be large enough // to accomodate all the reasonable statements that could have some // information in them. This includes multiple function prototypes // in a single statement (ImageMagick has some examples) but probably // does NOT include large data tables. if(g_cxx.pTokenChain->iCount > 16384) cxxTokenChainDestroyLast(g_cxx.pTokenChain); cxxTokenChainAppend(g_cxx.pTokenChain,t); g_cxx.pToken = t; cxxParserSkipToNonWhiteSpace(); // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); // This must be done after getting char from input t->iLineNumber = getInputLineNumber(); t->oFilePosition = getInputFilePosition(); if(g_cxx.iChar == EOF) { t->eType = CXXTokenTypeEOF; t->bFollowedBySpace = false; return false; } unsigned int uInfo = UINFO(g_cxx.iChar); //fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); if(uInfo & CXXCharTypeStartOfIdentifier) { // word t->eType = CXXTokenTypeIdentifier; t->bFollowedBySpace = false; vStringPut(t->pszWord,g_cxx.iChar); // special case for tile, which may actually be an operator if(g_cxx.iChar == '~') { // may be followed by space! g_cxx.iChar = cppGetc(); if(isspace(g_cxx.iChar)) { t->bFollowedBySpace = true; g_cxx.iChar = cppGetc(); while(isspace(g_cxx.iChar)) g_cxx.iChar = cppGetc(); } // non space uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeStartOfIdentifier)) { // this is not an identifier after all t->eType = CXXTokenTypeOperator; if((!t->bFollowedBySpace) && g_cxx.iChar == '=') { // make ~= single token so it's not handled as // a separate assignment vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); } return true; } } else { g_cxx.iChar = cppGetc(); } for(;;) { uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypePartOfIdentifier)) break; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage); if(iCXXKeyword >= 0) { if( ( (iCXXKeyword == CXXKeywordFINAL) && (!g_cxx.bParsingClassStructOrUnionDeclaration) ) || ( ( (iCXXKeyword == CXXKeywordPUBLIC) || (iCXXKeyword == CXXKeywordPROTECTED) || (iCXXKeyword == CXXKeywordPRIVATE) ) && (!g_cxx.bEnablePublicProtectedPrivateKeywords) ) ) { t->eType = CXXTokenTypeIdentifier; } else { t->eType = CXXTokenTypeKeyword; t->eKeyword = (enum CXXKeyword)iCXXKeyword; if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) { // special handling for __attribute__ return cxxParserParseNextTokenCondenseAttribute(); } } } else { const cppMacroInfo * pMacro = cppFindMacro(vStringValue(t->pszWord)); if(pMacro) { CXX_DEBUG_PRINT("Macro %s",vStringValue(t->pszWord)); cxxTokenChainDestroyLast(g_cxx.pTokenChain); CXXToken * pParameterChain = NULL; if(pMacro->hasParameterList) { CXX_DEBUG_PRINT("Macro has parameter list"); if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain)) return false; } // This is used to avoid infinite recursion in substitution // (things like -D foo=foo or similar) static int iReplacementRecursionCount = 0; if(pMacro->replacements) { CXX_DEBUG_PRINT("The token has replacements: applying"); if(iReplacementRecursionCount < 1024) { // unget last char cppUngetc(g_cxx.iChar); // unget the replacement cxxParserParseNextTokenApplyReplacement( pMacro, pParameterChain ); g_cxx.iChar = cppGetc(); } } if(pParameterChain) cxxTokenDestroy(pParameterChain); iReplacementRecursionCount++; // Have no token to return: parse it CXX_DEBUG_PRINT("Parse inner token"); bool bRet = cxxParserParseNextToken(); CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx.pToken->pszWord->buffer,g_cxx.pToken->eType); iReplacementRecursionCount--; return bRet; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(g_cxx.iChar == '-') { // special case for pointer vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); if(g_cxx.iChar == '>') { t->eType = CXXTokenTypePointerOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } else { t->eType = CXXTokenTypeOperator; if(g_cxx.iChar == '-') { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '"') { // special case for strings t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care of storing the other chars: we don't need // them for parsing // FIXME: We might need them in signature:() tag.. maybe add // them up to a certain length only? for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '"') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == STRING_SYMBOL) { t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,'"'); vStringPut(t->pszWord,'"'); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '\'') { // special case for strings t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care storing the other chars: we don't // need them for parsing for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '\'') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == CHAR_SYMBOL) { t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,'\''); vStringPut(t->pszWord,'\''); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif if(uInfo & CXXCharTypeDecimalDigit) { // number t->eType = CXXTokenTypeNumber; vStringPut(t->pszWord,g_cxx.iChar); for(;;) { g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeValidInNumber)) break; vStringPut(t->pszWord,g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); int iChar = g_cxx.iChar; g_cxx.iChar = cppGetc(); if(g_cxx.iChar == iChar) { t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; // We could signal a syntax error with more than two colons // or equal signs...but we're tolerant do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } while(g_cxx.iChar == iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrOperatorToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)) { t->eType = CXXTokenTypeOperator; do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } while( uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken) ); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeOperator) { t->eType = CXXTokenTypeOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); while(uInfo & CXXCharTypeOperator) { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } t->eType = CXXTokenTypeUnknown; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; }
boolean cxxParserParseNextToken(void) { CXXToken * t = cxxTokenCreate(); // The token chain should not be allowed to grow arbitrairly large. // The token structures are quite big and it's easy to grow up to // 5-6GB or memory usage. However this limit should be large enough // to accomodate all the reasonable statements that could have some // information in them. This includes multiple function prototypes // in a single statement (ImageMagick has some examples) but probably // does NOT include large data tables. if(g_cxx.pTokenChain->iCount > 16384) cxxTokenChainDestroyLast(g_cxx.pTokenChain); cxxTokenChainAppend(g_cxx.pTokenChain,t); g_cxx.pToken = t; cxxParserSkipToNonWhiteSpace(); // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); // This must be done after getting char from input t->iLineNumber = getInputLineNumber(); t->oFilePosition = getInputFilePosition(); if(g_cxx.iChar == EOF) { t->eType = CXXTokenTypeEOF; t->bFollowedBySpace = FALSE; return FALSE; } unsigned int uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; //printf("Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); if(uInfo & CXXCharTypeStartOfIdentifier) { // word t->eType = CXXTokenTypeIdentifier; t->bFollowedBySpace = FALSE; vStringPut(t->pszWord,g_cxx.iChar); // special case for tile, which may actually be an operator if(g_cxx.iChar == '~') { // may be followed by space! g_cxx.iChar = cppGetc(); if(isspace(g_cxx.iChar)) { t->bFollowedBySpace = TRUE; g_cxx.iChar = cppGetc(); while(isspace(g_cxx.iChar)) g_cxx.iChar = cppGetc(); } // non space uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypeStartOfIdentifier)) { // this is not an identifier after all t->eType = CXXTokenTypeOperator; if((!t->bFollowedBySpace) && g_cxx.iChar == '=') { // make ~= single token so it's not handled as // a separate assignment vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); } return TRUE; } } else { g_cxx.iChar = cppGetc(); } for(;;) { uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypePartOfIdentifier)) break; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage); if(iCXXKeyword >= 0) { if( ( (iCXXKeyword == CXXKeywordFINAL) && (!g_cxx.bParsingClassStructOrUnionDeclaration) ) || ( ( (iCXXKeyword == CXXKeywordPUBLIC) || (iCXXKeyword == CXXKeywordPROTECTED) || (iCXXKeyword == CXXKeywordPRIVATE) ) && (!g_cxx.bEnablePublicProtectedPrivateKeywords) ) ) { t->eType = CXXTokenTypeIdentifier; } else { t->eType = CXXTokenTypeKeyword; t->eKeyword = (enum CXXKeyword)iCXXKeyword; if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) { // special handling for __attribute__ return cxxParserParseNextTokenCondenseAttribute(); } } } else { boolean bIgnoreParens = FALSE; const char * szReplacement = NULL; if(isIgnoreToken( vStringValue(t->pszWord), &bIgnoreParens, &szReplacement )) { CXX_DEBUG_PRINT("Ignore token %s",vStringValue(t->pszWord)); // FIXME: Handle ignore parens! if(szReplacement && *szReplacement) { vStringClear(t->pszWord); vStringCatS(t->pszWord,szReplacement); } else { // skip cxxTokenChainDestroyLast(g_cxx.pTokenChain); return cxxParserParseNextToken(); } } } t->bFollowedBySpace = t->bFollowedBySpace | isspace(g_cxx.iChar); return TRUE; } if(g_cxx.iChar == '-') { // special case for pointer vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); if(g_cxx.iChar == '>') { t->eType = CXXTokenTypePointerOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } else { t->eType = CXXTokenTypeOperator; if(g_cxx.iChar == '-') { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '"') { // special case for strings t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care of storing the other chars: we don't need // them for parsing // FIXME: We might need them in signature:() tag.. maybe add // them up to a certain length only? for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } } else if(g_cxx.iChar == '"') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #else if(g_cxx.iChar == STRING_SYMBOL) { t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,'"'); vStringPut(t->pszWord,'"'); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #endif #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '\'') { // special case for strings t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care storing the other chars: we don't // need them for parsing for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = FALSE; return TRUE; } } else if(g_cxx.iChar == '\'') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #else if(g_cxx.iChar == CHAR_SYMBOL) { t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,'\''); vStringPut(t->pszWord,'\''); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } #endif if(uInfo & CXXCharTypeDecimalDigit) { // number t->eType = CXXTokenTypeNumber; vStringPut(t->pszWord,g_cxx.iChar); for(;;) { g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(!(uInfo & CXXCharTypeValidInNumber)) break; vStringPut(t->pszWord,g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); int iChar = g_cxx.iChar; g_cxx.iChar = cppGetc(); if(g_cxx.iChar == iChar) { t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; // We could signal a syntax error with more than two colons // or equal signs...but we're tolerant do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } while(g_cxx.iChar == iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleOrOperatorToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)) { t->eType = CXXTokenTypeOperator; do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; } while( uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken) ); } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeNamedSingleCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } if(uInfo & CXXCharTypeOperator) { t->eType = CXXTokenTypeOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; while(uInfo & CXXCharTypeOperator) { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0; } t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; } t->eType = CXXTokenTypeUnknown; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return TRUE; }