Exemple #1
0
static void cxxParserParseNextTokenApplyReplacement(
		const cppMacroInfo * pInfo,
		CXXToken * pParameterChainToken
	)
{
	CXX_DEBUG_ENTER();

	CXX_DEBUG_ASSERT(pInfo,"Info must be not null");
	CXX_DEBUG_ASSERT(pInfo->replacements,"There should be a replacement");

	if(!pInfo->hasParameterList)
	{
		CXX_DEBUG_ASSERT(!pParameterChainToken,"This shouldn't have been extracted");
	}

	CXXTokenChain * pParameters = NULL;
	const char ** aParameters = NULL;
	int iParameterCount = 0;

	if(pInfo->hasParameterList && pParameterChainToken && (pParameterChainToken->pChain->iCount >= 3))
	{
		// kill parenthesis
		cxxTokenChainDestroyFirst(pParameterChainToken->pChain);
		cxxTokenChainDestroyLast(pParameterChainToken->pChain);

		pParameters = cxxTokenChainSplitOnComma(
				pParameterChainToken->pChain
			);

		aParameters = (const char **)eMalloc(sizeof(const char *) * pParameters->iCount);
		CXXToken * pParam = cxxTokenChainFirst(pParameters);
		while(pParam)
		{
			aParameters[iParameterCount] = vStringValue(pParam->pszWord);
			iParameterCount++;
			pParam = pParam->pNext;
		}

		CXX_DEBUG_ASSERT(iParameterCount == pParameters->iCount,"Bad number of parameters found");
	}

	vString * pReplacement = cppBuildMacroReplacement(pInfo,aParameters,iParameterCount);

	if(pParameters)
	{
		cxxTokenChainDestroy(pParameters);
		eFree(aParameters);
	}

	CXX_DEBUG_PRINT("Applying complex replacement '%s'",vStringValue(pReplacement));

	cppUngetString(vStringValue(pReplacement),vStringLength(pReplacement));

	vStringDelete(pReplacement);

	CXX_DEBUG_LEAVE();
}
//
// Parses a template<anything> prefix.
// The parsed template parameter definition is stored in a separate token chain.
//
boolean cxxParserParseTemplatePrefix(void)
{
	CXX_DEBUG_ENTER();

	CXX_DEBUG_ASSERT(cxxTokenTypeIs(cxxTokenChainLast(g_cxx.pTokenChain),CXXTokenTypeKeyword),"We should be pointing at the template keyword here");

	cxxTokenChainDestroyLast(g_cxx.pTokenChain); // kill the template keyword

	if(!cxxParserParseUpToOneOf(CXXTokenTypeSmallerThanSign | CXXTokenTypeEOF | CXXTokenTypeSemicolon))
	{
		CXX_DEBUG_LEAVE_TEXT("Failed to parse up to the < sign");
		return FALSE;
	}

	if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeEOF | CXXTokenTypeSemicolon))
	{
		CXX_DEBUG_LEAVE_TEXT("Found EOF or semicolon: assuming this is unparseable");
		cxxParserNewStatement();
		return TRUE; // tolerate syntax error
	}

	CXXTokenChain * pSave = g_cxx.pTokenChain;
	g_cxx.pTokenChain = cxxTokenChainCreate();
	cxxTokenChainAppend(g_cxx.pTokenChain,cxxTokenChainTakeLast(pSave));

	if(!cxxParserParseTemplatePrefixAngleBrackets())
	{
		CXX_DEBUG_LEAVE_TEXT("Failed to parse angle brackets");
		cxxTokenChainDestroy(pSave);
		return FALSE;
	}

	if(g_cxx.pTemplateTokenChain)
		cxxTokenChainDestroy(g_cxx.pTemplateTokenChain);

	g_cxx.pTemplateTokenChain = g_cxx.pTokenChain;
	g_cxx.pTokenChain = pSave;

	CXX_DEBUG_LEAVE();
	return TRUE;
}
Exemple #3
0
bool cxxParserParseNextToken(void)
{
	CXXToken * t = cxxTokenCreate();

	// The token chain should not be allowed to grow arbitrairly large.
	// The token structures are quite big and it's easy to grow up to
	// 5-6GB or memory usage. However this limit should be large enough
	// to accomodate all the reasonable statements that could have some
	// information in them. This includes multiple function prototypes
	// in a single statement (ImageMagick has some examples) but probably
	// does NOT include large data tables.
	if(g_cxx.pTokenChain->iCount > 16384)
		cxxTokenChainDestroyLast(g_cxx.pTokenChain);

	cxxTokenChainAppend(g_cxx.pTokenChain,t);

	g_cxx.pToken = t;

	cxxParserSkipToNonWhiteSpace();

	// FIXME: this cpp handling is kind of broken:
	// it works only because the moon is in the correct phase.
	cppBeginStatement();

	// This must be done after getting char from input
	t->iLineNumber = getInputLineNumber();
	t->oFilePosition = getInputFilePosition();

	if(g_cxx.iChar == EOF)
	{
		t->eType = CXXTokenTypeEOF;
		t->bFollowedBySpace = false;
		return false;
	}

	unsigned int uInfo = UINFO(g_cxx.iChar);

	//fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo);

	if(uInfo & CXXCharTypeStartOfIdentifier)
	{
		// word
		t->eType = CXXTokenTypeIdentifier;
		t->bFollowedBySpace = false;

		vStringPut(t->pszWord,g_cxx.iChar);

		// special case for tile, which may actually be an operator
		if(g_cxx.iChar == '~')
		{
			// may be followed by space!
			g_cxx.iChar = cppGetc();
			if(isspace(g_cxx.iChar))
			{
				t->bFollowedBySpace = true;
				g_cxx.iChar = cppGetc();
				while(isspace(g_cxx.iChar))
					g_cxx.iChar = cppGetc();
			}

			// non space
			uInfo = UINFO(g_cxx.iChar);
			if(!(uInfo & CXXCharTypeStartOfIdentifier))
			{
				// this is not an identifier after all
				t->eType = CXXTokenTypeOperator;
				if((!t->bFollowedBySpace) && g_cxx.iChar == '=')
				{
					// make ~= single token so it's not handled as
					// a separate assignment
					vStringPut(t->pszWord,g_cxx.iChar);
					g_cxx.iChar = cppGetc();
					t->bFollowedBySpace = isspace(g_cxx.iChar);
				}
				return true;
			}
		} else {
			g_cxx.iChar = cppGetc();
		}

		for(;;)
		{
			uInfo = UINFO(g_cxx.iChar);
			if(!(uInfo & CXXCharTypePartOfIdentifier))
				break;
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
		}

		int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage);
		if(iCXXKeyword >= 0)
		{
			if(
					(
						(iCXXKeyword == CXXKeywordFINAL) &&
						(!g_cxx.bParsingClassStructOrUnionDeclaration)
					) || (
						(
							(iCXXKeyword == CXXKeywordPUBLIC) ||
							(iCXXKeyword == CXXKeywordPROTECTED) ||
							(iCXXKeyword == CXXKeywordPRIVATE)
						) &&
						(!g_cxx.bEnablePublicProtectedPrivateKeywords)
					)
				)
			{
				t->eType = CXXTokenTypeIdentifier;
			} else {
				t->eType = CXXTokenTypeKeyword;
				t->eKeyword = (enum CXXKeyword)iCXXKeyword;

				if(iCXXKeyword == CXXKeyword__ATTRIBUTE__)
				{
					// special handling for __attribute__
					return cxxParserParseNextTokenCondenseAttribute();
				}
			}
		} else {

			const cppMacroInfo * pMacro = cppFindMacro(vStringValue(t->pszWord));

			if(pMacro)
			{
				CXX_DEBUG_PRINT("Macro %s",vStringValue(t->pszWord));

				cxxTokenChainDestroyLast(g_cxx.pTokenChain);

				CXXToken * pParameterChain = NULL;

				if(pMacro->hasParameterList)
				{
					CXX_DEBUG_PRINT("Macro has parameter list");
					if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain))
						return false;
				}

				// This is used to avoid infinite recursion in substitution
				// (things like -D foo=foo or similar)
				static int iReplacementRecursionCount = 0;

				if(pMacro->replacements)
				{
					CXX_DEBUG_PRINT("The token has replacements: applying");

					if(iReplacementRecursionCount < 1024)
					{
						// unget last char
						cppUngetc(g_cxx.iChar);
						// unget the replacement
						cxxParserParseNextTokenApplyReplacement(
								pMacro,
								pParameterChain
							);

						g_cxx.iChar = cppGetc();
					}
				}

				if(pParameterChain)
					cxxTokenDestroy(pParameterChain);

				iReplacementRecursionCount++;
				// Have no token to return: parse it
				CXX_DEBUG_PRINT("Parse inner token");
				bool bRet = cxxParserParseNextToken();
				CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx.pToken->pszWord->buffer,g_cxx.pToken->eType);
				iReplacementRecursionCount--;
				return bRet;
			}
		}

		t->bFollowedBySpace = isspace(g_cxx.iChar);

		return true;
	}

	if(g_cxx.iChar == '-')
	{
		// special case for pointer
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		if(g_cxx.iChar == '>')
		{
			t->eType = CXXTokenTypePointerOperator;
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
		} else {
			t->eType = CXXTokenTypeOperator;
			if(g_cxx.iChar == '-')
			{
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

#if 0
	// As long as we use cppGetc() we don't need this

	if(g_cxx.iChar == '"')
	{
		// special case for strings
		t->eType = CXXTokenTypeStringConstant;
		vStringPut(t->pszWord,g_cxx.iChar);
		// We don't even care of storing the other chars: we don't need
		// them for parsing
		// FIXME: We might need them in signature:() tag.. maybe add
		// them up to a certain length only?
		for(;;)
		{
			g_cxx.iChar = cppGetc();
			if(g_cxx.iChar == EOF)
			{
				t->bFollowedBySpace = false;
				return true;
			}
			if(g_cxx.iChar == '\\')
			{
				// escape
				g_cxx.iChar = cppGetc();
				if(g_cxx.iChar == EOF)
				{
					t->bFollowedBySpace = false;
					return true;
				}
			} else if(g_cxx.iChar == '"')
			{
				g_cxx.iChar = cppGetc();
				break;
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}
#else
	if(g_cxx.iChar == STRING_SYMBOL)
	{
		t->eType = CXXTokenTypeStringConstant;
		vStringPut(t->pszWord,'"');
		vStringPut(t->pszWord,'"');
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}
#endif

#if 0
	// As long as we use cppGetc() we don't need this
	if(g_cxx.iChar == '\'')
	{
		// special case for strings
		t->eType = CXXTokenTypeCharacterConstant;
		vStringPut(t->pszWord,g_cxx.iChar);
		// We don't even care storing the other chars: we don't
		// need them for parsing
		for(;;)
		{
			g_cxx.iChar = cppGetc();
			if(g_cxx.iChar == EOF)
			{
				t->bFollowedBySpace = false;
				return true;
			}
			if(g_cxx.iChar == '\\')
			{
				// escape
				g_cxx.iChar = cppGetc();
				if(g_cxx.iChar == EOF)
				{
					t->bFollowedBySpace = false;
					return true;
				}
			} else if(g_cxx.iChar == '\'')
			{
				g_cxx.iChar = cppGetc();
				break;
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}
#else
	if(g_cxx.iChar == CHAR_SYMBOL)
	{
		t->eType = CXXTokenTypeCharacterConstant;
		vStringPut(t->pszWord,'\'');
		vStringPut(t->pszWord,'\'');
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}
#endif

	if(uInfo & CXXCharTypeDecimalDigit)
	{
		// number
		t->eType = CXXTokenTypeNumber;
		vStringPut(t->pszWord,g_cxx.iChar);

		for(;;)
		{
			g_cxx.iChar = cppGetc();
			uInfo = UINFO(g_cxx.iChar);
			if(!(uInfo & CXXCharTypeValidInNumber))
				break;
			vStringPut(t->pszWord,g_cxx.iChar);
		}

		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

	if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		int iChar = g_cxx.iChar;
		g_cxx.iChar = cppGetc();
		if(g_cxx.iChar == iChar)
		{
			t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType;
			// We could signal a syntax error with more than two colons
			// or equal signs...but we're tolerant
			do {
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
			} while(g_cxx.iChar == iChar);
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

	if(uInfo & CXXCharTypeNamedSingleOrOperatorToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		uInfo = UINFO(g_cxx.iChar);
		if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken))
		{
			t->eType = CXXTokenTypeOperator;
			do {
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
				uInfo = UINFO(g_cxx.iChar);
			} while(
					uInfo &
						(CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)
				);
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

	if(uInfo & CXXCharTypeNamedSingleCharToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

	if(uInfo & CXXCharTypeOperator)
	{
		t->eType = CXXTokenTypeOperator;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		uInfo = UINFO(g_cxx.iChar);
		while(uInfo & CXXCharTypeOperator)
		{
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
			uInfo = UINFO(g_cxx.iChar);
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return true;
	}

	t->eType = CXXTokenTypeUnknown;
	vStringPut(t->pszWord,g_cxx.iChar);
	g_cxx.iChar = cppGetc();
	t->bFollowedBySpace = isspace(g_cxx.iChar);

	return true;
}
//
// Parses the <parameters> part of the template prefix.
// Here we are pointing at the initial < (but the token chain has been emptied by cxxParserParseTemplatePrefix())
//
static boolean cxxParserParseTemplatePrefixAngleBrackets(void)
{
	CXX_DEBUG_ENTER();

	// Here we have the big prolem of <> characters which may be
	// template argument delimiters, less than/greater than operators, shift left/right operators.
	//
	// A well written code will have parentheses around all the ambiguous cases. We handle that
	// and we permit any kind of syntax inside a parenthesis.
	//
	// Without parentheses we still try to handle the << and >> shift operator cases:
	// - << is always recognized as shift operator
	// - >> is recognized as shift unless it's non-nested. This is what C++11 spec says
	//      and theoretically it should be also pseudo-compatible with C++03 which treats this
	//      case as a syntax error.
	//
	// The 'less-than' and 'greater-than' operators are hopeless in the general case: gcc
	// is smart enough to figure them out by looking at the identifiers around but without
	// proper state (include headers, macro expansion, full type database etc) we simply can't
	// do the same. However, we try to recover if we figure out we (or the programmer?) screwed up.

	int iTemplateLevel = 0;

	for(;;)
	{
		// Within parentheses everything is permitted.
		if(!cxxParserParseAndCondenseSubchainsUpToOneOf(
				CXXTokenTypeGreaterThanSign | CXXTokenTypeSmallerThanSign | CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | CXXTokenTypeEOF | CXXTokenTypeAssignment,
				CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis
			))
		{
			CXX_DEBUG_LEAVE_TEXT("Failed to parse up to '<>{EOF'");
			return FALSE;
		}

evaluate_current_token:
		switch(g_cxx.pToken->eType)
		{
			case CXXTokenTypeSmallerThanSign:
				if(g_cxx.pToken->pPrev && cxxTokenTypeIsOneOf(g_cxx.pToken->pPrev,CXXTokenTypeIdentifier | CXXTokenTypeKeyword))
				{
					if(!cxxParserParseNextToken())
					{
						CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level");
						return TRUE;
					}

					CXX_DEBUG_PRINT("Got token '%s' of type 0x%02x",vStringValue(g_cxx.pToken->pszWord),g_cxx.pToken->eType);

					if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSmallerThanSign))
					{
						// assume it's an operator
						CXX_DEBUG_PRINT("Treating < as shift-left operator");
					} else {
						CXX_DEBUG_PRINT("Increasing template level");
						iTemplateLevel++;

						if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis))
						{
							CXX_DEBUG_PRINT("Condensing subchain");
							// hmmm.. must condense as subchain
							if(!cxxParserParseAndCondenseCurrentSubchain(CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis,TRUE))
							{
								CXX_DEBUG_LEAVE_TEXT("Failed to condense current subchain");
								return TRUE;
							}
							if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF))
							{
								CXX_DEBUG_LEAVE_TEXT("Found EOF, syntax error but we tolerate it");
								return TRUE;
							}
						} else {
							// it's ok
							CXX_DEBUG_PRINT("No need to condense subchain");
						}
					}
				} else {
					// assume it's an operator
					CXX_DEBUG_PRINT("Treating < as less-than operator");
				}
			break;
			case CXXTokenTypeGreaterThanSign:
				if(iTemplateLevel == 0)
				{
					// Non-nested > : always a terminator
					cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					CXX_DEBUG_LEAVE_TEXT("Found end of template");
					return TRUE;
				}
				// Nested > : is is a shift operator?

				if(!cxxParserParseNextToken())
				{
					CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level");
					return TRUE;
				}

				CXX_DEBUG_PRINT("Got token '%s' of type 0x%02x",vStringValue(g_cxx.pToken->pszWord),g_cxx.pToken->eType);

				if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeGreaterThanSign))
				{
					// assume it's an operator
					CXX_DEBUG_PRINT("Treating > as shift-right operator");
				} else {
					CXX_DEBUG_PRINT("Decreasing template level");
					iTemplateLevel--;

					if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis))
					{
						CXX_DEBUG_PRINT("Condensing subchain");
						// hmmm.. must condense as subchain
						if(!cxxParserParseAndCondenseCurrentSubchain(CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningSquareParenthesis,TRUE))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to condense current subchain");
							return TRUE;
						}
						if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF))
						{
							CXX_DEBUG_LEAVE_TEXT("Found EOF, syntax error but we tolerate it");
							return TRUE;
						}
					} else {
						// it's ok
						CXX_DEBUG_PRINT("No need to condense subchain");
					}
				}
			break;
			case CXXTokenTypeAssignment:
				CXX_DEBUG_PRINT("Found assignment, trying to skip up to a 'notable' point");
				// try to skip to the next > or , without stopping at < characters.
				if(!cxxParserParseUpToOneOf(
						CXXTokenTypeGreaterThanSign | CXXTokenTypeComma | CXXTokenTypeOpeningBracket | CXXTokenTypeSemicolon | CXXTokenTypeEOF
					))
				{
					CXX_DEBUG_LEAVE_TEXT("Failed to parse up to '}EOF'");
					return FALSE;
				}
				goto evaluate_current_token; // backward jump!
			break;
			case CXXTokenTypeEOF:
				CXX_DEBUG_LEAVE_TEXT("Syntax error, but tolerate it at this level");
				return TRUE;
			break;
			case CXXTokenTypeSemicolon:
				cxxParserNewStatement();
				CXX_DEBUG_LEAVE_TEXT("Broken template arguments, attempting to continue");
				return TRUE;
			break;
			case CXXTokenTypeOpeningBracket:
				CXX_DEBUG_PRINT("Found opening bracket: either syntax error or we screwed up parsing the template parameters (some kind of ugly C++11 syntax?), but we try to recover...");
				// skip the whole bracketed part.
				if(!cxxParserParseUpToOneOf(CXXTokenTypeClosingBracket | CXXTokenTypeEOF))
				{
					CXX_DEBUG_LEAVE_TEXT("Failed to parse up to '}EOF'");
					return FALSE;
				}
				cxxParserNewStatement();
				CXX_DEBUG_LEAVE_TEXT("Broken template arguments recovery complete");
				return TRUE;
			break;
			default:
				CXX_DEBUG_ASSERT(FALSE,"Should not end up here");
				CXX_DEBUG_LEAVE_TEXT("Found unexpected token type 0x%02x",g_cxx.pToken->eType);
				return FALSE;
			break;
		}
	}

	// never reached
	CXX_DEBUG_LEAVE();
	return TRUE;
}
Exemple #5
0
boolean cxxParserParseNextToken(void)
{
	CXXToken * t = cxxTokenCreate();

	// The token chain should not be allowed to grow arbitrairly large.
	// The token structures are quite big and it's easy to grow up to
	// 5-6GB or memory usage. However this limit should be large enough
	// to accomodate all the reasonable statements that could have some
	// information in them. This includes multiple function prototypes
	// in a single statement (ImageMagick has some examples) but probably
	// does NOT include large data tables.
	if(g_cxx.pTokenChain->iCount > 16384)
		cxxTokenChainDestroyLast(g_cxx.pTokenChain);

	cxxTokenChainAppend(g_cxx.pTokenChain,t);

	g_cxx.pToken = t;

	cxxParserSkipToNonWhiteSpace();

	// FIXME: this cpp handling is kind of broken:
	// it works only because the moon is in the correct phase.
	cppBeginStatement();

	// This must be done after getting char from input
	t->iLineNumber = getInputLineNumber();
	t->oFilePosition = getInputFilePosition();

	if(g_cxx.iChar == EOF)
	{
		t->eType = CXXTokenTypeEOF;
		t->bFollowedBySpace = FALSE;
		return FALSE;
	}

	unsigned int uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;

	//printf("Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo);

	if(uInfo & CXXCharTypeStartOfIdentifier)
	{
		// word
		t->eType = CXXTokenTypeIdentifier;
		t->bFollowedBySpace = FALSE;

		vStringPut(t->pszWord,g_cxx.iChar);

		// special case for tile, which may actually be an operator
		if(g_cxx.iChar == '~')
		{
			// may be followed by space!
			g_cxx.iChar = cppGetc();
			if(isspace(g_cxx.iChar))
			{
				t->bFollowedBySpace = TRUE;
				g_cxx.iChar = cppGetc();
				while(isspace(g_cxx.iChar))
					g_cxx.iChar = cppGetc();
			}

			// non space
			uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
			if(!(uInfo & CXXCharTypeStartOfIdentifier))
			{
				// this is not an identifier after all
				t->eType = CXXTokenTypeOperator;
				if((!t->bFollowedBySpace) && g_cxx.iChar == '=')
				{
					// make ~= single token so it's not handled as
					// a separate assignment
					vStringPut(t->pszWord,g_cxx.iChar);
					g_cxx.iChar = cppGetc();
					t->bFollowedBySpace = isspace(g_cxx.iChar);
				}
				return TRUE;
			}
		} else {
			g_cxx.iChar = cppGetc();
		}

		for(;;)
		{
			uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
			if(!(uInfo & CXXCharTypePartOfIdentifier))
				break;
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
		}

		int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLanguage);
		if(iCXXKeyword >= 0)
		{
			if(
					(
						(iCXXKeyword == CXXKeywordFINAL) &&
						(!g_cxx.bParsingClassStructOrUnionDeclaration)
					) || (
						(
							(iCXXKeyword == CXXKeywordPUBLIC) ||
							(iCXXKeyword == CXXKeywordPROTECTED) ||
							(iCXXKeyword == CXXKeywordPRIVATE)
						) &&
						(!g_cxx.bEnablePublicProtectedPrivateKeywords)
					)
				)
			{
				t->eType = CXXTokenTypeIdentifier;
			} else {
				t->eType = CXXTokenTypeKeyword;
				t->eKeyword = (enum CXXKeyword)iCXXKeyword;

				if(iCXXKeyword == CXXKeyword__ATTRIBUTE__)
				{
					// special handling for __attribute__
					return cxxParserParseNextTokenCondenseAttribute();
				}
			}
		} else {
			boolean bIgnoreParens = FALSE;
			const char * szReplacement = NULL;
			if(isIgnoreToken(
					vStringValue(t->pszWord),
					&bIgnoreParens,
					&szReplacement
				))
			{
				CXX_DEBUG_PRINT("Ignore token %s",vStringValue(t->pszWord));
				// FIXME: Handle ignore parens!
				if(szReplacement && *szReplacement)
				{
					vStringClear(t->pszWord);
					vStringCatS(t->pszWord,szReplacement);
				} else {
					// skip
					cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					return cxxParserParseNextToken();
				}
			}
		}

		t->bFollowedBySpace = t->bFollowedBySpace | isspace(g_cxx.iChar);

		return TRUE;
	}

	if(g_cxx.iChar == '-')
	{
		// special case for pointer
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		if(g_cxx.iChar == '>')
		{
			t->eType = CXXTokenTypePointerOperator;
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
		} else {
			t->eType = CXXTokenTypeOperator;
			if(g_cxx.iChar == '-')
			{
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

#if 0
	// As long as we use cppGetc() we don't need this

	if(g_cxx.iChar == '"')
	{
		// special case for strings
		t->eType = CXXTokenTypeStringConstant;
		vStringPut(t->pszWord,g_cxx.iChar);
		// We don't even care of storing the other chars: we don't need
		// them for parsing
		// FIXME: We might need them in signature:() tag.. maybe add
		// them up to a certain length only?
		for(;;)
		{
			g_cxx.iChar = cppGetc();
			if(g_cxx.iChar == EOF)
			{
				t->bFollowedBySpace = FALSE;
				return TRUE;
			}
			if(g_cxx.iChar == '\\')
			{
				// escape
				g_cxx.iChar = cppGetc();
				if(g_cxx.iChar == EOF)
				{
					t->bFollowedBySpace = FALSE;
					return TRUE;
				}
			} else if(g_cxx.iChar == '"')
			{
				g_cxx.iChar = cppGetc();
				break;
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}
#else
	if(g_cxx.iChar == STRING_SYMBOL)
	{
		t->eType = CXXTokenTypeStringConstant;
		vStringPut(t->pszWord,'"');
		vStringPut(t->pszWord,'"');
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}
#endif

#if 0
	// As long as we use cppGetc() we don't need this
	if(g_cxx.iChar == '\'')
	{
		// special case for strings
		t->eType = CXXTokenTypeCharacterConstant;
		vStringPut(t->pszWord,g_cxx.iChar);
		// We don't even care storing the other chars: we don't
		// need them for parsing
		for(;;)
		{
			g_cxx.iChar = cppGetc();
			if(g_cxx.iChar == EOF)
			{
				t->bFollowedBySpace = FALSE;
				return TRUE;
			}
			if(g_cxx.iChar == '\\')
			{
				// escape
				g_cxx.iChar = cppGetc();
				if(g_cxx.iChar == EOF)
				{
					t->bFollowedBySpace = FALSE;
					return TRUE;
				}
			} else if(g_cxx.iChar == '\'')
			{
				g_cxx.iChar = cppGetc();
				break;
			}
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}
#else
	if(g_cxx.iChar == CHAR_SYMBOL)
	{
		t->eType = CXXTokenTypeCharacterConstant;
		vStringPut(t->pszWord,'\'');
		vStringPut(t->pszWord,'\'');
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}
#endif

	if(uInfo & CXXCharTypeDecimalDigit)
	{
		// number
		t->eType = CXXTokenTypeNumber;
		vStringPut(t->pszWord,g_cxx.iChar);

		for(;;)
		{
			g_cxx.iChar = cppGetc();
			uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
			if(!(uInfo & CXXCharTypeValidInNumber))
				break;
			vStringPut(t->pszWord,g_cxx.iChar);
		}

		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

	if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		int iChar = g_cxx.iChar;
		g_cxx.iChar = cppGetc();
		if(g_cxx.iChar == iChar)
		{
			t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType;
			// We could signal a syntax error with more than two colons
			// or equal signs...but we're tolerant
			do {
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
			} while(g_cxx.iChar == iChar);
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

	if(uInfo & CXXCharTypeNamedSingleOrOperatorToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
		if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken))
		{
			t->eType = CXXTokenTypeOperator;
			do {
				vStringPut(t->pszWord,g_cxx.iChar);
				g_cxx.iChar = cppGetc();
				uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
			} while(
					uInfo &
						(CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)
				);
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

	if(uInfo & CXXCharTypeNamedSingleCharToken)
	{
		t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

	if(uInfo & CXXCharTypeOperator)
	{
		t->eType = CXXTokenTypeOperator;
		vStringPut(t->pszWord,g_cxx.iChar);
		g_cxx.iChar = cppGetc();
		uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
		while(uInfo & CXXCharTypeOperator)
		{
			vStringPut(t->pszWord,g_cxx.iChar);
			g_cxx.iChar = cppGetc();
			uInfo = (g_cxx.iChar < 0x80) ? g_aCharTable[g_cxx.iChar].uType : 0;
		}
		t->bFollowedBySpace = isspace(g_cxx.iChar);
		return TRUE;
	}

	t->eType = CXXTokenTypeUnknown;
	vStringPut(t->pszWord,g_cxx.iChar);
	g_cxx.iChar = cppGetc();
	t->bFollowedBySpace = isspace(g_cxx.iChar);

	return TRUE;
}
Exemple #6
0
static bool cxxParserParseBlockInternal(bool bExpectClosingBracket)
{
	CXX_DEBUG_ENTER();

	//char * szScopeName = cxxScopeGetFullName();
	//CXX_DEBUG_PRINT("Scope name is '%s'",szScopeName ? szScopeName : "");

	cxxParserNewStatement();

	if(bExpectClosingBracket)
	{
		// FIXME: this cpp handling is kind of broken:
		//        it works only because the moon is in the correct phase.
		cppBeginStatement();
	}

	for(;;)
	{
		if(!cxxParserParseNextToken())
		{
found_eof:

			if(bExpectClosingBracket)
			{
				CXX_DEBUG_LEAVE_TEXT(
						"Syntax error: found EOF in block but a closing " \
							"bracket was expected!"
					);
				return false;
			}

			CXX_DEBUG_LEAVE_TEXT("EOF in main block");
			return true; // EOF
		}

process_token:

		CXX_DEBUG_PRINT(
				"Token '%s' of type 0x%02x",
				vStringValue(g_cxx.pToken->pszWord),
				g_cxx.pToken->eType
			);

		switch(g_cxx.pToken->eType)
		{
			case CXXTokenTypeKeyword:
			{
				switch(g_cxx.pToken->eKeyword)
				{
					case CXXKeywordNAMESPACE:
					{
						enum CXXScopeType eScopeType = cxxScopeGetType();

						if(
								(
									// toplevel or nested within a namespace
									(eScopeType == CXXScopeTypeNamespace) ||
									// namespace X = Y inside a function
									(eScopeType == CXXScopeTypeFunction)
								) && (
									// either certainly C++
									g_cxx.bConfirmedCPPLanguage ||
									// or a "sane" namespace syntax
									(
										!cxxTokenChainPreviousTokenOfType(
												g_cxx.pToken,
												CXXTokenTypeStar |
												CXXTokenTypeAnd |
												CXXTokenTypeKeyword
											)
									)
								)
							)
						{
							if(!cxxParserParseNamespace())
							{
								CXX_DEBUG_LEAVE_TEXT("Failed to parse namespace");
								return false;
							}
						} else {
							// If we're pretty sure this is C++ then this is a syntax error.
							// If we're not sure (namely when we're in a *.h file) then
							// let's try to be flexible: treat the namespace keyword as an identifier.
							if(!g_cxx.bConfirmedCPPLanguage)
							{
								CXX_DEBUG_LEAVE_TEXT(
									"Found namespace in unexpected place, but we're not sure it's really C++ "
									"so we'll treat it as an identifier instead"
								);
								g_cxx.pToken->eType = CXXTokenTypeIdentifier;
								continue;
							}

							CXX_DEBUG_LEAVE_TEXT(
								"Found namespace in a wrong place: we're probably out of sync"
							);
							return false;
						}

						cxxParserNewStatement();
					}
					break;
					case CXXKeywordTEMPLATE:
						if(!cxxParserParseTemplatePrefix())
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse template");
							return false;
						}
						// Here we are just after the "template<parameters>" prefix.
					break;
					case CXXKeywordTYPEDEF:
						// Mark the next declaration as a typedef
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenTypedef;
						cxxTokenChainClear(g_cxx.pTokenChain);
					break;
					case CXXKeywordENUM:
						if(!cxxParserParseEnum())
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse enum");
							return false;
						}
					break;
					case CXXKeywordCLASS:
						if(!cxxParserParseClassStructOrUnion(CXXKeywordCLASS,CXXTagCPPKindCLASS,CXXScopeTypeClass))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union");
							return false;
						}
					break;
					case CXXKeywordSTRUCT:
						if(!cxxParserParseClassStructOrUnion(CXXKeywordSTRUCT,CXXTagKindSTRUCT,CXXScopeTypeStruct))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union");
							return false;
						}
					break;
					case CXXKeywordUNION:
						if(!cxxParserParseClassStructOrUnion(CXXKeywordUNION,CXXTagKindUNION,CXXScopeTypeUnion))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse class/struct/union");
							return false;
						}
					break;
					case CXXKeywordPUBLIC:
					case CXXKeywordPROTECTED:
					case CXXKeywordPRIVATE:
						// Note that the class keyword has its own handler
						// so the only possibility here is an access specifier
						if(!cxxParserParseAccessSpecifier())
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse access specifier");
							return false;
						}
					break;
					case CXXKeywordUSING:
						if(!cxxParserParseUsingClause())
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse using clause");
							return false;
						}
						cxxParserNewStatement();
					break;
					case CXXKeywordIF:
					case CXXKeywordFOR:
					case CXXKeywordWHILE:
					case CXXKeywordSWITCH:
						if(!cxxParserParseIfForWhileSwitch())
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse if/for/while/switch");
							return false;
						}
						cxxParserNewStatement();
						// Force the cpp preprocessor to think that we're in the middle of a statement.
						cppBeginStatement();
					break;
					case CXXKeywordTRY:
					case CXXKeywordELSE:
					case CXXKeywordDO:
						// parse as normal statement/block
						cxxParserNewStatement();
						// Force the cpp preprocessor to think that we're in the middle of a statement.
						cppBeginStatement();
					break;
					case CXXKeywordRETURN:
						if(cxxParserCurrentLanguageIsCPP())
						{
							// may be followed by a lambda, otherwise it's not interesting.
							cxxParserNewStatement();
							g_cxx.uKeywordState |= CXXParserKeywordStateSeenReturn;
						} else {
							// ignore
							if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF,
								   false))
							{
								CXX_DEBUG_LEAVE_TEXT("Failed to parse return");
								return false;
							}
							cxxParserNewStatement();
						}
					break;
					case CXXKeywordCONTINUE:
					case CXXKeywordBREAK:
					case CXXKeywordGOTO:
						// ignore
						if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF,
							   false))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse continue/break/goto");
							return false;
						}
						cxxParserNewStatement();
					break;
					case CXXKeywordTHROW:
						// ignore when inside a function
						if(cxxScopeGetType() == CXXScopeTypeFunction)
						{
							if(!cxxParserParseUpToOneOf(CXXTokenTypeSemicolon | CXXTokenTypeEOF,
								   false))
							{
								CXX_DEBUG_LEAVE_TEXT("Failed to parse return/continue/break");
								return false;
							}
							cxxParserNewStatement();
						}
					break;
					case CXXKeywordCASE:
						// ignore
						if(!cxxParserParseUpToOneOf(
								CXXTokenTypeSemicolon | CXXTokenTypeEOF | CXXTokenTypeSingleColon,
								false
							))
						{
							CXX_DEBUG_LEAVE_TEXT("Failed to parse case keyword");
							return false;
						}
						cxxParserNewStatement();
					break;
					case CXXKeywordEXTERN:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenExtern;

						cxxTokenChainDestroyLast(g_cxx.pTokenChain);

						if(!cxxParserParseNextToken())
							goto found_eof;

						if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeStringConstant))
						{
							// assume extern "language"

							// Strictly speaking this is a C++ only syntax.
							// However we allow it also in C as it doesn't really hurt.

							cxxTokenChainDestroyLast(g_cxx.pTokenChain);

							// Note that extern "C" may be followed by a block with declarations
							//
							//   extern "C" { ... }
							//
							// However in this case the declarations are ALSO definitions
							// and extern "C" is used only to specify the name mangling mode.
							//
							//   extern "C" int x; <-- a declaration and not a definition
							//   extern "C" { int x; } <-- a declaration and definition: x IS defined
							//                             here and is NOT extern.
							//
							// A variable in an extern "C" block has to be re-declared extern again
							// to be really treated as declaration only.
							//
							//   extern "C" { extern int x; }
							//
							// So in this case we do NOT treat the inner declarations as extern
							// and we don't need specific handling code for this case.
						} else {
							// something else: handle it the normal way
							goto process_token;
						}
					break;
					case CXXKeywordSTATIC:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenStatic;
						cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					break;
					case CXXKeywordINLINE:
					case CXXKeyword__INLINE:
					case CXXKeyword__INLINE__:
					case CXXKeyword__FORCEINLINE:
					case CXXKeyword__FORCEINLINE__:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline;
						cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					break;
					case CXXKeywordEXPLICIT:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenExplicit;
						cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					break;
					case CXXKeywordOPERATOR:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenOperator;
					break;
					case CXXKeywordVIRTUAL:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenVirtual;
						cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					break;
					case CXXKeywordMUTABLE:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenMutable;
						cxxTokenChainDestroyLast(g_cxx.pTokenChain);
					break;
					// "const" and "volatile" are part of the type. Don't treat them specially
					// and don't attempt to extract an eventual typedef yet,
					// as there might be a struct/class/union keyword following.
					case CXXKeywordVOLATILE:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenVolatile;
					break;
					case CXXKeywordCONST:
						g_cxx.uKeywordState |= CXXParserKeywordStateSeenConst;
					break;
					default:
						if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef)
						{
							g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef;
							if(!cxxParserParseGenericTypedef())
							{
								CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef");
								return false;
							}
							cxxParserNewStatement();
						}
					break;
				}
			}
			break;
			case CXXTokenTypeSemicolon:
			{
				if(
						(cxxParserCurrentLanguageIsC()) &&
						cxxScopeIsGlobal() &&
						(!(g_cxx.uKeywordState & CXXParserKeywordStateSeenExtern)) &&
						(!(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef))
					)
				{
					// Special handling of K&R style function declarations.
					// We might be in the following situation:
					//
					//  type whatever fname(par1,par2) int par1; int par2; {
					//                                        ^
					//
					switch(cxxParserMaybeParseKnRStyleFunctionDefinition())
					{
						case 1:
							// K&R parser did the job and started a new statement
						break;
						case 0:
							// something else
							cxxParserAnalyzeOtherStatement();
						break;
						default:
							CXX_DEBUG_LEAVE_TEXT("Failed to check for K&R style function definition");
							return false;
						break;
					}
				} else {
					// K&R style function declarations not allowed here.
					cxxParserAnalyzeOtherStatement();
				}
				cxxParserNewStatement();
			}
			break;
			case CXXTokenTypeSingleColon:
			{
				// label ?
				if(
						(g_cxx.pTokenChain->iCount == 2) &&
						cxxTokenTypeIs(
								cxxTokenChainFirst(g_cxx.pTokenChain),
								CXXTokenTypeIdentifier
							)
					)
				{
					CXXToken * pFirst = cxxTokenChainFirst(g_cxx.pTokenChain);
					// assume it's label
					tagEntryInfo * tag = cxxTagBegin(CXXTagKindLABEL,pFirst);

					if(tag)
					{
						tag->isFileScope = true;
						cxxTagCommit();
					}
				} else {
					// what is this? (default: and similar things have been handled at keyword level)
				}
			}
			break;
			case CXXTokenTypeOpeningBracket:
				if(!cxxParserParseBlockHandleOpeningBracket())
				{
					CXX_DEBUG_LEAVE_TEXT("Failed to handle opening bracket");
					return false;
				}
			break;
			case CXXTokenTypeClosingBracket:
				// scope finished
				if(!bExpectClosingBracket)
				{
					CXX_DEBUG_LEAVE_TEXT(
						"Found unexpected closing bracket: probably preprocessing problem"
					);
					return false;
				}
				CXX_DEBUG_LEAVE_TEXT("Closing bracket!");
				cxxParserNewStatement();
				return true;
			break;
			case CXXTokenTypeOpeningParenthesis:
			case CXXTokenTypeOpeningSquareParenthesis:
				if(!cxxParserParseAndCondenseCurrentSubchain(
						CXXTokenTypeOpeningBracket | CXXTokenTypeOpeningParenthesis |
							CXXTokenTypeOpeningSquareParenthesis,
						true,
						false
					))
				{
					CXX_DEBUG_LEAVE_TEXT("Parsing the parenthesis failed");
					return false;
				}

				if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF))
				{
					if(bExpectClosingBracket)
					{
						CXX_DEBUG_LEAVE_TEXT(
								"Syntax error: found EOF in block but a closing bracket was expected!"
							);
						return false;
					}
					return true; // EOF
				}
			break;
			case CXXTokenTypeIdentifier:
				if(g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef)
				{
					g_cxx.uKeywordState &= ~CXXParserKeywordStateSeenTypedef;
					if(!cxxParserParseGenericTypedef())
					{
						CXX_DEBUG_LEAVE_TEXT("Failed to parse generic typedef");
						return false;
					}
					cxxParserNewStatement();
				}
			break;
			default:
				// something else we didn't handle
			break;
		}
	}

	CXX_DEBUG_LEAVE_TEXT("WARNING: Not reached");
	return true;
}
Exemple #7
0
//
// The __attribute__((...)) sequence complicates parsing quite a lot.
// For this reason we attempt to "hide" it from the rest of the parser
// at tokenizer level.
//
// Returns false if it finds an EOF. This is an important invariant required by
// cxxParserParseNextToken(), the only caller.
//
static bool cxxParserParseNextTokenCondenseAttribute(void)
{
	// Since cxxParserParseNextToken() returns false only when it has found
	// an EOF, this function must do the same.
	// This means that any broken input must be discarded here.

	CXX_DEBUG_ENTER();

	CXX_DEBUG_ASSERT(
			cxxTokenIsKeyword(g_cxx.pToken,CXXKeyword__ATTRIBUTE__),
			"This function should be called only after we have parsed __attribute__"
		);

	// Kill it
	cxxTokenChainDestroyLast(g_cxx.pTokenChain);

	// And go ahead.

	if(!cxxParserParseNextToken())
	{
		CXX_DEBUG_LEAVE_TEXT("No next token after __attribute__");
		return false;
	}

	if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeOpeningParenthesis))
	{
		CXX_DEBUG_LEAVE_TEXT("Something that is not an opening parenthesis");
		return true;
	}

	// Do NOT accept EOF as a valid terminator as it implies broken input.
	if(!cxxParserParseAndCondenseCurrentSubchain(
			CXXTokenTypeOpeningParenthesis |
				CXXTokenTypeOpeningSquareParenthesis |
				CXXTokenTypeOpeningBracket,
			false,
			false
		))
	{
		// Pasing and/or condensation of the subchain failed. This implies broken
		// input (mismatched parenthesis/bracket, early EOF).

		CXX_DEBUG_LEAVE_TEXT("Failed to parse subchains. The input is broken...");

		// However our invariant (see comment at the beginning of the function)
		// forbids us to return false if we didn't find an EOF. So we attempt
		// to resume parsing anyway. If there is an EOF, cxxParserParseNextToken()
		// will report it.

		// Kill the token chain
		cxxTokenChainDestroyLast(g_cxx.pTokenChain);

		return cxxParserParseNextToken();
	}

	CXX_DEBUG_ASSERT(
			cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain),
			"Should have a parenthesis chain as last token!"
		);

	// Try to make sense of certain kinds of __attribute__.
	// the proper syntax is __attribute__(()), so look at the inner chain

	CXXToken * pInner = cxxTokenChainFirst(g_cxx.pToken->pChain);
	if(pInner)
	{
		if(pInner->pNext && cxxTokenTypeIs(pInner->pNext,CXXTokenTypeParenthesisChain))
			pInner = cxxTokenChainFirst(pInner->pNext->pChain);

		while(pInner)
		{
			if(cxxTokenTypeIs(pInner,CXXTokenTypeIdentifier))
			{
				CXX_DEBUG_PRINT("Analyzing attribyte %s",vStringValue(pInner->pszWord));
				if(
						(strcmp(vStringValue(pInner->pszWord),"always_inline") == 0) ||
						(strcmp(vStringValue(pInner->pszWord),"__always_inline__") == 0)
					)
				{
					CXX_DEBUG_PRINT("Found attribute 'always_inline'");
					// assume "inline" has been seen.
					g_cxx.uKeywordState |= CXXParserKeywordStateSeenInline;
				} else if(
						(strcmp(vStringValue(pInner->pszWord),"deprecated") == 0) ||
						(strcmp(vStringValue(pInner->pszWord),"__deprecated__") == 0)
					)
				{
					CXX_DEBUG_PRINT("Found attribute 'deprecated'");
					// assume "inline" has been seen.
					g_cxx.uKeywordState |= CXXParserKeywordStateSeenAttributeDeprecated;
				}
			}

			// If needed, we could attach certain attributes to previous
			// identifiers. But note that __attribute__ may apply to a
			// following identifier too.

			pInner = pInner->pNext;
		}
	}

	// Now just kill the chain.
	cxxTokenChainDestroyLast(g_cxx.pTokenChain);

	// And finally extract yet another token.
	CXX_DEBUG_LEAVE();
	return cxxParserParseNextToken();
}
Exemple #8
0
boolean cxxParserParseClassStructOrUnion(
		enum CXXKeyword eKeyword,
		enum CXXTagKind eTagKind
	)
{
	CXX_DEBUG_ENTER();

	// make sure that there is only the class/struct/union keyword in the chain
	while(g_cxx.pTokenChain->iCount > 1)
		cxxTokenChainDestroyFirst(g_cxx.pTokenChain);

	// this may be cleared below
	boolean bParsingTypedef = (g_cxx.uKeywordState & CXXParserKeywordStateSeenTypedef);

	/*
		Spec is:
			class-key attr class-head-name base-clause { member-specification }

			class-key	-	one of class or struct. The keywords are identical
				except for the default member access and the default base class access.
			attr(C++11)	-	optional sequence of any number of attributes,
				may include alignas specifier
			class-head-name	-	the name of the class that's being defined.
				Optionally qualified, optionally followed by keyword final.
				The name may be omitted, in which case the class is unnamed (note
				that unnamed class cannot be final)
			base-clause	-	optional list of one or more parent classes and the
				model of inheritance used for each (see derived class)
			member-specification	-	list of access specifiers, member object and
				member function declarations and definitions (see below)
	*/

	// Skip attr and class-head-name

	// enable "final" keyword handling
	g_cxx.bParsingClassStructOrUnionDeclaration = TRUE;

	unsigned int uTerminatorTypes = CXXTokenTypeEOF | CXXTokenTypeSingleColon |
			CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket |
			CXXTokenTypeSmallerThanSign;

	if(eTagKind != CXXTagKindCLASS)
		uTerminatorTypes |= CXXTokenTypeParenthesisChain | CXXTokenTypeAssignment;

	boolean bRet;

	for(;;)
	{
		bRet = cxxParserParseUpToOneOf(uTerminatorTypes);

		if(!bRet)
		{
			g_cxx.bParsingClassStructOrUnionDeclaration = FALSE;
			CXX_DEBUG_LEAVE_TEXT("Could not parse class/struct/union name");
			return FALSE;
		}

		if(!cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSmallerThanSign))
			break;

		// Probably a template specialisation

		// template<typename T> struct X<int>
		// {
		// }

		// FIXME: Should we add the specialisation arguments somewhere?
		//        Maye as a separate field?

		bRet = cxxParserParseAndCondenseCurrentSubchain(
					CXXTokenTypeOpeningParenthesis | CXXTokenTypeOpeningBracket |
						CXXTokenTypeOpeningSquareParenthesis |
						CXXTokenTypeSmallerThanSign,
					FALSE
				);

		if(!bRet)
		{
			g_cxx.bParsingClassStructOrUnionDeclaration = FALSE;
			CXX_DEBUG_LEAVE_TEXT("Could not parse class/struct/union name");
			return FALSE;
		}
	}

	g_cxx.bParsingClassStructOrUnionDeclaration = FALSE;

	if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeParenthesisChain))
	{
		// probably a function declaration/prototype
		// something like struct x * func()....
		// do not clear statement
		CXX_DEBUG_LEAVE_TEXT("Probably a function declaration!");
		return TRUE;
	}

	// FIXME: This block is duplicated in enum
	if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSemicolon))
	{
		if(g_cxx.pTokenChain->iCount > 3)
		{
			// [typedef] struct X Y; <-- typedef has been removed!
			if(bParsingTypedef)
				cxxParserExtractTypedef(g_cxx.pTokenChain,TRUE);
			else
				cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0);
		}

		cxxParserNewStatement();
		CXX_DEBUG_LEAVE();
		return TRUE;
	}

	if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeAssignment))
	{
		if(g_cxx.pTokenChain->iCount > 3)
		{
			// struct X Y = ...;
			cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0);
		}

		// Skip the initialization (which almost certainly contains a block)
		if(!cxxParserParseUpToOneOf(CXXTokenTypeEOF | CXXTokenTypeSemicolon))
		{
			CXX_DEBUG_LEAVE_TEXT("Failed to parse up to EOF/semicolon");
			return FALSE;
		}

		cxxParserNewStatement();
		CXX_DEBUG_LEAVE();
		return TRUE;
	}

	if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF))
	{
		// tolerate EOF, just ignore this
		cxxParserNewStatement();
		CXX_DEBUG_LEAVE_TEXT("EOF: ignoring");
		return TRUE;
	}

	// semicolon or opening bracket

	// check if we can extract a class name identifier
	CXXToken * pClassName = cxxTokenChainLastTokenOfType(
			g_cxx.pTokenChain,
			CXXTokenTypeIdentifier
		);

	int iPushedScopes = 0;

	if(pClassName)
	{
		// good.
		// It may be qualified though.
		CXXToken * pNamespaceBegin = pClassName;
		CXXToken * pPrev = pClassName->pPrev;
		while(pPrev)
		{
			if(!cxxTokenTypeIs(pPrev,CXXTokenTypeMultipleColons))
				break;
			pPrev = pPrev->pPrev;
			if(!pPrev)
				break;
			if(!cxxTokenTypeIs(pPrev,CXXTokenTypeIdentifier))
				break;
			pNamespaceBegin = pPrev;
			pPrev = pPrev->pPrev;
		}

		while(pNamespaceBegin != pClassName)
		{
			CXXToken * pNext = pNamespaceBegin->pNext;
			cxxTokenChainTake(g_cxx.pTokenChain,pNamespaceBegin);
			// FIXME: We don't really know if it's a class!
			cxxScopePush(pNamespaceBegin,CXXTagKindCLASS,CXXScopeAccessUnknown);
			iPushedScopes++;
			pNamespaceBegin = pNext->pNext;
		}

		CXX_DEBUG_PRINT(
				"Class/struct/union name is %s",
				vStringValue(pClassName->pszWord)
			);
		cxxTokenChainTake(g_cxx.pTokenChain,pClassName);
	} else {
		pClassName = cxxTokenCreateAnonymousIdentifier(eTagKind);
		CXX_DEBUG_PRINT(
				"Class/struct/union name is %s (anonymous)",
				vStringValue(pClassName->pszWord)
			);
	}

	if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeSingleColon))
	{
		// check for base classes
		cxxTokenChainClear(g_cxx.pTokenChain);

		if(!cxxParserParseUpToOneOf(
				CXXTokenTypeEOF | CXXTokenTypeSemicolon | CXXTokenTypeOpeningBracket
			))
		{
			cxxTokenDestroy(pClassName);
			CXX_DEBUG_LEAVE_TEXT("Failed to parse base class part");
			return FALSE;
		}

		if(cxxTokenTypeIsOneOf(g_cxx.pToken,CXXTokenTypeSemicolon | CXXTokenTypeEOF))
		{
			cxxTokenDestroy(pClassName);
			cxxParserNewStatement();
			CXX_DEBUG_LEAVE_TEXT("Syntax error: ignoring");
			return TRUE;
		}

		cxxTokenChainDestroyLast(g_cxx.pTokenChain); // remove the {
	} else {
		cxxTokenChainClear(g_cxx.pTokenChain);
	}

	tagEntryInfo * tag = cxxTagBegin(eTagKind,pClassName);

	int iCorkQueueIndex = CORK_NIL;

	if(tag)
	{
		if(g_cxx.pTokenChain->iCount > 0)
		{
			// Strip inheritance type information
			// FIXME: This could be optional!

			CXXToken * t = cxxTokenChainFirst(g_cxx.pTokenChain);
			while(t)
			{
				if(
					cxxTokenTypeIs(t,CXXTokenTypeKeyword) &&
					(
						(t->eKeyword == CXXKeywordPUBLIC) ||
						(t->eKeyword == CXXKeywordPROTECTED) ||
						(t->eKeyword == CXXKeywordPRIVATE) ||
						(t->eKeyword == CXXKeywordVIRTUAL)
					)
				)
				{
					CXXToken * pNext = t->pNext;
					cxxTokenChainTake(g_cxx.pTokenChain,t);
					cxxTokenDestroy(t);
					t = pNext;
				} else {
					t = t->pNext;
				}
			}

			if(g_cxx.pTokenChain->iCount > 0)
			{
				cxxTokenChainCondense(
						g_cxx.pTokenChain,
						CXXTokenChainCondenseNoTrailingSpaces
					);
				tag->extensionFields.inheritance = vStringValue(
						g_cxx.pTokenChain->pHead->pszWord
					);
			}
		}

		if(
			g_cxx.pTemplateTokenChain && (g_cxx.pTemplateTokenChain->iCount > 0) &&
			cxxTagCPPFieldEnabled(CXXTagCPPFieldTemplate)
		)
		{
			cxxTokenChainNormalizeTypeNameSpacing(g_cxx.pTemplateTokenChain);
			cxxTokenChainCondense(g_cxx.pTemplateTokenChain,0);
			cxxTagSetCPPField(
					CXXTagCPPFieldTemplate,
					vStringValue(cxxTokenChainFirst(g_cxx.pTemplateTokenChain)->pszWord)
				);
		}

		tag->isFileScope = !isInputHeaderFile();

		iCorkQueueIndex = cxxTagCommit();
	}

	cxxScopePush(
			pClassName,
			eTagKind,
			(eTagKind == CXXTagKindCLASS) ?
				CXXScopeAccessPrivate : CXXScopeAccessPublic
		);

	vString * pScopeName = cxxScopeGetFullNameAsString();

	if(!cxxParserParseBlock(TRUE))
	{
		CXX_DEBUG_LEAVE_TEXT("Failed to parse scope");
		if(pScopeName)
			vStringDelete(pScopeName);
		return FALSE;
	}

	if(iCorkQueueIndex > CORK_NIL)
		cxxParserMarkEndLineForTagInCorkQueue(iCorkQueueIndex);

	iPushedScopes++;
	while(iPushedScopes > 0)
	{
		cxxScopePop();
		iPushedScopes--;
	}

	bRet = cxxParserParseEnumStructClassOrUnionFullDeclarationTrailer(
			bParsingTypedef,
			eKeyword,
			eTagKind,
			vStringValue(pScopeName)
		);

	if(pScopeName)
		vStringDelete(pScopeName);

	cxxParserNewStatement();
	CXX_DEBUG_LEAVE();
	return bRet;
};