// Returns false if it finds an EOF. Returns true otherwise. // // In some special cases this function may parse more than one token, // however only a single token will always be returned. bool cxxParserParseNextToken(void) { // The token chain should not be allowed to grow arbitrarily large. // The token structures are quite big and it's easy to grow up to // 5-6GB or memory usage. However this limit should be large enough // to accommodate all the reasonable statements that could have some // information in them. This includes multiple function prototypes // in a single statement (ImageMagick has some examples) but probably // does NOT include large data tables. if(g_cxx.pTokenChain->iCount > 16384) cxxTokenChainDestroyLast(g_cxx.pTokenChain); if(g_cxx.pUngetToken) { // got some tokens in the unget chain. cxxTokenChainAppend(g_cxx.pTokenChain,g_cxx.pUngetToken); g_cxx.pToken = g_cxx.pUngetToken; g_cxx.pUngetToken = NULL; return !cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF); } CXXToken * t = cxxTokenCreate(); cxxTokenChainAppend(g_cxx.pTokenChain,t); g_cxx.pToken = t; cxxParserSkipToNonWhiteSpace(); // FIXME: this cpp handling is kind of broken: // it works only because the moon is in the correct phase. cppBeginStatement(); // This must be done after getting char from input t->iLineNumber = getInputLineNumber(); t->oFilePosition = getInputFilePosition(); if(g_cxx.iChar == EOF) { t->eType = CXXTokenTypeEOF; t->bFollowedBySpace = false; return false; } unsigned int uInfo = UINFO(g_cxx.iChar); //fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo); if(uInfo & CXXCharTypeStartOfIdentifier) { // word t->eType = CXXTokenTypeIdentifier; t->bFollowedBySpace = false; vStringPut(t->pszWord,g_cxx.iChar); // special case for tile, which may actually be an operator if(g_cxx.iChar == '~') { // may be followed by space! g_cxx.iChar = cppGetc(); if(isspace(g_cxx.iChar)) { t->bFollowedBySpace = true; g_cxx.iChar = cppGetc(); while(isspace(g_cxx.iChar)) g_cxx.iChar = cppGetc(); } // non space uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeStartOfIdentifier)) { // this is not an identifier after all t->eType = CXXTokenTypeOperator; if((!t->bFollowedBySpace) && g_cxx.iChar == '=') { // make ~= single token so it's not handled as // a separate assignment vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); } return true; } } else { g_cxx.iChar = cppGetc(); } for(;;) { uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypePartOfIdentifier)) break; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } int iCXXKeyword = lookupKeyword(t->pszWord->buffer,g_cxx.eLangType); if(iCXXKeyword >= 0) { if(cxxKeywordIsDisabled((CXXKeyword)iCXXKeyword)) { t->eType = CXXTokenTypeIdentifier; } else if (isInputHeaderFile () && (iCXXKeyword == CXXKeywordPUBLIC || iCXXKeyword == CXXKeywordPROTECTED || iCXXKeyword == CXXKeywordPRIVATE)) { int c0 = g_cxx.iChar; if (c0 == ':') { /* Specifying the scope of struct/union/class member */ goto assign_keyword; } if (isspace (c0)) { int c1; cxxParserSkipToNonWhiteSpace (); c1 = g_cxx.iChar; cppUngetc (c1); g_cxx.iChar = c0; if (c1 == ':') { /* Specifying the scope of struct/union/class member */ goto assign_keyword; } else if (isalpha (c1)) { /* Specifying the scope of class inheritance */ goto assign_keyword; } } t->eType = CXXTokenTypeIdentifier; g_cxx.bConfirmedCPPLanguage = false; cxxKeywordEnablePublicProtectedPrivate(false); } else { assign_keyword: t->eType = CXXTokenTypeKeyword; t->eKeyword = (CXXKeyword)iCXXKeyword; if(iCXXKeyword == CXXKeyword__ATTRIBUTE__) { // special handling for __attribute__ return cxxParserParseNextTokenCondenseAttribute(); } } } else { const cppMacroInfo * pMacro = cppFindMacro(vStringValue(t->pszWord)); if(pMacro) { CXX_DEBUG_PRINT("Macro %s",vStringValue(t->pszWord)); cxxTokenChainDestroyLast(g_cxx.pTokenChain); CXXToken * pParameterChain = NULL; if(pMacro->hasParameterList) { CXX_DEBUG_PRINT("Macro has parameter list"); if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain)) return false; } // This is used to avoid infinite recursion in substitution // (things like -D foo=foo or similar) static int iReplacementRecursionCount = 0; if(pMacro->replacements) { CXX_DEBUG_PRINT("The token has replacements: applying"); if(iReplacementRecursionCount < 1024) { // unget last char cppUngetc(g_cxx.iChar); // unget the replacement cxxParserParseNextTokenApplyReplacement( pMacro, pParameterChain ); g_cxx.iChar = cppGetc(); } } if(pParameterChain) cxxTokenDestroy(pParameterChain); iReplacementRecursionCount++; // Have no token to return: parse it CXX_DEBUG_PRINT("Parse inner token"); bool bRet = cxxParserParseNextToken(); CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx.pToken->pszWord->buffer,g_cxx.pToken->eType); iReplacementRecursionCount--; return bRet; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(g_cxx.iChar == '-') { // special case for pointer vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); if(g_cxx.iChar == '>') { t->eType = CXXTokenTypePointerOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } else { t->eType = CXXTokenTypeOperator; if(g_cxx.iChar == '-') { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '"') { // special case for strings t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care of storing the other chars: we don't need // them for parsing // FIXME: We might need them in signature:() tag.. maybe add // them up to a certain length only? for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '"') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == STRING_SYMBOL) { t->eType = CXXTokenTypeStringConstant; vStringPut(t->pszWord,'"'); vStringPut(t->pszWord,'"'); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif #if 0 // As long as we use cppGetc() we don't need this if(g_cxx.iChar == '\'') { // special case for strings t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,g_cxx.iChar); // We don't even care storing the other chars: we don't // need them for parsing for(;;) { g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } if(g_cxx.iChar == '\\') { // escape g_cxx.iChar = cppGetc(); if(g_cxx.iChar == EOF) { t->bFollowedBySpace = false; return true; } } else if(g_cxx.iChar == '\'') { g_cxx.iChar = cppGetc(); break; } } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #else if(g_cxx.iChar == CHAR_SYMBOL) { t->eType = CXXTokenTypeCharacterConstant; vStringPut(t->pszWord,'\''); vStringPut(t->pszWord,'\''); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } #endif if(uInfo & CXXCharTypeDecimalDigit) { // number t->eType = CXXTokenTypeNumber; vStringPut(t->pszWord,g_cxx.iChar); for(;;) { g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(!(uInfo & CXXCharTypeValidInNumber)) break; vStringPut(t->pszWord,g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrRepeatedCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); int iChar = g_cxx.iChar; g_cxx.iChar = cppGetc(); if(g_cxx.iChar == iChar) { t->eType = g_aCharTable[g_cxx.iChar].uMultiTokenType; // We could signal a syntax error with more than two colons // or equal signs...but we're tolerant do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); } while(g_cxx.iChar == iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleOrOperatorToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); if(uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken)) { t->eType = CXXTokenTypeOperator; do { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } while( uInfo & (CXXCharTypeOperator | CXXCharTypeNamedSingleOrOperatorToken) ); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeNamedSingleCharToken) { t->eType = g_aCharTable[g_cxx.iChar].uSingleTokenType; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } if(uInfo & CXXCharTypeOperator) { t->eType = CXXTokenTypeOperator; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); while(uInfo & CXXCharTypeOperator) { vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); uInfo = UINFO(g_cxx.iChar); } t->bFollowedBySpace = isspace(g_cxx.iChar); return true; } t->eType = CXXTokenTypeUnknown; vStringPut(t->pszWord,g_cxx.iChar); g_cxx.iChar = cppGetc(); t->bFollowedBySpace = isspace(g_cxx.iChar); return true; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: if (! InPhp) { c = findPhpStart (); if (c != EOF) InPhp = TRUE; } else c = getcFromInputFile (); c = skipWhitespaces (c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '&': token->type = TOKEN_AMPERSAND; break; case '\\': token->type = TOKEN_BACKSLASH; break; case '=': { int d = getcFromInputFile (); if (d == '=' || d == '>') token->type = TOKEN_OPERATOR; else { ungetcToInputFile (d); token->type = TOKEN_EQUAL_SIGN; } break; } case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '/') { /* </script[:white:]*> */ if (tolower ((d = getcFromInputFile ())) == 's' && tolower ((d = getcFromInputFile ())) == 'c' && tolower ((d = getcFromInputFile ())) == 'r' && tolower ((d = getcFromInputFile ())) == 'i' && tolower ((d = getcFromInputFile ())) == 'p' && tolower ((d = getcFromInputFile ())) == 't' && (d = skipWhitespaces (getcFromInputFile ())) == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } } else if (d == '<' && (d = getcFromInputFile ()) == '<') { token->type = TOKEN_STRING; parseHeredoc (token->string); } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } case '#': /* comment */ skipSingleComment (); goto getNextChar; break; case '+': case '-': case '*': case '%': { int d = getcFromInputFile (); if (d != '=' && ! (c == '-' && d == '>')) ungetcToInputFile (d); token->type = TOKEN_OPERATOR; break; } case '/': /* division or comment start */ { int d = getcFromInputFile (); if (d == '/') /* single-line comment */ { skipSingleComment (); goto getNextChar; } else if (d == '*') { do { c = skipToCharacter ('*'); if (c != EOF) { c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } } while (c != EOF && c != '\0'); goto getNextChar; } else { if (d != '=') ungetcToInputFile (d); token->type = TOKEN_OPERATOR; } break; } case '$': /* variable start */ { int d = getcFromInputFile (); if (! isIdentChar (d)) { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } else { parseIdentifier (token->string, d); token->type = TOKEN_VARIABLE; } break; } case '?': /* maybe the end of the PHP chunk */ { int d = getcFromInputFile (); if (d == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, getInputLanguage ()); if (token->keyword == KEYWORD_NONE) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } if (token->type == TOKEN_SEMICOLON || token->type == TOKEN_OPEN_CURLY || token->type == TOKEN_CLOSE_CURLY) { /* reset current statement details on statement end, and when entering * a deeper scope. * it is a bit ugly to do this in readToken(), but it makes everything * a lot simpler. */ CurrentStatement.access = ACCESS_UNDEFINED; CurrentStatement.impl = IMPL_UNDEFINED; } }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { token->type = TOKEN_SEMICOLON; goto done; } } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } done: lastTokenType = token->type; }
/* Advances the parser one token, optionally skipping whitespace * (otherwise it is concatenated and returned as a single whitespace token). * Whitespace is needed to properly render function signatures. Unrecognized * token starts are stored literally, e.g. token may equal to a character '#'. */ static int advanceToken (lexerState *lexer, boolean skip_whitspace) { boolean have_whitespace = FALSE; lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (isWhitespace(lexer->cur_c)) { scanWhitespace(lexer); have_whitespace = TRUE; } else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*')) { scanComments(lexer); have_whitespace = TRUE; } else { if (have_whitespace && !skip_whitspace) return lexer->cur_token = TOKEN_WHITESPACE; break; } } lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (lexer->cur_c == '"') { scanString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"')) { scanRawString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (isIdentifierStart(lexer->cur_c)) { scanIdentifier(lexer); return lexer->cur_token = TOKEN_IDENT; } /* These shift tokens aren't too important for tag-generation per se, * but they confuse the skipUntil code which tracks the <> pairs. */ else if (lexer->cur_c == '>' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RSHIFT; } else if (lexer->cur_c == '<' && lexer->next_c == '<') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_LSHIFT; } else if (lexer->cur_c == '-' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RARROW; } else { int c = lexer->cur_c; advanceChar(lexer); return lexer->cur_token = c; } } return lexer->cur_token = TOKEN_EOF; }
static void readTokenFull (tokenInfo *const token, boolean includeStringRepr) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); do c = getcFromInputFile (); while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case ':': token->type = TOKEN_COLON; break; case ',': token->type = TOKEN_COMMA; break; case '"': { boolean escaped = FALSE; token->type = TOKEN_STRING; while (TRUE) { c = getcFromInputFile (); /* we don't handle unicode escapes but they are safe */ if (escaped) escaped = FALSE; else if (c == '\\') escaped = TRUE; else if (c >= 0x00 && c <= 0x1F) break; /* break on invalid, unescaped, control characters */ else if (c == '"' || c == EOF) break; if (includeStringRepr) vStringPut (token->string, c); } vStringTerminate (token->string); break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { do { vStringPut (token->string, c); c = getcFromInputFile (); } while (c != EOF && isIdentChar (c)); vStringTerminate (token->string); ungetcToInputFile (c); switch (lookupKeyword (vStringValue (token->string), Lang_json)) { case KEYWORD_true: token->type = TOKEN_TRUE; break; case KEYWORD_false: token->type = TOKEN_FALSE; break; case KEYWORD_null: token->type = TOKEN_NULL; break; default: token->type = TOKEN_NUMBER; break; } } break; } }
// // This is called after a full enum/struct/class/union declaration // that ends with a closing bracket. // static boolean cxxParserParseEnumStructClassOrUnionFullDeclarationTrailer( boolean bParsingTypedef, enum CXXKeyword eTagKeyword, enum CXXTagKind eTagKind, const char * szTypeName ) { CXX_DEBUG_ENTER(); cxxTokenChainClear(g_cxx.pTokenChain); CXX_DEBUG_PRINT( "Parse enum/struct/class/union trailer, typename is '%s'", szTypeName ); MIOPos oFilePosition = getInputFilePosition(); int iFileLine = getInputLineNumber(); if(!cxxParserParseUpToOneOf(CXXTokenTypeEOF | CXXTokenTypeSemicolon)) { CXX_DEBUG_LEAVE_TEXT("Failed to parse up to EOF/semicolon"); return FALSE; } if(cxxTokenTypeIs(g_cxx.pToken,CXXTokenTypeEOF)) { // It's a syntax error, but we can be tolerant here. CXX_DEBUG_LEAVE_TEXT("Got EOF after enum/class/struct/union block"); return TRUE; } if(g_cxx.pTokenChain->iCount < 2) { CXX_DEBUG_LEAVE_TEXT("Nothing interesting after enum/class/struct block"); return TRUE; } // fake the initial two tokens CXXToken * pIdentifier = cxxTokenCreate(); pIdentifier->oFilePosition = oFilePosition; pIdentifier->iLineNumber = iFileLine; pIdentifier->eType = CXXTokenTypeIdentifier; pIdentifier->bFollowedBySpace = TRUE; vStringCatS(pIdentifier->pszWord,szTypeName); cxxTokenChainPrepend(g_cxx.pTokenChain,pIdentifier); CXXToken * pKeyword = cxxTokenCreate(); pKeyword->oFilePosition = oFilePosition; pKeyword->iLineNumber = iFileLine; pKeyword->eType = CXXTokenTypeKeyword; pKeyword->eKeyword = eTagKeyword; pKeyword->bFollowedBySpace = TRUE; vStringCatS(pKeyword->pszWord,cxxTagGetKindOptions()[eTagKind].name); cxxTokenChainPrepend(g_cxx.pTokenChain,pKeyword); if(bParsingTypedef) cxxParserExtractTypedef(g_cxx.pTokenChain,TRUE); else cxxParserExtractVariableDeclarations(g_cxx.pTokenChain,0); CXX_DEBUG_LEAVE(); return TRUE; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); i++; } while (c == '\t' || c == ' ' || ((c == '\r' || c == '\n') && ! include_newlines)); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\r': case '\n': /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left invalid. Hopefully this should not have false negatives * (e.g. should not miss insertion of a semicolon) but might have * false positives (e.g. it will wrongfully emit a semicolon for the * newline in "foo\n+bar"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ switch (LastTokenType) { /* these cannot be the end of a statement, so hold the newline */ case TOKEN_EQUAL_SIGN: case TOKEN_COLON: case TOKEN_PERIOD: case TOKEN_FORWARD_SLASH: case TOKEN_BINARY_OPERATOR: /* and these already end one, no need to duplicate it */ case TOKEN_SEMICOLON: case TOKEN_COMMA: case TOKEN_CLOSE_CURLY: case TOKEN_OPEN_CURLY: include_newlines = FALSE; /* no need to recheck */ goto getNextChar; break; default: token->type = TOKEN_SEMICOLON; } break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } LastTokenType = token->type; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = getcFromInputFile (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = getcFromInputFile (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { ungetcToInputFile (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = getcFromInputFile (); if (c != '\\' && c != '"' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_CHARACTER; token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = getcFromInputFile (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { ungetcToInputFile (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacterInInputFile ('*'); c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacterInInputFile ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) ungetcToInputFile ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = getcFromInputFile ()) != '!') { ungetcToInputFile (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacterInInputFile ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '\\': /* * All Tex tags start with a backslash. * Check if the next character is an alpha character * else it is not a potential tex tag. */ c = fileGetc (); if (! isalpha (c)) fileUngetc (c); else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; case '%': fileSkipToCharacter ('\n'); /* % are single line comments */ goto getNextChar; break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->type = TOKEN_IDENTIFIER; } break; } }
static void readToken (tokenInfo * const token) { int c; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = getcFromInputFile (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case '.': token->type = TOKEN_PERIOD; break; case ',': token->type = TOKEN_COMMA; break; case '\'': /* only single char are inside simple quotes */ break; /* or it is for attributes so we don't care */ case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '-': c = getcFromInputFile (); if (c == '-') /* start of a comment */ { skipToCharacterInInputFile ('\n'); goto getNextChar; } else { if (!isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; } break; default: if (!isIdentChar1 (c)) token->type = TOKEN_NONE; else { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupCaseKeyword (vStringValue (token->string), Lang_vhdl); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void findCssTags (void) { bool readNextToken = true; tokenInfo token; token.string = vStringNew (); do { if (readNextToken) readToken (&token); readNextToken = true; if (token.type == '@') { /* At-rules, from the "@" to the next block or semicolon */ bool useContents; readToken (&token); useContents = (strcmp (vStringValue (token.string), "media") == 0 || strcmp (vStringValue (token.string), "supports") == 0); while (token.type != TOKEN_EOF && token.type != ';' && token.type != '{') { readToken (&token); } /* HACK: we *eat* the opening '{' for medias and the like so that * the content is parsed as if it was at the root */ readNextToken = useContents && token.type == '{'; } else if (token.type == TOKEN_SELECTOR) { /* collect selectors and make a tag */ cssKind kind = K_SELECTOR; MIOPos filePosition; unsigned long lineNumber; vString *selector = vStringNew (); do { if (vStringLength (selector) > 0) vStringPut (selector, ' '); vStringCat (selector, token.string); kind = classifySelector (token.string); lineNumber = getInputLineNumber (); filePosition = getInputFilePosition (); readToken (&token); /* handle attribute selectors */ if (token.type == '[') { int depth = 1; while (depth > 0 && token.type != TOKEN_EOF) { vStringCat (selector, token.string); readToken (&token); if (token.type == '[') depth++; else if (token.type == ']') depth--; } if (token.type != TOKEN_EOF) vStringCat (selector, token.string); readToken (&token); } } while (token.type == TOKEN_SELECTOR); /* we already consumed the next token, don't read it twice */ readNextToken = false; if (CssKinds[kind].enabled) { tagEntryInfo e; initTagEntry (&e, vStringValue (selector), &(CssKinds[kind])); e.lineNumber = lineNumber; e.filePosition = filePosition; makeTagEntry (&e); } vStringDelete (selector); } else if (token.type == '{') { /* skip over { ... } */ int depth = 1; while (depth > 0 && token.type != TOKEN_EOF) { readToken (&token); if (token.type == '{') depth++; else if (token.type == '}') depth--; } } } while (token.type != TOKEN_EOF); vStringDelete (token.string); }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); getNextChar: c = getcFromInputFile (); c = skipWhitespaces (c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '#') { /* <# ... #> multiline comment */ do { c = skipToCharacterInInputFile ('#'); if (c != EOF) { c = getcFromInputFile (); if (c == '>') break; else ungetcToInputFile (c); } } while (c != EOF); goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } case '#': /* comment */ skipSingleComment (); goto getNextChar; break; case '+': case '-': case '*': case '/': case '%': { int d = getcFromInputFile (); if (d != '=') ungetcToInputFile (d); token->type = TOKEN_OPERATOR; break; } case '$': /* variable start */ { int d = getcFromInputFile (); if (! isIdentChar (d)) { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } else { parseIdentifier (token->string, d); token->type = TOKEN_VARIABLE; } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); if (isTokenFunction (token->string)) token->type = TOKEN_KEYWORD; else token->type = TOKEN_IDENTIFIER; } break; } }