/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(struct parse_frame *frm, chunk_t *pc) { c_token_t parent = CT_NONE; chunk_t *prev; LOG_FMT(LTOK, "%s:%d] %16s - tos:%d/%16s stg:%d\n", __func__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), frm->pse[frm->pse_tos].stage); /* Mark statement starts */ if (((frm->stmt_count == 0) || (frm->expr_count == 0)) && !chunk_is_semicolon(pc) && (pc->type != CT_BRACE_CLOSE) && (pc->type != CT_VBRACE_CLOSE) && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { pc->flags |= PCF_EXPR_START; pc->flags |= (frm->stmt_count == 0) ? PCF_STMT_START : 0; LOG_FMT(LSTMT, "%d] 1.marked %s as %s start st:%d ex:%d\n", pc->orig_line, pc->str.c_str(), (pc->flags &PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { int tmp; pc->flags |= PCF_IN_SPAREN; /* Mark everything in the a for statement */ for (tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { pc->flags |= PCF_IN_FOR; break; } } /* Mark the parent on semicolons in for() stmts */ if ((pc->type == CT_SEMICOLON) && (frm->pse_tos > 1) && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { pc->parent_type = CT_FOR; } } /* Check the progression of complex statements */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { if (check_complex_statements(frm, pc)) { return; } } /** * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if ((cpd.lang_flags & LANG_PAWN) != 0) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } /* Handle close paren, vbrace, brace, and square */ if ((pc->type == CT_PAREN_CLOSE) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_MACRO_CLOSE) || (pc->type == CT_SQUARE_CLOSE)) { /* Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE */ if ((pc->type == CT_PAREN_CLOSE) && ((frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { pc->type = (c_token_t)(frm->pse[frm->pse_tos].type + 1); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; pc->flags &= ~PCF_IN_SPAREN; } } /* Make sure the open / close match */ if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ((frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s:%d Error: Unexpected '%s' for '%s', which was on line %d\n", cpd.filename, pc->orig_line, pc->str.c_str(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; /* Copy the parent, update the paren/brace levels */ pc->parent_type = frm->pse[frm->pse_tos].parent; frm->level--; if ((pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_MACRO_CLOSE)) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; /* Pop the entry */ frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); /* See if we are in a complex statement */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { handle_complex_close(frm, pc); } } } /* In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == BS_WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if ((tmp->type != CT_SEMICOLON) && (tmp->type != CT_VSEMICOLON)) { pawn_add_vsemi_after(pc); } } } else { /* Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway */ if ((pc->type == CT_SEMICOLON) || (pc->type == CT_VSEMICOLON)) { cpd.consumed = true; pc->parent_type = CT_WHILE_OF_DO; } else { LOG_FMT(LWARN, "%s:%d: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } /* Get the parent type for brace and paren open */ parent = pc->parent_type; if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN)) { prev = chunk_get_prev_ncnl(pc); if (prev != NULL) { if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN)) { /* Set the parent for parens and change paren type */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { pc->type = CT_SPAREN_OPEN; parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { pc->type = CT_FPAREN_OPEN; parent = CT_FUNCTION; } else { /* no need to set parent */ } } else /* must be CT_BRACE_OPEN */ { /* Set the parent for open braces */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { parent = frm->pse[frm->pse_tos].type; } else if ((prev->type == CT_ASSIGN) && (prev->str[0] == '=')) { parent = CT_ASSIGN; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { /* no need to set parent */ } } } } /** * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_MACRO_OPEN) || (pc->type == CT_SQUARE_OPEN)) { frm->level++; if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_MACRO_OPEN)) { frm->brace_level++; } push_fmr_pse(frm, pc, BS_NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; pc->parent_type = parent; } pattern_class patcls = get_token_pattern_class(pc->type); /** Create a stack entry for complex statements IF/DO/FOR/WHILE/SWITCH */ if (patcls == PATCLS_BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? BS_BRACE_DO : BS_BRACE2, "+ComplexBraced"); } else if (patcls == PATCLS_PBRACED) { brstage_e bs = BS_PAREN1; if ((pc->type == CT_WHILE) && maybe_while_of_do(pc)) { pc->type = CT_WHILE_OF_DO; bs = BS_WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == PATCLS_OPBRACED) { push_fmr_pse(frm, pc, BS_OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == PATCLS_ELSE) { push_fmr_pse(frm, pc, BS_ELSEIF, "+ComplexElse"); } /* Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ((pc->type == CT_SQUARE_OPEN) || ((pc->type == CT_BRACE_OPEN) && (pc->parent_type != CT_ASSIGN)) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || ((pc->type == CT_SPAREN_OPEN) && (pc->parent_type == CT_FOR)) || (chunk_is_semicolon(pc) && (frm->pse[frm->pse_tos].type != CT_PAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN))) { LOG_FMT(LSTMT, "%s: %d> reset1 stmt on %s\n", __func__, pc->orig_line, pc->str.c_str()); frm->stmt_count = 0; frm->expr_count = 0; } /* Mark expression starts */ chunk_t *tmp = chunk_get_next_ncnl(pc); if ((pc->type == CT_ARITH) || (pc->type == CT_ASSIGN) || (pc->type == CT_CASE) || (pc->type == CT_COMPARE) || ((pc->type == CT_STAR) && tmp && (tmp->type != CT_STAR)) || (pc->type == CT_BOOL) || (pc->type == CT_MINUS) || (pc->type == CT_PLUS) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_RETURN) || (pc->type == CT_GOTO) || (pc->type == CT_CONTINUE) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN) || chunk_is_semicolon(pc) || (pc->type == CT_COMMA) || (pc->type == CT_NOT) || (pc->type == CT_INV) || (pc->type == CT_COLON) || (pc->type == CT_QUESTION)) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s: %d> reset expr on %s\n", __func__, pc->orig_line, pc->str.c_str()); } }
/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(parse_frame_t *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); LOG_FMT(LTOK, "%s(%d): orig_line is %zu, type is %s, tos is %zu, TOS.type is %s, TOS.stage is %u\n", __func__, __LINE__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), (unsigned int)frm->pse[frm->pse_tos].stage); // Mark statement starts if ( (frm->stmt_count == 0 || frm->expr_count == 0) && !chunk_is_semicolon(pc) && pc->type != CT_BRACE_CLOSE && pc->type != CT_VBRACE_CLOSE && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { chunk_flags_set(pc, PCF_EXPR_START | ((frm->stmt_count == 0) ? PCF_STMT_START : 0)); LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, 1.marked '%s' as %s, start stmt_count is %d, expr_count is %d\n", __func__, __LINE__, pc->orig_line, pc->text(), (pc->flags & PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { chunk_flags_set(pc, PCF_IN_SPAREN); // Mark everything in the for statement for (int tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { chunk_flags_set(pc, PCF_IN_FOR); break; } } // Mark the parent on semicolons in for() statements if ( pc->type == CT_SEMICOLON && frm->pse_tos > 1 && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { set_chunk_parent(pc, CT_FOR); } } // Check the progression of complex statements if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { if (check_complex_statements(frm, pc)) { return; } } /* * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if (cpd.lang_flags & LANG_PAWN) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } // Handle close parenthesis, vbrace, brace, and square if ( pc->type == CT_PAREN_CLOSE || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_ANGLE_CLOSE || pc->type == CT_MACRO_CLOSE || pc->type == CT_SQUARE_CLOSE) { // Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE if ( pc->type == CT_PAREN_CLOSE && ( (frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { set_chunk_type(pc, (c_token_t)(frm->pse[frm->pse_tos].type + 1)); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; chunk_flags_clr(pc, PCF_IN_SPAREN); } } // Make sure the open / close match if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ( (frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s(%d): %s, orig_line is %zu, Error: Unexpected '%s' for '%s', which was on line %zu\n", __func__, __LINE__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; // Copy the parent, update the parenthesis/brace levels set_chunk_parent(pc, frm->pse[frm->pse_tos].parent); frm->level--; if ( pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_MACRO_CLOSE) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; // Pop the entry frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); // See if we are in a complex statement if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { handle_complex_close(frm, pc); } } } /* * In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == brace_stage_e::WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* * If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if (tmp->type != CT_SEMICOLON && tmp->type != CT_VSEMICOLON) { pawn_add_vsemi_after(pc); } } } else { // Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway if (pc->type == CT_SEMICOLON || pc->type == CT_VSEMICOLON) { cpd.consumed = true; set_chunk_parent(pc, CT_WHILE_OF_DO); } else { LOG_FMT(LWARN, "%s: %s(%d): %zu: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, __func__, __LINE__, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } // Get the parent type for brace and parenthesis open c_token_t parent = pc->parent_type; if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN) { chunk_t *prev = chunk_get_prev_ncnl(pc); if (prev != nullptr) { if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN) { // Set the parent for parenthesis and change parenthesis type if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { set_chunk_type(pc, CT_SPAREN_OPEN); parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_FUNCTION; } // NS_ENUM and NS_OPTIONS are followed by a (type, name) pair else if (prev->type == CT_ENUM && (cpd.lang_flags & LANG_OC)) { // Treat both as CT_ENUM since the syntax is identical set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_ENUM; } else { // no need to set parent } } else // must be CT_BRACE_OPEN { // Set the parent for open braces if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { parent = frm->pse[frm->pse_tos].type; } else if (prev->type == CT_ASSIGN && (prev->str[0] == '=')) { parent = CT_ASSIGN; } // Carry through CT_ENUM parent in NS_ENUM (type, name) { else if ( prev->type == CT_FPAREN_CLOSE && (cpd.lang_flags & LANG_OC) && prev->parent_type == CT_ENUM) { parent = CT_ENUM; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { // no need to set parent } } } } /* * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ( pc->type == CT_BRACE_OPEN || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_ANGLE_OPEN || pc->type == CT_MACRO_OPEN || pc->type == CT_SQUARE_OPEN) { frm->level++; if (pc->type == CT_BRACE_OPEN || pc->type == CT_MACRO_OPEN) { frm->brace_level++; } push_fmr_pse(frm, pc, brace_stage_e::NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; set_chunk_parent(pc, parent); } pattern_class_e patcls = get_token_pattern_class(pc->type); /* * Create a stack entry for complex statements: * if, elseif, switch, for, while, synchronized, using, lock, with, * version, CT_D_SCOPE_IF */ if (patcls == pattern_class_e::BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? brace_stage_e::BRACE_DO : brace_stage_e::BRACE2, "+ComplexBraced"); } else if (patcls == pattern_class_e::PBRACED) { brace_stage_e bs = brace_stage_e::PAREN1; if (pc->type == CT_WHILE && maybe_while_of_do(pc)) { set_chunk_type(pc, CT_WHILE_OF_DO); bs = brace_stage_e::WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == pattern_class_e::OPBRACED) { push_fmr_pse(frm, pc, brace_stage_e::OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == pattern_class_e::ELSE) { push_fmr_pse(frm, pc, brace_stage_e::ELSEIF, "+ComplexElse"); } /* * Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ( pc->type == CT_SQUARE_OPEN || (pc->type == CT_BRACE_OPEN && pc->parent_type != CT_ASSIGN) || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || (pc->type == CT_SPAREN_OPEN && pc->parent_type == CT_FOR) || pc->type == CT_COLON || pc->type == CT_OC_END || ( chunk_is_semicolon(pc) && frm->pse[frm->pse_tos].type != CT_PAREN_OPEN && frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN && frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN)) { LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset1 stmt on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); frm->stmt_count = 0; frm->expr_count = 0; } // Mark expression starts chunk_t *tmp = chunk_get_next_ncnl(pc); if ( pc->type == CT_ARITH || pc->type == CT_ASSIGN || pc->type == CT_CASE || pc->type == CT_COMPARE || ( pc->type == CT_STAR && tmp != nullptr && tmp->type != CT_STAR) || pc->type == CT_BOOL || pc->type == CT_MINUS || pc->type == CT_PLUS || pc->type == CT_CARET || pc->type == CT_ANGLE_OPEN || pc->type == CT_ANGLE_CLOSE || pc->type == CT_RETURN || pc->type == CT_THROW || pc->type == CT_GOTO || pc->type == CT_CONTINUE || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN || chunk_is_semicolon(pc) || pc->type == CT_COMMA || pc->type == CT_NOT || pc->type == CT_INV || pc->type == CT_COLON || pc->type == CT_QUESTION) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset expr on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); } else if (pc->type == CT_BRACE_CLOSE) { if (!cpd.consumed) { size_t file_pp_level = ifdef_over_whole_file() ? 1 : 0; if (!cpd.unc_off_used && pc->pp_level == file_pp_level) { // fatal error char *outputMessage; if (cpd.settings[UO_tok_split_gte].b) { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\n", pc->orig_line, pc->orig_col); } else { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\nTry the option 'tok_split_gte = true'\n", pc->orig_line, pc->orig_col); } fprintf(stderr, "%s", outputMessage); free(outputMessage); log_flush(true); exit(EXIT_FAILURE); } } } } // parse_cleanup
void tokenize_cleanup(void) { LOG_FUNC_ENTRY(); chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; cpd.unc_stage = US_TOKENIZE_CLEANUP; /* Since [] is expected to be TSQUARE for the 'operator', we need to make * this change in the first pass. */ for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc)) { if (pc->type == CT_SQUARE_OPEN) { next = chunk_get_next_ncnl(pc); if (chunk_is_token(next, CT_SQUARE_CLOSE)) { /* Change '[' + ']' into '[]' */ set_chunk_type(pc, CT_TSQUARE); pc->str = "[]"; // bug # 664 // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE. // pc->orig_col_end += 1; pc->orig_col_end = next->orig_col_end; chunk_del(next); } } if ((pc->type == CT_SEMICOLON) && (pc->flags & PCF_IN_PREPROC) && !chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n", cpd.filename, pc->orig_line); } } /* We can handle everything else in the second pass */ pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC)) { set_chunk_type(pc, CT_MEMBER); } if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS)) { set_chunk_type(pc, CT_MEMBER); } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_VERSION_IF); } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } set_chunk_type(pc, CT_WORD); } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_SCOPE_IF); } else { set_chunk_type(pc, CT_TYPE); } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_ENUM) && (next->type == CT_CLASS)) { set_chunk_type(next, CT_ENUM_CLASS); } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_ENUM_CLASS) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { set_chunk_type(next, CT_TYPE); } if (pc->type == CT_WORD) { set_chunk_type(pc, CT_TYPE); } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { set_chunk_type(next, CT_PTR_TYPE); } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { set_chunk_parent(next, CT_TYPE_CAST); in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { /* pretty much all languages except C use <> for something other than * comparisons. "#include<xxx>" is handled elsewhere. */ if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC)) { // bug #663 check_template(pc); } else { /* convert CT_ANGLE_OPEN to CT_COMPARE */ set_chunk_type(pc, CT_COMPARE); } } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; set_chunk_parent(pc, CT_TYPE_CAST); } else { next = handle_double_angle_close(pc); } } if (cpd.lang_flags & LANG_D) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { set_chunk_type(pc, CT_CONCAT); } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { set_chunk_type(pc, CT_D_TEMPLATE); } /* handle "version(unittest) { }" vs "unittest { }" */ if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } /* handle 'static if' and merge the tokens */ if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6)) { /* delete PREV and merge with IF */ pc->str.insert(0, ' '); pc->str.insert(0, prev->str); pc->orig_col = prev->orig_col; pc->orig_line = prev->orig_line; chunk_t *to_be_deleted = prev; prev = chunk_get_prev_ncnl(prev); chunk_del(to_be_deleted); } } if (cpd.lang_flags & LANG_CPP) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_TYPE); } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { set_chunk_type(pc, CT_GETSET_EMPTY); set_chunk_parent(next, CT_GETSET); } else { set_chunk_type(pc, CT_WORD); } } /* Interface is only a keyword in MS land if followed by 'class' or 'struct' * likewise, 'class' may be a member name in Java. */ if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0])) { set_chunk_type(pc, CT_WORD); } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { tmp2 = chunk_get_next(next); /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && tmp2 && (tmp2->type == CT_ANGLE_CLOSE) && (tmp2->orig_col == next->orig_col_end)) { next->str.append('>'); next->orig_col_end++; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp2); } else if (next->flags & PCF_PUNCTUATOR) { set_chunk_type(next, CT_OPERATOR_VAL); } else { set_chunk_type(next, CT_TYPE); /* Replace next with a collection of all tokens that are part of * the type. */ tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_CARET) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); int num_sp = space_needed(tmp2, tmp); while (num_sp-- > 0) { next->str.append(" "); } next->str.append(tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } set_chunk_type(next, CT_OPERATOR_VAL); next->orig_col_end = next->orig_col + next->len(); } set_chunk_parent(next, CT_OPERATOR); LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n", __func__, pc->orig_line, pc->orig_col, next->text()); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { set_chunk_type(next, CT_PRIVATE_COLON); if ((tmp = chunk_get_next_ncnl(next)) != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } } else { set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER); } } /* Look for <newline> 'EXEC' 'SQL' */ if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) || ((*pc->str == '$') && (pc->type != CT_SQL_WORD))) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { if (*pc->str == '$') { set_chunk_type(pc, CT_SQL_EXEC); if (pc->len() > 1) { /* SPLIT OFF '$' */ chunk_t nc; nc = *pc; pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; nc.type = CT_SQL_WORD; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); next = chunk_get_next(pc); } } tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { set_chunk_type(pc, CT_SQL_BEGIN); } else if (chunk_is_str_case(tmp, "END", 3)) { set_chunk_type(pc, CT_SQL_END); } else { set_chunk_type(pc, CT_SQL_EXEC); } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$'))) { set_chunk_type(tmp, CT_SQL_WORD); } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two with a space between */ pc->str.append(' '); pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { set_chunk_type(tmp, CT_IN); break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { set_chunk_type(pc, CT_WORD); } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { set_chunk_type(pc, CT_WORD); } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { set_chunk_type(next, CT_OC_CLASS); } set_chunk_parent(next, pc->type); tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->text(), get_token_name(tmp->type)); set_chunk_type(tmp, CT_WORD); } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->parent_type); tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //set_chunk_type(tmp, CT_OC_CLASS_EXT); set_chunk_parent(tmp, pc->parent_type); } else { set_chunk_type(tmp, CT_OC_CATEGORY); set_chunk_parent(tmp, pc->parent_type); } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->parent_type); } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START); } else { set_chunk_parent(next, pc->type); tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->type); tmp = chunk_get_next(next); if (tmp != NULL) { set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { set_chunk_parent(tmp, CT_OC_SEL); break; } set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { set_chunk_parent(pc, next->type); } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION); set_chunk_parent(prev, pc->type); } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len()))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA) || (tmp2->type == CT_BRACE_OPEN))) { doit = true; } } if (doit) { pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_SIZEOF); } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } if (((pc->type == CT_USING) || ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_USING_STMT); } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { set_chunk_type(pc, CT_BYREF); } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { set_chunk_type(pc, CT_QUALIFIER); } /* If Java's 'synchronized' is in a method declaration, it should be * a qualifier. */ if ((cpd.lang_flags & LANG_JAVA) && (pc->type == CT_SYNCHRONIZED) && (next->type != CT_PAREN_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } // guy 2015-11-05 // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL if ((pc->type == CT_FOR) && (pc->prev->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_FUNC_CALL); } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } } // tokenize_cleanup
void tokenize_cleanup(void) { chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { /* Change '[' + ']' into '[]' */ if ((pc->type == CT_SQUARE_OPEN) && (next->type == CT_SQUARE_CLOSE)) { pc->type = CT_TSQUARE; pc->str = "[]"; pc->len = 2; chunk_del(next); pc->orig_col_end += 1; next = chunk_get_next_ncnl(pc); } if ((pc->type == CT_DOT) && ((cpd.lang_flags & LANG_ALLC) != 0)) { pc->type = CT_MEMBER; } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { pc->type = CT_D_VERSION_IF; } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } pc->type = CT_WORD; } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { pc->type = CT_D_SCOPE_IF; } else { pc->type = CT_TYPE; } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_WORD; } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { next->type = CT_TYPE; } if (pc->type == CT_WORD) { pc->type = CT_TYPE; } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp != NULL) || (tmp->type != CT_BRACE_OPEN)) { pc->type = CT_QUALIFIER; } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { next->type = CT_PTR_TYPE; } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { next->parent_type = CT_TYPE_CAST; in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { check_template(pc); } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; pc->parent_type = CT_TYPE_CAST; } else { next = handle_double_angle_close(pc); } } if ((cpd.lang_flags & LANG_D) != 0) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { pc->type = CT_CONCAT; } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { pc->type = CT_D_TEMPLATE; } /* handle "version(unittest) { }" vs "unittest { }" */ if ((pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { pc->type = CT_WORD; } } if ((cpd.lang_flags & LANG_CPP) != 0) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { pc->type = CT_TYPE; } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { pc->type = CT_GETSET_EMPTY; next->parent_type = CT_GETSET; } else { pc->type = CT_WORD; } } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; next->len = 2; next->type = CT_OPERATOR_VAL; chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && (memcmp(next->str, ">>", 2) == 0)) { next->len++; next->orig_col_end++; next->type = CT_OPERATOR_VAL; chunk_del(chunk_get_next(next)); } else if (next->flags & PCF_PUNCTUATOR) { next->type = CT_OPERATOR_VAL; } else { next->type = CT_TYPE; /* Replace next with a collection of all tokens that are part of * the type. */ char opbuf[256]; int len; len = snprintf(opbuf, sizeof(opbuf), "%.*s", next->len, next->str); tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); len += snprintf(opbuf + len, sizeof(opbuf) - len, "%s%.*s", space_needed(tmp2, tmp) ? " " : "", tmp->len, tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } next->str = strdup(opbuf); next->len = len; next->flags |= PCF_OWN_STR; next->type = CT_OPERATOR_VAL; next->orig_col_end = next->orig_col + next->len; } next->parent_type = CT_OPERATOR; LOG_FMT(LOPERATOR, "%s: %d:%d operator '%.*s'\n", __func__, pc->orig_line, pc->orig_col, next->len, next->str); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { next->type = CT_PRIVATE_COLON; if ((tmp = chunk_get_next_ncnl(next)) != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; } } else { pc->type = (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER; } } /* Look for <newline> 'EXEC' 'SQL' */ if (chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { pc->type = CT_SQL_BEGIN; } else if (chunk_is_str_case(tmp, "END", 3)) { pc->type = CT_SQL_END; } else { pc->type = CT_SQL_EXEC; } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len > 0) && isalpha(*tmp->str)) { tmp->type = CT_SQL_WORD; } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two */ pc->len = next->orig_col_end - pc->orig_col; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { tmp->type = CT_IN; break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { pc->type = CT_WORD; } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { pc->type = CT_WORD; } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { pc->type = CT_WORD; } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { next->type = CT_OC_CLASS; } next->parent_type = pc->type; tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%.*s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->len, tmp->str, get_token_name(tmp->type)); tmp->type = CT_WORD; } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { next->parent_type = pc->parent_type; tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //tmp->type = CT_OC_CLASS_EXT; tmp->parent_type = pc->parent_type; } else { tmp->type = CT_OC_CATEGORY; tmp->parent_type = pc->parent_type; } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { tmp->parent_type = pc->parent_type; } } /** * Objective C @dynamic and @synthesize * @dynamic xxx, yyy; * @synthesize xxx, yyy; * Just find the semicolon and mark it. */ if (pc->type == CT_OC_DYNAMIC) { tmp = chunk_get_next_type(pc, CT_SEMICOLON, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { next->flags |= PCF_STMT_START | PCF_EXPR_START; } else { next->parent_type = pc->type; tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { next->parent_type = pc->type; tmp = chunk_get_next(next); if (tmp != NULL) { tmp->type = CT_OC_SEL_NAME; tmp->parent_type = pc->type; while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { tmp->parent_type = CT_OC_SEL; break; } tmp->type = CT_OC_SEL_NAME; tmp->parent_type = pc->type; } } } /* Mark Objective-C blocks (aka lambdas or closures) * The syntax and usage is exactly like C function pointers with two exceptions: * Instead of an asterisk they have a caret as pointer symbol. * In method declarations which take a block as parameter, there can be anonymous blocks, e.g.: (^) * 1. block literal: ^{ ... }; * 2. block declaration: return_t (^name) (int arg1, int arg2, ...) NB: return_t is optional and name can be optional if found as param in a method declaration. * 3. block expression: ^ return_t (int arg) { ... }; NB: return_t is optional * * See http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/Blocks for more info... */ if ((cpd.lang_flags & LANG_OC) && (pc->type == CT_ARITH) && (chunk_is_str(pc, "^", 1)) && (prev->type != CT_NUMBER) && (prev->type != CT_NUMBER_FP)) { /* mark objc blocks caret so that we can process it later*/ pc->type = CT_OC_BLOCK_CARET; if (prev->type == CT_PAREN_OPEN) { /* block declaration */ pc->parent_type = CT_OC_BLOCK_TYPE; } else if ((next->type == CT_PAREN_OPEN) || (next->type == CT_BRACE_OPEN)) { /* block expression without return type */ /* block literal */ pc->parent_type = CT_OC_BLOCK_EXPR; } else { /* block expression with return type (seldomly used) */ if (prev->type == CT_ASSIGN) { /* shortcut to spare the peeking below * the XOR operator wouldn't be used directly * after an assign all by itself */ pc->parent_type = CT_OC_BLOCK_EXPR; } else { /* this ones tricky because we don't know how many * stars the return type has - if there even is one */ tmp = pc; while ((tmp = chunk_get_next(tmp)) != NULL) { /* we just peek ahead and see if the line contains * an open brace somewhere. * FIXME: this check needs to be more thorough. */ if (tmp->type == CT_BRACE_OPEN) { pc->parent_type = CT_OC_BLOCK_EXPR; break; } } } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { pc->parent_type = next->type; } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { pc->type = (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION; prev->parent_type = pc->type; } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA))) { doit = true; } } if (doit) { pc->len++; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_SIZEOF; } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { pc->type = CT_QUALIFIER; } if ((pc->type == CT_USING) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_USING_STMT; } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { pc->type = CT_BYREF; } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { pc->type = CT_QUALIFIER; } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } }