/** * If there is nothing but CT_WORD and CT_MEMBER, then it's probably a * template thingy. Otherwise, it's likely a comparison. */ static void check_template(chunk_t *start) { chunk_t *pc; chunk_t *end; chunk_t *prev; chunk_t *next; bool in_if = false; LOG_FMT(LTEMPL, "%s: Line %d, col %d:", __func__, start->orig_line, start->orig_col); prev = chunk_get_prev_ncnl(start, CNAV_PREPROC); if (prev == NULL) { return; } if (prev->type == CT_TEMPLATE) { LOG_FMT(LTEMPL, " CT_TEMPLATE:"); /* We have: "template< ... >", which is a template declaration */ int level = 1; for (pc = chunk_get_next_ncnl(start, CNAV_PREPROC); pc != NULL; pc = chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LTEMPL, " [%s,%d]", get_token_name(pc->type), level); if ((pc->str[0] == '>') && (pc->len() > 1)) { LOG_FMT(LTEMPL, " {split '%s' at %d:%d}", pc->text(), pc->orig_line, pc->orig_col); split_off_angle_close(pc); } if (chunk_is_str(pc, "<", 1)) { level++; } else if (chunk_is_str(pc, ">", 1)) { level--; if (level == 0) { break; } } } end = pc; } else { /* We may have something like "a< ... >", which is a template where * '...' may consist of anything except braces {}, a semicolon, and * unbalanced parens. * if we are inside an 'if' statement and hit a CT_BOOL, then it isn't a * template. */ /* A template requires a word/type right before the open angle */ if ((prev->type != CT_WORD) && (prev->type != CT_TYPE) && (prev->type != CT_COMMA) && (prev->type != CT_OPERATOR_VAL) && (prev->parent_type != CT_OPERATOR)) { LOG_FMT(LTEMPL, " - after %s + ( - Not a template\n", get_token_name(prev->type)); set_chunk_type(start, CT_COMPARE); return; } LOG_FMT(LTEMPL, " - prev %s -", get_token_name(prev->type)); /* Scan back and make sure we aren't inside square parens */ pc = start; while ((pc = chunk_get_prev_ncnl(pc, CNAV_PREPROC)) != NULL) { if ((pc->type == CT_SEMICOLON) || (pc->type == CT_BRACE_OPEN) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_SQUARE_CLOSE)) { break; } if ((pc->type == CT_IF) || (pc->type == CT_RETURN)) { in_if = true; break; } if (pc->type == CT_SQUARE_OPEN) { LOG_FMT(LTEMPL, " - Not a template: after a square open\n"); set_chunk_type(start, CT_COMPARE); return; } } /* Scan forward to the angle close * If we have a comparison in there, then it can't be a template. */ c_token_t tokens[1024]; int num_tokens = 1; tokens[0] = CT_ANGLE_OPEN; for (pc = chunk_get_next_ncnl(start, CNAV_PREPROC); pc != NULL; pc = chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LTEMPL, " [%s,%d]", get_token_name(pc->type), num_tokens); if ((tokens[num_tokens - 1] == CT_ANGLE_OPEN) && (pc->str[0] == '>') && (pc->len() > 1) && (cpd.settings[UO_tok_split_gte].b || (chunk_is_str(pc, ">>", 2) && (num_tokens >= 2)))) { LOG_FMT(LTEMPL, " {split '%s' at %d:%d}", pc->text(), pc->orig_line, pc->orig_col); split_off_angle_close(pc); } if (chunk_is_str(pc, "<", 1)) { tokens[num_tokens++] = CT_ANGLE_OPEN; } else if (chunk_is_str(pc, ">", 1)) { if ((num_tokens > 0) && (tokens[num_tokens - 1] == CT_PAREN_OPEN)) { handle_double_angle_close(pc); } else if (--num_tokens <= 0) { break; } else if (tokens[num_tokens] != CT_ANGLE_OPEN) { /* unbalanced parens */ break; } } else if (in_if && ((pc->type == CT_BOOL) || (pc->type == CT_COMPARE))) { break; } else if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_SEMICOLON)) { break; } else if (pc->type == CT_PAREN_OPEN) { if (num_tokens >= (int)(ARRAY_SIZE(tokens) - 1)) { break; } tokens[num_tokens++] = CT_PAREN_OPEN; } else if (pc->type == CT_PAREN_CLOSE) { num_tokens--; if (tokens[num_tokens] != CT_PAREN_OPEN) { /* unbalanced parens */ break; } } } end = pc; } if ((end != NULL) && (end->type == CT_ANGLE_CLOSE)) { pc = chunk_get_next_ncnl(end, CNAV_PREPROC); if ((pc == NULL) || (pc->type != CT_NUMBER)) { LOG_FMT(LTEMPL, " - Template Detected\n"); set_chunk_parent(start, CT_TEMPLATE); pc = start; while (pc != end) { next = chunk_get_next_ncnl(pc, CNAV_PREPROC); chunk_flags_set(pc, PCF_IN_TEMPLATE); if (next->type != CT_PAREN_OPEN) { make_type(pc); } pc = next; } set_chunk_parent(end, CT_TEMPLATE); chunk_flags_set(end, PCF_IN_TEMPLATE); return; } } LOG_FMT(LTEMPL, " - Not a template: end = %s\n", (end != NULL) ? get_token_name(end->type) : "<null>"); set_chunk_type(start, CT_COMPARE); } // check_template
/** * Checks the progression of complex statements. * - checks for else after if * - checks for if after else * - checks for while after do * - checks for open brace in BRACE2 and BRACE_DO stages, inserts open VBRACE * - checks for open paren in PAREN1 and PAREN2 stages, complains * * @param frm The parse frame * @param pc The current chunk * @return true - done with this chunk, false - keep processing */ static bool check_complex_statements(struct parse_frame *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); c_token_t parent; chunk_t *vbrace; /* Turn an optional paren into either a real paren or a brace */ if (frm->pse[frm->pse_tos].stage == BS_OP_PAREN1) { frm->pse[frm->pse_tos].stage = (pc->type != CT_PAREN_OPEN) ? BS_BRACE2 : BS_PAREN1; } /* Check for CT_ELSE after CT_IF */ while (frm->pse[frm->pse_tos].stage == BS_ELSE) { if (pc->type == CT_ELSE) { /* Replace CT_IF with CT_ELSE on the stack & we are done */ frm->pse[frm->pse_tos].type = CT_ELSE; frm->pse[frm->pse_tos].stage = BS_ELSEIF; print_stack(LBCSSWAP, "=Swap ", frm, pc); return(true); } /* Remove the CT_IF and close the statement */ frm->pse_tos--; print_stack(LBCSPOP, "-IF-CCS ", frm, pc); if (close_statement(frm, pc)) { return(true); } } /* Check for CT_IF after CT_ELSE */ if (frm->pse[frm->pse_tos].stage == BS_ELSEIF) { if (pc->type == CT_IF) { if (!cpd.settings[UO_indent_else_if].b || !chunk_is_newline(chunk_get_prev_nc(pc))) { /* Replace CT_ELSE with CT_IF */ set_chunk_type(pc, CT_ELSEIF); frm->pse[frm->pse_tos].type = CT_ELSEIF; frm->pse[frm->pse_tos].stage = BS_PAREN1; return(true); } } /* Jump to the 'expecting brace' stage */ frm->pse[frm->pse_tos].stage = BS_BRACE2; } /* Check for CT_CATCH or CT_FINALLY after CT_TRY or CT_CATCH */ while (frm->pse[frm->pse_tos].stage == BS_CATCH) { if ((pc->type == CT_CATCH) || (pc->type == CT_FINALLY)) { /* Replace CT_TRY with CT_CATCH on the stack & we are done */ frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = (pc->type == CT_CATCH) ? BS_CATCH_WHEN : BS_BRACE2; print_stack(LBCSSWAP, "=Swap ", frm, pc); return(true); } /* Remove the CT_TRY and close the statement */ frm->pse_tos--; print_stack(LBCSPOP, "-TRY-CCS ", frm, pc); if (close_statement(frm, pc)) { return(true); } } /* Check for optional paren and optional CT_WHEN after CT_CATCH */ if (frm->pse[frm->pse_tos].stage == BS_CATCH_WHEN) { if (pc->type == CT_PAREN_OPEN) // this is for the paren after "catch" { /* Replace CT_PAREN_OPEN with CT_SPAREN_OPEN */ set_chunk_type(pc, CT_SPAREN_OPEN); frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = BS_PAREN1; return(false); } else if (pc->type == CT_WHEN) { frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = BS_OP_PAREN1; return(true); } else if (pc->type == CT_BRACE_OPEN) { frm->pse[frm->pse_tos].stage = BS_BRACE2; return(false); } } /* Check for CT_WHILE after the CT_DO */ if (frm->pse[frm->pse_tos].stage == BS_WHILE) { if (pc->type == CT_WHILE) { set_chunk_type(pc, CT_WHILE_OF_DO); frm->pse[frm->pse_tos].type = CT_WHILE_OF_DO; //CT_WHILE; frm->pse[frm->pse_tos].stage = BS_WOD_PAREN; return(true); } LOG_FMT(LWARN, "%s:%lu Error: Expected 'while', got '%s'\n", cpd.filename, pc->orig_line, pc->text()); frm->pse_tos--; print_stack(LBCSPOP, "-Error ", frm, pc); cpd.error_count++; } /* Insert a CT_VBRACE_OPEN, if needed */ if ((pc->type != CT_BRACE_OPEN) && ((frm->pse[frm->pse_tos].stage == BS_BRACE2) || (frm->pse[frm->pse_tos].stage == BS_BRACE_DO))) { if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_USING_STMT) && (!cpd.settings[UO_indent_using_block].b)) { // don't indent the using block } else { parent = frm->pse[frm->pse_tos].type; vbrace = insert_vbrace_open_before(pc, frm); set_chunk_parent(vbrace, parent); frm->level++; frm->brace_level++; push_fmr_pse(frm, vbrace, BS_NONE, "+VBrace "); frm->pse[frm->pse_tos].parent = parent; /* update the level of pc */ pc->level = frm->level; pc->brace_level = frm->brace_level; /* Mark as a start of a statement */ frm->stmt_count = 0; frm->expr_count = 0; pc->flags |= PCF_STMT_START | PCF_EXPR_START; frm->stmt_count = 1; frm->expr_count = 1; LOG_FMT(LSTMT, "%lu] 2.marked %s as stmt start\n", pc->orig_line, pc->text()); } } /* Verify open paren in complex statement */ if ((pc->type != CT_PAREN_OPEN) && ((frm->pse[frm->pse_tos].stage == BS_PAREN1) || (frm->pse[frm->pse_tos].stage == BS_WOD_PAREN))) { LOG_FMT(LWARN, "%s:%lu Error: Expected '(', got '%s' for '%s'\n", cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].type)); /* Throw out the complex statement */ frm->pse_tos--; print_stack(LBCSPOP, "-Error ", frm, pc); cpd.error_count++; } return(false); } // check_complex_statements
void tokenize_cleanup(void) { LOG_FUNC_ENTRY(); chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; cpd.unc_stage = US_TOKENIZE_CLEANUP; /* Since [] is expected to be TSQUARE for the 'operator', we need to make * this change in the first pass. */ for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc)) { if (pc->type == CT_SQUARE_OPEN) { next = chunk_get_next_ncnl(pc); if (chunk_is_token(next, CT_SQUARE_CLOSE)) { /* Change '[' + ']' into '[]' */ set_chunk_type(pc, CT_TSQUARE); pc->str = "[]"; // bug # 664 // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE. // pc->orig_col_end += 1; pc->orig_col_end = next->orig_col_end; chunk_del(next); } } if ((pc->type == CT_SEMICOLON) && (pc->flags & PCF_IN_PREPROC) && !chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n", cpd.filename, pc->orig_line); } } /* We can handle everything else in the second pass */ pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC)) { set_chunk_type(pc, CT_MEMBER); } if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS)) { set_chunk_type(pc, CT_MEMBER); } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_VERSION_IF); } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } set_chunk_type(pc, CT_WORD); } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_SCOPE_IF); } else { set_chunk_type(pc, CT_TYPE); } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_ENUM) && (next->type == CT_CLASS)) { set_chunk_type(next, CT_ENUM_CLASS); } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_ENUM_CLASS) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { set_chunk_type(next, CT_TYPE); } if (pc->type == CT_WORD) { set_chunk_type(pc, CT_TYPE); } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { set_chunk_type(next, CT_PTR_TYPE); } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { set_chunk_parent(next, CT_TYPE_CAST); in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { /* pretty much all languages except C use <> for something other than * comparisons. "#include<xxx>" is handled elsewhere. */ if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC)) { // bug #663 check_template(pc); } else { /* convert CT_ANGLE_OPEN to CT_COMPARE */ set_chunk_type(pc, CT_COMPARE); } } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; set_chunk_parent(pc, CT_TYPE_CAST); } else { next = handle_double_angle_close(pc); } } if (cpd.lang_flags & LANG_D) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { set_chunk_type(pc, CT_CONCAT); } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { set_chunk_type(pc, CT_D_TEMPLATE); } /* handle "version(unittest) { }" vs "unittest { }" */ if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } /* handle 'static if' and merge the tokens */ if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6)) { /* delete PREV and merge with IF */ pc->str.insert(0, ' '); pc->str.insert(0, prev->str); pc->orig_col = prev->orig_col; pc->orig_line = prev->orig_line; chunk_t *to_be_deleted = prev; prev = chunk_get_prev_ncnl(prev); chunk_del(to_be_deleted); } } if (cpd.lang_flags & LANG_CPP) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_TYPE); } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { set_chunk_type(pc, CT_GETSET_EMPTY); set_chunk_parent(next, CT_GETSET); } else { set_chunk_type(pc, CT_WORD); } } /* Interface is only a keyword in MS land if followed by 'class' or 'struct' * likewise, 'class' may be a member name in Java. */ if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0])) { set_chunk_type(pc, CT_WORD); } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { tmp2 = chunk_get_next(next); /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && tmp2 && (tmp2->type == CT_ANGLE_CLOSE) && (tmp2->orig_col == next->orig_col_end)) { next->str.append('>'); next->orig_col_end++; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp2); } else if (next->flags & PCF_PUNCTUATOR) { set_chunk_type(next, CT_OPERATOR_VAL); } else { set_chunk_type(next, CT_TYPE); /* Replace next with a collection of all tokens that are part of * the type. */ tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_CARET) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); int num_sp = space_needed(tmp2, tmp); while (num_sp-- > 0) { next->str.append(" "); } next->str.append(tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } set_chunk_type(next, CT_OPERATOR_VAL); next->orig_col_end = next->orig_col + next->len(); } set_chunk_parent(next, CT_OPERATOR); LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n", __func__, pc->orig_line, pc->orig_col, next->text()); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { set_chunk_type(next, CT_PRIVATE_COLON); if ((tmp = chunk_get_next_ncnl(next)) != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } } else { set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER); } } /* Look for <newline> 'EXEC' 'SQL' */ if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) || ((*pc->str == '$') && (pc->type != CT_SQL_WORD))) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { if (*pc->str == '$') { set_chunk_type(pc, CT_SQL_EXEC); if (pc->len() > 1) { /* SPLIT OFF '$' */ chunk_t nc; nc = *pc; pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; nc.type = CT_SQL_WORD; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); next = chunk_get_next(pc); } } tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { set_chunk_type(pc, CT_SQL_BEGIN); } else if (chunk_is_str_case(tmp, "END", 3)) { set_chunk_type(pc, CT_SQL_END); } else { set_chunk_type(pc, CT_SQL_EXEC); } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$'))) { set_chunk_type(tmp, CT_SQL_WORD); } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two with a space between */ pc->str.append(' '); pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { set_chunk_type(tmp, CT_IN); break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { set_chunk_type(pc, CT_WORD); } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { set_chunk_type(pc, CT_WORD); } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { set_chunk_type(next, CT_OC_CLASS); } set_chunk_parent(next, pc->type); tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->text(), get_token_name(tmp->type)); set_chunk_type(tmp, CT_WORD); } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->parent_type); tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //set_chunk_type(tmp, CT_OC_CLASS_EXT); set_chunk_parent(tmp, pc->parent_type); } else { set_chunk_type(tmp, CT_OC_CATEGORY); set_chunk_parent(tmp, pc->parent_type); } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->parent_type); } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START); } else { set_chunk_parent(next, pc->type); tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->type); tmp = chunk_get_next(next); if (tmp != NULL) { set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { set_chunk_parent(tmp, CT_OC_SEL); break; } set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { set_chunk_parent(pc, next->type); } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION); set_chunk_parent(prev, pc->type); } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len()))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA) || (tmp2->type == CT_BRACE_OPEN))) { doit = true; } } if (doit) { pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_SIZEOF); } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } if (((pc->type == CT_USING) || ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_USING_STMT); } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { set_chunk_type(pc, CT_BYREF); } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { set_chunk_type(pc, CT_QUALIFIER); } /* If Java's 'synchronized' is in a method declaration, it should be * a qualifier. */ if ((cpd.lang_flags & LANG_JAVA) && (pc->type == CT_SYNCHRONIZED) && (next->type != CT_PAREN_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } // guy 2015-11-05 // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL if ((pc->type == CT_FOR) && (pc->prev->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_FUNC_CALL); } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } } // tokenize_cleanup
/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(struct parse_frame *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); c_token_t parent = CT_NONE; chunk_t *prev; LOG_FMT(LTOK, "%s:%lu] %16s - tos:%d/%16s stg:%d\n", __func__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), frm->pse[frm->pse_tos].stage); /* Mark statement starts */ if (((frm->stmt_count == 0) || (frm->expr_count == 0)) && !chunk_is_semicolon(pc) && (pc->type != CT_BRACE_CLOSE) && (pc->type != CT_VBRACE_CLOSE) && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { chunk_flags_set(pc, PCF_EXPR_START | ((frm->stmt_count == 0) ? PCF_STMT_START : 0)); LOG_FMT(LSTMT, "%lu] 1.marked %s as %s start st:%d ex:%d\n", pc->orig_line, pc->text(), (pc->flags & PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { int tmp; chunk_flags_set(pc, PCF_IN_SPAREN); /* Mark everything in the a for statement */ for (tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { chunk_flags_set(pc, PCF_IN_FOR); break; } } /* Mark the parent on semicolons in for() stmts */ if ((pc->type == CT_SEMICOLON) && (frm->pse_tos > 1) && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { set_chunk_parent(pc, CT_FOR); } } /* Check the progression of complex statements */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { if (check_complex_statements(frm, pc)) { return; } } /** * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if (cpd.lang_flags & LANG_PAWN) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } /* Handle close paren, vbrace, brace, and square */ if ((pc->type == CT_PAREN_CLOSE) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_MACRO_CLOSE) || (pc->type == CT_SQUARE_CLOSE)) { /* Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE */ if ((pc->type == CT_PAREN_CLOSE) && ((frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { set_chunk_type(pc, (c_token_t)(frm->pse[frm->pse_tos].type + 1)); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; chunk_flags_clr(pc, PCF_IN_SPAREN); } } /* Make sure the open / close match */ if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ((frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s: %s:%lu Error: Unexpected '%s' for '%s', which was on line %lu\n", __func__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; /* Copy the parent, update the paren/brace levels */ set_chunk_parent(pc, frm->pse[frm->pse_tos].parent); frm->level--; if ((pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_MACRO_CLOSE)) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; /* Pop the entry */ frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); /* See if we are in a complex statement */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { handle_complex_close(frm, pc); } } } /* In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == BS_WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if ((tmp->type != CT_SEMICOLON) && (tmp->type != CT_VSEMICOLON)) { pawn_add_vsemi_after(pc); } } } else { /* Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway */ if ((pc->type == CT_SEMICOLON) || (pc->type == CT_VSEMICOLON)) { cpd.consumed = true; set_chunk_parent(pc, CT_WHILE_OF_DO); } else { LOG_FMT(LWARN, "%s:%lu: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } /* Get the parent type for brace and paren open */ parent = pc->parent_type; if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN)) { prev = chunk_get_prev_ncnl(pc); if (prev != NULL) { if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN)) { /* Set the parent for parens and change paren type */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { set_chunk_type(pc, CT_SPAREN_OPEN); parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_FUNCTION; } /* NS_ENUM and NS_OPTIONS are followed by a (type, name) pair */ else if ((prev->type == CT_ENUM) && (cpd.lang_flags & LANG_OC)) { /* Treat both as CT_ENUM since the syntax is identical */ set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_ENUM; } else { /* no need to set parent */ } } else /* must be CT_BRACE_OPEN */ { /* Set the parent for open braces */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { parent = frm->pse[frm->pse_tos].type; } else if ((prev->type == CT_ASSIGN) && (prev->str[0] == '=')) { parent = CT_ASSIGN; } /* Carry through CT_ENUM parent in NS_ENUM (type, name) { */ else if ((prev->type == CT_FPAREN_CLOSE) && (cpd.lang_flags & LANG_OC) && (prev->parent_type == CT_ENUM)) { parent = CT_ENUM; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { /* no need to set parent */ } } } } /** * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_MACRO_OPEN) || (pc->type == CT_SQUARE_OPEN)) { frm->level++; if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_MACRO_OPEN)) { frm->brace_level++; } push_fmr_pse(frm, pc, BS_NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; set_chunk_parent(pc, parent); } pattern_class patcls = get_token_pattern_class(pc->type); /** Create a stack entry for complex statements: */ /** if, elseif, switch, for, while, synchronized, using, lock, with, version, CT_D_SCOPE_IF */ if (patcls == PATCLS_BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? BS_BRACE_DO : BS_BRACE2, "+ComplexBraced"); } else if (patcls == PATCLS_PBRACED) { brstage_e bs = BS_PAREN1; if ((pc->type == CT_WHILE) && maybe_while_of_do(pc)) { set_chunk_type(pc, CT_WHILE_OF_DO); bs = BS_WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == PATCLS_OPBRACED) { push_fmr_pse(frm, pc, BS_OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == PATCLS_ELSE) { push_fmr_pse(frm, pc, BS_ELSEIF, "+ComplexElse"); } /* Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ((pc->type == CT_SQUARE_OPEN) || ((pc->type == CT_BRACE_OPEN) && (pc->parent_type != CT_ASSIGN)) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || ((pc->type == CT_SPAREN_OPEN) && (pc->parent_type == CT_FOR)) || (pc->type == CT_COLON) || (pc->type == CT_OC_END) || (chunk_is_semicolon(pc) && (frm->pse[frm->pse_tos].type != CT_PAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN))) { LOG_FMT(LSTMT, "%s: %lu> reset1 stmt on %s\n", __func__, pc->orig_line, pc->text()); frm->stmt_count = 0; frm->expr_count = 0; } /* Mark expression starts */ chunk_t *tmp = chunk_get_next_ncnl(pc); if ((pc->type == CT_ARITH) || (pc->type == CT_ASSIGN) || (pc->type == CT_CASE) || (pc->type == CT_COMPARE) || ((pc->type == CT_STAR) && tmp && (tmp->type != CT_STAR)) || (pc->type == CT_BOOL) || (pc->type == CT_MINUS) || (pc->type == CT_PLUS) || (pc->type == CT_CARET) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_RETURN) || (pc->type == CT_THROW) || (pc->type == CT_GOTO) || (pc->type == CT_CONTINUE) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN) || chunk_is_semicolon(pc) || (pc->type == CT_COMMA) || (pc->type == CT_NOT) || (pc->type == CT_INV) || (pc->type == CT_COLON) || (pc->type == CT_QUESTION)) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s: %lu> reset expr on %s\n", __func__, pc->orig_line, pc->text()); } } // parse_cleanup
/** * Called when a statement was just closed and the pse_tos was just * decremented. * * - if the TOS is now VBRACE, insert a CT_VBRACE_CLOSE and recurse. * - if the TOS is a complex statement, call handle_complex_close() * * @return true - done with this chunk, false - keep processing */ bool close_statement(struct parse_frame *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); chunk_t *vbc = pc; LOG_FMT(LTOK, "%s:%lu] %s '%s' type %s stage %d\n", __func__, pc->orig_line, get_token_name(pc->type), pc->text(), get_token_name(frm->pse[frm->pse_tos].type), frm->pse[frm->pse_tos].stage); if (cpd.consumed) { frm->stmt_count = 0; frm->expr_count = 0; LOG_FMT(LSTMT, "%s: %lu> reset2 stmt on %s\n", __func__, pc->orig_line, pc->text()); } /** * Insert a CT_VBRACE_CLOSE, if needed: * If we are in a virtual brace and we are not ON a CT_VBRACE_CLOSE add one */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { /* If the current token has already been consumed, then add after it */ if (cpd.consumed) { insert_vbrace_close_after(pc, frm); } else { /* otherwise, add before it and consume the vbrace */ vbc = chunk_get_prev_ncnl(pc); vbc = insert_vbrace_close_after(vbc, frm); set_chunk_parent(vbc, frm->pse[frm->pse_tos].parent); frm->level--; frm->brace_level--; frm->pse_tos--; /* Update the token level */ pc->level = frm->level; pc->brace_level = frm->brace_level; print_stack(LBCSPOP, "-CS VB ", frm, pc); /* And repeat the close */ close_statement(frm, pc); return(true); } } /* See if we are done with a complex statement */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { if (handle_complex_close(frm, vbc)) { return(true); } } return(false); } // close_statement
static bool check_complex_statements(parse_frame_t *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); c_token_t parent; // Turn an optional parenthesis into either a real parenthesis or a brace if (frm->pse[frm->pse_tos].stage == brace_stage_e::OP_PAREN1) { frm->pse[frm->pse_tos].stage = (pc->type != CT_PAREN_OPEN) ? brace_stage_e::BRACE2 : brace_stage_e::PAREN1; } // Check for CT_ELSE after CT_IF while (frm->pse[frm->pse_tos].stage == brace_stage_e::ELSE) { if (pc->type == CT_ELSE) { // Replace CT_IF with CT_ELSE on the stack & we are done frm->pse[frm->pse_tos].type = CT_ELSE; frm->pse[frm->pse_tos].stage = brace_stage_e::ELSEIF; print_stack(LBCSSWAP, "=Swap ", frm, pc); return(true); } // Remove the CT_IF and close the statement frm->pse_tos--; print_stack(LBCSPOP, "-IF-CCS ", frm, pc); if (close_statement(frm, pc)) { return(true); } } // Check for CT_IF after CT_ELSE if (frm->pse[frm->pse_tos].stage == brace_stage_e::ELSEIF) { if (pc->type == CT_IF) { if ( !cpd.settings[UO_indent_else_if].b || !chunk_is_newline(chunk_get_prev_nc(pc))) { // Replace CT_ELSE with CT_IF set_chunk_type(pc, CT_ELSEIF); frm->pse[frm->pse_tos].type = CT_ELSEIF; frm->pse[frm->pse_tos].stage = brace_stage_e::PAREN1; return(true); } } // Jump to the 'expecting brace' stage frm->pse[frm->pse_tos].stage = brace_stage_e::BRACE2; } // Check for CT_CATCH or CT_FINALLY after CT_TRY or CT_CATCH while (frm->pse[frm->pse_tos].stage == brace_stage_e::CATCH) { if (pc->type == CT_CATCH || pc->type == CT_FINALLY) { // Replace CT_TRY with CT_CATCH on the stack & we are done frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = (pc->type == CT_CATCH) ? brace_stage_e::CATCH_WHEN : brace_stage_e::BRACE2; print_stack(LBCSSWAP, "=Swap ", frm, pc); return(true); } // Remove the CT_TRY and close the statement frm->pse_tos--; print_stack(LBCSPOP, "-TRY-CCS ", frm, pc); if (close_statement(frm, pc)) { return(true); } } // Check for optional parenthesis and optional CT_WHEN after CT_CATCH if (frm->pse[frm->pse_tos].stage == brace_stage_e::CATCH_WHEN) { if (pc->type == CT_PAREN_OPEN) // this is for the paren after "catch" { // Replace CT_PAREN_OPEN with CT_SPAREN_OPEN set_chunk_type(pc, CT_SPAREN_OPEN); frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = brace_stage_e::PAREN1; return(false); } if (pc->type == CT_WHEN) { frm->pse[frm->pse_tos].type = pc->type; frm->pse[frm->pse_tos].stage = brace_stage_e::OP_PAREN1; return(true); } if (pc->type == CT_BRACE_OPEN) { frm->pse[frm->pse_tos].stage = brace_stage_e::BRACE2; return(false); } } // Check for CT_WHILE after the CT_DO if (frm->pse[frm->pse_tos].stage == brace_stage_e::WHILE) { if (pc->type == CT_WHILE) { set_chunk_type(pc, CT_WHILE_OF_DO); frm->pse[frm->pse_tos].type = CT_WHILE_OF_DO; //CT_WHILE; frm->pse[frm->pse_tos].stage = brace_stage_e::WOD_PAREN; return(true); } LOG_FMT(LWARN, "%s(%d): %s, orig_line is %zu, Error: Expected 'while', got '%s'\n", __func__, __LINE__, cpd.filename, pc->orig_line, pc->text()); frm->pse_tos--; print_stack(LBCSPOP, "-Error ", frm, pc); cpd.error_count++; } // Insert a CT_VBRACE_OPEN, if needed if ( pc->type != CT_BRACE_OPEN && ( (frm->pse[frm->pse_tos].stage == brace_stage_e::BRACE2) || (frm->pse[frm->pse_tos].stage == brace_stage_e::BRACE_DO))) { if ( (cpd.lang_flags & LANG_CS) && pc->type == CT_USING_STMT && (!cpd.settings[UO_indent_using_block].b)) { // don't indent the using block } else { parent = frm->pse[frm->pse_tos].type; chunk_t *vbrace = insert_vbrace_open_before(pc, frm); set_chunk_parent(vbrace, parent); frm->level++; frm->brace_level++; push_fmr_pse(frm, vbrace, brace_stage_e::NONE, "+VBrace "); frm->pse[frm->pse_tos].parent = parent; // update the level of pc pc->level = frm->level; pc->brace_level = frm->brace_level; // Mark as a start of a statement frm->stmt_count = 0; frm->expr_count = 0; pc->flags |= PCF_STMT_START | PCF_EXPR_START; frm->stmt_count = 1; frm->expr_count = 1; LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, 2.marked '%s' as stmt start\n", __func__, __LINE__, pc->orig_line, pc->text()); } } // Verify open parenthesis in complex statement if ( pc->type != CT_PAREN_OPEN && ( (frm->pse[frm->pse_tos].stage == brace_stage_e::PAREN1) || (frm->pse[frm->pse_tos].stage == brace_stage_e::WOD_PAREN))) { LOG_FMT(LWARN, "%s(%d): %s, orig_line is %zu, Error: Expected '(', got '%s' for '%s'\n", __func__, __LINE__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].type)); // Throw out the complex statement frm->pse_tos--; print_stack(LBCSPOP, "-Error ", frm, pc); cpd.error_count++; } return(false); } // check_complex_statements
/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(parse_frame_t *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); LOG_FMT(LTOK, "%s(%d): orig_line is %zu, type is %s, tos is %zu, TOS.type is %s, TOS.stage is %u\n", __func__, __LINE__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), (unsigned int)frm->pse[frm->pse_tos].stage); // Mark statement starts if ( (frm->stmt_count == 0 || frm->expr_count == 0) && !chunk_is_semicolon(pc) && pc->type != CT_BRACE_CLOSE && pc->type != CT_VBRACE_CLOSE && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { chunk_flags_set(pc, PCF_EXPR_START | ((frm->stmt_count == 0) ? PCF_STMT_START : 0)); LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, 1.marked '%s' as %s, start stmt_count is %d, expr_count is %d\n", __func__, __LINE__, pc->orig_line, pc->text(), (pc->flags & PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { chunk_flags_set(pc, PCF_IN_SPAREN); // Mark everything in the for statement for (int tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { chunk_flags_set(pc, PCF_IN_FOR); break; } } // Mark the parent on semicolons in for() statements if ( pc->type == CT_SEMICOLON && frm->pse_tos > 1 && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { set_chunk_parent(pc, CT_FOR); } } // Check the progression of complex statements if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { if (check_complex_statements(frm, pc)) { return; } } /* * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if (cpd.lang_flags & LANG_PAWN) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } // Handle close parenthesis, vbrace, brace, and square if ( pc->type == CT_PAREN_CLOSE || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_ANGLE_CLOSE || pc->type == CT_MACRO_CLOSE || pc->type == CT_SQUARE_CLOSE) { // Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE if ( pc->type == CT_PAREN_CLOSE && ( (frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { set_chunk_type(pc, (c_token_t)(frm->pse[frm->pse_tos].type + 1)); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; chunk_flags_clr(pc, PCF_IN_SPAREN); } } // Make sure the open / close match if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ( (frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s(%d): %s, orig_line is %zu, Error: Unexpected '%s' for '%s', which was on line %zu\n", __func__, __LINE__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; // Copy the parent, update the parenthesis/brace levels set_chunk_parent(pc, frm->pse[frm->pse_tos].parent); frm->level--; if ( pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_MACRO_CLOSE) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; // Pop the entry frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); // See if we are in a complex statement if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { handle_complex_close(frm, pc); } } } /* * In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == brace_stage_e::WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* * If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if (tmp->type != CT_SEMICOLON && tmp->type != CT_VSEMICOLON) { pawn_add_vsemi_after(pc); } } } else { // Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway if (pc->type == CT_SEMICOLON || pc->type == CT_VSEMICOLON) { cpd.consumed = true; set_chunk_parent(pc, CT_WHILE_OF_DO); } else { LOG_FMT(LWARN, "%s: %s(%d): %zu: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, __func__, __LINE__, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } // Get the parent type for brace and parenthesis open c_token_t parent = pc->parent_type; if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN) { chunk_t *prev = chunk_get_prev_ncnl(pc); if (prev != nullptr) { if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN) { // Set the parent for parenthesis and change parenthesis type if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { set_chunk_type(pc, CT_SPAREN_OPEN); parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_FUNCTION; } // NS_ENUM and NS_OPTIONS are followed by a (type, name) pair else if (prev->type == CT_ENUM && (cpd.lang_flags & LANG_OC)) { // Treat both as CT_ENUM since the syntax is identical set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_ENUM; } else { // no need to set parent } } else // must be CT_BRACE_OPEN { // Set the parent for open braces if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { parent = frm->pse[frm->pse_tos].type; } else if (prev->type == CT_ASSIGN && (prev->str[0] == '=')) { parent = CT_ASSIGN; } // Carry through CT_ENUM parent in NS_ENUM (type, name) { else if ( prev->type == CT_FPAREN_CLOSE && (cpd.lang_flags & LANG_OC) && prev->parent_type == CT_ENUM) { parent = CT_ENUM; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { // no need to set parent } } } } /* * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ( pc->type == CT_BRACE_OPEN || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_ANGLE_OPEN || pc->type == CT_MACRO_OPEN || pc->type == CT_SQUARE_OPEN) { frm->level++; if (pc->type == CT_BRACE_OPEN || pc->type == CT_MACRO_OPEN) { frm->brace_level++; } push_fmr_pse(frm, pc, brace_stage_e::NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; set_chunk_parent(pc, parent); } pattern_class_e patcls = get_token_pattern_class(pc->type); /* * Create a stack entry for complex statements: * if, elseif, switch, for, while, synchronized, using, lock, with, * version, CT_D_SCOPE_IF */ if (patcls == pattern_class_e::BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? brace_stage_e::BRACE_DO : brace_stage_e::BRACE2, "+ComplexBraced"); } else if (patcls == pattern_class_e::PBRACED) { brace_stage_e bs = brace_stage_e::PAREN1; if (pc->type == CT_WHILE && maybe_while_of_do(pc)) { set_chunk_type(pc, CT_WHILE_OF_DO); bs = brace_stage_e::WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == pattern_class_e::OPBRACED) { push_fmr_pse(frm, pc, brace_stage_e::OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == pattern_class_e::ELSE) { push_fmr_pse(frm, pc, brace_stage_e::ELSEIF, "+ComplexElse"); } /* * Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ( pc->type == CT_SQUARE_OPEN || (pc->type == CT_BRACE_OPEN && pc->parent_type != CT_ASSIGN) || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || (pc->type == CT_SPAREN_OPEN && pc->parent_type == CT_FOR) || pc->type == CT_COLON || pc->type == CT_OC_END || ( chunk_is_semicolon(pc) && frm->pse[frm->pse_tos].type != CT_PAREN_OPEN && frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN && frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN)) { LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset1 stmt on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); frm->stmt_count = 0; frm->expr_count = 0; } // Mark expression starts chunk_t *tmp = chunk_get_next_ncnl(pc); if ( pc->type == CT_ARITH || pc->type == CT_ASSIGN || pc->type == CT_CASE || pc->type == CT_COMPARE || ( pc->type == CT_STAR && tmp != nullptr && tmp->type != CT_STAR) || pc->type == CT_BOOL || pc->type == CT_MINUS || pc->type == CT_PLUS || pc->type == CT_CARET || pc->type == CT_ANGLE_OPEN || pc->type == CT_ANGLE_CLOSE || pc->type == CT_RETURN || pc->type == CT_THROW || pc->type == CT_GOTO || pc->type == CT_CONTINUE || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN || chunk_is_semicolon(pc) || pc->type == CT_COMMA || pc->type == CT_NOT || pc->type == CT_INV || pc->type == CT_COLON || pc->type == CT_QUESTION) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset expr on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); } else if (pc->type == CT_BRACE_CLOSE) { if (!cpd.consumed) { size_t file_pp_level = ifdef_over_whole_file() ? 1 : 0; if (!cpd.unc_off_used && pc->pp_level == file_pp_level) { // fatal error char *outputMessage; if (cpd.settings[UO_tok_split_gte].b) { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\n", pc->orig_line, pc->orig_col); } else { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\nTry the option 'tok_split_gte = true'\n", pc->orig_line, pc->orig_col); } fprintf(stderr, "%s", outputMessage); free(outputMessage); log_flush(true); exit(EXIT_FAILURE); } } } } // parse_cleanup
static chunk_t *pawn_process_func_def(chunk_t *pc) { LOG_FUNC_ENTRY(); /* We are on a function definition */ chunk_t *clp; chunk_t *last; chunk_t *next; set_chunk_type(pc, CT_FUNC_DEF); LOG_FMT(LPFUNC, "%s: %lu:%lu %s\n", __func__, pc->orig_line, pc->orig_col, pc->text()); /* If we don't have a brace open right after the close fparen, then * we need to add virtual braces around the function body. */ clp = chunk_get_next_str(pc, ")", 1, 0); last = chunk_get_next_ncnl(clp); if (last != NULL) { LOG_FMT(LPFUNC, "%s: %lu] last is '%s' [%s]\n", __func__, last->orig_line, last->text(), get_token_name(last->type)); } /* See if there is a state clause after the function */ if ((last != NULL) && chunk_is_str(last, "<", 1)) { LOG_FMT(LPFUNC, "%s: %lu] '%s' has state angle open %s\n", __func__, pc->orig_line, pc->text(), get_token_name(last->type)); set_chunk_type(last, CT_ANGLE_OPEN); set_chunk_parent(last, CT_FUNC_DEF); while (((last = chunk_get_next(last)) != NULL) && !chunk_is_str(last, ">", 1)) { } if (last != NULL) { LOG_FMT(LPFUNC, "%s: %lu] '%s' has state angle close %s\n", __func__, pc->orig_line, pc->text(), get_token_name(last->type)); set_chunk_type(last, CT_ANGLE_CLOSE); set_chunk_parent(last, CT_FUNC_DEF); } last = chunk_get_next_ncnl(last); } if (last == NULL) { return(last); } if (last->type == CT_BRACE_OPEN) { set_chunk_parent(last, CT_FUNC_DEF); last = chunk_get_next_type(last, CT_BRACE_CLOSE, last->level); if (last != NULL) { set_chunk_parent(last, CT_FUNC_DEF); } } else { LOG_FMT(LPFUNC, "%s: %lu] '%s' fdef: expected brace open: %s\n", __func__, pc->orig_line, pc->text(), get_token_name(last->type)); /* do not insert a vbrace before a preproc */ if (last->flags & PCF_IN_PREPROC) { return(last); } chunk_t chunk; chunk = *last; chunk.str.clear(); chunk.type = CT_VBRACE_OPEN; chunk.parent_type = CT_FUNC_DEF; chunk_t *prev = chunk_add_before(&chunk, last); last = prev; /* find the next newline at level 0 */ prev = chunk_get_next_ncnl(prev); do { LOG_FMT(LPFUNC, "%s:%lu] check %s, level %lu\n", __func__, prev->orig_line, get_token_name(prev->type), prev->level); if ((prev->type == CT_NEWLINE) && (prev->level == 0)) { next = chunk_get_next_ncnl(prev); if ((next != NULL) && (next->type != CT_ELSE) && (next->type != CT_WHILE_OF_DO)) { break; } } prev->level++; prev->brace_level++; last = prev; } while ((prev = chunk_get_next(prev)) != NULL); if (last != NULL) { LOG_FMT(LPFUNC, "%s:%lu] ended on %s, level %lu\n", __func__, last->orig_line, get_token_name(last->type), last->level); } chunk = *last; chunk.str.clear(); chunk.column += last->len(); chunk.type = CT_VBRACE_CLOSE; chunk.level = 0; chunk.brace_level = 0; chunk.parent_type = CT_FUNC_DEF; last = chunk_add_after(&chunk, last); } return(last); } // pawn_process_func_def