/** * This function parses or tokenizes the whole buffer into a list. * It has to do some tricks to parse preprocessors. * * If output_text() were called immediately after, two things would happen: * - trailing whitespace are removed. * - leading space & tabs are converted to the appropriate format. * * All the tokens are inserted before ref. If ref is NULL, they are inserted * at the end of the list. Line numbers are relative to the start of the data. */ void tokenize(const deque<int>& data, chunk_t *ref) { tok_ctx ctx(data); chunk_t chunk; chunk_t *pc = NULL; chunk_t *rprev = NULL; struct parse_frame frm; bool last_was_tab = false; int prev_sp = 0; memset(&frm, 0, sizeof(frm)); while (ctx.more()) { chunk.reset(); if (!parse_next(ctx, chunk)) { LOG_FMT(LERR, "%s:%d Bailed before the end?\n", cpd.filename, ctx.c.row); cpd.error_count++; break; } /* Don't create an entry for whitespace */ if (chunk.type == CT_WHITESPACE) { last_was_tab = chunk.after_tab; prev_sp = chunk.orig_prev_sp; continue; } chunk.orig_prev_sp = prev_sp; prev_sp = 0; if (chunk.type == CT_NEWLINE) { last_was_tab = chunk.after_tab; chunk.after_tab = false; chunk.str.clear(); } else if (chunk.type == CT_NL_CONT) { last_was_tab = chunk.after_tab; chunk.after_tab = false; chunk.str = "\\\n"; } else { chunk.after_tab = last_was_tab; last_was_tab = false; } /* Strip trailing whitespace (for CPP comments and PP blocks) */ while ((chunk.str.size() > 0) && ((chunk.str[chunk.str.size() - 1] == ' ') || (chunk.str[chunk.str.size() - 1] == '\t'))) { // If comment contains backslash '\' followed by whitespace chars, keep last one; // this will prevent it from turning '\' into line continuation. if ((chunk.str.size() > 1) && (chunk.str[chunk.str.size() - 2] == '\\')) { break; } chunk.str.pop_back(); } /* Store off the end column */ chunk.orig_col_end = ctx.c.col; /* Add the chunk to the list */ rprev = pc; if (rprev != NULL) { chunk_flags_set(pc, rprev->flags & PCF_COPY_FLAGS); /* a newline can't be in a preprocessor */ if (pc->type == CT_NEWLINE) { chunk_flags_clr(pc, PCF_IN_PREPROC); } } if (ref != NULL) { chunk.flags |= PCF_INSERTED; } else { chunk.flags &= ~PCF_INSERTED; } pc = chunk_add_before(&chunk, ref); /* A newline marks the end of a preprocessor */ if (pc->type == CT_NEWLINE) // || (pc->type == CT_COMMENT_MULTI)) { cpd.in_preproc = CT_NONE; cpd.preproc_ncnl_count = 0; } /* Special handling for preprocessor stuff */ if (cpd.in_preproc != CT_NONE) { chunk_flags_set(pc, PCF_IN_PREPROC); /* Count words after the preprocessor */ if (!chunk_is_comment(pc) && !chunk_is_newline(pc)) { cpd.preproc_ncnl_count++; } /* Figure out the type of preprocessor for #include parsing */ if (cpd.in_preproc == CT_PREPROC) { if ((pc->type < CT_PP_DEFINE) || (pc->type > CT_PP_OTHER)) { set_chunk_type(pc, CT_PP_OTHER); } cpd.in_preproc = pc->type; } } else { /* Check for a preprocessor start */ if ((pc->type == CT_POUND) && ((rprev == NULL) || (rprev->type == CT_NEWLINE))) { set_chunk_type(pc, CT_PREPROC); pc->flags |= PCF_IN_PREPROC; cpd.in_preproc = CT_PREPROC; } } } /* Set the cpd.newline string for this file */ if ((cpd.settings[UO_newlines].le == LE_LF) || ((cpd.settings[UO_newlines].le == LE_AUTO) && (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CRLF]) && (cpd.le_counts[LE_LF] >= cpd.le_counts[LE_CR]))) { /* LF line ends */ cpd.newline = "\n"; LOG_FMT(LLINEENDS, "Using LF line endings\n"); } else if ((cpd.settings[UO_newlines].le == LE_CRLF) || ((cpd.settings[UO_newlines].le == LE_AUTO) && (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_LF]) && (cpd.le_counts[LE_CRLF] >= cpd.le_counts[LE_CR]))) { /* CRLF line ends */ cpd.newline = "\r\n"; LOG_FMT(LLINEENDS, "Using CRLF line endings\n"); } else { /* CR line ends */ cpd.newline = "\r"; LOG_FMT(LLINEENDS, "Using CR line endings\n"); } } // tokenize
void tokenize_cleanup(void) { LOG_FUNC_ENTRY(); chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; cpd.unc_stage = US_TOKENIZE_CLEANUP; /* Since [] is expected to be TSQUARE for the 'operator', we need to make * this change in the first pass. */ for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc)) { if (pc->type == CT_SQUARE_OPEN) { next = chunk_get_next_ncnl(pc); if (chunk_is_token(next, CT_SQUARE_CLOSE)) { /* Change '[' + ']' into '[]' */ set_chunk_type(pc, CT_TSQUARE); pc->str = "[]"; // bug # 664 // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE. // pc->orig_col_end += 1; pc->orig_col_end = next->orig_col_end; chunk_del(next); } } if ((pc->type == CT_SEMICOLON) && (pc->flags & PCF_IN_PREPROC) && !chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n", cpd.filename, pc->orig_line); } } /* We can handle everything else in the second pass */ pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC)) { set_chunk_type(pc, CT_MEMBER); } if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS)) { set_chunk_type(pc, CT_MEMBER); } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_VERSION_IF); } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } set_chunk_type(pc, CT_WORD); } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_SCOPE_IF); } else { set_chunk_type(pc, CT_TYPE); } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_ENUM) && (next->type == CT_CLASS)) { set_chunk_type(next, CT_ENUM_CLASS); } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_ENUM_CLASS) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { set_chunk_type(next, CT_TYPE); } if (pc->type == CT_WORD) { set_chunk_type(pc, CT_TYPE); } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { set_chunk_type(next, CT_PTR_TYPE); } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { set_chunk_parent(next, CT_TYPE_CAST); in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { /* pretty much all languages except C use <> for something other than * comparisons. "#include<xxx>" is handled elsewhere. */ if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC)) { // bug #663 check_template(pc); } else { /* convert CT_ANGLE_OPEN to CT_COMPARE */ set_chunk_type(pc, CT_COMPARE); } } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; set_chunk_parent(pc, CT_TYPE_CAST); } else { next = handle_double_angle_close(pc); } } if (cpd.lang_flags & LANG_D) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { set_chunk_type(pc, CT_CONCAT); } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { set_chunk_type(pc, CT_D_TEMPLATE); } /* handle "version(unittest) { }" vs "unittest { }" */ if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } /* handle 'static if' and merge the tokens */ if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6)) { /* delete PREV and merge with IF */ pc->str.insert(0, ' '); pc->str.insert(0, prev->str); pc->orig_col = prev->orig_col; pc->orig_line = prev->orig_line; chunk_t *to_be_deleted = prev; prev = chunk_get_prev_ncnl(prev); chunk_del(to_be_deleted); } } if (cpd.lang_flags & LANG_CPP) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_TYPE); } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { set_chunk_type(pc, CT_GETSET_EMPTY); set_chunk_parent(next, CT_GETSET); } else { set_chunk_type(pc, CT_WORD); } } /* Interface is only a keyword in MS land if followed by 'class' or 'struct' * likewise, 'class' may be a member name in Java. */ if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0])) { set_chunk_type(pc, CT_WORD); } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { tmp2 = chunk_get_next(next); /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && tmp2 && (tmp2->type == CT_ANGLE_CLOSE) && (tmp2->orig_col == next->orig_col_end)) { next->str.append('>'); next->orig_col_end++; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp2); } else if (next->flags & PCF_PUNCTUATOR) { set_chunk_type(next, CT_OPERATOR_VAL); } else { set_chunk_type(next, CT_TYPE); /* Replace next with a collection of all tokens that are part of * the type. */ tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_CARET) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); int num_sp = space_needed(tmp2, tmp); while (num_sp-- > 0) { next->str.append(" "); } next->str.append(tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } set_chunk_type(next, CT_OPERATOR_VAL); next->orig_col_end = next->orig_col + next->len(); } set_chunk_parent(next, CT_OPERATOR); LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n", __func__, pc->orig_line, pc->orig_col, next->text()); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { set_chunk_type(next, CT_PRIVATE_COLON); if ((tmp = chunk_get_next_ncnl(next)) != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } } else { set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER); } } /* Look for <newline> 'EXEC' 'SQL' */ if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) || ((*pc->str == '$') && (pc->type != CT_SQL_WORD))) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { if (*pc->str == '$') { set_chunk_type(pc, CT_SQL_EXEC); if (pc->len() > 1) { /* SPLIT OFF '$' */ chunk_t nc; nc = *pc; pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; nc.type = CT_SQL_WORD; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); next = chunk_get_next(pc); } } tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { set_chunk_type(pc, CT_SQL_BEGIN); } else if (chunk_is_str_case(tmp, "END", 3)) { set_chunk_type(pc, CT_SQL_END); } else { set_chunk_type(pc, CT_SQL_EXEC); } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$'))) { set_chunk_type(tmp, CT_SQL_WORD); } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two with a space between */ pc->str.append(' '); pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { set_chunk_type(tmp, CT_IN); break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { set_chunk_type(pc, CT_WORD); } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { set_chunk_type(pc, CT_WORD); } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { set_chunk_type(next, CT_OC_CLASS); } set_chunk_parent(next, pc->type); tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->text(), get_token_name(tmp->type)); set_chunk_type(tmp, CT_WORD); } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->parent_type); tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //set_chunk_type(tmp, CT_OC_CLASS_EXT); set_chunk_parent(tmp, pc->parent_type); } else { set_chunk_type(tmp, CT_OC_CATEGORY); set_chunk_parent(tmp, pc->parent_type); } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->parent_type); } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START); } else { set_chunk_parent(next, pc->type); tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->type); tmp = chunk_get_next(next); if (tmp != NULL) { set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { set_chunk_parent(tmp, CT_OC_SEL); break; } set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { set_chunk_parent(pc, next->type); } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION); set_chunk_parent(prev, pc->type); } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len()))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA) || (tmp2->type == CT_BRACE_OPEN))) { doit = true; } } if (doit) { pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_SIZEOF); } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } if (((pc->type == CT_USING) || ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_USING_STMT); } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { set_chunk_type(pc, CT_BYREF); } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { set_chunk_type(pc, CT_QUALIFIER); } /* If Java's 'synchronized' is in a method declaration, it should be * a qualifier. */ if ((cpd.lang_flags & LANG_JAVA) && (pc->type == CT_SYNCHRONIZED) && (next->type != CT_PAREN_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } // guy 2015-11-05 // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL if ((pc->type == CT_FOR) && (pc->prev->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_FUNC_CALL); } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } } // tokenize_cleanup
/** * If there is nothing but CT_WORD and CT_MEMBER, then it's probably a * template thingy. Otherwise, it's likely a comparison. */ static void check_template(chunk_t *start) { chunk_t *pc; chunk_t *end; chunk_t *prev; chunk_t *next; bool in_if = false; LOG_FMT(LTEMPL, "%s: Line %d, col %d:", __func__, start->orig_line, start->orig_col); prev = chunk_get_prev_ncnl(start, CNAV_PREPROC); if (prev == NULL) { return; } if (prev->type == CT_TEMPLATE) { LOG_FMT(LTEMPL, " CT_TEMPLATE:"); /* We have: "template< ... >", which is a template declaration */ int level = 1; for (pc = chunk_get_next_ncnl(start, CNAV_PREPROC); pc != NULL; pc = chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LTEMPL, " [%s,%d]", get_token_name(pc->type), level); if ((pc->str[0] == '>') && (pc->len() > 1)) { LOG_FMT(LTEMPL, " {split '%s' at %d:%d}", pc->text(), pc->orig_line, pc->orig_col); split_off_angle_close(pc); } if (chunk_is_str(pc, "<", 1)) { level++; } else if (chunk_is_str(pc, ">", 1)) { level--; if (level == 0) { break; } } } end = pc; } else { /* We may have something like "a< ... >", which is a template where * '...' may consist of anything except braces {}, a semicolon, and * unbalanced parens. * if we are inside an 'if' statement and hit a CT_BOOL, then it isn't a * template. */ /* A template requires a word/type right before the open angle */ if ((prev->type != CT_WORD) && (prev->type != CT_TYPE) && (prev->type != CT_COMMA) && (prev->type != CT_OPERATOR_VAL) && (prev->parent_type != CT_OPERATOR)) { LOG_FMT(LTEMPL, " - after %s + ( - Not a template\n", get_token_name(prev->type)); set_chunk_type(start, CT_COMPARE); return; } LOG_FMT(LTEMPL, " - prev %s -", get_token_name(prev->type)); /* Scan back and make sure we aren't inside square parens */ pc = start; while ((pc = chunk_get_prev_ncnl(pc, CNAV_PREPROC)) != NULL) { if ((pc->type == CT_SEMICOLON) || (pc->type == CT_BRACE_OPEN) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_SQUARE_CLOSE)) { break; } if ((pc->type == CT_IF) || (pc->type == CT_RETURN)) { in_if = true; break; } if (pc->type == CT_SQUARE_OPEN) { LOG_FMT(LTEMPL, " - Not a template: after a square open\n"); set_chunk_type(start, CT_COMPARE); return; } } /* Scan forward to the angle close * If we have a comparison in there, then it can't be a template. */ c_token_t tokens[1024]; int num_tokens = 1; tokens[0] = CT_ANGLE_OPEN; for (pc = chunk_get_next_ncnl(start, CNAV_PREPROC); pc != NULL; pc = chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LTEMPL, " [%s,%d]", get_token_name(pc->type), num_tokens); if ((tokens[num_tokens - 1] == CT_ANGLE_OPEN) && (pc->str[0] == '>') && (pc->len() > 1) && (cpd.settings[UO_tok_split_gte].b || (chunk_is_str(pc, ">>", 2) && (num_tokens >= 2)))) { LOG_FMT(LTEMPL, " {split '%s' at %d:%d}", pc->text(), pc->orig_line, pc->orig_col); split_off_angle_close(pc); } if (chunk_is_str(pc, "<", 1)) { tokens[num_tokens++] = CT_ANGLE_OPEN; } else if (chunk_is_str(pc, ">", 1)) { if ((num_tokens > 0) && (tokens[num_tokens - 1] == CT_PAREN_OPEN)) { handle_double_angle_close(pc); } else if (--num_tokens <= 0) { break; } else if (tokens[num_tokens] != CT_ANGLE_OPEN) { /* unbalanced parens */ break; } } else if (in_if && ((pc->type == CT_BOOL) || (pc->type == CT_COMPARE))) { break; } else if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_SEMICOLON)) { break; } else if (pc->type == CT_PAREN_OPEN) { if (num_tokens >= (int)(ARRAY_SIZE(tokens) - 1)) { break; } tokens[num_tokens++] = CT_PAREN_OPEN; } else if (pc->type == CT_PAREN_CLOSE) { num_tokens--; if (tokens[num_tokens] != CT_PAREN_OPEN) { /* unbalanced parens */ break; } } } end = pc; } if ((end != NULL) && (end->type == CT_ANGLE_CLOSE)) { pc = chunk_get_next_ncnl(end, CNAV_PREPROC); if ((pc == NULL) || (pc->type != CT_NUMBER)) { LOG_FMT(LTEMPL, " - Template Detected\n"); set_chunk_parent(start, CT_TEMPLATE); pc = start; while (pc != end) { next = chunk_get_next_ncnl(pc, CNAV_PREPROC); chunk_flags_set(pc, PCF_IN_TEMPLATE); if (next->type != CT_PAREN_OPEN) { make_type(pc); } pc = next; } set_chunk_parent(end, CT_TEMPLATE); chunk_flags_set(end, PCF_IN_TEMPLATE); return; } } LOG_FMT(LTEMPL, " - Not a template: end = %s\n", (end != NULL) ? get_token_name(end->type) : "<null>"); set_chunk_type(start, CT_COMPARE); } // check_template
/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(struct parse_frame *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); c_token_t parent = CT_NONE; chunk_t *prev; LOG_FMT(LTOK, "%s:%lu] %16s - tos:%d/%16s stg:%d\n", __func__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), frm->pse[frm->pse_tos].stage); /* Mark statement starts */ if (((frm->stmt_count == 0) || (frm->expr_count == 0)) && !chunk_is_semicolon(pc) && (pc->type != CT_BRACE_CLOSE) && (pc->type != CT_VBRACE_CLOSE) && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { chunk_flags_set(pc, PCF_EXPR_START | ((frm->stmt_count == 0) ? PCF_STMT_START : 0)); LOG_FMT(LSTMT, "%lu] 1.marked %s as %s start st:%d ex:%d\n", pc->orig_line, pc->text(), (pc->flags & PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { int tmp; chunk_flags_set(pc, PCF_IN_SPAREN); /* Mark everything in the a for statement */ for (tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { chunk_flags_set(pc, PCF_IN_FOR); break; } } /* Mark the parent on semicolons in for() stmts */ if ((pc->type == CT_SEMICOLON) && (frm->pse_tos > 1) && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { set_chunk_parent(pc, CT_FOR); } } /* Check the progression of complex statements */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { if (check_complex_statements(frm, pc)) { return; } } /** * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if (cpd.lang_flags & LANG_PAWN) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } /* Handle close paren, vbrace, brace, and square */ if ((pc->type == CT_PAREN_CLOSE) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_MACRO_CLOSE) || (pc->type == CT_SQUARE_CLOSE)) { /* Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE */ if ((pc->type == CT_PAREN_CLOSE) && ((frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { set_chunk_type(pc, (c_token_t)(frm->pse[frm->pse_tos].type + 1)); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; chunk_flags_clr(pc, PCF_IN_SPAREN); } } /* Make sure the open / close match */ if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ((frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s: %s:%lu Error: Unexpected '%s' for '%s', which was on line %lu\n", __func__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; /* Copy the parent, update the paren/brace levels */ set_chunk_parent(pc, frm->pse[frm->pse_tos].parent); frm->level--; if ((pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || (pc->type == CT_MACRO_CLOSE)) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; /* Pop the entry */ frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); /* See if we are in a complex statement */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { handle_complex_close(frm, pc); } } } /* In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == BS_WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if ((tmp->type != CT_SEMICOLON) && (tmp->type != CT_VSEMICOLON)) { pawn_add_vsemi_after(pc); } } } else { /* Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway */ if ((pc->type == CT_SEMICOLON) || (pc->type == CT_VSEMICOLON)) { cpd.consumed = true; set_chunk_parent(pc, CT_WHILE_OF_DO); } else { LOG_FMT(LWARN, "%s:%lu: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } /* Get the parent type for brace and paren open */ parent = pc->parent_type; if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN)) { prev = chunk_get_prev_ncnl(pc); if (prev != NULL) { if ((pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN)) { /* Set the parent for parens and change paren type */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { set_chunk_type(pc, CT_SPAREN_OPEN); parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_FUNCTION; } /* NS_ENUM and NS_OPTIONS are followed by a (type, name) pair */ else if ((prev->type == CT_ENUM) && (cpd.lang_flags & LANG_OC)) { /* Treat both as CT_ENUM since the syntax is identical */ set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_ENUM; } else { /* no need to set parent */ } } else /* must be CT_BRACE_OPEN */ { /* Set the parent for open braces */ if (frm->pse[frm->pse_tos].stage != BS_NONE) { parent = frm->pse[frm->pse_tos].type; } else if ((prev->type == CT_ASSIGN) && (prev->str[0] == '=')) { parent = CT_ASSIGN; } /* Carry through CT_ENUM parent in NS_ENUM (type, name) { */ else if ((prev->type == CT_FPAREN_CLOSE) && (cpd.lang_flags & LANG_OC) && (prev->parent_type == CT_ENUM)) { parent = CT_ENUM; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { /* no need to set parent */ } } } } /** * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_MACRO_OPEN) || (pc->type == CT_SQUARE_OPEN)) { frm->level++; if ((pc->type == CT_BRACE_OPEN) || (pc->type == CT_MACRO_OPEN)) { frm->brace_level++; } push_fmr_pse(frm, pc, BS_NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; set_chunk_parent(pc, parent); } pattern_class patcls = get_token_pattern_class(pc->type); /** Create a stack entry for complex statements: */ /** if, elseif, switch, for, while, synchronized, using, lock, with, version, CT_D_SCOPE_IF */ if (patcls == PATCLS_BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? BS_BRACE_DO : BS_BRACE2, "+ComplexBraced"); } else if (patcls == PATCLS_PBRACED) { brstage_e bs = BS_PAREN1; if ((pc->type == CT_WHILE) && maybe_while_of_do(pc)) { set_chunk_type(pc, CT_WHILE_OF_DO); bs = BS_WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == PATCLS_OPBRACED) { push_fmr_pse(frm, pc, BS_OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == PATCLS_ELSE) { push_fmr_pse(frm, pc, BS_ELSEIF, "+ComplexElse"); } /* Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ((pc->type == CT_SQUARE_OPEN) || ((pc->type == CT_BRACE_OPEN) && (pc->parent_type != CT_ASSIGN)) || (pc->type == CT_BRACE_CLOSE) || (pc->type == CT_VBRACE_CLOSE) || ((pc->type == CT_SPAREN_OPEN) && (pc->parent_type == CT_FOR)) || (pc->type == CT_COLON) || (pc->type == CT_OC_END) || (chunk_is_semicolon(pc) && (frm->pse[frm->pse_tos].type != CT_PAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN) && (frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN))) { LOG_FMT(LSTMT, "%s: %lu> reset1 stmt on %s\n", __func__, pc->orig_line, pc->text()); frm->stmt_count = 0; frm->expr_count = 0; } /* Mark expression starts */ chunk_t *tmp = chunk_get_next_ncnl(pc); if ((pc->type == CT_ARITH) || (pc->type == CT_ASSIGN) || (pc->type == CT_CASE) || (pc->type == CT_COMPARE) || ((pc->type == CT_STAR) && tmp && (tmp->type != CT_STAR)) || (pc->type == CT_BOOL) || (pc->type == CT_MINUS) || (pc->type == CT_PLUS) || (pc->type == CT_CARET) || (pc->type == CT_ANGLE_OPEN) || (pc->type == CT_ANGLE_CLOSE) || (pc->type == CT_RETURN) || (pc->type == CT_THROW) || (pc->type == CT_GOTO) || (pc->type == CT_CONTINUE) || (pc->type == CT_PAREN_OPEN) || (pc->type == CT_FPAREN_OPEN) || (pc->type == CT_SPAREN_OPEN) || (pc->type == CT_BRACE_OPEN) || chunk_is_semicolon(pc) || (pc->type == CT_COMMA) || (pc->type == CT_NOT) || (pc->type == CT_INV) || (pc->type == CT_COLON) || (pc->type == CT_QUESTION)) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s: %lu> reset expr on %s\n", __func__, pc->orig_line, pc->text()); } } // parse_cleanup
static void process_if_chain(chunk_t *br_start) { LOG_FUNC_ENTRY(); chunk_t *braces[256]; int br_cnt = 0; bool must_have_braces = false; chunk_t *pc = br_start; LOG_FMT(LBRCH, "%s: if starts on line %zu\n", __func__, br_start->orig_line); while (pc != NULL) { if (pc->type == CT_BRACE_OPEN) { bool tmp = can_remove_braces(pc); LOG_FMT(LBRCH, " [%d] line %zu - can%s remove %s\n", br_cnt, pc->orig_line, tmp ? "" : "not", get_token_name(pc->type)); if (!tmp) { must_have_braces = true; } } else { bool tmp = should_add_braces(pc); if (tmp) { must_have_braces = true; } LOG_FMT(LBRCH, " [%d] line %zu - %s %s\n", br_cnt, pc->orig_line, tmp ? "should add" : "ignore", get_token_name(pc->type)); } braces[br_cnt++] = pc; chunk_t *br_close = chunk_skip_to_match(pc, CNAV_PREPROC); if (br_close == NULL) { break; } braces[br_cnt++] = br_close; pc = chunk_get_next_ncnl(br_close, CNAV_PREPROC); if ((pc == NULL) || (pc->type != CT_ELSE)) { break; } if (cpd.settings[UO_mod_full_brace_if_chain_only].b) { // There is an 'else' - we want full braces. must_have_braces = true; } pc = chunk_get_next_ncnl(pc, CNAV_PREPROC); if ((pc != NULL) && (pc->type == CT_ELSEIF)) { while ((pc != NULL) && (pc->type != CT_VBRACE_OPEN) && (pc->type != CT_BRACE_OPEN)) { pc = chunk_get_next_ncnl(pc, CNAV_PREPROC); } } if (pc == NULL) { break; } if ((pc->type != CT_BRACE_OPEN) && (pc->type != CT_VBRACE_OPEN)) { break; } } if (must_have_braces) { LOG_FMT(LBRCH, "%s: add braces on lines[%d]:", __func__, br_cnt); while (--br_cnt >= 0) { chunk_flags_set(braces[br_cnt], PCF_KEEP_BRACE); if ((braces[br_cnt]->type == CT_VBRACE_OPEN) || (braces[br_cnt]->type == CT_VBRACE_CLOSE)) { LOG_FMT(LBRCH, " %zu", braces[br_cnt]->orig_line); convert_vbrace(braces[br_cnt]); } else { LOG_FMT(LBRCH, " {%zu}", braces[br_cnt]->orig_line); } braces[br_cnt] = NULL; } LOG_FMT(LBRCH, "\n"); } else if (cpd.settings[UO_mod_full_brace_if_chain].b) { // This might run because either UO_mod_full_brace_if_chain or UO_mod_full_brace_if_chain_only is used. // We only want to remove braces if the first one is active. LOG_FMT(LBRCH, "%s: remove braces on lines[%d]:", __func__, br_cnt); while (--br_cnt >= 0) { if ((braces[br_cnt]->type == CT_BRACE_OPEN) || (braces[br_cnt]->type == CT_BRACE_CLOSE)) { LOG_FMT(LBRCH, " {%zu}", braces[br_cnt]->orig_line); convert_brace(braces[br_cnt]); } else { LOG_FMT(LBRCH, " %zu", braces[br_cnt]->orig_line); } braces[br_cnt] = NULL; } LOG_FMT(LBRCH, "\n"); } } // process_if_chain
void do_braces(void) { LOG_FUNC_ENTRY(); if (cpd.settings[UO_mod_full_brace_if_chain].b || cpd.settings[UO_mod_full_brace_if_chain_only].b) { mod_full_brace_if_chain(); } if ((cpd.settings[UO_mod_full_brace_if].a | cpd.settings[UO_mod_full_brace_do].a | cpd.settings[UO_mod_full_brace_for].a | cpd.settings[UO_mod_full_brace_using].a | cpd.settings[UO_mod_full_brace_while].a) & AV_REMOVE) { examine_braces(); } /* convert vbraces if needed */ if ((cpd.settings[UO_mod_full_brace_if].a | cpd.settings[UO_mod_full_brace_do].a | cpd.settings[UO_mod_full_brace_for].a | cpd.settings[UO_mod_full_brace_function].a | cpd.settings[UO_mod_full_brace_using].a | cpd.settings[UO_mod_full_brace_while].a) & AV_ADD) { convert_vbrace_to_brace(); } /* Mark one-liners */ chunk_t *pc = chunk_get_head(); while ((pc = chunk_get_next_ncnl(pc)) != NULL) { if ((pc->type != CT_BRACE_OPEN) && (pc->type != CT_VBRACE_OPEN)) { continue; } chunk_t *br_open = pc; c_token_t brc_type = c_token_t(pc->type + 1); /* Detect empty bodies */ chunk_t *tmp = chunk_get_next_ncnl(pc); if ((tmp != NULL) && (tmp->type == brc_type)) { chunk_flags_set(br_open, PCF_EMPTY_BODY); chunk_flags_set(tmp, PCF_EMPTY_BODY); } /* Scan for the brace close or a newline */ tmp = br_open; while ((tmp = chunk_get_next_nc(tmp)) != NULL) { if (chunk_is_newline(tmp)) { break; } if ((tmp->type == brc_type) && (br_open->level == tmp->level)) { flag_series(br_open, tmp, PCF_ONE_LINER); break; } } } if (cpd.settings[UO_mod_case_brace].a != AV_IGNORE) { mod_case_brace(); } if (cpd.settings[UO_mod_move_case_break].b) { move_case_break(); } } // do_braces
/** * Aligns all the stuff in m_aligned. * Re-adds 'newer' items in m_skipped. */ void AlignStack::Flush() { int last_seqnum = 0; int idx; int tmp_col; const ChunkStack::Entry *ce = NULL; chunk_t *pc; LOG_FMT(LAS, "%s: m_aligned.Len()=%d\n", __func__, m_aligned.Len()); LOG_FMT(LAS, "Flush (min=%d, max=%d)\n", m_min_col, m_max_col); if (m_aligned.Len() == 1) { // check if we have *one* typedef in the line pc = m_aligned.Get(0)->m_pc; chunk_t *temp = chunk_get_prev_type(pc, CT_TYPEDEF, pc->level); if (temp != NULL) { if (pc->orig_line == temp->orig_line) { // reset the gap only for *this* stack m_gap = 1; } } } m_last_added = 0; m_max_col = 0; /* Recalculate the max_col - it may have shifted since the last Add() */ for (idx = 0; idx < m_aligned.Len(); idx++) { pc = m_aligned.Get(idx)->m_pc; /* Set the column adjust and gap */ int col_adj = 0; int gap = 0; if (pc != pc->align.ref) { gap = pc->column - (pc->align.ref->column + pc->align.ref->len()); } chunk_t *tmp = pc; if (tmp->type == CT_TPAREN_OPEN) { tmp = chunk_get_next(tmp); } if (chunk_is_ptr_operator(tmp) && (m_star_style == SS_DANGLE)) { col_adj = pc->align.start->column - pc->column; gap = pc->align.start->column - (pc->align.ref->column + pc->align.ref->len()); } if (m_right_align) { /* Adjust the width for signed numbers */ int start_len = pc->align.start->len(); if (pc->align.start->type == CT_NEG) { tmp = chunk_get_next(pc->align.start); if ((tmp != NULL) && (tmp->type == CT_NUMBER)) { start_len += tmp->len(); } } col_adj += start_len; } pc->align.col_adj = col_adj; /* See if this pushes out the max_col */ int endcol = pc->column + col_adj; if (gap < m_gap) { endcol += m_gap - gap; } if (endcol > m_max_col) { m_max_col = endcol; } } if (cpd.settings[UO_align_on_tabstop].b && (m_aligned.Len() > 1)) { m_max_col = align_tab_column(m_max_col); } LOG_FMT(LAS, "%s: m_aligned.Len()=%d\n", __func__, m_aligned.Len()); for (idx = 0; idx < m_aligned.Len(); idx++) { ce = m_aligned.Get(idx); pc = ce->m_pc; tmp_col = m_max_col - pc->align.col_adj; if (idx == 0) { if (m_skip_first && (pc->column != tmp_col)) { LOG_FMT(LAS, "%s: %lu:%lu dropping first item due to skip_first\n", __func__, pc->orig_line, pc->orig_col); m_skip_first = false; m_aligned.Pop_Front(); Flush(); m_skip_first = true; return; } chunk_flags_set(pc, PCF_ALIGN_START); pc->align.right_align = m_right_align; pc->align.amp_style = (int)m_amp_style; pc->align.star_style = (int)m_star_style; } pc->align.gap = m_gap; pc->align.next = m_aligned.GetChunk(idx + 1); /* Indent the token, taking col_adj into account */ LOG_FMT(LAS, "%s: line %lu: '%s' to col %d (adj=%d)\n", __func__, pc->orig_line, pc->text(), tmp_col, pc->align.col_adj); align_to_column(pc, tmp_col); } if (ce != NULL) { last_seqnum = ce->m_seqnum; m_aligned.Reset(); } m_min_col = 9999; m_max_col = 0; if (m_skipped.Empty()) { /* Nothing was skipped, sync the seqnums */ m_nl_seqnum = m_seqnum; } else { /* Remove all items with seqnum < last_seqnum */ for (idx = 0; idx < m_skipped.Len(); idx++) { if (m_skipped.Get(idx)->m_seqnum < last_seqnum) { m_skipped.Zap(idx); } } m_skipped.Collapse(); /* Add all items from the skipped list */ ReAddSkipped(); } } // AlignStack::Flush
/** * At the heart of this algorithm are two stacks. * There is the Paren Stack (PS) and the Frame stack. * * The PS (pse in the code) keeps track of braces, parens, * if/else/switch/do/while/etc items -- anything that is nestable. * Complex statements go through stages. * Take this simple if statement as an example: * if ( x ) { x--; } * * The stack would change like so: 'token' stack afterwards * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] <- note that the state was incremented * '{' [IF - 2] [BRACE OPEN] * 'x' [IF - 2] [BRACE OPEN] * '--' [IF - 2] [BRACE OPEN] * ';' [IF - 2] [BRACE OPEN] * '}' [IF - 3] * <- lack of else kills the IF, closes statement * * Virtual braces example: * if ( x ) x--; else x++; * * 'if' [IF - 1] * '(' [IF - 1] [PAREN OPEN] * 'x' [IF - 1] [PAREN OPEN] * ')' [IF - 2] * 'x' [IF - 2] [VBRACE OPEN] <- VBrace open inserted before because '{' was not next * '--' [IF - 2] [VBRACE OPEN] * ';' [IF - 3] <- VBrace close inserted after semicolon * 'else' [ELSE - 0] <- IF changed into ELSE * 'x' [ELSE - 0] [VBRACE OPEN] <- lack of '{' -> VBrace * '++' [ELSE - 0] [VBRACE OPEN] * ';' [ELSE - 0] <- VBrace close inserted after semicolon * <- ELSE removed after statement close * * The pse stack is kept on a frame stack. * The frame stack is need for languages that support preprocessors (C, C++, C#) * that can arbitrarily change code flow. It also isolates #define macros so * that they are indented independently and do not affect the rest of the program. * * When an #if is hit, a copy of the current frame is push on the frame stack. * When an #else/#elif is hit, a copy of the current stack is pushed under the * #if frame and the original (pre-#if) frame is copied to the current frame. * When #endif is hit, the top frame is popped. * This has the following effects: * - a simple #if / #endif does not affect program flow * - #if / #else /#endif - continues from the #if clause * * When a #define is entered, the current frame is pushed and cleared. * When a #define is exited, the frame is popped. */ static void parse_cleanup(parse_frame_t *frm, chunk_t *pc) { LOG_FUNC_ENTRY(); LOG_FMT(LTOK, "%s(%d): orig_line is %zu, type is %s, tos is %zu, TOS.type is %s, TOS.stage is %u\n", __func__, __LINE__, pc->orig_line, get_token_name(pc->type), frm->pse_tos, get_token_name(frm->pse[frm->pse_tos].type), (unsigned int)frm->pse[frm->pse_tos].stage); // Mark statement starts if ( (frm->stmt_count == 0 || frm->expr_count == 0) && !chunk_is_semicolon(pc) && pc->type != CT_BRACE_CLOSE && pc->type != CT_VBRACE_CLOSE && !chunk_is_str(pc, ")", 1) && !chunk_is_str(pc, "]", 1)) { chunk_flags_set(pc, PCF_EXPR_START | ((frm->stmt_count == 0) ? PCF_STMT_START : 0)); LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, 1.marked '%s' as %s, start stmt_count is %d, expr_count is %d\n", __func__, __LINE__, pc->orig_line, pc->text(), (pc->flags & PCF_STMT_START) ? "stmt" : "expr", frm->stmt_count, frm->expr_count); } frm->stmt_count++; frm->expr_count++; if (frm->sparen_count > 0) { chunk_flags_set(pc, PCF_IN_SPAREN); // Mark everything in the for statement for (int tmp = frm->pse_tos - 1; tmp >= 0; tmp--) { if (frm->pse[tmp].type == CT_FOR) { chunk_flags_set(pc, PCF_IN_FOR); break; } } // Mark the parent on semicolons in for() statements if ( pc->type == CT_SEMICOLON && frm->pse_tos > 1 && (frm->pse[frm->pse_tos - 1].type == CT_FOR)) { set_chunk_parent(pc, CT_FOR); } } // Check the progression of complex statements if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { if (check_complex_statements(frm, pc)) { return; } } /* * Check for a virtual brace statement close due to a semicolon. * The virtual brace will get handled the next time through. * The semicolon isn't handled at all. * TODO: may need to float VBRACE past comments until newline? */ if (frm->pse[frm->pse_tos].type == CT_VBRACE_OPEN) { if (chunk_is_semicolon(pc)) { cpd.consumed = true; close_statement(frm, pc); } else if (cpd.lang_flags & LANG_PAWN) { if (pc->type == CT_BRACE_CLOSE) { close_statement(frm, pc); } } } // Handle close parenthesis, vbrace, brace, and square if ( pc->type == CT_PAREN_CLOSE || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_ANGLE_CLOSE || pc->type == CT_MACRO_CLOSE || pc->type == CT_SQUARE_CLOSE) { // Change CT_PAREN_CLOSE into CT_SPAREN_CLOSE or CT_FPAREN_CLOSE if ( pc->type == CT_PAREN_CLOSE && ( (frm->pse[frm->pse_tos].type == CT_FPAREN_OPEN) || (frm->pse[frm->pse_tos].type == CT_SPAREN_OPEN))) { set_chunk_type(pc, (c_token_t)(frm->pse[frm->pse_tos].type + 1)); if (pc->type == CT_SPAREN_CLOSE) { frm->sparen_count--; chunk_flags_clr(pc, PCF_IN_SPAREN); } } // Make sure the open / close match if (pc->type != (frm->pse[frm->pse_tos].type + 1)) { if ( (frm->pse[frm->pse_tos].type != CT_NONE) && (frm->pse[frm->pse_tos].type != CT_PP_DEFINE)) { LOG_FMT(LWARN, "%s(%d): %s, orig_line is %zu, Error: Unexpected '%s' for '%s', which was on line %zu\n", __func__, __LINE__, cpd.filename, pc->orig_line, pc->text(), get_token_name(frm->pse[frm->pse_tos].pc->type), frm->pse[frm->pse_tos].pc->orig_line); print_stack(LBCSPOP, "=Error ", frm, pc); cpd.error_count++; } } else { cpd.consumed = true; // Copy the parent, update the parenthesis/brace levels set_chunk_parent(pc, frm->pse[frm->pse_tos].parent); frm->level--; if ( pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || pc->type == CT_MACRO_CLOSE) { frm->brace_level--; } pc->level = frm->level; pc->brace_level = frm->brace_level; // Pop the entry frm->pse_tos--; print_stack(LBCSPOP, "-Close ", frm, pc); // See if we are in a complex statement if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { handle_complex_close(frm, pc); } } } /* * In this state, we expect a semicolon, but we'll also hit the closing * sparen, so we need to check cpd.consumed to see if the close sparen was * aleady handled. */ if (frm->pse[frm->pse_tos].stage == brace_stage_e::WOD_SEMI) { chunk_t *tmp = pc; if (cpd.consumed) { /* * If consumed, then we are on the close sparen. * PAWN: Check the next chunk for a semicolon. If it isn't, then * add a virtual semicolon, which will get handled on the next pass. */ if (cpd.lang_flags & LANG_PAWN) { tmp = chunk_get_next_ncnl(pc); if (tmp->type != CT_SEMICOLON && tmp->type != CT_VSEMICOLON) { pawn_add_vsemi_after(pc); } } } else { // Complain if this ISN'T a semicolon, but close out WHILE_OF_DO anyway if (pc->type == CT_SEMICOLON || pc->type == CT_VSEMICOLON) { cpd.consumed = true; set_chunk_parent(pc, CT_WHILE_OF_DO); } else { LOG_FMT(LWARN, "%s: %s(%d): %zu: Error: Expected a semicolon for WHILE_OF_DO, but got '%s'\n", cpd.filename, __func__, __LINE__, pc->orig_line, get_token_name(pc->type)); cpd.error_count++; } handle_complex_close(frm, pc); } } // Get the parent type for brace and parenthesis open c_token_t parent = pc->parent_type; if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN) { chunk_t *prev = chunk_get_prev_ncnl(pc); if (prev != nullptr) { if ( pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN) { // Set the parent for parenthesis and change parenthesis type if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { set_chunk_type(pc, CT_SPAREN_OPEN); parent = frm->pse[frm->pse_tos].type; frm->sparen_count++; } else if (prev->type == CT_FUNCTION) { set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_FUNCTION; } // NS_ENUM and NS_OPTIONS are followed by a (type, name) pair else if (prev->type == CT_ENUM && (cpd.lang_flags & LANG_OC)) { // Treat both as CT_ENUM since the syntax is identical set_chunk_type(pc, CT_FPAREN_OPEN); parent = CT_ENUM; } else { // no need to set parent } } else // must be CT_BRACE_OPEN { // Set the parent for open braces if (frm->pse[frm->pse_tos].stage != brace_stage_e::NONE) { parent = frm->pse[frm->pse_tos].type; } else if (prev->type == CT_ASSIGN && (prev->str[0] == '=')) { parent = CT_ASSIGN; } // Carry through CT_ENUM parent in NS_ENUM (type, name) { else if ( prev->type == CT_FPAREN_CLOSE && (cpd.lang_flags & LANG_OC) && prev->parent_type == CT_ENUM) { parent = CT_ENUM; } else if (prev->type == CT_FPAREN_CLOSE) { parent = CT_FUNCTION; } else { // no need to set parent } } } } /* * Adjust the level for opens & create a stack entry * Note that CT_VBRACE_OPEN has already been handled. */ if ( pc->type == CT_BRACE_OPEN || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_ANGLE_OPEN || pc->type == CT_MACRO_OPEN || pc->type == CT_SQUARE_OPEN) { frm->level++; if (pc->type == CT_BRACE_OPEN || pc->type == CT_MACRO_OPEN) { frm->brace_level++; } push_fmr_pse(frm, pc, brace_stage_e::NONE, "+Open "); frm->pse[frm->pse_tos].parent = parent; set_chunk_parent(pc, parent); } pattern_class_e patcls = get_token_pattern_class(pc->type); /* * Create a stack entry for complex statements: * if, elseif, switch, for, while, synchronized, using, lock, with, * version, CT_D_SCOPE_IF */ if (patcls == pattern_class_e::BRACED) { push_fmr_pse(frm, pc, (pc->type == CT_DO) ? brace_stage_e::BRACE_DO : brace_stage_e::BRACE2, "+ComplexBraced"); } else if (patcls == pattern_class_e::PBRACED) { brace_stage_e bs = brace_stage_e::PAREN1; if (pc->type == CT_WHILE && maybe_while_of_do(pc)) { set_chunk_type(pc, CT_WHILE_OF_DO); bs = brace_stage_e::WOD_PAREN; } push_fmr_pse(frm, pc, bs, "+ComplexParenBraced"); } else if (patcls == pattern_class_e::OPBRACED) { push_fmr_pse(frm, pc, brace_stage_e::OP_PAREN1, "+ComplexOpParenBraced"); } else if (patcls == pattern_class_e::ELSE) { push_fmr_pse(frm, pc, brace_stage_e::ELSEIF, "+ComplexElse"); } /* * Mark simple statement/expression starts * - after { or } * - after ';', but not if the paren stack top is a paren * - after '(' that has a parent type of CT_FOR */ if ( pc->type == CT_SQUARE_OPEN || (pc->type == CT_BRACE_OPEN && pc->parent_type != CT_ASSIGN) || pc->type == CT_BRACE_CLOSE || pc->type == CT_VBRACE_CLOSE || (pc->type == CT_SPAREN_OPEN && pc->parent_type == CT_FOR) || pc->type == CT_COLON || pc->type == CT_OC_END || ( chunk_is_semicolon(pc) && frm->pse[frm->pse_tos].type != CT_PAREN_OPEN && frm->pse[frm->pse_tos].type != CT_FPAREN_OPEN && frm->pse[frm->pse_tos].type != CT_SPAREN_OPEN)) { LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset1 stmt on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); frm->stmt_count = 0; frm->expr_count = 0; } // Mark expression starts chunk_t *tmp = chunk_get_next_ncnl(pc); if ( pc->type == CT_ARITH || pc->type == CT_ASSIGN || pc->type == CT_CASE || pc->type == CT_COMPARE || ( pc->type == CT_STAR && tmp != nullptr && tmp->type != CT_STAR) || pc->type == CT_BOOL || pc->type == CT_MINUS || pc->type == CT_PLUS || pc->type == CT_CARET || pc->type == CT_ANGLE_OPEN || pc->type == CT_ANGLE_CLOSE || pc->type == CT_RETURN || pc->type == CT_THROW || pc->type == CT_GOTO || pc->type == CT_CONTINUE || pc->type == CT_PAREN_OPEN || pc->type == CT_FPAREN_OPEN || pc->type == CT_SPAREN_OPEN || pc->type == CT_BRACE_OPEN || chunk_is_semicolon(pc) || pc->type == CT_COMMA || pc->type == CT_NOT || pc->type == CT_INV || pc->type == CT_COLON || pc->type == CT_QUESTION) { frm->expr_count = 0; LOG_FMT(LSTMT, "%s(%d): orig_line is %zu, reset expr on '%s'\n", __func__, __LINE__, pc->orig_line, pc->text()); } else if (pc->type == CT_BRACE_CLOSE) { if (!cpd.consumed) { size_t file_pp_level = ifdef_over_whole_file() ? 1 : 0; if (!cpd.unc_off_used && pc->pp_level == file_pp_level) { // fatal error char *outputMessage; if (cpd.settings[UO_tok_split_gte].b) { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\n", pc->orig_line, pc->orig_col); } else { outputMessage = make_message("Unmatched BRACE_CLOSE\nat orig_line=%zu, orig_col=%zu\nTry the option 'tok_split_gte = true'\n", pc->orig_line, pc->orig_col); } fprintf(stderr, "%s", outputMessage); free(outputMessage); log_flush(true); exit(EXIT_FAILURE); } } } } // parse_cleanup