void tokenize_cleanup(void) { chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { /* Change '[' + ']' into '[]' */ if ((pc->type == CT_SQUARE_OPEN) && (next->type == CT_SQUARE_CLOSE)) { pc->type = CT_TSQUARE; pc->str = "[]"; pc->len = 2; chunk_del(next); pc->orig_col_end += 1; next = chunk_get_next_ncnl(pc); } if ((pc->type == CT_DOT) && ((cpd.lang_flags & LANG_ALLC) != 0)) { pc->type = CT_MEMBER; } /* Determine the version stuff (D only) */ if (pc->type == CT_VERSION) { if (next->type == CT_PAREN_OPEN) { pc->type = CT_IF; } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } pc->type = CT_WORD; } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_WORD; } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { next->type = CT_TYPE; } if (pc->type == CT_WORD) { pc->type = CT_TYPE; } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp != NULL) || (tmp->type != CT_BRACE_OPEN)) { pc->type = CT_QUALIFIER; } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { next->type = CT_PTR_TYPE; } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { next->parent_type = CT_TYPE_CAST; in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { check_template(pc); } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; pc->parent_type = CT_TYPE_CAST; } else { pc->type = CT_COMPARE; } } if ((cpd.lang_flags & LANG_D) != 0) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { pc->type = CT_CONCAT; } /* Check for the D template symbol '!' */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_D_TEMPLATE; } } if ((cpd.lang_flags & LANG_CPP) != 0) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { pc->type = CT_TYPE; } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { pc->type = CT_GETSET_EMPTY; next->parent_type = CT_GETSET; } else { pc->type = CT_WORD; } } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * This will put the entire operator value in one chunk. */ if (pc->type == CT_OPERATOR) { /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; next->len = 2; next->type = CT_OPERATOR_VAL; chunk_del(tmp); next->orig_col_end += 1; } } else if (next->flags & PCF_PUNCTUATOR) { next->type = CT_OPERATOR_VAL; } else { next->type = CT_TYPE; /* Replace next with a collection of all tokens that are part of * the type. */ char opbuf[256]; int len; len = snprintf(opbuf, sizeof(opbuf), "%.*s", next->len, next->str); tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } len += snprintf(opbuf + len, sizeof(opbuf) - len, "%s%.*s", space_needed(tmp2, tmp) ? " " : "", tmp->len, tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } next->str = strdup(opbuf); next->len = len; next->flags |= PCF_OWN_STR; next->type = CT_OPERATOR_VAL; next->orig_col_end = next->orig_col + next->len; } next->parent_type = CT_OPERATOR; LOG_FMT(LOPERATOR, "%s: %d:%d operator '%.*s'\n", __func__, pc->orig_line, pc->orig_col, next->len, next->str); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { next->type = CT_PRIVATE_COLON; if ((tmp = chunk_get_next_ncnl(next)) != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; } } else { pc->type = chunk_is_str(pc, "signals", 7) ? CT_WORD : CT_QUALIFIER; } } /* Look for <newline> 'EXEC' 'SQL' */ if (chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { pc->type = CT_SQL_BEGIN; } else if (chunk_is_str_case(tmp, "END", 3)) { pc->type = CT_SQL_END; } else { pc->type = CT_SQL_EXEC; } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len > 0) && isalpha(*tmp->str)) { tmp->type = CT_SQL_WORD; } tmp = chunk_get_next_ncnl(tmp); } } } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF)) { next->type = CT_OC_CLASS; next->parent_type = pc->type; tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { next->parent_type = pc->parent_type; tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { tmp->type = CT_OC_CLASS_EXT; tmp->parent_type = pc->parent_type; } else { tmp->type = CT_OC_CATEGORY; tmp->parent_type = pc->parent_type; } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { tmp->parent_type = pc->parent_type; } } /** * Objective C @dynamic and @synthesize * @dynamic xxx, yyy; * @synthesize xxx, yyy; * Just find the semicolon and mark it. */ if (pc->type == CT_OC_DYNAMIC) { tmp = chunk_get_next_type(pc, CT_SEMICOLON, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { next->flags |= PCF_STMT_START | PCF_EXPR_START; } else { next->parent_type = pc->type; tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { tmp->flags |= PCF_STMT_START | PCF_EXPR_START; tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { tmp->parent_type = pc->type; } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { next->parent_type = pc->type; tmp = chunk_get_next(next); if (tmp != NULL) { tmp->type = CT_OC_SEL_NAME; tmp->parent_type = pc->type; while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { tmp->parent_type = CT_OC_SEL; break; } tmp->type = CT_OC_SEL_NAME; tmp->parent_type = pc->type; } } } /* Mark Objective-C blocks (aka lambdas or closures) * The syntax and usage is exactly like C function pointers with two exceptions: * Instead of an asterisk they have a caret as pointer symbol. * In method declarations which take a block as parameter, there can be anonymous blocks, e.g.: (^) * 1. block literal: ^{ ... }; * 2. block declaration: return_t (^name) (int arg1, int arg2, ...) NB: return_t is optional and name can be optional if found as param in a method declaration. * 3. block expression: ^ return_t (int arg) { ... }; NB: return_t is optional * * See http://developer.apple.com/mac/library/documentation/Cocoa/Conceptual/Blocks for more info... */ if ((cpd.lang_flags & LANG_OC) && (pc->type == CT_ARITH) && (chunk_is_str(pc, "^", 1)) && (prev->type != CT_NUMBER) && (prev->type != CT_NUMBER_FP)) { /* mark objc blocks caret so that we can process it later*/ pc->type = CT_OC_BLOCK_CARET; if (prev->type == CT_PAREN_OPEN) { /* block declaration */ pc->parent_type = CT_OC_BLOCK_TYPE; } else if ((next->type == CT_PAREN_OPEN) || (next->type == CT_BRACE_OPEN)) { /* block expression without return type */ /* block literal */ pc->parent_type = CT_OC_BLOCK_EXPR; } else { /* block expression with return type (seldomly used) */ if (prev->type == CT_ASSIGN) { /* shortcut to spare the peeking below * the XOR operator wouldn't be used directly * after an assign all by itself */ pc->parent_type = CT_OC_BLOCK_EXPR; } else { /* this ones tricky because we don't know how many * stars the return type has - if there even is one */ tmp = pc; while ((tmp = chunk_get_next(tmp)) != NULL) { /* we just peek ahead and see if the line contains * an open brace somewhere. * FIXME: this check needs to be more thorough. */ if (tmp->type == CT_BRACE_OPEN) { pc->parent_type = CT_OC_BLOCK_EXPR; break; } } } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { pc->parent_type = next->type; } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { pc->type = (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION; prev->parent_type = pc->type; } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA))) { doit = true; } } if (doit) { pc->len++; chunk_del(next); next = tmp; } } } /* Convert '>' + '>' into '>>' */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_ANGLE_CLOSE) && (next->type == CT_ANGLE_CLOSE) && (pc->parent_type == CT_NONE) && ((pc->orig_col + pc->len) == next->orig_col) && (next->parent_type == CT_NONE)) { pc->len++; pc->type = CT_ARITH; tmp = chunk_get_next_ncnl(next); chunk_del(next); next = tmp; } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { pc->type = CT_SIZEOF; } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { pc->type = CT_QUALIFIER; } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } }
void tokenize_cleanup(void) { LOG_FUNC_ENTRY(); chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; cpd.unc_stage = US_TOKENIZE_CLEANUP; /* Since [] is expected to be TSQUARE for the 'operator', we need to make * this change in the first pass. */ for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc)) { if (pc->type == CT_SQUARE_OPEN) { next = chunk_get_next_ncnl(pc); if (chunk_is_token(next, CT_SQUARE_CLOSE)) { /* Change '[' + ']' into '[]' */ set_chunk_type(pc, CT_TSQUARE); pc->str = "[]"; // bug # 664 // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE. // pc->orig_col_end += 1; pc->orig_col_end = next->orig_col_end; chunk_del(next); } } if ((pc->type == CT_SEMICOLON) && (pc->flags & PCF_IN_PREPROC) && !chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n", cpd.filename, pc->orig_line); } } /* We can handle everything else in the second pass */ pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC)) { set_chunk_type(pc, CT_MEMBER); } if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS)) { set_chunk_type(pc, CT_MEMBER); } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_VERSION_IF); } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } set_chunk_type(pc, CT_WORD); } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_SCOPE_IF); } else { set_chunk_type(pc, CT_TYPE); } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_ENUM) && (next->type == CT_CLASS)) { set_chunk_type(next, CT_ENUM_CLASS); } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_ENUM_CLASS) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { set_chunk_type(next, CT_TYPE); } if (pc->type == CT_WORD) { set_chunk_type(pc, CT_TYPE); } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { set_chunk_type(next, CT_PTR_TYPE); } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { set_chunk_parent(next, CT_TYPE_CAST); in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { /* pretty much all languages except C use <> for something other than * comparisons. "#include<xxx>" is handled elsewhere. */ if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC)) { // bug #663 check_template(pc); } else { /* convert CT_ANGLE_OPEN to CT_COMPARE */ set_chunk_type(pc, CT_COMPARE); } } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; set_chunk_parent(pc, CT_TYPE_CAST); } else { next = handle_double_angle_close(pc); } } if (cpd.lang_flags & LANG_D) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { set_chunk_type(pc, CT_CONCAT); } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { set_chunk_type(pc, CT_D_TEMPLATE); } /* handle "version(unittest) { }" vs "unittest { }" */ if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } /* handle 'static if' and merge the tokens */ if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6)) { /* delete PREV and merge with IF */ pc->str.insert(0, ' '); pc->str.insert(0, prev->str); pc->orig_col = prev->orig_col; pc->orig_line = prev->orig_line; chunk_t *to_be_deleted = prev; prev = chunk_get_prev_ncnl(prev); chunk_del(to_be_deleted); } } if (cpd.lang_flags & LANG_CPP) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_TYPE); } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { set_chunk_type(pc, CT_GETSET_EMPTY); set_chunk_parent(next, CT_GETSET); } else { set_chunk_type(pc, CT_WORD); } } /* Interface is only a keyword in MS land if followed by 'class' or 'struct' * likewise, 'class' may be a member name in Java. */ if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0])) { set_chunk_type(pc, CT_WORD); } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { tmp2 = chunk_get_next(next); /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && tmp2 && (tmp2->type == CT_ANGLE_CLOSE) && (tmp2->orig_col == next->orig_col_end)) { next->str.append('>'); next->orig_col_end++; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp2); } else if (next->flags & PCF_PUNCTUATOR) { set_chunk_type(next, CT_OPERATOR_VAL); } else { set_chunk_type(next, CT_TYPE); /* Replace next with a collection of all tokens that are part of * the type. */ tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_CARET) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); int num_sp = space_needed(tmp2, tmp); while (num_sp-- > 0) { next->str.append(" "); } next->str.append(tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } set_chunk_type(next, CT_OPERATOR_VAL); next->orig_col_end = next->orig_col + next->len(); } set_chunk_parent(next, CT_OPERATOR); LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n", __func__, pc->orig_line, pc->orig_col, next->text()); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { set_chunk_type(next, CT_PRIVATE_COLON); if ((tmp = chunk_get_next_ncnl(next)) != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } } else { set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER); } } /* Look for <newline> 'EXEC' 'SQL' */ if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) || ((*pc->str == '$') && (pc->type != CT_SQL_WORD))) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { if (*pc->str == '$') { set_chunk_type(pc, CT_SQL_EXEC); if (pc->len() > 1) { /* SPLIT OFF '$' */ chunk_t nc; nc = *pc; pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; nc.type = CT_SQL_WORD; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); next = chunk_get_next(pc); } } tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { set_chunk_type(pc, CT_SQL_BEGIN); } else if (chunk_is_str_case(tmp, "END", 3)) { set_chunk_type(pc, CT_SQL_END); } else { set_chunk_type(pc, CT_SQL_EXEC); } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$'))) { set_chunk_type(tmp, CT_SQL_WORD); } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two with a space between */ pc->str.append(' '); pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { set_chunk_type(tmp, CT_IN); break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { set_chunk_type(pc, CT_WORD); } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { set_chunk_type(pc, CT_WORD); } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { set_chunk_type(next, CT_OC_CLASS); } set_chunk_parent(next, pc->type); tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->text(), get_token_name(tmp->type)); set_chunk_type(tmp, CT_WORD); } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->parent_type); tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //set_chunk_type(tmp, CT_OC_CLASS_EXT); set_chunk_parent(tmp, pc->parent_type); } else { set_chunk_type(tmp, CT_OC_CATEGORY); set_chunk_parent(tmp, pc->parent_type); } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->parent_type); } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START); } else { set_chunk_parent(next, pc->type); tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->type); tmp = chunk_get_next(next); if (tmp != NULL) { set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { set_chunk_parent(tmp, CT_OC_SEL); break; } set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { set_chunk_parent(pc, next->type); } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION); set_chunk_parent(prev, pc->type); } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len()))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA) || (tmp2->type == CT_BRACE_OPEN))) { doit = true; } } if (doit) { pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_SIZEOF); } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } if (((pc->type == CT_USING) || ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_USING_STMT); } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { set_chunk_type(pc, CT_BYREF); } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { set_chunk_type(pc, CT_QUALIFIER); } /* If Java's 'synchronized' is in a method declaration, it should be * a qualifier. */ if ((cpd.lang_flags & LANG_JAVA) && (pc->type == CT_SYNCHRONIZED) && (next->type != CT_PAREN_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } // guy 2015-11-05 // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL if ((pc->type == CT_FOR) && (pc->prev->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_FUNC_CALL); } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } } // tokenize_cleanup