/** * Parses all legal D string constants. * * Quoted strings: * r"Wysiwyg" # WYSIWYG string * x"hexstring" # Hexadecimal array * `Wysiwyg` # WYSIWYG string * 'char' # single character * "reg_string" # regular string * * Non-quoted strings: * \x12 # 1-byte hex constant * \u1234 # 2-byte hex constant * \U12345678 # 4-byte hex constant * \123 # octal constant * \& # named entity * \n # single character * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool d_parse_string(tok_ctx& ctx, chunk_t& pc) { int ch = ctx.peek(); if ((ch == '"') || (ch == '\'') || (ch == '`')) { return(parse_string(ctx, pc, 0, true)); } else if (ch == '\\') { ctx.save(); int cnt; pc.str.clear(); while (ctx.peek() == '\\') { pc.str.append(ctx.get()); /* Check for end of file */ switch (ctx.peek()) { case 'x': /* \x HexDigit HexDigit */ cnt = 3; while (cnt--) { pc.str.append(ctx.get()); } break; case 'u': /* \u HexDigit HexDigit HexDigit HexDigit */ cnt = 5; while (cnt--) { pc.str.append(ctx.get()); } break; case 'U': /* \U HexDigit (x8) */ cnt = 9; while (cnt--) { pc.str.append(ctx.get()); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* handle up to 3 octal digits */ pc.str.append(ctx.get()); ch = ctx.peek(); if ((ch >= '0') && (ch <= '7')) { pc.str.append(ctx.get()); ch = ctx.peek(); if ((ch >= '0') && (ch <= '7')) { pc.str.append(ctx.get()); } } break; case '&': /* \& NamedCharacterEntity ; */ pc.str.append(ctx.get()); while (unc_isalpha(ctx.peek())) { pc.str.append(ctx.get()); } if (ctx.peek() == ';') { pc.str.append(ctx.get()); } break; default: /* Everything else is a single character */ pc.str.append(ctx.get()); break; } // switch } if (pc.str.size() > 1) { pc.type = CT_STRING; return(true); } ctx.restore(); } else if (((ch == 'r') || (ch == 'x')) && (ctx.peek(1) == '"')) { return(parse_string(ctx, pc, 1, false)); } return(false); } // d_parse_string
void tokenize_cleanup(void) { LOG_FUNC_ENTRY(); chunk_t *pc = chunk_get_head(); chunk_t *prev = NULL; chunk_t *next; chunk_t *tmp; chunk_t *tmp2; bool in_type_cast = false; cpd.unc_stage = US_TOKENIZE_CLEANUP; /* Since [] is expected to be TSQUARE for the 'operator', we need to make * this change in the first pass. */ for (pc = chunk_get_head(); pc != NULL; pc = chunk_get_next_ncnl(pc)) { if (pc->type == CT_SQUARE_OPEN) { next = chunk_get_next_ncnl(pc); if (chunk_is_token(next, CT_SQUARE_CLOSE)) { /* Change '[' + ']' into '[]' */ set_chunk_type(pc, CT_TSQUARE); pc->str = "[]"; // bug # 664 // The original orig_col_end of CT_SQUARE_CLOSE is stored at orig_col_end of CT_TSQUARE. // pc->orig_col_end += 1; pc->orig_col_end = next->orig_col_end; chunk_del(next); } } if ((pc->type == CT_SEMICOLON) && (pc->flags & PCF_IN_PREPROC) && !chunk_get_next_ncnl(pc, CNAV_PREPROC)) { LOG_FMT(LNOTE, "%s:%d Detected a macro that ends with a semicolon. Possible failures if used.\n", cpd.filename, pc->orig_line); } } /* We can handle everything else in the second pass */ pc = chunk_get_head(); next = chunk_get_next_ncnl(pc); while ((pc != NULL) && (next != NULL)) { if ((pc->type == CT_DOT) && (cpd.lang_flags & LANG_ALLC)) { set_chunk_type(pc, CT_MEMBER); } if ((pc->type == CT_NULLCOND) && (cpd.lang_flags & LANG_CS)) { set_chunk_type(pc, CT_MEMBER); } /* Determine the version stuff (D only) */ if (pc->type == CT_D_VERSION) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_VERSION_IF); } else { if (next->type != CT_ASSIGN) { LOG_FMT(LERR, "%s:%d %s: version: Unexpected token %s\n", cpd.filename, pc->orig_line, __func__, get_token_name(next->type)); cpd.error_count++; } set_chunk_type(pc, CT_WORD); } } /* Determine the scope stuff (D only) */ if (pc->type == CT_D_SCOPE) { if (next->type == CT_PAREN_OPEN) { set_chunk_type(pc, CT_D_SCOPE_IF); } else { set_chunk_type(pc, CT_TYPE); } } /** * Change CT_BASE before CT_PAREN_OPEN to CT_WORD. * public myclass() : base() { * } */ if ((pc->type == CT_BASE) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_ENUM) && (next->type == CT_CLASS)) { set_chunk_type(next, CT_ENUM_CLASS); } /** * Change CT_WORD after CT_ENUM, CT_UNION, or CT_STRUCT to CT_TYPE * Change CT_WORD before CT_WORD to CT_TYPE */ if (next->type == CT_WORD) { if ((pc->type == CT_ENUM) || (pc->type == CT_ENUM_CLASS) || (pc->type == CT_UNION) || (pc->type == CT_STRUCT)) { set_chunk_type(next, CT_TYPE); } if (pc->type == CT_WORD) { set_chunk_type(pc, CT_TYPE); } } /* change extern to qualifier if extern isn't followed by a string or * an open paren */ if (pc->type == CT_EXTERN) { if (next->type == CT_STRING) { /* Probably 'extern "C"' */ } else if (next->type == CT_PAREN_OPEN) { /* Probably 'extern (C)' */ } else { /* Something else followed by a open brace */ tmp = chunk_get_next_ncnl(next); if ((tmp == NULL) || (tmp->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } } } /** * Change CT_STAR to CT_PTR_TYPE if preceded by CT_TYPE, * CT_QUALIFIER, or CT_PTR_TYPE. */ if ((next->type == CT_STAR) && ((pc->type == CT_TYPE) || (pc->type == CT_QUALIFIER) || (pc->type == CT_PTR_TYPE))) { set_chunk_type(next, CT_PTR_TYPE); } if ((pc->type == CT_TYPE_CAST) && (next->type == CT_ANGLE_OPEN)) { set_chunk_parent(next, CT_TYPE_CAST); in_type_cast = true; } /** * Change angle open/close to CT_COMPARE, if not a template thingy */ if ((pc->type == CT_ANGLE_OPEN) && (pc->parent_type != CT_TYPE_CAST)) { /* pretty much all languages except C use <> for something other than * comparisons. "#include<xxx>" is handled elsewhere. */ if (cpd.lang_flags & (LANG_CPP | LANG_CS | LANG_JAVA | LANG_VALA | LANG_OC)) { // bug #663 check_template(pc); } else { /* convert CT_ANGLE_OPEN to CT_COMPARE */ set_chunk_type(pc, CT_COMPARE); } } if ((pc->type == CT_ANGLE_CLOSE) && (pc->parent_type != CT_TEMPLATE)) { if (in_type_cast) { in_type_cast = false; set_chunk_parent(pc, CT_TYPE_CAST); } else { next = handle_double_angle_close(pc); } } if (cpd.lang_flags & LANG_D) { /* Check for the D string concat symbol '~' */ if ((pc->type == CT_INV) && ((prev->type == CT_STRING) || (prev->type == CT_WORD) || (next->type == CT_STRING))) { set_chunk_type(pc, CT_CONCAT); } /* Check for the D template symbol '!' (word + '!' + word or '(') */ if ((pc->type == CT_NOT) && (prev->type == CT_WORD) && ((next->type == CT_PAREN_OPEN) || (next->type == CT_WORD) || (next->type == CT_TYPE))) { set_chunk_type(pc, CT_D_TEMPLATE); } /* handle "version(unittest) { }" vs "unittest { }" */ if (prev && (pc->type == CT_UNITTEST) && (prev->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } /* handle 'static if' and merge the tokens */ if (prev && (pc->type == CT_IF) && chunk_is_str(prev, "static", 6)) { /* delete PREV and merge with IF */ pc->str.insert(0, ' '); pc->str.insert(0, prev->str); pc->orig_col = prev->orig_col; pc->orig_line = prev->orig_line; chunk_t *to_be_deleted = prev; prev = chunk_get_prev_ncnl(prev); chunk_del(to_be_deleted); } } if (cpd.lang_flags & LANG_CPP) { /* Change Word before '::' into a type */ if ((pc->type == CT_WORD) && (next->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_TYPE); } } /* Change get/set to CT_WORD if not followed by a brace open */ if ((pc->type == CT_GETSET) && (next->type != CT_BRACE_OPEN)) { if ((next->type == CT_SEMICOLON) && ((prev->type == CT_BRACE_CLOSE) || (prev->type == CT_BRACE_OPEN) || (prev->type == CT_SEMICOLON))) { set_chunk_type(pc, CT_GETSET_EMPTY); set_chunk_parent(next, CT_GETSET); } else { set_chunk_type(pc, CT_WORD); } } /* Interface is only a keyword in MS land if followed by 'class' or 'struct' * likewise, 'class' may be a member name in Java. */ if ((pc->type == CT_CLASS) && !CharTable::IsKw1(next->str[0])) { set_chunk_type(pc, CT_WORD); } /* Change item after operator (>=, ==, etc) to a CT_OPERATOR_VAL * Usually the next item is part of the operator. * In a few cases the next few tokens are part of it: * operator + - common case * operator >> - need to combine '>' and '>' * operator () * operator [] - already converted to TSQUARE * operator new [] * operator delete [] * operator const char * * operator const B& * operator std::allocator<U> * * In all cases except the last, this will put the entire operator value * in one chunk. */ if (pc->type == CT_OPERATOR) { tmp2 = chunk_get_next(next); /* Handle special case of () operator -- [] already handled */ if (next->type == CT_PAREN_OPEN) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_PAREN_CLOSE)) { next->str = "()"; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp); next->orig_col_end += 1; } } else if ((next->type == CT_ANGLE_CLOSE) && tmp2 && (tmp2->type == CT_ANGLE_CLOSE) && (tmp2->orig_col == next->orig_col_end)) { next->str.append('>'); next->orig_col_end++; set_chunk_type(next, CT_OPERATOR_VAL); chunk_del(tmp2); } else if (next->flags & PCF_PUNCTUATOR) { set_chunk_type(next, CT_OPERATOR_VAL); } else { set_chunk_type(next, CT_TYPE); /* Replace next with a collection of all tokens that are part of * the type. */ tmp2 = next; while ((tmp = chunk_get_next(tmp2)) != NULL) { if ((tmp->type != CT_WORD) && (tmp->type != CT_TYPE) && (tmp->type != CT_QUALIFIER) && (tmp->type != CT_STAR) && (tmp->type != CT_CARET) && (tmp->type != CT_AMP) && (tmp->type != CT_TSQUARE)) { break; } /* Change tmp into a type so that space_needed() works right */ make_type(tmp); int num_sp = space_needed(tmp2, tmp); while (num_sp-- > 0) { next->str.append(" "); } next->str.append(tmp->str); tmp2 = tmp; } while ((tmp2 = chunk_get_next(next)) != tmp) { chunk_del(tmp2); } set_chunk_type(next, CT_OPERATOR_VAL); next->orig_col_end = next->orig_col + next->len(); } set_chunk_parent(next, CT_OPERATOR); LOG_FMT(LOPERATOR, "%s: %d:%d operator '%s'\n", __func__, pc->orig_line, pc->orig_col, next->text()); } /* Change private, public, protected into either a qualifier or label */ if (pc->type == CT_PRIVATE) { /* Handle Qt slots - maybe should just check for a CT_WORD? */ if (chunk_is_str(next, "slots", 5) || chunk_is_str(next, "Q_SLOTS", 7)) { tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->type == CT_COLON)) { next = tmp; } } if (next->type == CT_COLON) { set_chunk_type(next, CT_PRIVATE_COLON); if ((tmp = chunk_get_next_ncnl(next)) != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } } else { set_chunk_type(pc, (chunk_is_str(pc, "signals", 7) || chunk_is_str(pc, "Q_SIGNALS", 9)) ? CT_WORD : CT_QUALIFIER); } } /* Look for <newline> 'EXEC' 'SQL' */ if ((chunk_is_str(pc, "EXEC", 4) && chunk_is_str(next, "SQL", 3)) || ((*pc->str == '$') && (pc->type != CT_SQL_WORD))) { tmp = chunk_get_prev(pc); if (chunk_is_newline(tmp)) { if (*pc->str == '$') { set_chunk_type(pc, CT_SQL_EXEC); if (pc->len() > 1) { /* SPLIT OFF '$' */ chunk_t nc; nc = *pc; pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; nc.type = CT_SQL_WORD; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); next = chunk_get_next(pc); } } tmp = chunk_get_next(next); if (chunk_is_str_case(tmp, "BEGIN", 5)) { set_chunk_type(pc, CT_SQL_BEGIN); } else if (chunk_is_str_case(tmp, "END", 3)) { set_chunk_type(pc, CT_SQL_END); } else { set_chunk_type(pc, CT_SQL_EXEC); } /* Change words into CT_SQL_WORD until CT_SEMICOLON */ while (tmp != NULL) { if (tmp->type == CT_SEMICOLON) { break; } if ((tmp->len() > 0) && (unc_isalpha(*tmp->str) || (*tmp->str == '$'))) { set_chunk_type(tmp, CT_SQL_WORD); } tmp = chunk_get_next_ncnl(tmp); } } } /* handle MS abomination 'for each' */ if ((pc->type == CT_FOR) && chunk_is_str(next, "each", 4) && (next == chunk_get_next(pc))) { /* merge the two with a space between */ pc->str.append(' '); pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = chunk_get_next_ncnl(pc); /* label the 'in' */ if (next && (next->type == CT_PAREN_OPEN)) { tmp = chunk_get_next_ncnl(next); while (tmp && (tmp->type != CT_PAREN_CLOSE)) { if (chunk_is_str(tmp, "in", 2)) { set_chunk_type(tmp, CT_IN); break; } tmp = chunk_get_next_ncnl(tmp); } } } /* ObjectiveC allows keywords to be used as identifiers in some situations * This is a dirty hack to allow some of the more common situations. */ if (cpd.lang_flags & LANG_OC) { if (((pc->type == CT_IF) || (pc->type == CT_FOR) || (pc->type == CT_WHILE)) && !chunk_is_token(next, CT_PAREN_OPEN)) { set_chunk_type(pc, CT_WORD); } if ((pc->type == CT_DO) && (chunk_is_token(prev, CT_MINUS) || chunk_is_token(next, CT_SQUARE_CLOSE))) { set_chunk_type(pc, CT_WORD); } } /* Another hack to clean up more keyword abuse */ if ((pc->type == CT_CLASS) && (chunk_is_token(prev, CT_DOT) || chunk_is_token(next, CT_DOT))) { set_chunk_type(pc, CT_WORD); } /* Detect Objective C class name */ if ((pc->type == CT_OC_IMPL) || (pc->type == CT_OC_INTF) || (pc->type == CT_OC_PROTOCOL)) { if (next->type != CT_PAREN_OPEN) { set_chunk_type(next, CT_OC_CLASS); } set_chunk_parent(next, pc->type); tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); } tmp = chunk_get_next_type(pc, CT_OC_END, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } if (pc->type == CT_OC_INTF) { tmp = chunk_get_next_ncnl(pc, CNAV_PREPROC); while ((tmp != NULL) && (tmp->type != CT_OC_END)) { if (get_token_pattern_class(tmp->type) != PATCLS_NONE) { LOG_FMT(LOBJCWORD, "@interface %d:%d change '%s' (%s) to CT_WORD\n", pc->orig_line, pc->orig_col, tmp->text(), get_token_name(tmp->type)); set_chunk_type(tmp, CT_WORD); } tmp = chunk_get_next_ncnl(tmp, CNAV_PREPROC); } } /* Detect Objective-C categories and class extensions */ /* @interface ClassName (CategoryName) */ /* @implementation ClassName (CategoryName) */ /* @interface ClassName () */ /* @implementation ClassName () */ if (((pc->parent_type == CT_OC_IMPL) || (pc->parent_type == CT_OC_INTF) || (pc->type == CT_OC_CLASS)) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->parent_type); tmp = chunk_get_next(next); if ((tmp != NULL) && (tmp->next != NULL)) { if (tmp->type == CT_PAREN_CLOSE) { //set_chunk_type(tmp, CT_OC_CLASS_EXT); set_chunk_parent(tmp, pc->parent_type); } else { set_chunk_type(tmp, CT_OC_CATEGORY); set_chunk_parent(tmp, pc->parent_type); } } tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->parent_type); } } /* Detect Objective C @property * @property NSString *stringProperty; * @property(nonatomic, retain) NSMutableDictionary *shareWith; */ if (pc->type == CT_OC_PROPERTY) { if (next->type != CT_PAREN_OPEN) { chunk_flags_set(next, PCF_STMT_START | PCF_EXPR_START); } else { set_chunk_parent(next, pc->type); tmp = chunk_get_next_type(pc, CT_PAREN_CLOSE, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); tmp = chunk_get_next_ncnl(tmp); if (tmp != NULL) { chunk_flags_set(tmp, PCF_STMT_START | PCF_EXPR_START); tmp = chunk_get_next_type(tmp, CT_SEMICOLON, pc->level); if (tmp != NULL) { set_chunk_parent(tmp, pc->type); } } } } } /* Detect Objective C @selector * @selector(msgNameWithNoArg) * @selector(msgNameWith1Arg:) * @selector(msgNameWith2Args:arg2Name:) */ if ((pc->type == CT_OC_SEL) && (next->type == CT_PAREN_OPEN)) { set_chunk_parent(next, pc->type); tmp = chunk_get_next(next); if (tmp != NULL) { set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); while ((tmp = chunk_get_next_ncnl(tmp)) != NULL) { if (tmp->type == CT_PAREN_CLOSE) { set_chunk_parent(tmp, CT_OC_SEL); break; } set_chunk_type(tmp, CT_OC_SEL_NAME); set_chunk_parent(tmp, pc->type); } } } /* Handle special preprocessor junk */ if (pc->type == CT_PREPROC) { set_chunk_parent(pc, next->type); } /* Detect "pragma region" and "pragma endregion" */ if ((pc->type == CT_PP_PRAGMA) && (next->type == CT_PREPROC_BODY)) { if ((memcmp(next->str, "region", 6) == 0) || (memcmp(next->str, "endregion", 9) == 0)) { set_chunk_type(pc, (*next->str == 'r') ? CT_PP_REGION : CT_PP_ENDREGION); set_chunk_parent(prev, pc->type); } } /* Check for C# nullable types '?' is in next */ if ((cpd.lang_flags & LANG_CS) && (next->type == CT_QUESTION) && (next->orig_col == (pc->orig_col + pc->len()))) { tmp = chunk_get_next_ncnl(next); if (tmp != NULL) { bool doit = ((tmp->type == CT_PAREN_CLOSE) || (tmp->type == CT_ANGLE_CLOSE)); if (tmp->type == CT_WORD) { tmp2 = chunk_get_next_ncnl(tmp); if ((tmp2 != NULL) && ((tmp2->type == CT_SEMICOLON) || (tmp2->type == CT_ASSIGN) || (tmp2->type == CT_COMMA) || (tmp2->type == CT_BRACE_OPEN))) { doit = true; } } if (doit) { pc->str += next->str; pc->orig_col_end = next->orig_col_end; chunk_del(next); next = tmp; } } } /* Change 'default(' into a sizeof-like statement */ if ((cpd.lang_flags & LANG_CS) && (pc->type == CT_DEFAULT) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_SIZEOF); } if ((pc->type == CT_UNSAFE) && (next->type != CT_BRACE_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } if (((pc->type == CT_USING) || ((pc->type == CT_TRY) && (cpd.lang_flags & LANG_JAVA))) && (next->type == CT_PAREN_OPEN)) { set_chunk_type(pc, CT_USING_STMT); } /* Add minimal support for C++0x rvalue references */ if ((pc->type == CT_BOOL) && chunk_is_str(pc, "&&", 2)) { if (prev->type == CT_TYPE) { set_chunk_type(pc, CT_BYREF); } } /* HACK: treat try followed by a colon as a qualifier to handle this: * A::A(int) try : B() { } catch (...) { } */ if ((pc->type == CT_TRY) && chunk_is_str(pc, "try", 3) && (next != NULL) && (next->type == CT_COLON)) { set_chunk_type(pc, CT_QUALIFIER); } /* If Java's 'synchronized' is in a method declaration, it should be * a qualifier. */ if ((cpd.lang_flags & LANG_JAVA) && (pc->type == CT_SYNCHRONIZED) && (next->type != CT_PAREN_OPEN)) { set_chunk_type(pc, CT_QUALIFIER); } // guy 2015-11-05 // change CT_DC_MEMBER + CT_FOR into CT_DC_MEMBER + CT_FUNC_CALL if ((pc->type == CT_FOR) && (pc->prev->type == CT_DC_MEMBER)) { set_chunk_type(pc, CT_FUNC_CALL); } /* TODO: determine other stuff here */ prev = pc; pc = next; next = chunk_get_next_ncnl(pc); } } // tokenize_cleanup
/** * Skips the next bit of whatever and returns the type of block. * * pc.str is the input text. * pc.len in the output length. * pc.type is the output type * pc.column is output column * * @param pc The structure to update, str is an input. * @return true/false - whether anything was parsed */ static bool parse_next(tok_ctx& ctx, chunk_t& pc) { const chunk_tag_t *punc; int ch, ch1; if (!ctx.more()) { //fprintf(stderr, "All done!\n"); return(false); } /* Save off the current column */ pc.orig_line = ctx.c.row; pc.column = ctx.c.col; pc.orig_col = ctx.c.col; pc.type = CT_NONE; pc.nl_count = 0; pc.flags = 0; /* If it is turned off, we put everything except newlines into CT_UNKNOWN */ if (cpd.unc_off) { if (parse_ignored(ctx, pc)) { return(true); } } /** * Parse whitespace */ if (parse_whitespace(ctx, pc)) { return(true); } /** * Handle unknown/unhandled preprocessors */ if ((cpd.in_preproc > CT_PP_BODYCHUNK) && (cpd.in_preproc <= CT_PP_OTHER)) { pc.str.clear(); tok_info ss; ctx.save(ss); /* Chunk to a newline or comment */ pc.type = CT_PREPROC_BODY; int last = 0; while (ctx.more()) { int ch = ctx.peek(); if ((ch == '\n') || (ch == '\r')) { /* Back off if this is an escaped newline */ if (last == '\\') { ctx.restore(ss); pc.str.pop_back(); } break; } /* Quit on a C++ comment start */ if ((ch == '/') && (ctx.peek(1) == '/')) { break; } last = ch; ctx.save(ss); pc.str.append(ctx.get()); } if (pc.str.size() > 0) { return(true); } } /** * Detect backslash-newline */ if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc)) { return(true); } /** * Parse comments */ if (parse_comment(ctx, pc)) { return(true); } /* Parse code placeholders */ if (parse_code_placeholder(ctx, pc)) { return(true); } /* Check for C# literal strings, ie @"hello" and identifiers @for*/ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@')) { if (ctx.peek(1) == '"') { parse_cs_string(ctx, pc); return(true); } /* check for non-keyword identifiers such as @if @switch, etc */ if (CharTable::IsKw1(ctx.peek(1))) { parse_word(ctx, pc, true); return(true); } } /* Check for C# Interpolated strings */ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"')) { parse_cs_interpolated_string(ctx, pc); return(true); } /* handle VALA """ strings """ */ if ((cpd.lang_flags & LANG_VALA) && (ctx.peek() == '"') && (ctx.peek(1) == '"') && (ctx.peek(2) == '"')) { parse_verbatim_string(ctx, pc); return(true); } /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */ ch = ctx.peek(); if ((cpd.lang_flags & LANG_CPP) && ((ch == 'u') || (ch == 'U') || (ch == 'R'))) { int idx = 0; bool is_real = false; if ((ch == 'u') && (ctx.peek(1) == '8')) { idx = 2; } else if (unc_tolower(ch) == 'u') { idx++; } if (ctx.peek(idx) == 'R') { idx++; is_real = true; } if (ctx.peek(idx) == '"') { if (is_real) { if (parse_cr_string(ctx, pc, idx)) { return(true); } } else { if (parse_string(ctx, pc, idx, true)) { parse_suffix(ctx, pc, true); return(true); } } } } /* PAWN specific stuff */ if (cpd.lang_flags & LANG_PAWN) { if ((cpd.preproc_ncnl_count == 1) && ((cpd.in_preproc == CT_PP_DEFINE) || (cpd.in_preproc == CT_PP_EMIT))) { parse_pawn_pattern(ctx, pc, CT_MACRO); return(true); } /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */ if ((ctx.peek() == '\\') || (ctx.peek() == '!')) { if (ctx.peek(1) == '"') { parse_string(ctx, pc, 1, (ctx.peek() == '!')); return(true); } else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) && (ctx.peek(2) == '"')) { parse_string(ctx, pc, 2, false); return(true); } } /* handle PAWN preprocessor args %0 .. %9 */ if ((cpd.in_preproc == CT_PP_DEFINE) && (ctx.peek() == '%') && unc_isdigit(ctx.peek(1))) { pc.str.clear(); pc.str.append(ctx.get()); pc.str.append(ctx.get()); pc.type = CT_WORD; return(true); } } /** * Parse strings and character constants */ if (parse_number(ctx, pc)) { return(true); } if (cpd.lang_flags & LANG_D) { /* D specific stuff */ if (d_parse_string(ctx, pc)) { return(true); } } else { /* Not D stuff */ /* Check for L'a', L"abc", 'a', "abc", <abc> strings */ ch = ctx.peek(); ch1 = ctx.peek(1); if ((((ch == 'L') || (ch == 'S')) && ((ch1 == '"') || (ch1 == '\''))) || (ch == '"') || (ch == '\'') || ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE))) { parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true); return(true); } if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE)) { if (chunk_get_tail()->type == CT_MACRO) { /* We have "#define XXX <", assume '<' starts an include string */ parse_string(ctx, pc, 0, false); return(true); } } } /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/ if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@')) { int nc = ctx.peek(1); if ((nc == '"') || (nc == '\'')) { /* literal string */ parse_string(ctx, pc, 1, true); return(true); } else if ((nc >= '0') && (nc <= '9')) { /* literal number */ pc.str.append(ctx.get()); /* store the '@' */ parse_number(ctx, pc); return(true); } } /* Check for pawn/ObjectiveC/Java and normal identifiers */ if (CharTable::IsKw1(ctx.peek()) || ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1)))) { parse_word(ctx, pc, false); return(true); } /* see if we have a punctuator */ char punc_txt[4]; punc_txt[0] = ctx.peek(); punc_txt[1] = ctx.peek(1); punc_txt[2] = ctx.peek(2); punc_txt[3] = ctx.peek(3); if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL) { int cnt = strlen(punc->tag); while (cnt--) { pc.str.append(ctx.get()); } pc.type = punc->type; pc.flags |= PCF_PUNCTUATOR; return(true); } /* throw away this character */ pc.type = CT_UNKNOWN; pc.str.append(ctx.get()); LOG_FMT(LWARN, "%s:%d Garbage in col %d: %x\n", cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]); cpd.error_count++; return(true); } // parse_next
/** * A multiline comment -- woopeee! * The only trick here is that we have to trim out whitespace characters * to get the comment to line up. */ static void output_comment_multi(chunk_t *pc) { int cmt_col; int cmt_idx; int ch; unc_text line; int line_count = 0; int ccol; /* the col of subsequent comment lines */ int col_diff = 0; bool nl_end = false; cmt_reflow cmt; //LOG_FMT(LSYS, "%s: line %d\n", __func__, pc->orig_line); output_cmt_start(cmt, pc); cmt.reflow = (cpd.settings[UO_cmt_reflow_mode].n != 1); cmt_col = cmt.base_col; col_diff = pc->orig_col - cmt.base_col; calculate_comment_body_indent(cmt, pc->str); cmt.cont_text = !cpd.settings[UO_cmt_indent_multi].b ? "" : (cpd.settings[UO_cmt_star_cont].b ? "* " : " "); LOG_CONTTEXT(); //LOG_FMT(LSYS, "Indenting1 line %d to col %d (orig=%d) col_diff=%d xtra=%d cont='%s'\n", // pc->orig_line, cmt_col, pc->orig_col, col_diff, cmt.xtra_indent, cmt.cont_text.c_str()); ccol = pc->column; cmt_idx = 0; line.clear(); while (cmt_idx < pc->len()) { ch = pc->str[cmt_idx++]; /* handle the CRLF and CR endings. convert both to LF */ if (ch == '\r') { ch = '\n'; if ((cmt_idx < pc->len()) && (pc->str[cmt_idx] == '\n')) { cmt_idx++; } } /* Find the start column */ if (line.size() == 0) { nl_end = false; if (ch == ' ') { ccol++; continue; } else if (ch == '\t') { ccol = calc_next_tab_column(ccol, cpd.settings[UO_input_tab_size].n); continue; } else { //LOG_FMT(LSYS, "%d] Text starts in col %d\n", line_count, ccol); } } /* * Now see if we need/must fold the next line with the current to enable * full reflow */ if ((cpd.settings[UO_cmt_reflow_mode].n == 2) && (ch == '\n') && (cmt_idx < pc->len())) { int nxt_len = 0; int next_nonempty_line = -1; int prev_nonempty_line = -1; int nwidx = line.size(); bool star_is_bullet = false; /* strip trailing whitespace from the line collected so far */ while (nwidx > 0) { nwidx--; if ((prev_nonempty_line < 0) && !unc_isspace(line[nwidx]) && (line[nwidx] != '*') && // block comment: skip '*' at end of line ((pc->flags & PCF_IN_PREPROC) ? (line[nwidx] != '\\') || ((line[nwidx + 1] != 'r') && (line[nwidx + 1] != '\n')) : true)) { prev_nonempty_line = nwidx; // last nonwhitespace char in the previous line } } int remaining = pc->len() - cmt_idx; for (nxt_len = 0; (nxt_len <= remaining) && (pc->str[nxt_len] != 'r') && (pc->str[nxt_len] != '\n'); nxt_len++) { if ((next_nonempty_line < 0) && !unc_isspace(pc->str[nxt_len]) && (pc->str[nxt_len] != '*') && ((nxt_len == remaining) || ((pc->flags & PCF_IN_PREPROC) ? (pc->str[nxt_len] != '\\') || ((pc->str[nxt_len + 1] != 'r') && (pc->str[nxt_len + 1] != '\n')) : true))) { next_nonempty_line = nxt_len; // first nonwhitespace char in the next line } } /* * see if we should fold up; usually that'd be a YES, but there are a few * situations where folding/reflowing by merging lines is frowned upon: * * - ASCII art in the comments (most often, these are drawings done in +-\/|.,*) * * - Doxygen/JavaDoc/etc. parameters: these often start with \ or @, at least * something clearly non-alphanumeric (you see where we're going with this?) * * - bullet lists that are closely spaced: bullets are always non-alphanumeric * characters, such as '-' or '+' (or, oh horor, '*' - that's bloody ambiguous * to parse :-( ... with or without '*' comment start prefix, that's the * question, then.) * * - semi-HTML formatted code, e.g. <pre>...</pre> comment sections (NDoc, etc.) * * - New lines which form a new paragraph without there having been added an * extra empty line between the last sentence and the new one. * A bit like this, really; so it is opportune to check if the last line ended * in a terminal (that would be the set '.:;!?') and the new line starts with * a capital. * Though new lines starting with comment delimiters, such as '(', should be * pulled up. * * So it bores down to this: the only folding (& reflowing) that's going to happen * is when the next line starts with an alphanumeric character AND the last * line didn't end with an non-alphanumeric character, except: ',' AND the next * line didn't start with a '*' all of a sudden while the previous one didn't * (the ambiguous '*'-for-bullet case!) */ if ((prev_nonempty_line >= 0) && (next_nonempty_line >= 0) && (((unc_isalnum(line[prev_nonempty_line]) || strchr(",)]", line[prev_nonempty_line])) && (unc_isalnum(pc->str[next_nonempty_line]) || strchr("([", pc->str[next_nonempty_line]))) || (('.' == line[prev_nonempty_line]) && // dot followed by non-capital is NOT a new sentence start unc_isupper(pc->str[next_nonempty_line]))) && !star_is_bullet) { // rewind the line to the last non-alpha: line.resize(prev_nonempty_line + 1); // roll the current line forward to the first non-alpha: cmt_idx += next_nonempty_line; // override the NL and make it a single whitespace: ch = ' '; } } line.append(ch); /* If we just hit an end of line OR we just hit end-of-comment... */ if ((ch == '\n') || (cmt_idx == pc->len())) { line_count++; /* strip trailing tabs and spaces before the newline */ if (ch == '\n') { nl_end = true; line.pop_back(); cmt_trim_whitespace(line, pc->flags & PCF_IN_PREPROC); } //LOG_FMT(LSYS, "[%3d]%s\n", ccol, line); if (line_count == 1) { /* this is the first line - add unchanged */ add_comment_text(line, cmt, false); if (nl_end) { add_char('\n'); } } else { /* This is not the first line, so we need to indent to the * correct column. Each line is indented 0 or more spaces. */ ccol -= col_diff; if (ccol < (cmt_col + 3)) { ccol = cmt_col + 3; } if (line.size() == 0) { /* Empty line - just a '\n' */ if (cpd.settings[UO_cmt_star_cont].b) { cmt.column = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n; cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column); if (cmt.xtra_indent) { add_char(' '); } add_text(cmt.cont_text); } add_char('\n'); } else { /* If this doesn't start with a '*' or '|'. * '\name' is a common parameter documentation thing. */ if (cpd.settings[UO_cmt_indent_multi].b && (line[0] != '*') && (line[0] != '|') && (line[0] != '#') && ((line[0] != '\\') || unc_isalpha(line[1])) && (line[0] != '+')) { int start_col = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n; if (cpd.settings[UO_cmt_star_cont].b) { cmt.column = start_col; cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column); if (cmt.xtra_indent) { add_char(' '); } add_text(cmt.cont_text); output_to_column(ccol + cpd.settings[UO_cmt_sp_after_star_cont].n, false); } else { cmt.column = ccol; cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column); } } else { cmt.column = cmt_col + cpd.settings[UO_cmt_sp_before_star_cont].n; cmt_output_indent(cmt.brace_col, cmt.base_col, cmt.column); if (cmt.xtra_indent) { add_char(' '); } int idx; idx = cmt_parse_lead(line, (cmt_idx == pc->len())); if (idx > 0) { cmt.cont_text.set(line, 0, idx); LOG_CONTTEXT(); if ((line.size() >= 2) && (line[0] == '*') && unc_isalnum(line[1])) { line.insert(1, ' '); } } else { add_text(cmt.cont_text); } } add_comment_text(line, cmt, false); if (nl_end) { add_text("\n"); } } } line.clear(); ccol = 1; } } }