static void split_off_angle_close(chunk_t *pc) { chunk_t nc; nc = *pc; const chunk_tag_t *ct; ct = find_punctuator(pc->str + 1, cpd.lang_flags); if (ct == NULL) { return; } pc->len = 1; pc->orig_col_end = pc->orig_col + 1; pc->type = CT_ANGLE_CLOSE; nc.type = ct->type; nc.str++; nc.len--; nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); }
static void split_off_angle_close(chunk_t *pc) { chunk_t nc; nc = *pc; const chunk_tag_t *ct; ct = find_punctuator(pc->text() + 1, cpd.lang_flags); if (ct == NULL) { return; } pc->str.resize(1); pc->orig_col_end = pc->orig_col + 1; set_chunk_type(pc, CT_ANGLE_CLOSE); nc.type = ct->type; nc.str.pop_front(); nc.orig_col++; nc.column++; chunk_add_after(&nc, pc); }
/** * Skips the next bit of whatever and returns the type of block. * * pc.str is the input text. * pc.len in the output length. * pc.type is the output type * pc.column is output column * * @param pc The structure to update, str is an input. * @return true/false - whether anything was parsed */ static bool parse_next(tok_ctx& ctx, chunk_t& pc) { const chunk_tag_t *punc; int ch, ch1; if (!ctx.more()) { //fprintf(stderr, "All done!\n"); return(false); } /* Save off the current column */ pc.orig_line = ctx.c.row; pc.column = ctx.c.col; pc.orig_col = ctx.c.col; pc.type = CT_NONE; pc.nl_count = 0; pc.flags = 0; /* If it is turned off, we put everything except newlines into CT_UNKNOWN */ if (cpd.unc_off) { if (parse_ignored(ctx, pc)) { return(true); } } /** * Parse whitespace */ if (parse_whitespace(ctx, pc)) { return(true); } /** * Handle unknown/unhandled preprocessors */ if ((cpd.in_preproc > CT_PP_BODYCHUNK) && (cpd.in_preproc <= CT_PP_OTHER)) { pc.str.clear(); tok_info ss; ctx.save(ss); /* Chunk to a newline or comment */ pc.type = CT_PREPROC_BODY; int last = 0; while (ctx.more()) { int ch = ctx.peek(); if ((ch == '\n') || (ch == '\r')) { /* Back off if this is an escaped newline */ if (last == '\\') { ctx.restore(ss); pc.str.pop_back(); } break; } /* Quit on a C++ comment start */ if ((ch == '/') && (ctx.peek(1) == '/')) { break; } last = ch; ctx.save(ss); pc.str.append(ctx.get()); } if (pc.str.size() > 0) { return(true); } } /** * Detect backslash-newline */ if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc)) { return(true); } /** * Parse comments */ if (parse_comment(ctx, pc)) { return(true); } /* Parse code placeholders */ if (parse_code_placeholder(ctx, pc)) { return(true); } /* Check for C# literal strings, ie @"hello" and identifiers @for*/ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@')) { if (ctx.peek(1) == '"') { parse_cs_string(ctx, pc); return(true); } /* check for non-keyword identifiers such as @if @switch, etc */ if (CharTable::IsKw1(ctx.peek(1))) { parse_word(ctx, pc, true); return(true); } } /* Check for C# Interpolated strings */ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"')) { parse_cs_interpolated_string(ctx, pc); return(true); } /* handle VALA """ strings """ */ if ((cpd.lang_flags & LANG_VALA) && (ctx.peek() == '"') && (ctx.peek(1) == '"') && (ctx.peek(2) == '"')) { parse_verbatim_string(ctx, pc); return(true); } /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */ ch = ctx.peek(); if ((cpd.lang_flags & LANG_CPP) && ((ch == 'u') || (ch == 'U') || (ch == 'R'))) { int idx = 0; bool is_real = false; if ((ch == 'u') && (ctx.peek(1) == '8')) { idx = 2; } else if (unc_tolower(ch) == 'u') { idx++; } if (ctx.peek(idx) == 'R') { idx++; is_real = true; } if (ctx.peek(idx) == '"') { if (is_real) { if (parse_cr_string(ctx, pc, idx)) { return(true); } } else { if (parse_string(ctx, pc, idx, true)) { parse_suffix(ctx, pc, true); return(true); } } } } /* PAWN specific stuff */ if (cpd.lang_flags & LANG_PAWN) { if ((cpd.preproc_ncnl_count == 1) && ((cpd.in_preproc == CT_PP_DEFINE) || (cpd.in_preproc == CT_PP_EMIT))) { parse_pawn_pattern(ctx, pc, CT_MACRO); return(true); } /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */ if ((ctx.peek() == '\\') || (ctx.peek() == '!')) { if (ctx.peek(1) == '"') { parse_string(ctx, pc, 1, (ctx.peek() == '!')); return(true); } else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) && (ctx.peek(2) == '"')) { parse_string(ctx, pc, 2, false); return(true); } } /* handle PAWN preprocessor args %0 .. %9 */ if ((cpd.in_preproc == CT_PP_DEFINE) && (ctx.peek() == '%') && unc_isdigit(ctx.peek(1))) { pc.str.clear(); pc.str.append(ctx.get()); pc.str.append(ctx.get()); pc.type = CT_WORD; return(true); } } /** * Parse strings and character constants */ if (parse_number(ctx, pc)) { return(true); } if (cpd.lang_flags & LANG_D) { /* D specific stuff */ if (d_parse_string(ctx, pc)) { return(true); } } else { /* Not D stuff */ /* Check for L'a', L"abc", 'a', "abc", <abc> strings */ ch = ctx.peek(); ch1 = ctx.peek(1); if ((((ch == 'L') || (ch == 'S')) && ((ch1 == '"') || (ch1 == '\''))) || (ch == '"') || (ch == '\'') || ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE))) { parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true); return(true); } if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE)) { if (chunk_get_tail()->type == CT_MACRO) { /* We have "#define XXX <", assume '<' starts an include string */ parse_string(ctx, pc, 0, false); return(true); } } } /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/ if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@')) { int nc = ctx.peek(1); if ((nc == '"') || (nc == '\'')) { /* literal string */ parse_string(ctx, pc, 1, true); return(true); } else if ((nc >= '0') && (nc <= '9')) { /* literal number */ pc.str.append(ctx.get()); /* store the '@' */ parse_number(ctx, pc); return(true); } } /* Check for pawn/ObjectiveC/Java and normal identifiers */ if (CharTable::IsKw1(ctx.peek()) || ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1)))) { parse_word(ctx, pc, false); return(true); } /* see if we have a punctuator */ char punc_txt[4]; punc_txt[0] = ctx.peek(); punc_txt[1] = ctx.peek(1); punc_txt[2] = ctx.peek(2); punc_txt[3] = ctx.peek(3); if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL) { int cnt = strlen(punc->tag); while (cnt--) { pc.str.append(ctx.get()); } pc.type = punc->type; pc.flags |= PCF_PUNCTUATOR; return(true); } /* throw away this character */ pc.type = CT_UNKNOWN; pc.str.append(ctx.get()); LOG_FMT(LWARN, "%s:%d Garbage in col %d: %x\n", cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]); cpd.error_count++; return(true); } // parse_next
/** * Marches through the whole file and checks to see how many spaces should be * between two chunks */ void space_text(void) { chunk_t *pc; chunk_t *next; chunk_t *tmp; int column, prev_column; int delta; pc = chunk_get_head(); if (pc == NULL) { return; } column = pc->column; while (pc != NULL) { next = chunk_get_next(pc); if (next == NULL) { break; } /* If the current chunk contains a newline, do not change the column * of the next item */ if ((pc->type == CT_NEWLINE) || (pc->type == CT_NL_CONT) || (pc->type == CT_COMMENT_MULTI)) { column = next->column; } else { /* Set to the minimum allowed column */ if (pc->nl_count == 0) { column += pc->len; } else { column = pc->orig_col_end; } prev_column = column; /** * Apply a general safety check * If the two chunks combined will tokenize differently, then we * must force a space. * Two chunks -- "()" and "[]" will always tokenize differently. * They are always safe to not have a space after them. */ pc->flags &= ~PCF_FORCE_SPACE; if ((pc->len > 0) && !chunk_is_str(pc, "[]", 2) && !chunk_is_str(pc, "()", 2)) { /* Find the next non-empty chunk on this line */ tmp = next; while ((tmp != NULL) && (tmp->len == 0) && !chunk_is_newline(tmp)) { tmp = chunk_get_next(tmp); } if ((tmp != NULL) && (tmp->len > 0)) { bool kw1 = CharTable::IsKw2(pc->str[pc->len - 1]); bool kw2 = CharTable::IsKw1(next->str[0]); if (kw1 && kw2) { /* back-to-back words need a space */ pc->flags |= PCF_FORCE_SPACE; } else if (!kw1 && !kw2 && (pc->len < 4) && (next->len < 4)) { /* We aren't dealing with keywords. concat and try punctuators */ char buf[9]; memcpy(buf, pc->str, pc->len); memcpy(buf + pc->len, next->str, next->len); buf[pc->len + next->len] = 0; const chunk_tag_t *ct; ct = find_punctuator(buf, cpd.lang_flags); if ((ct != NULL) && ((int)strlen(ct->tag) != pc->len)) { /* punctuator parsed to a different size.. */ pc->flags |= PCF_FORCE_SPACE; } } } } int av = do_space(pc, next, false); if (pc->flags & PCF_FORCE_SPACE) { av |= AV_ADD; } switch (av) { case AV_FORCE: /* add exactly one space */ column++; break; case AV_ADD: delta = 1; if ((next->orig_col >= pc->orig_col_end) && (pc->orig_col_end != 0)) { /* Keep the same relative spacing, minimum 1 */ delta = next->orig_col - pc->orig_col_end; if (delta < 1) { delta = 1; } } column += delta; break; case AV_REMOVE: /* the symbols will be back-to-back "a+3" */ break; default: /* Keep the same relative spacing, if possible */ if ((next->orig_col >= pc->orig_col_end) && (pc->orig_col_end != 0)) { column += next->orig_col - pc->orig_col_end; } break; } if (chunk_is_comment(next) && chunk_is_newline(chunk_get_next(next)) && (column < (int)next->orig_col)) { if ((cpd.settings[UO_sp_endif_cmt].a == AV_IGNORE) || ((pc->type != CT_PP_ELSE) && (pc->type != CT_PP_ENDIF))) { if (cpd.settings[UO_indent_relative_single_line_comments].b) { column = pc->column + (next->orig_col - pc->orig_col_end); } else { column = next->orig_col; } } } next->column = column; LOG_FMT(LSPACE, " = %s @ %d\n", (av == AV_IGNORE) ? "IGNORE" : (av == AV_ADD) ? "ADD" : (av == AV_ADD) ? "REMOVE" : "FORCE", column - prev_column); } pc = next; } }
/** * Marches through the whole file and checks to see how many spaces should be * between two chunks */ void space_text(void) { chunk_t *pc; chunk_t *next; chunk_t *tmp; int column, prev_column; int delta; pc = chunk_get_head(); if (pc == NULL) { return; } column = pc->column; while (pc != NULL) { next = chunk_get_next(pc); if (next == NULL) { break; } /* If the current chunk contains a newline, do not change the column * of the next item */ if ((pc->type == CT_NEWLINE) || (pc->type == CT_NL_CONT) || (pc->type == CT_COMMENT_MULTI)) { column = next->column; } else { /* Set to the minimum allowed column */ if (pc->nl_count == 0) { column += pc->len(); } else { column = pc->orig_col_end; } prev_column = column; /** * Apply a general safety check * If the two chunks combined will tokenize differently, then we * must force a space. * Two chunks -- "()" and "[]" will always tokenize differently. * They are always safe to not have a space after them. */ pc->flags &= ~PCF_FORCE_SPACE; if ((pc->len() > 0) && !chunk_is_str(pc, "[]", 2) && !chunk_is_str(pc, "()", 2)) { /* Find the next non-empty chunk on this line */ tmp = next; while ((tmp != NULL) && (tmp->len() == 0) && !chunk_is_newline(tmp)) { tmp = chunk_get_next(tmp); } if ((tmp != NULL) && (tmp->len() > 0)) { bool kw1 = CharTable::IsKw2(pc->str[pc->len() - 1]); bool kw2 = CharTable::IsKw1(next->str[0]); if (kw1 && kw2) { /* back-to-back words need a space */ pc->flags |= PCF_FORCE_SPACE; } else if (!kw1 && !kw2 && (pc->len() < 4) && (next->len() < 4)) { /* We aren't dealing with keywords. concat and try punctuators */ char buf[9]; memcpy(buf, pc->text(), pc->len()); memcpy(buf + pc->len(), next->text(), next->len()); buf[pc->len() + next->len()] = 0; const chunk_tag_t *ct; ct = find_punctuator(buf, cpd.lang_flags); if ((ct != NULL) && ((int)strlen(ct->tag) != pc->len())) { /* punctuator parsed to a different size.. */ /* C++11 allows '>>' to mean '> >' in templates: * some_func<vector<string>>(); */ if ((cpd.lang_flags & LANG_CPP) && cpd.settings[UO_sp_permit_cpp11_shift].b && (pc->type == CT_ANGLE_CLOSE) && (next->type == CT_ANGLE_CLOSE)) { /* allow '>' and '>' to become '>>' */ } else if (strcmp(ct->tag, "[]") == 0) { /* this is OK */ } else { pc->flags |= PCF_FORCE_SPACE; } } } } } int min_sp; int av = do_space(pc, next, min_sp, false); if (pc->flags & PCF_FORCE_SPACE) { LOG_FMT(LSPACE, " <force between '%s' and '%s'>", pc->str.c_str(), next->str.c_str()); av |= AV_ADD; } min_sp = max(1, min_sp); switch (av) { case AV_FORCE: /* add exactly the specified # of spaces */ column += min_sp; break; case AV_ADD: delta = min_sp; if ((next->orig_col >= pc->orig_col_end) && (pc->orig_col_end != 0)) { /* Keep the same relative spacing, minimum 1 */ delta = next->orig_col - pc->orig_col_end; if (delta < min_sp) { delta = min_sp; } } column += delta; break; case AV_REMOVE: /* the symbols will be back-to-back "a+3" */ break; default: /* Keep the same relative spacing, if possible */ if ((next->orig_col >= pc->orig_col_end) && (pc->orig_col_end != 0)) { column += next->orig_col - pc->orig_col_end; } break; } if (chunk_is_comment(next) && chunk_is_newline(chunk_get_next(next)) && (column < (int)next->orig_col)) { if (((cpd.settings[UO_sp_before_tr_emb_cmt].a == AV_IGNORE) || ((next->parent_type != CT_COMMENT_END) && (next->parent_type != CT_COMMENT_EMBED))) && ((cpd.settings[UO_sp_endif_cmt].a == AV_IGNORE) || ((pc->type != CT_PP_ELSE) && (pc->type != CT_PP_ENDIF)))) { if (cpd.settings[UO_indent_relative_single_line_comments].b) { LOG_FMT(LSPACE, " <relative adj>"); column = pc->column + (next->orig_col - pc->orig_col_end); } else { LOG_FMT(LSPACE, " <relative set>"); column = next->orig_col; } } } next->column = column; LOG_FMT(LSPACE, " = %s @ %d\n", (av == AV_IGNORE) ? "IGNORE" : (av == AV_ADD) ? "ADD" : (av == AV_REMOVE) ? "REMOVE" : "FORCE", column - prev_column); } pc = next; } }