/** * VALA verbatim string, ends with three quotes (""") * * @param pc The structure to update, str is an input. */ static void parse_verbatim_string(tok_ctx& ctx, chunk_t& pc) { pc.type = CT_STRING; // consume the initial """ pc.str = ctx.get(); pc.str.append(ctx.get()); pc.str.append(ctx.get()); /* go until we hit a zero (end of file) or a """ */ while (ctx.more()) { int ch = ctx.get(); pc.str.append(ch); if ((ch == '"') && (ctx.peek() == '"') && (ctx.peek(1) == '"')) { pc.str.append(ctx.get()); pc.str.append(ctx.get()); break; } if ((ch == '\n') || (ch == '\r')) { pc.type = CT_STRING_MULTI; pc.nl_count++; } } }
/** * Called when we hit a backslash. * If there is nothing but whitespace until the newline, then this is a * backslash newline */ static bool parse_bs_newline(tok_ctx& ctx, chunk_t& pc) { ctx.save(); ctx.get(); /* skip the '\' */ int ch; while (ctx.more() && unc_isspace(ch = ctx.peek())) { ctx.get(); if ((ch == '\r') || (ch == '\n')) { if (ch == '\r') { ctx.expect('\n'); } pc.str = "\\"; pc.type = CT_NL_CONT; pc.nl_count = 1; return(true); } } ctx.restore(); return(false); }
/** * PAWN #define is different than C/C++. * #define PATTERN REPLACEMENT_TEXT * The PATTERN may not contain a space or '[' or ']'. * A generic whitespace check should be good enough. * Do not change the pattern. */ static void parse_pawn_pattern(tok_ctx &ctx, chunk_t &pc, c_token_t tt) { pc.str.clear(); pc.type = tt; while (!unc_isspace(ctx.peek())) { /* end the pattern on an escaped newline */ if (ctx.peek() == '\\') { int ch = ctx.peek(1); if ((ch == '\n') || (ch == '\r')) { break; } } pc.str.append(ctx.get()); } }
/** * Parses any number of tab or space chars followed by a newline. * Does not change pc.len if a newline isn't found. * This is not the same as parse_whitespace() because it only consumes until * a single newline is encountered. */ static bool parse_newline(tok_ctx& ctx) { ctx.save(); /* Eat whitespace */ while ((ctx.peek() == ' ') || (ctx.peek() == '\t')) { ctx.get(); } if ((ctx.peek() == '\r') || (ctx.peek() == '\n')) { if (!ctx.expect('\n')) { ctx.get(); ctx.expect('\n'); } return(true); } ctx.restore(); return(false); }
/** * Literal string, ends with single " * Two "" don't end the string. * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool parse_cs_string(tok_ctx& ctx, chunk_t& pc) { pc.str = ctx.get(); pc.str.append(ctx.get()); pc.type = CT_STRING; /* go until we hit a zero (end of file) or a single " */ while (ctx.more()) { int ch = ctx.get(); pc.str.append(ch); if ((ch == '\n') || (ch == '\r')) { pc.type = CT_STRING_MULTI; pc.nl_count++; } if (ch == '"') { if (ctx.peek() == '"') { pc.str.append(ctx.get()); } else { break; } } } return(true); }
/** * Literal string, ends with single " * Two "" don't end the string. * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool parse_cs_string(tok_ctx &ctx, chunk_t &pc) { pc.str = ctx.get(); pc.str.append(ctx.get()); pc.type = CT_STRING; bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b; /* go until we hit a zero (end of file) or a single " */ while (ctx.more()) { int ch = ctx.get(); pc.str.append(ch); if ((ch == '\n') || (ch == '\r')) { pc.type = CT_STRING_MULTI; pc.nl_count++; } else if (ch == '\t') { if (should_escape_tabs && !cpd.warned_unable_string_replace_tab_chars) { cpd.warned_unable_string_replace_tab_chars = true; log_sev_t warnlevel = (log_sev_t)cpd.settings[UO_warn_level_tabs_found_in_verbatim_string_literals].n; /* a tab char can't be replaced with \\t because escapes don't work in here-strings. best we can do is warn. */ LOG_FMT(warnlevel, "%s:%d Detected non-replaceable tab char in literal string\n", cpd.filename, pc.orig_line); if (warnlevel < LWARN) { cpd.error_count++; } } } else if (ch == '"') { if (ctx.peek() == '"') { pc.str.append(ctx.get()); } else { break; } } } return(true); } // parse_cs_string
/** * Count the number of characters in a quoted string. * The next bit of text starts with a quote char " or ' or <. * Count the number of characters until the matching character. * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool parse_string(tok_ctx& ctx, chunk_t& pc, int quote_idx, bool allow_escape) { bool escaped = 0; int end_ch; char escape_char = cpd.settings[UO_string_escape_char].n; char escape_char2 = cpd.settings[UO_string_escape_char2].n; bool should_escape_tabs = cpd.settings[UO_string_replace_tab_chars].b && (cpd.lang_flags & LANG_ALLC); pc.str.clear(); while (quote_idx-- > 0) { pc.str.append(ctx.get()); } pc.type = CT_STRING; end_ch = CharTable::Get(ctx.peek()) & 0xff; pc.str.append(ctx.get()); /* store the " */ while (ctx.more()) { int lastcol = ctx.c.col; int ch = ctx.get(); if ((ch == '\t') && should_escape_tabs) { ctx.c.col = lastcol + 2; pc.str.append(escape_char); pc.str.append('t'); continue; } pc.str.append(ch); if (ch == '\n') { pc.nl_count++; pc.type = CT_STRING_MULTI; escaped = 0; continue; } if ((ch == '\r') && (ctx.peek() != '\n')) { pc.str.append(ctx.get()); pc.nl_count++; pc.type = CT_STRING_MULTI; escaped = 0; continue; } if (!escaped) { if (ch == escape_char) { escaped = (escape_char != 0); } else if ((ch == escape_char2) && (ctx.peek() == end_ch)) { escaped = allow_escape; } else if (ch == end_ch) { break; } } else { escaped = false; } } parse_suffix(ctx, pc, true); return(true); } // parse_string
/** * Count the number of characters in the number. * The next bit of text starts with a number (0-9 or '.'), so it is a number. * Count the number of characters in the number. * * This should cover all number formats for all languages. * Note that this is not a strict parser. It will happily parse numbers in * an invalid format. * * For example, only D allows underscores in the numbers, but they are * allowed in all formats. * * @param pc The structure to update, str is an input. * @return Whether a number was parsed */ static bool parse_number(tok_ctx& ctx, chunk_t& pc) { int tmp; bool is_float; bool did_hex = false; /* A number must start with a digit or a dot, followed by a digit */ if (!is_dec(ctx.peek()) && ((ctx.peek() != '.') || !is_dec(ctx.peek(1)))) { return(false); } is_float = (ctx.peek() == '.'); if (is_float && (ctx.peek(1) == '.')) { return(false); } /* Check for Hex, Octal, or Binary * Note that only D and Pawn support binary, but who cares? */ if (ctx.peek() == '0') { pc.str.append(ctx.get()); /* store the '0' */ switch (unc_toupper(ctx.peek())) { case 'X': /* hex */ did_hex = true; do { pc.str.append(ctx.get()); /* store the 'x' and then the rest */ } while (is_hex_(ctx.peek())); break; case 'B': /* binary */ do { pc.str.append(ctx.get()); /* store the 'b' and then the rest */ } while (is_bin_(ctx.peek())); break; case '0': /* octal or decimal */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { pc.str.append(ctx.get()); } while (is_oct_(ctx.peek())); break; default: /* either just 0 or 0.1 or 0UL, etc */ break; } } else { /* Regular int or float */ while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } /* Check if we stopped on a decimal point & make sure it isn't '..' */ if ((ctx.peek() == '.') && (ctx.peek(1) != '.')) { pc.str.append(ctx.get()); is_float = true; if (did_hex) { while (is_hex_(ctx.peek())) { pc.str.append(ctx.get()); } } else { while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } } /* Check exponent * Valid exponents per language (not that it matters): * C/C++/D/Java: eEpP * C#/Pawn: eE */ tmp = unc_toupper(ctx.peek()); if ((tmp == 'E') || (tmp == 'P')) { is_float = true; pc.str.append(ctx.get()); if ((ctx.peek() == '+') || (ctx.peek() == '-')) { pc.str.append(ctx.get()); } while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } /* Check the suffixes * Valid suffixes per language (not that it matters): * Integer Float * C/C++: uUlL64 lLfF * C#: uUlL fFdDMm * D: uUL ifFL * Java: lL fFdD * Pawn: (none) (none) * * Note that i, f, d, and m only appear in floats. */ while (1) { tmp = unc_toupper(ctx.peek()); if ((tmp == 'I') || (tmp == 'F') || (tmp == 'D') || (tmp == 'M')) { is_float = true; } else if ((tmp != 'L') && (tmp != 'U')) { break; } pc.str.append(ctx.get()); } /* skip the Microsoft-specific '64' suffix */ if ((ctx.peek() == '6') && (ctx.peek(1) == '4')) { pc.str.append(ctx.get()); pc.str.append(ctx.get()); } pc.type = is_float ? CT_NUMBER_FP : CT_NUMBER; /* If there is anything left, then we are probably dealing with garbage or * some sick macro junk. Eat it. */ parse_suffix(ctx, pc); return(true); } // parse_number
/** * Figure of the length of the comment at text. * The next bit of text starts with a '/', so it might be a comment. * There are three types of comments: * - C comments that start with '/ *' and end with '* /' * - C++ comments that start with // * - D nestable comments '/+' '+/' * * @param pc The structure to update, str is an input. * @return Whether a comment was parsed */ static bool parse_comment(tok_ctx& ctx, chunk_t& pc) { int ch; bool is_d = (cpd.lang_flags & LANG_D) != 0; // forcing value to bool bool is_cs = (cpd.lang_flags & LANG_CS) != 0; // forcing value to bool int d_level = 0; int bs_cnt; /* does this start with '/ /' or '/ *' or '/ +' (d) */ if ((ctx.peek() != '/') || ((ctx.peek(1) != '*') && (ctx.peek(1) != '/') && ((ctx.peek(1) != '+') || !is_d))) { return(false); } ctx.save(); /* account for opening two chars */ pc.str = ctx.get(); /* opening '/' */ ch = ctx.get(); pc.str.append(ch); /* second char */ if (ch == '/') { pc.type = CT_COMMENT_CPP; while (true) { bs_cnt = 0; while (ctx.more()) { ch = ctx.peek(); if ((ch == '\r') || (ch == '\n')) { break; } if ((ch == '\\') && !is_cs) /* backslashes aren't special in comments in C# */ { bs_cnt++; } else { bs_cnt = 0; } pc.str.append(ctx.get()); } /* If we hit an odd number of backslashes right before the newline, * then we keep going. */ if (((bs_cnt & 1) == 0) || !ctx.more()) { break; } if (ctx.peek() == '\r') { pc.str.append(ctx.get()); } if (ctx.peek() == '\n') { pc.str.append(ctx.get()); } pc.nl_count++; cpd.did_newline = true; } } else if (!ctx.more()) { /* unexpected end of file */ ctx.restore(); return(false); } else if (ch == '+') { pc.type = CT_COMMENT; d_level++; while ((d_level > 0) && ctx.more()) { if ((ctx.peek() == '+') && (ctx.peek(1) == '/')) { pc.str.append(ctx.get()); /* store the '+' */ pc.str.append(ctx.get()); /* store the '/' */ d_level--; continue; } if ((ctx.peek() == '/') && (ctx.peek(1) == '+')) { pc.str.append(ctx.get()); /* store the '/' */ pc.str.append(ctx.get()); /* store the '+' */ d_level++; continue; } ch = ctx.get(); pc.str.append(ch); if ((ch == '\n') || (ch == '\r')) { pc.type = CT_COMMENT_MULTI; pc.nl_count++; if (ch == '\r') { if (ctx.peek() == '\n') { cpd.le_counts[LE_CRLF]++; pc.str.append(ctx.get()); /* store the '\n' */ } else { cpd.le_counts[LE_CR]++; } } else { cpd.le_counts[LE_LF]++; } } } } else /* must be '/ *' */ { pc.type = CT_COMMENT; while (ctx.more()) { if ((ctx.peek() == '*') && (ctx.peek(1) == '/')) { pc.str.append(ctx.get()); /* store the '*' */ pc.str.append(ctx.get()); /* store the '/' */ tok_info ss; ctx.save(ss); int oldsize = pc.str.size(); /* If there is another C comment right after this one, combine them */ while ((ctx.peek() == ' ') || (ctx.peek() == '\t')) { pc.str.append(ctx.get()); } if ((ctx.peek() != '/') || (ctx.peek(1) != '*')) { /* undo the attempt to join */ ctx.restore(ss); pc.str.resize(oldsize); break; } } ch = ctx.get(); pc.str.append(ch); if ((ch == '\n') || (ch == '\r')) { pc.type = CT_COMMENT_MULTI; pc.nl_count++; if (ch == '\r') { if (ctx.peek() == '\n') { cpd.le_counts[LE_CRLF]++; pc.str.append(ctx.get()); /* store the '\n' */ } else { cpd.le_counts[LE_CR]++; } } else { cpd.le_counts[LE_LF]++; } } } } if (cpd.unc_off) { const char *ontext = cpd.settings[UO_enable_processing_cmt].str; if ((ontext == NULL) || !ontext[0]) { ontext = UNCRUSTIFY_ON_TEXT; } if (pc.str.find(ontext) >= 0) { LOG_FMT(LBCTRL, "Found '%s' on line %d\n", ontext, pc.orig_line); cpd.unc_off = false; } } else { const char *offtext = cpd.settings[UO_disable_processing_cmt].str; if ((offtext == NULL) || !offtext[0]) { offtext = UNCRUSTIFY_OFF_TEXT; } if (pc.str.find(offtext) >= 0) { LOG_FMT(LBCTRL, "Found '%s' on line %d\n", offtext, pc.orig_line); cpd.unc_off = true; } } return(true); } // parse_comment
/** * Parses all legal D string constants. * * Quoted strings: * r"Wysiwyg" # WYSIWYG string * x"hexstring" # Hexadecimal array * `Wysiwyg` # WYSIWYG string * 'char' # single character * "reg_string" # regular string * * Non-quoted strings: * \x12 # 1-byte hex constant * \u1234 # 2-byte hex constant * \U12345678 # 4-byte hex constant * \123 # octal constant * \& # named entity * \n # single character * * @param pc The structure to update, str is an input. * @return Whether a string was parsed */ static bool d_parse_string(tok_ctx& ctx, chunk_t& pc) { int ch = ctx.peek(); if ((ch == '"') || (ch == '\'') || (ch == '`')) { return(parse_string(ctx, pc, 0, true)); } else if (ch == '\\') { ctx.save(); int cnt; pc.str.clear(); while (ctx.peek() == '\\') { pc.str.append(ctx.get()); /* Check for end of file */ switch (ctx.peek()) { case 'x': /* \x HexDigit HexDigit */ cnt = 3; while (cnt--) { pc.str.append(ctx.get()); } break; case 'u': /* \u HexDigit HexDigit HexDigit HexDigit */ cnt = 5; while (cnt--) { pc.str.append(ctx.get()); } break; case 'U': /* \U HexDigit (x8) */ cnt = 9; while (cnt--) { pc.str.append(ctx.get()); } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': /* handle up to 3 octal digits */ pc.str.append(ctx.get()); ch = ctx.peek(); if ((ch >= '0') && (ch <= '7')) { pc.str.append(ctx.get()); ch = ctx.peek(); if ((ch >= '0') && (ch <= '7')) { pc.str.append(ctx.get()); } } break; case '&': /* \& NamedCharacterEntity ; */ pc.str.append(ctx.get()); while (unc_isalpha(ctx.peek())) { pc.str.append(ctx.get()); } if (ctx.peek() == ';') { pc.str.append(ctx.get()); } break; default: /* Everything else is a single character */ pc.str.append(ctx.get()); break; } // switch } if (pc.str.size() > 1) { pc.type = CT_STRING; return(true); } ctx.restore(); } else if (((ch == 'r') || (ch == 'x')) && (ctx.peek(1) == '"')) { return(parse_string(ctx, pc, 1, false)); } return(false); } // d_parse_string
/** * Skips the next bit of whatever and returns the type of block. * * pc.str is the input text. * pc.len in the output length. * pc.type is the output type * pc.column is output column * * @param pc The structure to update, str is an input. * @return true/false - whether anything was parsed */ static bool parse_next(tok_ctx& ctx, chunk_t& pc) { const chunk_tag_t *punc; int ch, ch1; if (!ctx.more()) { //fprintf(stderr, "All done!\n"); return(false); } /* Save off the current column */ pc.orig_line = ctx.c.row; pc.column = ctx.c.col; pc.orig_col = ctx.c.col; pc.type = CT_NONE; pc.nl_count = 0; pc.flags = 0; /* If it is turned off, we put everything except newlines into CT_UNKNOWN */ if (cpd.unc_off) { if (parse_ignored(ctx, pc)) { return(true); } } /** * Parse whitespace */ if (parse_whitespace(ctx, pc)) { return(true); } /** * Handle unknown/unhandled preprocessors */ if ((cpd.in_preproc > CT_PP_BODYCHUNK) && (cpd.in_preproc <= CT_PP_OTHER)) { pc.str.clear(); tok_info ss; ctx.save(ss); /* Chunk to a newline or comment */ pc.type = CT_PREPROC_BODY; int last = 0; while (ctx.more()) { int ch = ctx.peek(); if ((ch == '\n') || (ch == '\r')) { /* Back off if this is an escaped newline */ if (last == '\\') { ctx.restore(ss); pc.str.pop_back(); } break; } /* Quit on a C++ comment start */ if ((ch == '/') && (ctx.peek(1) == '/')) { break; } last = ch; ctx.save(ss); pc.str.append(ctx.get()); } if (pc.str.size() > 0) { return(true); } } /** * Detect backslash-newline */ if ((ctx.peek() == '\\') && parse_bs_newline(ctx, pc)) { return(true); } /** * Parse comments */ if (parse_comment(ctx, pc)) { return(true); } /* Parse code placeholders */ if (parse_code_placeholder(ctx, pc)) { return(true); } /* Check for C# literal strings, ie @"hello" and identifiers @for*/ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '@')) { if (ctx.peek(1) == '"') { parse_cs_string(ctx, pc); return(true); } /* check for non-keyword identifiers such as @if @switch, etc */ if (CharTable::IsKw1(ctx.peek(1))) { parse_word(ctx, pc, true); return(true); } } /* Check for C# Interpolated strings */ if ((cpd.lang_flags & LANG_CS) && (ctx.peek() == '$') && (ctx.peek(1) == '"')) { parse_cs_interpolated_string(ctx, pc); return(true); } /* handle VALA """ strings """ */ if ((cpd.lang_flags & LANG_VALA) && (ctx.peek() == '"') && (ctx.peek(1) == '"') && (ctx.peek(2) == '"')) { parse_verbatim_string(ctx, pc); return(true); } /* handle C++0x strings u8"x" u"x" U"x" R"x" u8R"XXX(I'm a "raw UTF-8" string.)XXX" */ ch = ctx.peek(); if ((cpd.lang_flags & LANG_CPP) && ((ch == 'u') || (ch == 'U') || (ch == 'R'))) { int idx = 0; bool is_real = false; if ((ch == 'u') && (ctx.peek(1) == '8')) { idx = 2; } else if (unc_tolower(ch) == 'u') { idx++; } if (ctx.peek(idx) == 'R') { idx++; is_real = true; } if (ctx.peek(idx) == '"') { if (is_real) { if (parse_cr_string(ctx, pc, idx)) { return(true); } } else { if (parse_string(ctx, pc, idx, true)) { parse_suffix(ctx, pc, true); return(true); } } } } /* PAWN specific stuff */ if (cpd.lang_flags & LANG_PAWN) { if ((cpd.preproc_ncnl_count == 1) && ((cpd.in_preproc == CT_PP_DEFINE) || (cpd.in_preproc == CT_PP_EMIT))) { parse_pawn_pattern(ctx, pc, CT_MACRO); return(true); } /* Check for PAWN strings: \"hi" or !"hi" or !\"hi" or \!"hi" */ if ((ctx.peek() == '\\') || (ctx.peek() == '!')) { if (ctx.peek(1) == '"') { parse_string(ctx, pc, 1, (ctx.peek() == '!')); return(true); } else if (((ctx.peek(1) == '\\') || (ctx.peek(1) == '!')) && (ctx.peek(2) == '"')) { parse_string(ctx, pc, 2, false); return(true); } } /* handle PAWN preprocessor args %0 .. %9 */ if ((cpd.in_preproc == CT_PP_DEFINE) && (ctx.peek() == '%') && unc_isdigit(ctx.peek(1))) { pc.str.clear(); pc.str.append(ctx.get()); pc.str.append(ctx.get()); pc.type = CT_WORD; return(true); } } /** * Parse strings and character constants */ if (parse_number(ctx, pc)) { return(true); } if (cpd.lang_flags & LANG_D) { /* D specific stuff */ if (d_parse_string(ctx, pc)) { return(true); } } else { /* Not D stuff */ /* Check for L'a', L"abc", 'a', "abc", <abc> strings */ ch = ctx.peek(); ch1 = ctx.peek(1); if ((((ch == 'L') || (ch == 'S')) && ((ch1 == '"') || (ch1 == '\''))) || (ch == '"') || (ch == '\'') || ((ch == '<') && (cpd.in_preproc == CT_PP_INCLUDE))) { parse_string(ctx, pc, unc_isalpha(ch) ? 1 : 0, true); return(true); } if ((ch == '<') && (cpd.in_preproc == CT_PP_DEFINE)) { if (chunk_get_tail()->type == CT_MACRO) { /* We have "#define XXX <", assume '<' starts an include string */ parse_string(ctx, pc, 0, false); return(true); } } } /* Check for Objective C literals and VALA identifiers ('@1', '@if')*/ if ((cpd.lang_flags & (LANG_OC | LANG_VALA)) && (ctx.peek() == '@')) { int nc = ctx.peek(1); if ((nc == '"') || (nc == '\'')) { /* literal string */ parse_string(ctx, pc, 1, true); return(true); } else if ((nc >= '0') && (nc <= '9')) { /* literal number */ pc.str.append(ctx.get()); /* store the '@' */ parse_number(ctx, pc); return(true); } } /* Check for pawn/ObjectiveC/Java and normal identifiers */ if (CharTable::IsKw1(ctx.peek()) || ((ctx.peek() == '@') && CharTable::IsKw1(ctx.peek(1)))) { parse_word(ctx, pc, false); return(true); } /* see if we have a punctuator */ char punc_txt[4]; punc_txt[0] = ctx.peek(); punc_txt[1] = ctx.peek(1); punc_txt[2] = ctx.peek(2); punc_txt[3] = ctx.peek(3); if ((punc = find_punctuator(punc_txt, cpd.lang_flags)) != NULL) { int cnt = strlen(punc->tag); while (cnt--) { pc.str.append(ctx.get()); } pc.type = punc->type; pc.flags |= PCF_PUNCTUATOR; return(true); } /* throw away this character */ pc.type = CT_UNKNOWN; pc.str.append(ctx.get()); LOG_FMT(LWARN, "%s:%d Garbage in col %d: %x\n", cpd.filename, pc.orig_line, (int)ctx.c.col, pc.str[0]); cpd.error_count++; return(true); } // parse_next
static bool parse_ignored(tok_ctx& ctx, chunk_t& pc) { int nl_count = 0; /* Parse off newlines/blank lines */ while (parse_newline(ctx)) { nl_count++; } if (nl_count > 0) { pc.nl_count = nl_count; pc.type = CT_NEWLINE; return(true); } /* See if the UO_enable_processing_cmt text is on this line */ ctx.save(); pc.str.clear(); while (ctx.more() && (ctx.peek() != '\r') && (ctx.peek() != '\n')) { pc.str.append(ctx.get()); } if (pc.str.size() == 0) { /* end of file? */ return(false); } /* Note that we aren't actually making sure this is in a comment, yet */ const char *ontext = cpd.settings[UO_enable_processing_cmt].str; if (ontext == NULL) { ontext = UNCRUSTIFY_ON_TEXT; } if (pc.str.find(ontext) < 0) { pc.type = CT_IGNORED; return(true); } ctx.restore(); /* parse off whitespace leading to the comment */ if (parse_whitespace(ctx, pc)) { pc.type = CT_IGNORED; return(true); } /* Look for the ending comment and let it pass */ if (parse_comment(ctx, pc) && !cpd.unc_off) { return(true); } /* Reset the chunk & scan to until a newline */ pc.str.clear(); while (ctx.more() && (ctx.peek() != '\r') && (ctx.peek() != '\n')) { pc.str.append(ctx.get()); } if (pc.str.size() > 0) { pc.type = CT_IGNORED; return(true); } return(false); } // parse_ignored
/** * Parses a C++0x 'R' string. R"( xxx )" R"tag( )tag" u8R"(x)" uR"(x)" * Newlines may be in the string. */ static bool parse_cr_string(tok_ctx& ctx, chunk_t& pc, int q_idx) { int cnt; int tag_idx = ctx.c.idx + q_idx + 1; int tag_len = 0; ctx.save(); /* Copy the prefix + " to the string */ pc.str.clear(); cnt = q_idx + 1; while (cnt--) { pc.str.append(ctx.get()); } /* Add the tag and get the length of the tag */ while (ctx.more() && (ctx.peek() != '(')) { tag_len++; pc.str.append(ctx.get()); } if (ctx.peek() != '(') { ctx.restore(); return(false); } pc.type = CT_STRING; while (ctx.more()) { if ((ctx.peek() == ')') && (ctx.peek(tag_len + 1) == '"') && tag_compare(ctx.data, tag_idx, ctx.c.idx + 1, tag_len)) { cnt = tag_len + 2; /* for the )" */ while (cnt--) { pc.str.append(ctx.get()); } parse_suffix(ctx, pc); return(true); } if (ctx.peek() == '\n') { pc.str.append(ctx.get()); pc.nl_count++; pc.type = CT_STRING_MULTI; } else { pc.str.append(ctx.get()); } } ctx.restore(); return(false); } // parse_cr_string
/** * Count the number of characters in a word. * The first character is already valid for a keyword * * @param pc The structure to update, str is an input. * @return Whether a word was parsed (always true) */ bool parse_word(tok_ctx &ctx, chunk_t &pc, bool skipcheck) { int ch; static unc_text intr_txt("@interface"); /* The first character is already valid */ pc.str.clear(); pc.str.append(ctx.get()); while (ctx.more()) { ch = ctx.peek(); if (CharTable::IsKw2(ch)) { pc.str.append(ctx.get()); } else if ((ch == '\\') && (unc_tolower(ctx.peek(1)) == 'u')) { pc.str.append(ctx.get()); pc.str.append(ctx.get()); skipcheck = true; } else { break; } /* HACK: Non-ASCII character are only allowed in identifiers */ if (ch > 0x7f) { skipcheck = true; } } pc.type = CT_WORD; if (skipcheck) { return(true); } /* Detect pre-processor functions now */ if ((cpd.in_preproc == CT_PP_DEFINE) && (cpd.preproc_ncnl_count == 1)) { if (ctx.peek() == '(') { pc.type = CT_MACRO_FUNC; } else { pc.type = CT_MACRO; } } else { /* '@interface' is reserved, not an interface itself */ if ((cpd.lang_flags & LANG_JAVA) && pc.str.startswith("@") && !pc.str.equals(intr_txt)) { pc.type = CT_ANNOTATION; } else { /* Turn it into a keyword now */ pc.type = find_keyword_type(pc.text(), pc.str.size()); } } return(true); } // parse_word
/** * Parses any number of tab or space chars followed by a newline. * Does not change pc.len if a newline isn't found. * This is not the same as parse_whitespace() because it only consumes until * a single newline is encountered. */ static bool parse_newline(tok_ctx& ctx) { ctx.save(); /* Eat whitespace */ while ((ctx.peek() == ' ') || (ctx.peek() == '\t')) { ctx.get(); } if ((ctx.peek() == '\r') || (ctx.peek() == '\n')) { if (ctx.peek() == '\n') { ctx.get(); } else /* it is '\r' */ { ctx.get(); if (ctx.peek() == '\n') { ctx.get(); } } return(true); } ctx.restore(); return(false); }