/** * Count the number of characters in a word. * The first character is already valid for a keyword * * @param pc The structure to update, str is an input. * @return Whether a word was parsed (always true) */ bool parse_word(tok_ctx& ctx, chunk_t& pc, bool skipcheck) { int ch; static unc_text intr_txt("@interface"); /* The first character is already valid */ pc.str.clear(); pc.str.append(ctx.get()); while (ctx.more() && CharTable::IsKw2(ctx.peek())) { ch = ctx.get(); pc.str.append(ch); /* HACK: Non-ASCII character are only allowed in identifiers */ if (ch > 0x7f) { skipcheck = true; } } pc.type = CT_WORD; if (skipcheck) { return(true); } /* Detect pre-processor functions now */ if ((cpd.in_preproc == CT_PP_DEFINE) && (cpd.preproc_ncnl_count == 1)) { if (ctx.peek() == '(') { pc.type = CT_MACRO_FUNC; } else { pc.type = CT_MACRO; } } else { /* '@interface' is reserved, not an interface itself */ if ((cpd.lang_flags & LANG_JAVA) && pc.str.startswith("@") && !pc.str.equals(intr_txt)) { pc.type = CT_ANNOTATION; } else { /* Turn it into a keyword now */ pc.type = find_keyword_type(pc.text(), pc.str.size()); } } return(true); } // parse_word
/** * Count the number of characters in the number. * The next bit of text starts with a number (0-9 or '.'), so it is a number. * Count the number of characters in the number. * * This should cover all number formats for all languages. * Note that this is not a strict parser. It will happily parse numbers in * an invalid format. * * For example, only D allows underscores in the numbers, but they are * allowed in all formats. * * @param pc The structure to update, str is an input. * @return Whether a number was parsed */ static bool parse_number(tok_ctx &ctx, chunk_t &pc) { int tmp; bool is_float; bool did_hex = false; /* A number must start with a digit or a dot, followed by a digit */ if (!is_dec(ctx.peek()) && ((ctx.peek() != '.') || !is_dec(ctx.peek(1)))) { return(false); } is_float = (ctx.peek() == '.'); if (is_float && (ctx.peek(1) == '.')) { return(false); } /* Check for Hex, Octal, or Binary * Note that only D and Pawn support binary, but who cares? */ if (ctx.peek() == '0') { pc.str.append(ctx.get()); /* store the '0' */ int ch; chunk_t pc_temp; size_t pc_length; pc_temp.str.append('0'); // MS constant might have an "h" at the end. Look for it ctx.save(); while (ctx.more() && CharTable::IsKw2(ctx.peek())) { ch = ctx.get(); pc_temp.str.append(ch); } pc_length = pc_temp.len(); ch = pc_temp.str[pc_length - 1]; ctx.restore(); LOG_FMT(LGUY, "%s(%d): pc_temp:%s\n", __func__, __LINE__, pc_temp.text()); if (ch == 'h') { // we have an MS hexadecimal number with "h" at the end LOG_FMT(LGUY, "%s(%d): MS hexadecimal number\n", __func__, __LINE__); did_hex = true; do { pc.str.append(ctx.get()); /* store the rest */ } while (is_hex_(ctx.peek())); pc.str.append(ctx.get()); /* store the h */ LOG_FMT(LGUY, "%s(%d): pc:%s\n", __func__, __LINE__, pc.text()); } else { switch (unc_toupper(ctx.peek())) { case 'X': /* hex */ did_hex = true; do { pc.str.append(ctx.get()); /* store the 'x' and then the rest */ } while (is_hex_(ctx.peek())); break; case 'B': /* binary */ do { pc.str.append(ctx.get()); /* store the 'b' and then the rest */ } while (is_bin_(ctx.peek())); break; case '0': /* octal or decimal */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { pc.str.append(ctx.get()); } while (is_oct_(ctx.peek())); break; default: /* either just 0 or 0.1 or 0UL, etc */ break; } } } else { /* Regular int or float */ while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } /* Check if we stopped on a decimal point & make sure it isn't '..' */ if ((ctx.peek() == '.') && (ctx.peek(1) != '.')) { pc.str.append(ctx.get()); is_float = true; if (did_hex) { while (is_hex_(ctx.peek())) { pc.str.append(ctx.get()); } } else { while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } } /* Check exponent * Valid exponents per language (not that it matters): * C/C++/D/Java: eEpP * C#/Pawn: eE */ tmp = unc_toupper(ctx.peek()); if ((tmp == 'E') || (tmp == 'P')) { is_float = true; pc.str.append(ctx.get()); if ((ctx.peek() == '+') || (ctx.peek() == '-')) { pc.str.append(ctx.get()); } while (is_dec_(ctx.peek())) { pc.str.append(ctx.get()); } } /* Check the suffixes * Valid suffixes per language (not that it matters): * Integer Float * C/C++: uUlL64 lLfF * C#: uUlL fFdDMm * D: uUL ifFL * Java: lL fFdD * Pawn: (none) (none) * * Note that i, f, d, and m only appear in floats. */ while (1) { tmp = unc_toupper(ctx.peek()); if ((tmp == 'I') || (tmp == 'F') || (tmp == 'D') || (tmp == 'M')) { is_float = true; } else if ((tmp != 'L') && (tmp != 'U')) { break; } pc.str.append(ctx.get()); } /* skip the Microsoft-specific '64' suffix */ if ((ctx.peek() == '6') && (ctx.peek(1) == '4')) { pc.str.append(ctx.get()); pc.str.append(ctx.get()); } pc.type = is_float ? CT_NUMBER_FP : CT_NUMBER; /* If there is anything left, then we are probably dealing with garbage or * some sick macro junk. Eat it. */ parse_suffix(ctx, pc); return(true); } // parse_number