void patch_iget(DexMethod* meth, IRList::iterator it, DexType* original_field_type) { auto insn = it->insn; const auto op = insn->opcode(); always_assert(is_iget(op)); switch (op) { case OPCODE_IGET_OBJECT: { auto dest = std::next(it)->insn->dest(); auto cast = ModelMethodMerger::make_check_cast(original_field_type, dest); meth->get_code()->insert_after(insn, cast); break; } case OPCODE_IGET_BYTE: { always_assert(original_field_type == get_byte_type()); auto int_to_byte = new IRInstruction(OPCODE_INT_TO_BYTE); patch_iget_for_int_like_types(meth, it, int_to_byte); break; } case OPCODE_IGET_CHAR: { always_assert(original_field_type == get_char_type()); auto int_to_char = new IRInstruction(OPCODE_INT_TO_CHAR); patch_iget_for_int_like_types(meth, it, int_to_char); break; } case OPCODE_IGET_SHORT: { always_assert(original_field_type == get_short_type()); auto int_to_short = new IRInstruction(OPCODE_INT_TO_SHORT); patch_iget_for_int_like_types(meth, it, int_to_short); break; } default: break; } };
voikko_token_type Tokenizer::nextToken(voikko_options_t * options, const wchar_t * text, size_t textlen, size_t * tokenlen) { if (textlen == 0) { *tokenlen = 0; return TOKEN_NONE; } switch (get_char_type(text[0])) { case CHAR_LETTER: case CHAR_DIGIT: *tokenlen = word_length(text, textlen, options); return TOKEN_WORD; case CHAR_WHITESPACE: for (size_t i = 1; i < textlen; i++) { if (get_char_type(text[i]) != CHAR_WHITESPACE) { *tokenlen = i; return TOKEN_WHITESPACE; } } *tokenlen = textlen; return TOKEN_WHITESPACE; case CHAR_PUNCTUATION: if (wcschr(L"-\u2010\u2011", text[0])) { if (textlen == 1) { *tokenlen = 1; return TOKEN_PUNCTUATION; } size_t wlen = word_length(text + 1, textlen - 1, options); if (wlen == 0) { *tokenlen = 1; return TOKEN_PUNCTUATION; } *tokenlen = wlen + 1; return TOKEN_WORD; } else if (textlen >= 3 && text[0] == L'.' && text[1] == L'.' && text[2] == L'.') { *tokenlen = 3; return TOKEN_PUNCTUATION; } *tokenlen = 1; return TOKEN_PUNCTUATION; case CHAR_UNKNOWN: *tokenlen = 1; return TOKEN_UNKNOWN; } return TOKEN_NONE; // unreachable }
MultiValueBlock* NodeBuilder::new_cstr_value_block(String s) { ArrayType* type = _type_builder->get_array_type(get_qualified_type(get_char_type()), IInteger(0), IInteger(s.length()+1)); MultiValueBlock* mb = create_multi_value_block(_suif_env, type); IInteger charlen = get_char_type()->get_bit_size(); IInteger offset(0); for (int i =0; i<s.length(); i++) { mb->add_sub_block(offset, create_expression_value_block(_suif_env, char_const(s[i]))); offset += charlen; } mb->add_sub_block(offset, create_expression_value_block(_suif_env, char_const('\0'))); return mb; }
std::vector<DexField*> create_merger_fields( const DexType* owner, const std::vector<DexField*>& mergeable_fields) { std::vector<DexField*> res; size_t cnt = 0; for (const auto f : mergeable_fields) { auto type = f->get_type(); std::string name; if (type == get_byte_type() || type == get_char_type() || type == get_short_type() || type == get_int_type()) { type = get_int_type(); name = "i"; } else if (type == get_boolean_type()) { type = get_boolean_type(); name = "z"; } else if (type == get_long_type()) { type = get_long_type(); name = "j"; } else if (type == get_float_type()) { type = get_float_type(); name = "f"; } else if (type == get_double_type()) { type = get_double_type(); name = "d"; } else { static DexType* string_type = DexType::make_type("Ljava/lang/String;"); if (type == string_type) { type = string_type; name = "s"; } else { char t = type_shorty(type); always_assert(t == 'L' || t == '['); type = get_object_type(); name = "l"; } } name = name + std::to_string(cnt); auto field = static_cast<DexField*>( DexField::make_field(owner, DexString::make_string(name), type)); field->make_concrete(ACC_PUBLIC); res.push_back(field); cnt++; } TRACE(TERA, 8, " created merger fields %d \n", res.size()); return res; }
//! Returns all the builtins of the type system ObjectList<Type> Type::get_integer_types() { Type all_integer_types[] = { Type(get_char_type()), Type(get_signed_int_type()), Type(get_signed_short_int_type()), Type(get_signed_long_int_type()), Type(get_signed_long_long_int_type()), Type(get_signed_char_type()), Type(get_unsigned_int_type()), Type(get_unsigned_short_int_type()), Type(get_unsigned_long_int_type()), Type(get_unsigned_long_long_int_type()), Type(get_unsigned_char_type()), }; return ObjectList<Type>(all_integer_types); }
IntConstant* NodeBuilder::char_const(char c) { return create_int_constant(_suif_env, get_char_type(), c); }
/******************************************************** * next_token -- read the next token in an input stream * * * * Returns * * next token * ********************************************************/ enum TOKEN_TYPE next_token(void) { if (in_comment) return (read_comment()); while (is_char_type(in_cur_char(), C_WHITE)) { in_read_char(); } if (in_cur_char() == EOF) return (T_EOF); switch (get_char_type(in_cur_char())) { case C_NEWLINE: in_read_char(); return (T_NEWLINE); case C_ALPHA: while (is_char_type(in_cur_char(), C_ALPHA_NUMERIC)) in_read_char(); return (T_ID); case C_DIGIT: in_read_char(); if ((in_cur_char() == 'X') || (in_cur_char() == 'x')) { in_read_char(); while (is_char_type(in_cur_char(), C_HEX_DIGIT)) in_read_char(); return (T_NUMBER); } while (is_char_type(in_cur_char(), C_DIGIT)) in_read_char(); return (T_NUMBER); case C_SLASH: /* Check for '/', '*' characters */ if (in_next_char() == '*') { return (read_comment()); } /* Fall through */ case C_OPERATOR: in_read_char(); return (T_OPERATOR); case C_L_PAREN: in_read_char(); return (T_L_PAREN); case C_R_PAREN: in_read_char(); return (T_R_PAREN); case C_L_CURLY: in_read_char(); return (T_L_CURLY); case C_R_CURLY: in_read_char(); return (T_R_CURLY); case C_DOUBLE: while (1) { in_read_char(); /* Check for end of string */ if (in_cur_char() == '"') break; /* Escape character, then skip the next character */ if (in_cur_char() == '\\') in_read_char(); } in_read_char(); return (T_STRING); case C_SINGLE: while (1) { in_read_char(); /* Check for end of character */ if (in_cur_char() == '\'') break; /* Escape character, then skip the next character */ if (in_cur_char() == '\\') in_read_char(); } in_read_char(); return (T_STRING); default: fprintf(stderr, "Internal error: Very strange character\n"); abort(); } fprintf(stderr, "Internal error: We should never get here\n"); abort(); return (T_EOF); /* Should never get here either */ /* But we put in the return to avoid a compiler */ /* warning. */ }
static size_t findUrlOrEmail(const wchar_t * text, size_t textlen) { // 12 is a rough lower bound for a length of a reasonable real world http URL. if (textlen < 12 || (wcsncmp(L"http://", text, 7) != 0 && wcsncmp(L"https://", text, 8) != 0)) { // try finding an email address instead if (textlen < 6) { return 0; } bool foundAt = false; bool foundDot = false; for (size_t i = 0; i < textlen; ++i) { switch (get_char_type(text[i])) { case CHAR_WHITESPACE: if (foundAt && foundDot) { return i; } return 0; case CHAR_UNKNOWN: if (text[i] == L'@') { if (foundAt) { return 0; } foundAt = true; break; } if (wcschr(L"#$%*+=^_`|~", text[i]) == 0) { if (foundAt && foundDot) { return i; } return 0; } break; case CHAR_DIGIT: case CHAR_LETTER: break; case CHAR_PUNCTUATION: if (text[i] == L'.' && foundAt) { if (i + 1 == textlen || get_char_type(text[i+1]) == CHAR_WHITESPACE) { if (foundDot) { return i; } return 0; } foundDot = true; break; } if (wcschr(L"!&'-/?{}.", text[i]) == 0) { if (foundAt && foundDot) { return i; } return 0; } break; } } if (foundAt && foundDot) { return textlen; } return 0; } for (size_t i = (text[4] == L's' ? 8 : 7); i < textlen; ++i) { switch (get_char_type(text[i])) { case CHAR_WHITESPACE: return i; case CHAR_UNKNOWN: if (wcschr(L"=#%", text[i]) == 0) { return i; } continue; case CHAR_DIGIT: case CHAR_LETTER: continue; case CHAR_PUNCTUATION: if (text[i] == L'.' && (i + 1 == textlen || get_char_type(text[i+1]) == CHAR_WHITESPACE)) { return i; } } } return textlen; }
static size_t word_length(const wchar_t * text, size_t textlen, voikko_options_t * options) { size_t wlen = 0; bool processing_number = false; const size_t urlLength = findUrlOrEmail(text, textlen); if (urlLength != 0) { return urlLength; } size_t adot; if (options->ignore_dot) { adot = 1; } else adot = 0; while (wlen < textlen) { switch (get_char_type(text[wlen])) { case CHAR_LETTER: processing_number = false; wlen++; break; case CHAR_DIGIT: processing_number = true; wlen++; break; case CHAR_WHITESPACE: case CHAR_UNKNOWN: return wlen; case CHAR_PUNCTUATION: switch (text[wlen]) { case L'\'': case L'\u2019': /* RIGHT SINGLE QUOTATION MARK */ case L':': if (wlen + 1 == textlen) return wlen; if (get_char_type(text[wlen+1]) == CHAR_LETTER) break; return wlen; case L'-': case L'\u00AD': /* SOFT HYPHEN */ case L'\u2010': /* HYPHEN */ case L'\u2011': /* NON-BREAKING HYPHEN */ if (wlen + 1 == textlen) { return wlen + 1; } if (isFinnishQuotationMark(text[wlen+1])) { return wlen + 1; } switch (get_char_type(text[wlen+1])) { case CHAR_LETTER: case CHAR_DIGIT: break; case CHAR_WHITESPACE: case CHAR_UNKNOWN: return wlen + 1; case CHAR_PUNCTUATION: if (text[wlen+1] == L',') { return wlen + 1; } return wlen; } break; case L'.': if (wlen + 1 == textlen) return wlen + adot; switch (get_char_type(text[wlen+1])) { case CHAR_LETTER: case CHAR_DIGIT: break; case CHAR_WHITESPACE: case CHAR_UNKNOWN: case CHAR_PUNCTUATION: return wlen + adot; } break; case L',': if (!processing_number) return wlen; if (wlen + 1 == textlen) return wlen; if (get_char_type(text[wlen+1]) == CHAR_DIGIT) break; return wlen; default: return wlen; } wlen++; } } return textlen; }
// Returns the canonical type `char32`. inline char_type& builder::get_char32_type() { return get_char_type(32); }
// Returns the canonical type `char16`. inline char_type& builder::get_char16_type() { return get_char_type(16); }
// Returns the canonical type `char8`. inline char_type& builder::get_char8_type() { return get_char_type(8); }