static void printInst(const llvm::MCInst& inst) { const llvm::MCInstrDesc& id = MII->get(inst.getOpcode()); llvm::outs() << MII->getName(inst.getOpcode()) << " (" << inst.getNumOperands() << ") "; for (int iop = 0; iop < inst.getNumOperands(); ++iop) { const llvm::MCOperand& op = inst.getOperand(iop); if (op.isReg()) { unsigned reg = op.getReg(); const char* rcName; char clsn[128]; if (id.OpInfo[iop].RegClass < MRI->getNumRegClasses()) { const llvm::MCRegisterClass& rc = MRI->getRegClass(id.OpInfo[iop].RegClass); rcName = rc.getName(); } else { snprintf(clsn, sizeof(clsn), "CLS%d", id.OpInfo[iop].RegClass); rcName = clsn; } llvm::outs() << MRI->getName(reg) << "(" << rcName << ", " << (uint64_t)id.OpInfo[iop].OperandType << ")"; } else if (op.isImm()) { llvm::outs() << op.getImm() << "(" << (uint64_t)id.OpInfo[iop].OperandType << ")"; } else { llvm::outs() << "<UNK>"; } llvm::outs() << ", "; } llvm::outs() << "\n"; }
// Decodes the instruction, and returns the number of bytes decoded. size_t ArchDecodeInstruction(const uint8_t *bytes, const uint8_t *bytes_end, uintptr_t va, llvm::MCInst &inst) { size_t total_size = 0; size_t max_size = static_cast<size_t>(bytes_end - bytes); std::unordered_set<unsigned> prefixes; for (; total_size < max_size; ) { llvm::ArrayRef<uint8_t> bytes_to_decode( &(bytes[total_size]), max_size - total_size); uint64_t size = 0; auto decode_status = gDisassembler->getInstruction( inst, size, bytes_to_decode, va, llvm::nulls(), llvm::nulls()); if (llvm::MCDisassembler::Success != decode_status) { return 0; } total_size += size; switch (auto op_code = inst.getOpcode()) { case llvm::X86::CS_PREFIX: case llvm::X86::DATA16_PREFIX: case llvm::X86::DS_PREFIX: case llvm::X86::ES_PREFIX: case llvm::X86::FS_PREFIX: case llvm::X86::GS_PREFIX: case llvm::X86::LOCK_PREFIX: case llvm::X86::REPNE_PREFIX: case llvm::X86::REP_PREFIX: case llvm::X86::REX64_PREFIX: case llvm::X86::SS_PREFIX: case llvm::X86::XACQUIRE_PREFIX: case llvm::X86::XRELEASE_PREFIX: prefixes.insert(op_code); break; default: max_size = 0; // Stop decoding. break; } } FixupInstruction(inst, prefixes); return total_size; }
bool DisassemblerLLVMC::LLVMCDisassembler::CanBranch (llvm::MCInst &mc_inst) { return m_instr_info_ap->get(mc_inst.getOpcode()).mayAffectControlFlow(mc_inst, *m_reg_info_ap.get()); }
// Convert the given assembly instruction into an inline ASM operation in lieu // of decompiling it. The output instruction will look something like this // (although note that i128 doesn't work properly with LLVM codegen for the // inline ASM instructions, necessitating a vector instead). // %151 = load i128, i128* %XMM0_read // %152 = bitcast i128 %151 to <16 x i8> // %153 = load i128, i128* %XMM1_read // %154 = bitcast i128 %153 to <16 x i8> // %AESDECrr = call <16 x i8> asm "\09aesdec\09%xmm1, %xmm0", "={XMM0},{XMM0},{XMM1}"(<16 x i8> %152, <16 x i8> %154) // %155 = bitcast <16 x i8> %AESDECrr to i128 // store volatile i128 %155, i128* %XMM0_write void ArchBuildInlineAsm(llvm::MCInst &inst, llvm::BasicBlock *block) { auto opcode = inst.getOpcode(); // Use the printer to build the ASM string. We'll need to escape the $ in // register names with $$. std::stringstream asmString; { std::string outS; llvm::raw_string_ostream strOut(outS); gIP->printInst(&inst, strOut, "", *gSTI); for (char c : strOut.str()) { if (c == '$') asmString << "$$"; else asmString << c; } } // Next, find all the registers being used as definitions or uses in the // inline ASM. This will write up the constraints for us, as well as // provide us with a list of types (for the inline ASM output) and a list of // values to pass into the string. llvm::SmallVector<llvm::Value *, 3> operands; llvm::SmallVector<llvm::Type *, 3> resultTypes; std::stringstream constraints; for (unsigned i = 0; i < inst.getNumOperands(); i++) { llvm::MCOperand &op = inst.getOperand(i); if (op.isReg()) { unsigned regSize = ArchRegisterSize(op.getReg()); if (constraints.tellp() > 0) constraints << ","; if (i < gMII->get(opcode).getNumDefs()) { constraints << "="; if (regSize > 64) { // LLVM can't handle register constraints of i128, so we // need to map this to <16 x i8>. resultTypes.push_back(llvm::VectorType::get( llvm::Type::getInt8Ty(block->getContext()), regSize / 8)); } else { resultTypes.push_back(llvm::IntegerType::get(block->getContext(), regSize)); } } else { auto readReg = GENERIC_MC_READREG(block, op.getReg(), regSize); if (regSize > 64) { // LLVM can't handle register constraints of i128, so we // need to map this to <16 x i8>. readReg = llvm::CastInst::Create(llvm::Instruction::BitCast, readReg, llvm::VectorType::get(llvm::Type::getInt8Ty(block->getContext()), regSize / 8), "", block); } operands.push_back(readReg); } constraints << "{" << gMRI->getName(op.getReg()) << "}"; } } // With all of these pieces, piece together the actual call to the inline ASM // string. llvm::SmallVector<llvm::Type *, 3> argTypes; for (auto val : operands) argTypes.push_back(val->getType()); llvm::Type *returnTy; if (resultTypes.empty()) returnTy = llvm::Type::getVoidTy(block->getContext()); else if (resultTypes.size() == 1) returnTy = resultTypes[0]; else returnTy = llvm::StructType::get(block->getContext(), resultTypes); auto asmTy = llvm::FunctionType::get(returnTy, argTypes, false); auto callee = llvm::InlineAsm::get(asmTy, asmString.str(), constraints.str(), false); llvm::Value *resultPack = llvm::CallInst::Create(callee, operands, "", block); // Unpack the called registers into the LLVM values. for (unsigned i = 0; i < resultTypes.size(); i++) { llvm::Value *result = resultTypes.size() == 1 ? resultPack : llvm::ExtractValueInst::Create(resultPack, i, "", block); llvm::Type *ty = resultTypes[i]; // Cast vector outputs to iXYZ for R_WRITE. if (ty->isVectorTy()) { ty = llvm::Type::getIntNTy(block->getContext(), ty->getVectorNumElements() * 8); result = llvm::CastInst::Create(llvm::Instruction::BitCast, result, ty, "", block); } unsigned regNo = inst.getOperand(i).getReg(); GENERIC_MC_WRITEREG(block, regNo, result); } }