void doJumpIndexTableViaSwitch( BasicBlock *&block, InstPtr ip) { Function *F = block->getParent(); Module *M = F->getParent(); // we know this conforms to // movzx reg32, [base+disp] // sanity check const MCInst &inst = ip->get_inst(); const MCOperand& dest = OP(0); const MCOperand& base = OP(1); TASSERT(base.isReg(), "Conformant jump index tables need base to be a register"); TASSERT(dest.isReg(), "Conformant jump index tables need to write to a register"); JumpIndexTablePtr idxptr = ip->get_jump_index_table(); // to ensure no negative entries Value *adjustment = CONST_V<32>(block, idxptr->getInitialEntry()); Value *reg_val = R_READ<32>(block, base.getReg()); Value *real_index = BinaryOperator::Create(Instruction::Add, adjustment, reg_val, "", block); BasicBlock *continueBlock = BasicBlock::Create(block->getContext(), "", F, 0); // create a default block that just traps BasicBlock *defaultBlock = BasicBlock::Create(block->getContext(), "", F, 0); Function *trapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); CallInst::Create(trapFn, "", defaultBlock); BranchInst::Create(continueBlock, defaultBlock); // end default block const std::vector<uint8_t> &idxblocks = idxptr->getJumpIndexTable(); // create a switch inst SwitchInst *theSwitch = SwitchInst::Create( real_index, defaultBlock, idxblocks.size(), block); // populate switch int myindex = 0; for(std::vector<uint8_t>::const_iterator itr = idxblocks.begin(); itr != idxblocks.end(); itr++) { BasicBlock *writeBl = emitJumpIndexWrite(F, *itr, dest.getReg(), continueBlock ); theSwitch->addCase(CONST_V<32>(block, myindex), writeBl); ++myindex; } // new block to write to is continue block block = continueBlock; }
// assume the immediate references code if: // * we are dealing with a fully linked ELF // * The immediate is in the range of a valid code or data section static bool setHeuristicRef(ExecutableContainer *c, InstPtr I, int opnum, stack<VA> &funcs, raw_ostream &out, const std::string whichInst) { MCOperand op; std::string imp_name; ElfTarget *elft = dynamic_cast<ElfTarget*>(c); op = I->get_inst().getOperand(opnum); LASSERT(op.isImm(), "No immediate operand for " + whichInst); VA imm = op.getImm(); if(elft && elft->isLinked()) { if (elft->is_in_code(imm)) { // this instruction references code I->set_call_tgt(imm); // make sure we disassemble at this new address funcs.push(imm); out << "Found new function entry from " << whichInst << ": " << to_string<VA>(imm, hex) << "\n"; return true; } else if (elft->is_in_data(imm)) { out << "Adding local data ref to: " << to_string<VA>(imm, hex) << "\n"; I->set_data_offset(imm); } else if (c->find_import_name(imm, imp_name)) { out << "Import name is: " << imp_name << "\n"; } } return false; }
static InstTransResult translate_CALL32m( NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; // is this an external call? if( ip->has_ext_call_target() ) { std::string s = ip->get_ext_call_target()->getSymbolName(); ret = doCallPCExtern(block, s); // not external call, but some weird way of calling local function? } else if( ip->has_call_tgt() ) { ret = doCallPC(ip, block, ip->get_call_tgt(0)); } // is this referencing global data? else if( ip->is_data_offset() ) { doCallM<32>(block, ip, STD_GLOBAL_OP(0)); ret = ContinueBlock; // is this a simple address computation? } else { doCallM<32>(block, ip, ADDR(0)); ret = ContinueBlock; } return ret; }
static InstTransResult translate_MOVoa (NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); // loading functions only available if its a 32-bit offset if( ip->has_external_ref() && width == 32) { Value *addrInt = getValueForExternal<32>(F->getParent(), ip, block); TASSERT(addrInt != 0, "Could not get external data reference"); R_WRITE<width>(block, X86::EAX, addrInt); return ContinueBlock; //ret = doRMMov<32>(ip, block, addrInt, MCOperand::CreateReg(X86::EAX)) ; } else if( ip->has_call_tgt() && width == 32 ) { Value *callback_fn = makeCallbackForLocalFunction( block->getParent()->getParent(), ip->get_call_tgt(0) ); Value *addrInt = new PtrToIntInst( callback_fn, llvm::Type::getInt32Ty(block->getContext()), "", block); ret = doRMMov<32>(ip, block, addrInt, MCOperand::CreateReg(X86::EAX)) ; } else if( ip->is_data_offset() ) { ret = doRMMov<width>(ip, block, GLOBAL_DATA_OFFSET(block, natM, ip), MCOperand::CreateReg(X86::EAX) ); } else { Value *addrv = CONST_V<width>(block, OP(0).getImm()); ret = doRMMov<width>(ip, block, addrv, MCOperand::CreateReg(X86::EAX)) ; } return ret ; }
static InstTransResult doMOVSrm(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); // MOV from memory to XMM register will set the unused poriton // of the XMM register to 0s. // Just set the whole thing to zero, and let the subsequent // write take care of the rest R_WRITE<128>(block, OP(0).getReg(), CONST_V<128>(block, 0)); if( ip->has_external_ref()) { Value *addrInt = getValueForExternal<width>(F->getParent(), ip, block); TASSERT(addrInt != NULL, "Could not get address for external"); R_WRITE<width>(block, OP(0).getReg(), addrInt); return ContinueBlock; } else if( ip->is_data_offset() ) { ret = doRMMov<width>(ip, block, GLOBAL( block, natM, inst, ip, 1 ), OP(0) ); } else { ret = doRMMov<width>(ip, block, ADDR(1), OP(0)); } return ret ; }
static bool parseJumpIndexTable(ExecutableContainer *c, InstPtr index_insn, const vector<VA> &jmptable_entries, raw_ostream &out) { VA reloc_offset = index_insn->get_reloc_offset(); if (reloc_offset == 0) { out << "Unsupported jump index write; no relocation\n"; // this jump index probably doesn't use a table return false; } VA addrInInst = index_insn->get_loc() + reloc_offset; VA indexTableEntry; VA symbolSize; if(!c->relocate_addr(addrInInst, indexTableEntry, symbolSize)) { out << "Not a jump index table: can't relocate relocation in index insn\n"; // can't relocate, something bad happened return false; } // assume we always index the start of the index table // ... might not be correct // this means we set initial entry to zero, the first element int initial_entry = 0; uint8_t b; int bindex = 0; // loop while all the bytes we read can be table indexes vector<uint8_t> index_entries; while( (indexTableEntry+bindex) < c->getExtent() ) { c->readByte(indexTableEntry+bindex, &b); if (b > jmptable_entries.size()) { break; } out << "Read index table byte: " << to_string<uint32_t>((uint32_t)b, hex) << "\n"; index_entries.push_back(b); bindex++; } JumpIndexTable *jit = new JumpIndexTable(index_entries, initial_entry); index_insn->set_jump_index_table(JumpIndexTablePtr(jit)); return true; }
static InstTransResult translate_MOV32mr(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); if( ip->has_external_ref()) { Value *addrInt = getValueForExternal<32>(F->getParent(), ip, block); TASSERT(addrInt != NULL, "Could not get address for external"); return doMRMov<32>(ip, block, addrInt, OP(5) ); } else if( ip->is_data_offset() ) { ret = doMRMov<32>(ip, block, GLOBAL( block, natM, inst, ip, 0), OP(5) ); } else { ret = doMRMov<32>(ip, block, ADDR(0), OP(5)) ; } return ret ; }
bool dataInCodeHeuristic( ExecutableContainer *c, InstPtr I, uint32_t addr, list<VA> &funcs, uint32_t relocSize) { // detect SEH handler if(I->get_inst().getOpcode() == X86::PUSHi32) { uint32_t dw1; uint8_t *ptr = (uint8_t*)&dw1; c->readByte(addr+0, ptr+0); c->readByte(addr+1, ptr+1); c->readByte(addr+2, ptr+2); c->readByte(addr+3, ptr+3); if(dw1 == 0xFFFFFFFE) { llvm::outs() << "WARNING: Heuristically detected SEH handler at: " << to_string<VA>(addr, hex) << "\n"; return treatCodeAsData(c, addr, 0x28, funcs); } } else { return treatCodeAsData(c, addr, relocSize, funcs); } return false; }
static InstTransResult doMovZXRM(InstPtr ip, BasicBlock *&b, const MCOperand &dst, Value *src) { NASSERT(dst.isReg()); NASSERT(src != NULL); if( dstWidth == 32 && srcWidth == 8 && ip->has_jump_index_table()) { doJumpIndexTableViaSwitch(b, ip); return ContinueBlock; } //do a read from src of the appropriate width Value *fromSrc = M_READ<srcWidth>(ip, b, src); //extend Type *toT = Type::getIntNTy(b->getContext(), dstWidth); Value *xt = new ZExtInst(fromSrc, toT, "", b); //write into dst R_WRITE<dstWidth>(b, dst.getReg(), xt); return ContinueBlock; }
static InstTransResult translate_JMP32r(NativeModulePtr natM, BasicBlock *&block, InstPtr ip, MCInst &inst) { const MCOperand &tgtOp = inst.getOperand(0); TASSERT(inst.getNumOperands() == 1, ""); TASSERT(tgtOp.isReg(), ""); //read the register Value *fromReg = R_READ<32>(block, tgtOp.getReg()); if (ip->has_jump_table()) { // this is a jump table that got converted // into a table in the data section llvm::dbgs() << __FUNCTION__ << ": jump table via register: " << to_string<VA>(ip->get_loc(), std::hex) << "\n"; BasicBlock *defaultb = nullptr; doJumpTableViaSwitchReg(block, ip, fromReg, defaultb); TASSERT(defaultb != nullptr, "Default block has to exit"); // fallback to doing do_call_value doCallV(defaultb, ip, fromReg); return doRet(defaultb); } else { // translate the JMP32r as a call/ret llvm::dbgs() << __FUNCTION__ << ": regular jump via register: " << to_string<VA>(ip->get_loc(), std::hex) << "\n"; doCallV(block, ip, fromReg); return doRet(block); } }
void doJumpTableViaSwitch( NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { llvm::dbgs() << __FUNCTION__ << ": Doing jumpt table via switch\n"; Function *F = block->getParent(); Module *M = F->getParent(); // we know this conforms to // jmp [reg*4+displacement] // sanity check const MCOperand& scale = OP(1); const MCOperand& index = OP(2); TASSERT(index.isReg(), "Conformant jump tables need index to be a register"); TASSERT(scale.isImm() && scale.getImm() == 4, "Conformant jump tables have scale == 4"); MCSJumpTablePtr jmpptr = ip->get_jump_table(); // to ensure no negative entries Value *adjustment = CONST_V<32>(block, jmpptr->getInitialEntry()); Value *reg_val = R_READ<32>(block, index.getReg()); Value *real_index = BinaryOperator::Create(Instruction::Add, adjustment, reg_val, "", block); // create a default block that just traps BasicBlock *defaultBlock = BasicBlock::Create(block->getContext(), "", block->getParent(), 0); Function *trapFn = Intrinsic::getDeclaration(M, Intrinsic::trap); CallInst::Create(trapFn, "", defaultBlock); ReturnInst::Create(defaultBlock->getContext(), defaultBlock); // end default block const std::vector<VA> &jmpblocks = jmpptr->getJumpTable(); // create a switch inst SwitchInst *theSwitch = SwitchInst::Create( real_index, defaultBlock, jmpblocks.size(), block); // populate switch int myindex = 0; for(std::vector<VA>::const_iterator itr = jmpblocks.begin(); itr != jmpblocks.end(); itr++) { std::string bbname = "block_0x"+to_string<VA>(*itr, std::hex); BasicBlock *toBlock = bbFromStrName(bbname, F); TASSERT(toBlock != NULL, "Could not find block: "+bbname); theSwitch->addCase(CONST_V<32>(block, myindex), toBlock); ++myindex; } }
static InstTransResult translate_LEA64_32r(NativeModulePtr natM, BasicBlock *&block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); if( ip->has_call_tgt() ) { Value *callback_fn = archMakeCallbackForLocalFunction( block->getParent()->getParent(), ip->get_call_tgt(0)); Value *addrInt = new PtrToIntInst( callback_fn, llvm::Type::getInt32Ty(block->getContext()), "", block); ret = doLeaV<32>(block, OP(0), addrInt); } else if( ip->is_data_offset() ) { ret = doLea<32>(ip, block, STD_GLOBAL_OP(1), OP(0)); } else { ret = doLea<32>(ip, block, ADDR(1), OP(0)); } return ret; }
// return true if this instruction // branches via a memory lookup static bool isBranchViaMemory(InstPtr inst) { switch(inst->get_inst().getOpcode()) { case X86::JMP32m: case X86::CALL32m: return true; default: return false; } }
static InstTransResult translate_MOVao (NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); if( ip->is_data_offset() ) { ret = doMRMov<width>(ip, block, GLOBAL_DATA_OFFSET(block, natM, ip), MCOperand::CreateReg(X86::EAX) ); } else { Value *addrv = CONST_V<width>(block, OP(0).getImm()); ret = doMRMov<width>(ip, block, addrv, MCOperand::CreateReg(X86::EAX)) ; } return ret ; }
static InstTransResult translate_MOV32ri(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); if( ip->has_call_tgt() ) { Value *callback_fn = makeCallbackForLocalFunction( block->getParent()->getParent(), ip->get_call_tgt(0) ); Value *addrInt = new PtrToIntInst( callback_fn, llvm::Type::getInt32Ty(block->getContext()), "", block); ret = doRIMovV<32>(ip, block, addrInt, OP(0) ); } else if( ip->is_data_offset() ) { ret = doRIMovV<32>(ip, block, GLOBAL_DATA_OFFSET(block, natM, ip), OP(0) ); } else { ret = doRIMov<32>(ip, block, OP(1), OP(0)) ; } return ret ; }
//GENERIC_TRANSLATION_32MI(MOV32mi, // doMIMov<32>(ip, block, ADDR(0), OP(5)), // doMIMov<32>(ip, block, STD_GLOBAL_OP(0), OP(5)), // doMIMovV<32>(ip, block, ADDR_NOREF(0), GLOBAL_DATA_OFFSET(block, natM, ip)) // ) // static InstTransResult translate_MOV32mi(NativeModulePtr natM, BasicBlock *&block, InstPtr ip, MCInst &inst) { InstTransResult ret; Function *F = block->getParent(); if( ip->has_call_tgt() ) { Value *callback_fn = makeCallbackForLocalFunction( block->getParent()->getParent(), ip->get_call_tgt(0) ); Value *addrInt = new PtrToIntInst( callback_fn, llvm::Type::getInt32Ty(block->getContext()), "", block); ret = doMIMovV<32>(ip, block, ADDR(0), addrInt); } else if( ip->is_data_offset() ) { if( ip->get_reloc_offset() < OP(5).getOffset() ) { doMIMov<32>(ip, block, STD_GLOBAL_OP(0), OP(5)); } else { doMIMovV<32>(ip, block, ADDR_NOREF(0), GLOBAL_DATA_OFFSET(block, natM, ip)); } ret = ContinueBlock; } else { ret = doMIMov<32>(ip, block, ADDR(0), OP(5)); } return ret; }
static bool processJumpIndexTable(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, const vector<VA> &jmptable_entries, raw_ostream &out) { // first, find which operand was the index // register in jmpinst // const MCInst &inst = jmpinst->get_inst(); int index_reg = regFromInst(inst); if(index_reg == -1) { out << "JMPINST does not use a register to index\n"; return false; } // loop backwards through block looking for // instructions that write to this register const std::list<InstPtr> &block_insts = B->get_insts(); InstPtr write_reg_insn; for( std::list<InstPtr>::const_reverse_iterator itr = block_insts.rbegin(); itr != block_insts.rend(); itr++) { // check if we 'write to a register' if(writesToReg((*itr)->get_inst(), index_reg)) { write_reg_insn = *itr; break; } } if(write_reg_insn == NULL) { out << "No instruction writes index register in the same basic block\n"; return false; } out << "Found register index write instruction:\n"; if(!parseJumpIndexTable(c, write_reg_insn, jmptable_entries, out)) { out << "Could not parse jump index table, aborting\n"; return false; } return true; }
static InstTransResult doFstM(InstPtr ip, BasicBlock *&b, Value *memAddr) { NASSERT(memAddr != NULL); Value *regVal = FPUR_READ(b, X86::ST0); llvm::Type *destType; llvm::Type *ptrType; unsigned addrspace = ip->get_addr_space(); switch (width) { case 32: destType = llvm::Type::getFloatTy(b->getContext()); ptrType = llvm::Type::getFloatPtrTy(b->getContext(), addrspace); break; case 64: destType = llvm::Type::getDoubleTy(b->getContext()); ptrType = llvm::Type::getDoublePtrTy(b->getContext(), addrspace); break; case 80: //destType = llvm::Type::getX86_FP80Ty(b->getContext()); ptrType = llvm::Type::getX86_FP80PtrTy(b->getContext(), addrspace); break; default: throw TErr(__LINE__, __FILE__, "Invalid width specified for FST"); break; } // do not truncate 80-bit to 80-bit, causes a truncation error if(width < 80) { Value *trunc = new FPTruncInst(regVal, destType, "", b); M_WRITE_T(ip, b, memAddr, trunc, ptrType); } else if(width == 80) { M_WRITE_T(ip, b, memAddr, regVal, ptrType); } else { throw TErr(__LINE__, __FILE__, "FPU Registers >80 bits not implemented for FST"); } // Next instruction. return ContinueBlock; }
static InstTransResult doCallPC(InstPtr ip, BasicBlock *&b, VA tgtAddr) { Module *M = b->getParent()->getParent(); Function *ourF = b->getParent(); //insert a call to the call function //this function will be a translated function that we emit, so we should //be able to look it up in our module. std::string fname = "sub_"+to_string<VA>(tgtAddr, std::hex); Function *F = M->getFunction(fname); TASSERT( F != NULL, "Could not find function: " + fname ); writeFakeReturnAddr(b); //we need to wrap up our current context writeLocalsToContext(b, 32, ABICallStore); //make the call, the only argument should be our parents arguments TASSERT(ourF->arg_size() == 1, ""); std::vector<Value*> subArgs; subArgs.push_back(ourF->arg_begin()); CallInst *c = CallInst::Create(F, subArgs, "", b); c->setCallingConv(F->getCallingConv()); if ( ip->has_local_noreturn() ) { // noreturn functions just hit unreachable std::cout << __FUNCTION__ << ": Adding Unreachable Instruction to local noreturn" << std::endl; c->setDoesNotReturn(); c->setTailCall(); Value *unreachable = new UnreachableInst(b->getContext(), b); return EndBlock; } //spill our context back writeContextToLocals(b, 32, ABIRetSpill); //and we can continue to run the old code return ContinueBlock; }
static bool canInstructionReferenceCode( InstPtr inst) { switch(inst->get_inst().getOpcode()) { case X86::MOV32mi: // writes to memory, but uses an immediate, which could be code case X86::MOV32o32a: // writes imm32 to eax; probably code case X86::MOV32ri: // writes imm32 to register, could be code case X86::PUSHi32: // push an imm32, which could be code // need to check if mem references are valid here case X86::MOV32rm: // writes mem to register, mem could be code? case X86::PUSH32rmm: // push mem, which could be/have code //case X86::LEA32r: // write address of mem to reg return true; default: return false; } }
static bool handleJump(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, VA curAddr, stack<VA> &funcs, stack<VA> &blockChildren, raw_ostream &out) { // this is an internal jmp. probably a jump table. out << "Found a possible jump table!\n"; bool did_jmptable = handlePossibleJumpTable(c, B, jmpinst, curAddr, funcs, blockChildren, out); if(!did_jmptable) { out << "Heristic jumptable processing couldn't parse jumptable\n"; out << "pointing to: 0x" << to_string<VA>(curAddr, hex) << "\n"; out << jmpinst->printInst() << "\n"; out << c->hash << "\n"; } return did_jmptable; }
void doJumpTableViaSwitchReg( BasicBlock *& block, InstPtr ip, Value *regVal, BasicBlock *&default_block) { llvm::dbgs() << __FUNCTION__ << ": Doing jumpt table via switch(reg)\n"; Function *F = block->getParent(); Module *M = F->getParent(); MCSJumpTablePtr jmpptr = ip->get_jump_table(); // create a default block that just traps default_block = BasicBlock::Create(block->getContext(), "", block->getParent(), 0); // end default block const std::vector<VA> &jmpblocks = jmpptr->getJumpTable(); std::unordered_set<VA> uniq_blocks(jmpblocks.begin(), jmpblocks.end()); // create a switch inst SwitchInst *theSwitch = SwitchInst::Create( regVal, default_block, uniq_blocks.size(), block); // populate switch for(auto blockVA : uniq_blocks) { std::string bbname = "block_0x"+to_string<VA>(blockVA, std::hex); BasicBlock *toBlock = bbFromStrName(bbname, F); llvm::dbgs() << __FUNCTION__ << ": Mapping from " << to_string<VA>(blockVA, std::hex) << " => " << bbname << "\n"; TASSERT(toBlock != NULL, "Could not find block: "+bbname); theSwitch->addCase(CONST_V<32>(block, blockVA), toBlock); } }
static InstTransResult translate_CALLpcrel32(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; if( ip->has_ext_call_target() ) { std::string s = ip->get_ext_call_target()->getSymbolName(); ret = doCallPCExtern(block, s); } else if (ip->has_call_tgt() ) { int64_t off = (int64_t) ip->get_call_tgt(0); ret = doCallPC(ip, block, off); } else { int64_t off = (int64_t) OP(0).getImm(); ret = doCallPC(ip, block, ip->get_loc()+ip->get_len()+off); } return ret; }
static InstTransResult translate_JMP32m(NativeModulePtr natM, BasicBlock *& block, InstPtr ip, MCInst &inst) { InstTransResult ret; // translate JMP mem32 API calls // as a call <api>, ret; if( ip->has_ext_call_target() ) { std::string s = ip->get_ext_call_target()->getSymbolName(); ret = doCallPCExtern(block, s, true); if (ret != EndBlock) { return doRet(block); } else { // noreturn api calls don't need to fix stack return ret; } } else if (ip->has_jump_table() && ip->is_data_offset()) { // this is a jump table that got converted // into a table in the data section doJumpTableViaData(natM, block, ip, inst); // return a "ret", since the jmp is simulated // as a call/ret pair return doRet(block); } else if(ip->has_jump_table()) { // this is a conformant jump table // emit an llvm switch doJumpTableViaSwitch(natM, block, ip, inst); return EndBlock; } else { std::string msg("NIY: JMP32m only supported for external API calls and jump tables: "); msg += to_string<VA>(ip->get_loc(), std::hex); throw TErr(__LINE__, __FILE__, msg.c_str()); return EndBlock; } }
static InstTransResult doCmpxchgRM(InstPtr ip, BasicBlock *&b, Value *dstAddr, const MCOperand &srcReg) { NASSERT(dstAddr != NULL); NASSERT(srcReg.isReg()); Value *acc; switch(width) { case 8: acc = R_READ<width>(b, X86::AL); break; case 16: acc = R_READ<width>(b, X86::AX); break; case 32: acc = R_READ<width>(b, X86::EAX); break; default: throw TErr(__LINE__, __FILE__, "Width not supported"); } //Value *mem_v = M_READ<width>(ip, b, dstAddr); Value *m_addr = NULL; unsigned addrspace = ip->get_addr_space(); if( dstAddr->getType()->isPointerTy() == false ) { llvm::Type *ptrTy = Type::getIntNPtrTy(b->getContext(), width, addrspace); m_addr = new llvm::IntToPtrInst(dstAddr, ptrTy, "", b); } else if( dstAddr->getType() != Type::getIntNPtrTy( b->getContext(), width, addrspace) ) { //we need to bitcast the pointer value to a pointer type of the appropriate width m_addr = CastInst::CreatePointerCast(dstAddr, Type::getIntNPtrTy(b->getContext(), width, addrspace), "", b); } else { m_addr = dstAddr; } Value *srcReg_v = R_READ<width>(b, srcReg.getReg()); AtomicCmpXchgInst *cmpx = new AtomicCmpXchgInst( m_addr, acc, srcReg_v, llvm::SequentiallyConsistent, llvm::SequentiallyConsistent, llvm::CrossThread, b); cmpx->setVolatile(true); Value *cmpx_val = ExtractValueInst::Create(cmpx, 0, "cmpxchg_cmpx_val", b); Value *was_eq = ExtractValueInst::Create(cmpx, 1, "cmpxchg_was_eq", b); doCmpVV<width>(ip, b, acc, cmpx_val); F_WRITE(b, ZF, was_eq); Value *new_acc = SelectInst::Create(was_eq, acc, cmpx_val, "", b); switch(width) { case 8: R_WRITE<width>(b, X86::AL, new_acc); break; case 16: R_WRITE<width>(b, X86::AX, new_acc); break; case 32: R_WRITE<width>(b, X86::EAX, new_acc); break; default: throw TErr(__LINE__, __FILE__, "Width not supported"); } return ContinueBlock; }
void ModuleBuilder::SetInsertPoint(InstPtr I) { CurBB = I->getParent(); InsertPoint = CurBB->getIterator(I); }
static InstTransResult doCmpxchgRM(InstPtr ip, BasicBlock *&b, Value *dstAddr, const MCOperand &srcReg) { NASSERT(dstAddr != NULL); NASSERT(srcReg.isReg()); Function *F = b->getParent(); BasicBlock *AccEQDest = BasicBlock::Create(b->getContext(), "AccEQDest", F); BasicBlock *AccNEDest = BasicBlock::Create(b->getContext(), "AccNEDest", F); BasicBlock *done = BasicBlock::Create(b->getContext(), "done", F); Value *acc; switch(width) { case 8: acc = R_READ<width>(b, X86::AL); break; case 16: acc = R_READ<width>(b, X86::AX); break; case 32: acc = R_READ<width>(b, X86::EAX); break; default: throw TErr(__LINE__, __FILE__, "Width not supported"); } //Value *mem_v = M_READ<width>(ip, b, dstAddr); Value *m_addr = NULL; unsigned addrspace = ip->get_addr_space(); if( dstAddr->getType()->isPointerTy() == false ) { llvm::Type *ptrTy = Type::getIntNPtrTy(b->getContext(), width, addrspace); m_addr = new llvm::IntToPtrInst(dstAddr, ptrTy, "", b); } else if( dstAddr->getType() != Type::getIntNPtrTy( b->getContext(), width, addrspace) ) { //we need to bitcast the pointer value to a pointer type of the appropriate width m_addr = CastInst::CreatePointerCast(dstAddr, Type::getIntNPtrTy(b->getContext(), width, addrspace), "", b); } else { m_addr = dstAddr; } Value *srcReg_v = R_READ<width>(b, srcReg.getReg()); AtomicCmpXchgInst *cmpx = new AtomicCmpXchgInst( m_addr, acc, srcReg_v, llvm::SequentiallyConsistent, llvm::CrossThread, b); cmpx->setVolatile(true); // needed for flags settings doCmpVV<width>(ip, b, acc, cmpx); Value *Cmp = new ICmpInst(*b, CmpInst::ICMP_EQ, cmpx, acc); BranchInst::Create(AccEQDest, AccNEDest, Cmp, b); // Acc == Dst F_SET(AccEQDest, "ZF"); //M_WRITE<width>(ip, AccEQDest, dstAddr, srcReg_v); BranchInst::Create(done, AccEQDest); // Acc != Dst F_CLEAR(AccNEDest, "ZF"); switch(width) { case 8: R_WRITE<width>(AccNEDest, X86::AL, cmpx); break; case 16: R_WRITE<width>(AccNEDest, X86::AX, cmpx); break; case 32: R_WRITE<width>(AccNEDest, X86::EAX, cmpx); break; default: throw TErr(__LINE__, __FILE__, "Width not supported"); } BranchInst::Create(done, AccNEDest); b = done; return ContinueBlock; }
NativeBlockPtr decodeBlock( ExecutableContainer *c, ExternalFunctionMap &f, LLVMByteDecoder &d, stack<VA> &blockChildren, VA e, stack<VA> &funcs, raw_ostream &out) { NativeBlockPtr B = NativeBlockPtr(new NativeBlock(e, d.getPrinter())); VA curAddr = e; bool has_follow = true; out << "Processing block: " << B->get_name() << "\n"; do { InstPtr I = d.getInstFromBuff(curAddr, c); //I, if a terminator, will have true and false targets //filled in. I could be an indirect branch of some kind, //we will deal with that here. we will also deal with the //instruction if it is a data instruction with relocation out << to_string<VA>(I->get_loc(), hex) << ":"; out << I->printInst() << "\n"; if(I->get_tr() != 0) { B->add_follow(I->get_tr()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_tr(), hex) << "\n"; blockChildren.push(I->get_tr()); } if(I->get_fa() != 0) { B->add_follow(I->get_fa()); has_follow = false; out << "Adding block: " << to_string<VA>(I->get_fa(), hex) << "\n"; blockChildren.push(I->get_fa()); } if(I->terminator()) { has_follow = false; } //do we need to add a data reference to this instruction? //again, because there is no offset information in the //instruction decoder, for now we just ask if every addr //in the inst is relocated for(uint32_t i = 0; i < I->get_len(); i++) { VA addrInInst = curAddr+i; if(c->is_addr_relocated(addrInInst)) { VA addr = 0; std::string has_imp; // this instruction has a relocation // save the relocation offset for later I->set_reloc_offset(i); //get the offset for this address //add it as a data offset to the instruction if (c->find_import_name(addrInInst, has_imp) ) { if(f.is_data(has_imp)) { ExternalDataRefPtr data_p = makeExtDataRefFromString(has_imp, f); out << "Adding external data ref: " << has_imp << "\n"; I->set_ext_data_ref(data_p); } else { ExternalCodeRefPtr code_p = makeExtCodeRefFromString(has_imp, f); LASSERT(code_p, "Failed to get ext call from map for symbol: "+has_imp); //maybe, this call doesn't return, in which case, //we should kill the decoding of this flow if(code_p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } out << "Adding external code ref: " << has_imp << "\n"; I->set_ext_call_target(code_p); } } else if(c->relocate_addr(addrInInst, addr)) { bool can_ref_code = canInstructionReferenceCode(I); bool is_reloc_code = isAddrOfType(c, addr, ExecutableContainer::CodeSection); bool is_reloc_data = isAddrOfType(c, addr, ExecutableContainer::DataSection); unsigned opc = I->get_inst().getOpcode(); if(isBranchViaMemory(I)) { out << "Detect branch via memory, relocation handled later\n"; } // this instruction can reference code and does // reference code // so we assume the code points to a function else if( can_ref_code && is_reloc_code ) { list<VA> new_funcs; if(dataInCodeHeuristic(c, I, addr, new_funcs)) { // add new functions to our functions list for(list<VA>::const_iterator nfi = new_funcs.begin(); nfi != new_funcs.end(); nfi++) { funcs.push(*nfi); } I->set_data_offset(addr); } else { I->set_call_tgt(addr); out << "Adding: 0x" << to_string<VA>(addr, hex) << " as target\n"; funcs.push(addr); } } // this instruction can't reference code and points to .text // or references data. Treat as data element // TODO: extract this from .text and shove into .data? else if(( !can_ref_code && is_reloc_code) || is_reloc_data ) { I->set_data_offset(addr); } else { out << "WARNING: relocation points to neither code nor data:" << to_string<VA>(addr, hex) << "\n"; } } else { out << "*NOT* Relocating relocatable addr:" << to_string<uint32_t>(addrInInst, hex) << "\n"; } break; } } //is this instruction an external call? //in a COFF binary, the pcrel call can refer to an //external symbol that has been relocated //so, get the string that corresponds, and //provide the translation using the function map MCOperand op; string imp; switch(I->get_inst().getOpcode()) { case X86::JMP32m: { string thunkSym; bool r = c->find_import_name(curAddr+2, thunkSym); if(r) { // this goes to an external API call out << "Adding external code ref via JMP: " << thunkSym << "\n"; ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); has_follow = false; } else { // this is an internal jmp. probably a jump table. bool did_jmptable = handlePossibleJumpTable(c, B, I, curAddr, funcs, blockChildren, out); LASSERT(did_jmptable, "JMP32m processing aborted: couldn't parse jumptable"); } } break; case X86::CALLpcrel32: //this could be an external call in COFF, or not op = I->get_inst().getOperand(0); LASSERT(op.isImm(), "Nonsense for CALLpcrel32"); if(op.getImm() !=0) { VA callTgt = curAddr+op.getImm()+I->get_len(); bool foldFunc = false; //speculate about callTgt InstPtr spec = d.getInstFromBuff(callTgt, c); if(spec->terminator() && spec->get_inst().getOpcode() == X86::JMP32m) { string thunkSym; bool r = c->find_import_name(callTgt+2, thunkSym); LASSERT(r, "Need to find thunk import addr"); ExternalCodeRefPtr p = makeExtCodeRefFromString(thunkSym, f); I->set_ext_call_target(p); foldFunc = true; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } } if(foldFunc == false) { //add this to our list of funcs to search funcs.push(callTgt); } } else { //check to see if this is an external call... if(I->has_ext_call_target() == false) { // may be a local call VA addr=curAddr+1, relo_addr=0; out << "Symbol not found, maybe a local call\n"; if(c->relocate_addr(addr, relo_addr)){ out << "Found local call to: " << to_string<VA>(relo_addr, hex) << "\n"; I->set_call_tgt(relo_addr); out << "Adding: 0x" << to_string<VA>(relo_addr, hex) << " as target\n"; funcs.push(relo_addr); } else { out << "Could not relocate addr for local call at: "; out << to_string<VA>(curAddr, hex) << "\n"; } } else { out << "External call to: " << I->get_ext_call_target()->getSymbolName() << "\n"; } } break; case X86::CALL32m: //this should be a call to an external, or we have no idea //so we need to try and look up the symbol that we're calling at this address... if(c->find_import_name(curAddr+2, imp)) { ExternalCodeRefPtr p = makeExtCodeRefFromString(imp, f); LASSERT(p, "Failed to get ext call from map for symbol"+imp); out << "Calling symbol: " << p->getSymbolName() << "\n"; if(p->getReturnType() == ExternalCodeRef::NoReturn) { has_follow = false; } I->set_ext_call_target(p); } else { out << "Cannot find symbol at address "; out << to_string<VA>(curAddr, hex) << "\n"; } break; } B->add_inst(I); curAddr += I->get_len(); } while(has_follow); //we have built a basic block, it might contain //multiple calls, but it only has one terminator //which is either a ret or a branch return B; }
static bool handlePossibleJumpTable(ExecutableContainer *c, NativeBlockPtr B, InstPtr jmpinst, VA curAddr, stack<VA> &funcs, stack<VA> &blockChildren, raw_ostream &out) { LASSERT(jmpinst->get_inst().getOpcode() == X86::JMP32m, "handlePossibleJumpTable needs a JMP32m opcode" ); // is this a jump table, step 0 // does this instruction have a relocation? VA reloc_offset = jmpinst->get_reloc_offset(); if (reloc_offset == 0) { out << "Not a jump table: no relocation in JMP32m\n"; // bail, this is not a jump table return false; } // this relocation has to point to a relocation VA addrInInst = curAddr + reloc_offset; VA jmpTableEntry, someFunction; if(!c->relocate_addr(addrInInst, jmpTableEntry)) { out << "Not a jump table: can't relocate relocation in JMP32m\n"; // can't relocate, something bad happened return false; } if(!c->relocate_addr(jmpTableEntry, someFunction)) { // could not relocate the default jump table entry. // not good out << "Not a jump table: can't relocate first jump table entry\n"; return false; } bool is_reloc_code = isAddrOfType(c, someFunction, ExecutableContainer::CodeSection); if(!is_reloc_code) { // jump table entry not point to code out << "Not a jump table: first entry doesn't point to code\n"; return false; } // read jump table entries and add them as new function // entry points vector<VA> jmptable_entries; int new_funs; int original_zero; // this reads negative jump table indexes, but vectors are not negative // indexed. the negative most, which should be the new index 0, is now // index N. Reverse the vector so it will be index 0, and save the current // size as the original zeroth element new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry, -4, out); std::reverse(jmptable_entries.begin(), jmptable_entries.end()); out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n"; original_zero = new_funs; // add original entry at the zero position jmptable_entries.push_back(someFunction); out << "Added JMPTABLE entry [" << to_string<uint32_t>(jmpTableEntry, hex) << "] => " << to_string<uint32_t>(someFunction, hex) << "\n"; // add the positive table entries new_funs = addJmpTableEntries(c, jmptable_entries, jmpTableEntry, 4, out); out << "Added: " << to_string<int>(new_funs, dec) << " functions to jmptable\n"; // associate instruction with jump table JumpTable *jt = new JumpTable(jmptable_entries, original_zero); jmpinst->set_jump_table(JumpTablePtr(jt)); stack<VA> *toPush = NULL; // if this jump table is in the format // jmp [reg*4+imm32], then it is conformant // and we can turn it into an llvm switch(); bool is_conformant = isConformantJumpInst(jmpinst); if(is_conformant) { toPush = &blockChildren; out << "GOT A CONFORMANT JUMP INST\n"; } else { toPush = &funcs; } // add these jump table entries as new entry points for(std::vector<VA>::const_iterator itr = jmptable_entries.begin(); itr != jmptable_entries.end(); itr++) { out << "Adding block via jmptable: " << to_string<VA>(*itr, hex) << "\n"; toPush->push(*itr); if(is_conformant) { B->add_follow(*itr); } } processJumpIndexTable(c, B, jmpinst, jmptable_entries, out); return true; }