CFWidget::CFWidget(InstructionAPI::Instruction::Ptr insn, Address addr) : isCall_(false), isConditional_(false), isIndirect_(false), gap_(0), insn_(insn), addr_(addr), origTarget_(0) { // HACK to be sure things are parsed... insn->format(); for (Instruction::cftConstIter iter = insn->cft_begin(); iter != insn->cft_end(); ++iter) { if (iter->isCall) isCall_ = true; if (iter->isIndirect) isIndirect_ = true; if (iter->isConditional) isConditional_ = true; } #if 0 // Old way if (insn->getCategory() == c_CallInsn) { // Calls have a fallthrough but are not conditional. // TODO: conditional calls work how? isCall_ = true; } else if (insn->allowsFallThrough()) { isConditional_ = true; } #endif // This whole next section is obsolete, but IAPI's CFT interface doesn't say // what a "return" is (aka, they don't include "indirect"). So I'm using it // so that things work. // TODO: IAPI is recording all PPC64 instructions as PPC32. However, the // registers they use are still PPC64. This is a pain to fix, and therefore // I'm working around it here and in Movement-adhoc.C by checking _both_ // 32- and 64-bit. Architecture fixme = insn_->getArch(); if (fixme == Arch_ppc32) fixme = Arch_ppc64; Expression::Ptr thePC(new RegisterAST(MachRegister::getPC(insn_->getArch()))); Expression::Ptr thePCFixme(new RegisterAST(MachRegister::getPC(fixme))); Expression::Ptr exp = insn_->getControlFlowTarget(); exp->bind(thePC.get(), Result(u64, addr_)); exp->bind(thePCFixme.get(), Result(u64, addr_)); Result res = exp->eval(); if (!res.defined) { if (!isIndirect_) { isIndirect_ = true; } } }
void IA_x86Details::findThunkInBlock(Block* curBlock) { const unsigned char* buf = (const unsigned char*)(currentBlock->_isrc->getPtrToInstruction(curBlock->start())); if( buf == NULL ) { parsing_printf("%s[%d]: failed to get pointer to instruction by offset\n", FILE__, __LINE__); return; } InstructionDecoder dec(buf,curBlock->size() + InstructionDecoder::maxInstructionLength, currentBlock->_isrc->getArch()); IA_IAPI block(dec,curBlock->start(),currentBlock->_obj,currentBlock->_cr, currentBlock->_isrc, curBlock); parsing_printf("\tchecking block at 0x%lx for thunk\n", curBlock->start()); while(block.getAddr() < curBlock->end()) { if(block.getInstruction()->getCategory() == c_CallInsn) { if(handleCall(block)) return; } else if(block.getInstruction()->getOperation().getID() == e_lea) // Look for an AMD64 IP-relative LEA. If we find one, it should point to the start of a { // relative jump table. parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", block.getInstruction()->format().c_str(), block.getAddr()); Expression::Ptr IPRelAddr = block.getInstruction()->getOperand(1).getValue(); IPRelAddr->bind(currentBlock->thePC[currentBlock->_isrc->getArch()].get(), Result(s64, block.getNextAddr())); Result iprel = IPRelAddr->eval(); if(iprel.defined) { thunkInsn.addrFromInsn = iprel.convert<Address>(); parsing_printf("\tsetting thunkOffset to 0x%lx at 0x%lx\n",thunkInsn.addrFromInsn, block.getAddr()); thunkInsn.addrOfInsn = block.getAddr(); thunkInsn.insn = block.getInstruction(); return; } } else if(block.getInstruction()->getOperation().getID() == e_add) { if(handleAdd(block)) { parsing_printf("handleAdd found thunk candidate, addr is 0x%lx\n", block.getAddr()); return; } } block.advance(); } return; }
std::pair<bool, Address> RelocBlock::getJumpTarget() { InstructionAPI::Instruction insn = cfWidget()->insn(); if (!insn.isValid()) return std::make_pair(false, 0); Expression::Ptr cft = insn.getControlFlowTarget(); if (!cft) return std::make_pair(false, 0); Expression::Ptr thePC(new RegisterAST(MachRegister::getPC(insn.getArch()))); cft->bind(thePC.get(), Result(u64, cfWidget()->addr())); Result res = cft->eval(); if (res.defined) { return std::make_pair(true, res.convert<Address>()); } return std::make_pair(false, 0); }
std::pair<bool, Address> parse_block::callTarget() { using namespace InstructionAPI; Offset off = lastInsnOffset(); const unsigned char *ptr = (const unsigned char *)getPtrToInstruction(off); if (ptr == NULL) return std::make_pair(false, 0); InstructionDecoder d(ptr, endOffset() - lastInsnOffset(), obj()->cs()->getArch()); Instruction::Ptr insn = d.decode(); // Bind PC to that insn // We should build a free function to do this... Expression::Ptr cft = insn->getControlFlowTarget(); if (cft) { Expression::Ptr pc(new RegisterAST(MachRegister::getPC(obj()->cs()->getArch()))); cft->bind(pc.get(), Result(u64, lastInsnAddr())); Result res = cft->eval(); if (!res.defined) return std::make_pair(false, 0); return std::make_pair(true, res.convert<Address>()); } return std::make_pair(false, 0); }
// This should only be called on a known indirect branch... bool IA_x86Details::parseJumpTable(Block* currBlk, std::vector<std::pair< Address, EdgeTypeEnum> >& outEdges) { if(currentBlock->isIPRelativeBranch()) { return false; } if(isMovAPSTable(outEdges)) { return true; } bool foundJCCAlongTaken = false; IA_IAPI::allInsns_t::const_iterator tableLoc = findTableInsn(); if(tableLoc == currentBlock->allInsns.end()) { parsing_printf("\tunable to find table insn\n"); return false; } tableInsn.addrOfInsn = tableLoc->first; tableInsn.insn = tableLoc->second; Instruction::Ptr maxSwitchInsn, branchInsn; boost::tie(maxSwitchInsn, branchInsn, foundJCCAlongTaken) = findMaxSwitchInsn(currBlk); if(!maxSwitchInsn || !branchInsn) { parsing_printf("\tunable to fix max switch size\n"); return false; } computeTableAddress(); findThunkAndOffset(currBlk); if(thunkInsn.addrOfInsn != 0) { /* * FIXME * Noticed 2/8/2011 * * Although findThunkAndOffset looks outside of the current block, * this code only looks at the instructions within the current * block. One of these things is the wrong thing to do. * I don't understand what the goal of this code is; clearly thorough * code review is required. --nater */ // XXX this is the only place where an actual search // through allInsns is required; as per the previous // comment, I think something is wrong here anyway IA_IAPI::allInsns_t::const_iterator thunkLoc = search_insn_vec(thunkInsn.addrOfInsn, currentBlock->allInsns); if(thunkLoc != currentBlock->allInsns.end()) { if(thunkLoc->second && thunkLoc->second->getOperation().getID() == e_lea) { tableLoc = thunkLoc; tableInsn.addrOfInsn = thunkInsn.addrOfInsn; tableInsn.insn = thunkLoc->second; } } } parsing_printf("\ttableInsn %s at 0x%lx\n",tableInsn.insn->format().c_str(), tableInsn.addrOfInsn); if(thunkInsn.addrFromInsn) { parsing_printf("\tThunk-calculated table base address: 0x%lx\n", thunkInsn.addrFromInsn); } unsigned tableSize = 0, tableStride = 0; bool ok = computeTableBounds(maxSwitchInsn, branchInsn, tableInsn.insn, foundJCCAlongTaken, tableSize, tableStride); if(!ok) { return false; } IA_IAPI::allInsns_t::const_iterator cur = currentBlock->curInsnIter; while(tableLoc != cur) { tableLoc++; if(tableLoc->second->getOperation().getID() == e_lea) { parsing_printf("\tchecking instruction %s at 0x%lx for IP-relative LEA\n", tableLoc->second->format().c_str(), tableLoc->first); Expression::Ptr IPRelAddr = tableLoc->second->getOperand(1).getValue(); IPRelAddr->bind(currentBlock->thePC[currentBlock->_isrc->getArch()].get(), Result(s64, tableLoc->first + tableLoc->second->size())); Result iprel = IPRelAddr->eval(); if(iprel.defined) { parsing_printf("\trevising tableInsn to %s at 0x%lx\n",tableLoc->second->format().c_str(), tableLoc->first); tableInsn.insn = tableLoc->second; tableInsn.addrOfInsn = tableLoc->first; } } else { parsing_printf("\tChecking for sign-extending mov at 0x%lx...\n", tableLoc->first); if(tableLoc->second->getOperation().getID() == e_movsxd || tableLoc->second->getOperation().getID() == e_movsx) { std::set<Expression::Ptr> movsxReadAddr; tableLoc->second->getMemoryReadOperands(movsxReadAddr); if(movsxReadAddr.empty()) { // should be a register-register movsx[d] // from either 16- or 32-bit source operand Expression::Ptr op1 = tableLoc->second->getOperand(0).getValue(); Expression::Ptr op2 = tableLoc->second->getOperand(1).getValue(); if(op1 && op2) { int sz1 = op1->eval().size(); int sz2 = op2->eval().size(); parsing_printf("\t\tfound %d-byte to %d-byte move, revised stride to %d\n", sz2,sz1,sz2); tableStride = sz2; } } static Immediate::Ptr four(new Immediate(Result(u8, 4))); static Expression::Ptr dummy4(new DummyExpr()); static Expression::Ptr dummy2(new DummyExpr()); static Immediate::Ptr two(new Immediate(Result(u8, 2))); static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult()); static BinaryFunction::Ptr scaleCheck4(new BinaryFunction(four, dummy4, (tableStride == 8) ? u64: u32, multiplier)); static BinaryFunction::Ptr scaleCheck2(new BinaryFunction(two, dummy2, (tableStride == 8) ? u64: u32, multiplier)); for(std::set<Expression::Ptr>::const_iterator readExp = movsxReadAddr.begin(); readExp != movsxReadAddr.end(); ++readExp) { if((*readExp)->isUsed(scaleCheck4)) { parsing_printf("\tFound sign-extending mov, revising table stride to scale (4)\n"); tableStride = 4; } else if((*readExp)->isUsed(scaleCheck2)) { parsing_printf("\tFound sign-extending mov, revising table stride to scale (2)\n"); tableStride = 2; } else { parsing_printf("\tFound sign-extending mov insn %s, readExp %s\n", tableLoc->second->format().c_str(), (*readExp)->format().c_str()); parsing_printf("\tcouldn't match stride expression %s or %s--HELP!\n", scaleCheck2->format().c_str(), scaleCheck4->format().c_str()); } } break; } } } // This first compute() should be unnecessary, as we'll already have done the work above. // However, if there turn out to be bugs later (PIC tables where we're revising the table insn // and it matters), then recompute here before revision... // computeTableAddress(); reviseTableAddress(); IA_IAPI::allInsns_t::const_iterator findSubtract = search_insn_vec(tableInsn.addrOfInsn,currentBlock->allInsns); int offsetMultiplier = 1; while(findSubtract->first < currentBlock->current) { if(findSubtract->second && findSubtract->second->getOperation().getID() == e_sub) { parsing_printf("\tsuspect table contains negative offsets, revising\n"); offsetMultiplier = -1; break; } findSubtract++; } return fillTableEntries(thunkInsn.addrFromInsn, tableInsn.addrFromInsn, tableSize, tableStride, offsetMultiplier, outEdges); }
AbsRegion AbsRegionConverter::convert(Expression::Ptr exp, Address addr, ParseAPI::Function *func, ParseAPI::Block *block) { // We want to simplify the expression as much as possible given // currently known state, and then quantify it as one of the following: // // Stack: a memory access based off the current frame pointer (FP) or // stack pointer (SP). If we can determine an offset from the "top" // of the stack we create a stack slot location. Otherwise we create // a "stack" location that represents all stack locations. // // Heap: a memory access to a generic pointer. // // Memory: a memory access to a known address. // // TODO: aliasing relations. Aliasing SUCKS. // Since we have an Expression as input, we don't have the dereference // operator. // Here's the logic: // If no registers are used: // If only immediates are used: // Evaluate and create a MemLoc. // If a dereference exists: // WTF??? // If registers are used: // If the only register is the FP AND the function has a stack frame: // Set FP to 0, eval, and create a specific StackLoc. // If the only register is the SP: // If we know the contents of SP: // Eval and create a specific StackLoc // Else create a generic StackLoc. // If a non-stack register is used: // Create a generic MemLoc. long spHeight = 0; bool stackDefined = getCurrentStackHeight(func, block, addr, spHeight); long fpHeight = 0; bool frameDefined = getCurrentFrameHeight(func, block, addr, fpHeight); bool isStack = false; bool isFrame = false; static Expression::Ptr theStackPtr(new RegisterAST(MachRegister::getStackPointer(Arch_x86))); static Expression::Ptr theStackPtr64(new RegisterAST(MachRegister::getStackPointer(Arch_x86_64))); static Expression::Ptr theStackPtrPPC(new RegisterAST(MachRegister::getStackPointer(Arch_ppc32))); static Expression::Ptr theFramePtr(new RegisterAST(MachRegister::getFramePointer(Arch_x86))); static Expression::Ptr theFramePtr64(new RegisterAST(MachRegister::getFramePointer(Arch_x86_64))); static Expression::Ptr thePC(new RegisterAST(MachRegister::getPC(Arch_x86))); static Expression::Ptr thePC64(new RegisterAST(MachRegister::getPC(Arch_x86_64))); static Expression::Ptr thePCPPC(new RegisterAST(MachRegister::getPC(Arch_ppc32))); // We currently have to try and bind _every_ _single_ _alias_ // of the stack pointer... if (stackDefined) { if (exp->bind(theStackPtr.get(), Result(s32, spHeight)) || exp->bind(theStackPtr64.get(), Result(s64, spHeight)) || exp->bind(theStackPtrPPC.get(), Result(s32, spHeight))) { isStack = true; } } if (frameDefined) { if (exp->bind(theFramePtr.get(), Result(s32, fpHeight)) || exp->bind(theFramePtr64.get(), Result(s64, fpHeight))) { isFrame = true; } } // Bind the IP, why not... exp->bind(thePC.get(), Result(u32, addr)); exp->bind(thePC64.get(), Result(u64, addr)); exp->bind(thePCPPC.get(), Result(u32, addr)); Result res = exp->eval(); if (isFrame && stackAnalysisEnabled_) { if (res.defined && frameDefined) { return AbsRegion(Absloc(res.convert<Address>(), 0, func)); } else { return AbsRegion(Absloc::Stack); } } if (isStack && stackAnalysisEnabled_) { if (res.defined && stackDefined) { return AbsRegion(Absloc(res.convert<Address>(), 0, func)); } else if (func->obj()->defensiveMode()) { // SP could point to the heap, we make the worst-case // assumption and will emulate this stack access return AbsRegion(Absloc::Heap); } else { return AbsRegion(Absloc::Stack); } } // Otherwise we're on the heap if (res.defined) { return AbsRegion(Absloc(res.convert<Address>())); } else { return AbsRegion(Absloc::Heap); } }
std::pair<bool, Address> IA_IAPI::getCFT() const { if(validCFT) return cachedCFT; Expression::Ptr callTarget = curInsn().getControlFlowTarget(); if (!callTarget) return make_pair(false, 0); // FIXME: templated bind(),dammit! callTarget->bind(thePC[_isrc->getArch()].get(), Result(s64, current)); parsing_printf("%s[%d]: binding PC %s in %s to 0x%x...", FILE__, __LINE__, thePC[_isrc->getArch()]->format(curInsn().getArch()).c_str(), curInsn().format().c_str(), current); Result actualTarget = callTarget->eval(); #if defined(os_vxworks) int reloc_target = current; #if defined(arch_x86) ++reloc_target; #endif if (actualTarget.convert<Address>() == reloc_target) { // We have a zero offset branch. Consider relocation information. SymtabCodeRegion *scr = dynamic_cast<SymtabCodeRegion *>(_cr); SymtabCodeSource *scs = dynamic_cast<SymtabCodeSource *>(_obj->cs()); if (!scr && scs) { set<CodeRegion *> regions; assert( scs->findRegions(reloc_target, regions) == 1 ); scr = dynamic_cast<SymtabCodeRegion *>(*regions.begin()); } SymtabAPI::Symbol *sym = NULL; if (scr) { std::vector<SymtabAPI::relocationEntry> relocs = scr->symRegion()->getRelocations(); for (unsigned i = 0; i < relocs.size(); ++i) { if (relocs[i].rel_addr() == reloc_target) { sym = relocs[i].getDynSym(); if (sym && sym->getOffset()) { parsing_printf(" <reloc hit> "); actualTarget = Result(s64, sym->getOffset()); } break; } } } if (sym && sym->getOffset() == 0) { // VxWorks external call. // Need some external means to find the target. Address found; const std::string &sym_name = sym->getMangledName(); if (wtxFindFunction(sym_name.c_str(), 0x0, found)) { parsing_printf(" <wtx search hit> "); actualTarget = Result(s64, found); // We've effectively found a plt call. Update linkage table. _obj->cs()->linkage()[found] = sym_name; } else { parsing_printf(" <wtx fail %s> ", sym_name.c_str()); actualTarget.defined = false; } } } #endif if(actualTarget.defined) { cachedCFT = std::make_pair(true, actualTarget.convert<Address>()); parsing_printf("SUCCESS (CFT=0x%x)\n", cachedCFT.second); } else { cachedCFT = std::make_pair(false, 0); parsing_printf("FAIL (CFT=0x%x), callTarget exp: %s\n", cachedCFT.second,callTarget->format(curInsn().getArch()).c_str()); } validCFT = true; if(isLinkerStub()) { parsing_printf("Linker stub detected: Correcting CFT. (CFT=0x%x)\n", cachedCFT.second); } return cachedCFT; }
func_instance *mapped_object::findGlobalDestructorFunc(const std::string &dtorHandler) { using namespace Dyninst::InstructionAPI; const pdvector<func_instance *> *funcs = findFuncVectorByMangled(dtorHandler); if( funcs != NULL ) { return funcs->at(0); } /* * If the symbol isn't found, try looking for it in a call in the * .fini section. It is the last call in .fini. * * The pattern is: * * _fini: * * ... some code ... * * call dtor_handler * * ... prologue ... */ Symtab *linkedFile = parse_img()->getObject(); Region *finiRegion = NULL; if( !linkedFile->findRegion(finiRegion, ".fini") ) { vector<Dyninst::SymtabAPI::Function *> symFuncs; if( linkedFile->findFunctionsByName(symFuncs, "_fini") ) { finiRegion = symFuncs[0]->getRegion(); }else{ logLine("failed to locate .fini Region or _fini function\n"); return NULL; } } if( finiRegion == NULL ) { logLine("failed to locate .fini Region or _fini function\n"); return NULL; } // Search for last call in the function Address dtorAddress = 0; unsigned bytesSeen = 0; const unsigned char *p = reinterpret_cast<const unsigned char *>(finiRegion->getPtrToRawData()); InstructionDecoder decoder(p, finiRegion->getDiskSize(), parse_img()->codeObject()->cs()->getArch()); Instruction::Ptr lastCall; Instruction::Ptr curInsn = decoder.decode(); while(curInsn && curInsn->isValid() && bytesSeen < finiRegion->getDiskSize()) { InsnCategory category = curInsn->getCategory(); if( category == c_CallInsn ) { lastCall = curInsn; break; } bytesSeen += curInsn->size(); curInsn = decoder.decode(); } if( !lastCall.get() || !lastCall->isValid() ) { logLine("heuristic for finding global destructor function failed\n"); return NULL; } Address callAddress = finiRegion->getMemOffset() + bytesSeen; RegisterAST thePC = RegisterAST( Dyninst::MachRegister::getPC(parse_img()->codeObject()->cs()->getArch())); Expression::Ptr callTarget = lastCall->getControlFlowTarget(); if( !callTarget.get() ) { logLine("failed to find global destructor function\n"); return NULL; } callTarget->bind(&thePC, Result(s64, callAddress)); Result actualTarget = callTarget->eval(); if( actualTarget.defined ) { dtorAddress = actualTarget.convert<Address>(); }else{ logLine("failed to find global destructor function\n"); return NULL; } if( !dtorAddress || !parse_img()->codeObject()->cs()->isValidAddress(dtorAddress) ) { logLine("invalid address for global destructor function\n"); return NULL; } // A targ stub should have been created at the address func_instance *ret = NULL; if( (ret = findFuncByEntry(dtorAddress)) == NULL ) { logLine("unable to find global destructor function\n"); return NULL; } inst_printf("%s[%d]: set global destructor address to 0x%lx\n", FILE__, __LINE__, dtorAddress); return ret; }
func_instance *mapped_object::findGlobalConstructorFunc(const std::string &ctorHandler) { using namespace Dyninst::InstructionAPI; const pdvector<func_instance *> *funcs = findFuncVectorByMangled(ctorHandler); if( funcs != NULL ) { return funcs->at(0); } /* If the symbol isn't found, try looking for it in a call instruction in * the .init section * * On Linux, the instruction sequence is: * ... * some instructions * ... * call call_gmon_start * call frame_dummy * call ctor_handler * * On FreeBSD, the instruction sequence is: * ... * some instructions * ... * call frame_dummy * call ctor_handler */ Symtab *linkedFile = parse_img()->getObject(); Region *initRegion = NULL; if( !linkedFile->findRegion(initRegion, ".init") ) { vector<Dyninst::SymtabAPI::Function *> symFuncs; if( linkedFile->findFunctionsByName(symFuncs, "_init") ) { initRegion = symFuncs[0]->getRegion(); }else{ logLine("failed to locate .init Region or _init function\n"); return NULL; } } if( initRegion == NULL ) { logLine("failed to locate .init Region or _init function\n"); return NULL; } // Search for last of a fixed number of calls #if defined(os_freebsd) const unsigned CTOR_NUM_CALLS = 2; #else const unsigned CTOR_NUM_CALLS = 3; #endif Address ctorAddress = 0; unsigned bytesSeen = 0; unsigned numCalls = 0; const unsigned char *p = reinterpret_cast<const unsigned char *>(initRegion->getPtrToRawData()); InstructionDecoder decoder(p, initRegion->getDiskSize(), parse_img()->codeObject()->cs()->getArch()); Instruction::Ptr curInsn = decoder.decode(); while(numCalls < CTOR_NUM_CALLS && curInsn && curInsn->isValid() && bytesSeen < initRegion->getDiskSize()) { InsnCategory category = curInsn->getCategory(); if( category == c_CallInsn ) { numCalls++; } if( numCalls < CTOR_NUM_CALLS ) { bytesSeen += curInsn->size(); curInsn = decoder.decode(); } } if( numCalls != CTOR_NUM_CALLS ) { logLine("heuristic for finding global constructor function failed\n"); return NULL; } Address callAddress = initRegion->getMemOffset() + bytesSeen; RegisterAST thePC = RegisterAST( Dyninst::MachRegister::getPC(parse_img()->codeObject()->cs()->getArch())); Expression::Ptr callTarget = curInsn->getControlFlowTarget(); if( !callTarget.get() ) { logLine("failed to find global constructor function\n"); return NULL; } callTarget->bind(&thePC, Result(s64, callAddress)); Result actualTarget = callTarget->eval(); if( actualTarget.defined ) { ctorAddress = actualTarget.convert<Address>(); }else{ logLine("failed to find global constructor function\n"); return NULL; } if( !ctorAddress || !parse_img()->codeObject()->cs()->isValidAddress(ctorAddress) ) { logLine("invalid address for global constructor function\n"); return NULL; } func_instance *ret; if( (ret = findFuncByEntry(ctorAddress)) == NULL ) { logLine("unable to create representation for global constructor function\n"); return NULL; } inst_printf("%s[%d]: set global constructor address to 0x%lx\n", FILE__, __LINE__, ctorAddress); return ret; }