bool IA_IAPI::cleansStack() const { Instruction::Ptr ci = curInsn(); if (ci->getCategory() != c_ReturnInsn) return false; std::vector<Operand> ops; ci->getOperands(ops); return (ops.size() > 1); }
static Address ThunkAdjustment(Address afterThunk, MachRegister reg, ParseAPI::Block *b) { // After the call to thunk, there is usually // an add insturction like ADD ebx, OFFSET to adjust // the value coming out of thunk. const unsigned char* buf = (const unsigned char*) (b->obj()->cs()->getPtrToInstruction(afterThunk)); InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch()); Instruction::Ptr nextInsn = dec.decode(); // It has to be an add if (nextInsn->getOperation().getID() != e_add) return 0; vector<Operand> operands; nextInsn->getOperands(operands); RegisterAST::Ptr regAST = boost::dynamic_pointer_cast<RegisterAST>(operands[0].getValue()); // The first operand should be a register if (regAST == 0) return 0; if (regAST->getID() != reg) return 0; Result res = operands[1].getValue()->eval(); // A not defined result means that // the second operand is not an immediate if (!res.defined) return 0; return res.convert<Address>(); }
void AssignmentConverter::convert(const Instruction::Ptr I, const Address &addr, ParseAPI::Function *func, ParseAPI::Block *block, std::vector<Assignment::Ptr> &assignments) { assignments.clear(); if (cache(func, addr, assignments)) return; // Decompose the instruction into a set of abstract assignments. // We don't have the Definition class concept yet, so we'll do the // hard work here. // Two phases: // 1) Special-cased for IA32 multiple definition instructions, // based on the opcode of the instruction // 2) Generic handling for things like flags and the PC. // Non-PC handling section switch(I->getOperation().getID()) { case e_push: { // SP = SP - 4 // *SP = <register> std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // The argument can be any of the following: // 1) a register (push eax); // 2) an immediate value (push $deadbeef) // 3) a memory location. std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePushEquivalent(I, addr, func, block, oper0, assignments); break; } case e_call: { // This can be seen as a push of the PC... std::vector<AbsRegion> pcRegion; pcRegion.push_back(Absloc::makePC(func->isrc()->getArch())); Absloc sp = Absloc::makeSP(func->isrc()->getArch()); handlePushEquivalent(I, addr, func, block, pcRegion, assignments); // Now for the PC definition // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, pcRegion[0])); if (!used.empty()) { for(std::vector<AbsRegion>::const_iterator u = used.begin(); u != used.end(); ++u) { if(!(u->contains(pcRegion[0])) && !(u->contains(sp))) { a->addInput(*u); } } } else { a->addInputs(pcRegion); } assignments.push_back(a); break; } case e_pop: { // <reg> = *SP // SP = SP + 4/8 // Amusingly... this doesn't have an intra-instruction dependence. It should to enforce // the order that <reg> = *SP happens before SP = SP - 4, but since the input to both // uses of SP in this case are the, well, input values... no "sideways" edges. // However, we still special-case it so that SP doesn't depend on the incoming stack value... // Also, we use the same logic for return, defining it as // PC = *SP // SP = SP + 4/8 // As with push, eSP shows up as operand 1. std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the explicit register, the second will be ESP. assert(operands.size() == 2); std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePopEquivalent(I, addr, func, block, oper0, assignments); break; } case e_leave: { // a leave is equivalent to: // mov ebp, esp // pop ebp // From a definition POV, we have the following: // SP = BP // BP = *SP // BP STACK[newSP] // | | // v v // SP -> BP // This is going to give the stack analysis fits... for now, I think it just reverts the // stack depth to 0. // TODO FIXME update stack analysis to make this really work. AbsRegion sp(Absloc::makeSP(func->isrc()->getArch())); AbsRegion fp(Absloc::makeFP(func->isrc()->getArch())); // Should be "we assign SP using FP" Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(fp); // And now we want "FP = (stack slot -2*wordsize)" /* AbsRegion stackTop(Absloc(0, 0, func)); */ // Actually, I think this is ebp = pop esp === ebp = pop ebp Assignment::Ptr fpA = Assignment::Ptr(new Assignment(I, addr, func, block, fp)); //fpA->addInput(aConverter.stack(addr + I->size(), func, false)); fpA->addInput(aConverter.frame(addr, func, block, false)); assignments.push_back(spA); assignments.push_back(fpA); break; } case e_ret_near: case e_ret_far: { // PC = *SP // SP = SP + 4/8 // Like pop, except it's all implicit. AbsRegion pc = AbsRegion(Absloc::makePC(func->isrc()->getArch())); Assignment::Ptr pcA = Assignment::Ptr(new Assignment(I, addr, func, block, pc)); pcA->addInput(aConverter.stack(addr, func, block, false)); AbsRegion sp = AbsRegion(Absloc::makeSP(func->isrc()->getArch())); Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(sp); assignments.push_back(pcA); assignments.push_back(spA); break; } case e_xchg: { // xchg defines two abslocs, and uses them as appropriate... std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // We use the first to define the second, and vice versa std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); std::vector<AbsRegion> oper1; aConverter.convertAll(operands[1].getValue(), addr, func, block, oper1); // Okay. We may have a memory reference in here, which will // cause either oper0 or oper1 to have multiple entries (the // remainder will be registers). So. Use everything from oper1 // to define oper0[0], and vice versa. Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, oper0[0])); a->addInputs(oper1); Assignment::Ptr b = Assignment::Ptr(new Assignment(I, addr, func, block, oper1[0])); b->addInputs(oper0); assignments.push_back(a); assignments.push_back(b); break; } case power_op_stwu: { std::vector<Operand> operands; I->getOperands(operands); // stwu <a>, <b>, <c> // <a> = R1 // <b> = -16(R1) // <c> = R1 // From this, R1 <= R1 - 16; -16(R1) <= R1 // So a <= b (without a deref) // deref(b) <= c std::set<Expression::Ptr> writes; I->getMemoryWriteOperands(writes); assert(writes.size() == 1); Expression::Ptr tmp = *(writes.begin()); AbsRegion effAddr = aConverter.convert(tmp, addr, func, block); std::vector<AbsRegion> regions; aConverter.convertAll(operands[0].getValue(), addr, func, block, regions); AbsRegion RS = regions[0]; regions.clear(); aConverter.convertAll(operands[2].getValue(), addr, func, block, regions); AbsRegion RA = regions[0]; Assignment::Ptr mem = Assignment::Ptr(new Assignment(I, addr, func, block, effAddr)); mem->addInput(RS); Assignment::Ptr ra = Assignment::Ptr(new Assignment(I, addr, func, block, RA)); ra->addInput(RS); assignments.push_back(mem); assignments.push_back(ra); break; } default: // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); for (std::vector<AbsRegion>::const_iterator i = defined.begin(); i != defined.end(); ++i) { Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, *i)); a->addInputs(used); assignments.push_back(a); } break; } // Now for flags... // According to Matt, the easiest way to represent dependencies for flags on // IA-32/AMD-64 is to have them depend on the inputs to the instruction and // not the outputs of the instruction; therefore, there's no intra-instruction // dependence. // PC-handling section // Most instructions use the PC to set the PC. This includes calls, relative branches, // and the like. So we're basically looking for indirect branches or absolute branches. // (are there absolutes on IA-32?). // Also, conditional branches and the flag registers they use. if (cacheEnabled_) { cache_[func][addr] = assignments; } }
void dyninst_analyze_address_taken(BPatch_addressSpace *handle, DICFG *cfg) { /* XXX: this is the most naive address-taken analysis that can be used by the * lbr_analysis_pass. More sophisticated ones can be (and are) plugged in in the pass. * This naive solution is provided only for comparison with more sophisticated ones. * * This analysis looks for instruction operands that correspond to known function addresses, * and then marks these functions as having their address taken. In particular, we * do /not/ look for function pointers stored in (static) memory, or for function * pointers that are computed at runtime. */ SymtabCodeSource *sts; CodeObject *co; std::vector<BPatch_object*> objs; handle->getImage()->getObjects(objs); assert(objs.size() > 0); const char *bin = objs[0]->pathName().c_str(); // Create a new binary object sts = new SymtabCodeSource((char*)bin); co = new CodeObject(sts); // Parse the binary co->parse(); BPatch_image *image = handle->getImage(); std::vector<BPatch_module *> *mods = image->getModules(); std::vector<BPatch_module *>::iterator mods_iter; for (mods_iter = mods->begin(); mods_iter != mods->end(); mods_iter++) { std::vector<BPatch_function *> *funcs = (*mods_iter)->getProcedures(false); std::vector<BPatch_function *>::iterator funcs_iter = funcs->begin(); for(; funcs_iter != funcs->end(); funcs_iter++) { co->parse((Address)(*funcs_iter)->getBaseAddr(), true); BPatch_flowGraph *fg = (*funcs_iter)->getCFG(); std::set<BPatch_basicBlock*> blocks; fg->getAllBasicBlocks(blocks); std::set<BPatch_basicBlock*>::iterator block_iter; for (block_iter = blocks.begin(); block_iter != blocks.end(); ++block_iter) { BPatch_basicBlock *block = (*block_iter); std::vector<Instruction::Ptr> insns; block->getInstructions(insns); std::vector<Instruction::Ptr>::iterator insn_iter; for (insn_iter = insns.begin(); insn_iter != insns.end(); ++insn_iter) { Instruction::Ptr ins = *insn_iter; std::vector<Operand> ops; ins->getOperands(ops); std::vector<Operand>::iterator op_iter; for (op_iter = ops.begin(); op_iter != ops.end(); ++op_iter) { Expression::Ptr expr = (*op_iter).getValue(); struct OperandAnalyzer : public Dyninst::InstructionAPI::Visitor { virtual void visit(BinaryFunction* op) {}; virtual void visit(Dereference* op) {} virtual void visit(Immediate* op) { address_t addr; ArmsFunction *func; switch(op->eval().type) { case s32: addr = op->eval().val.s32val; break; case u32: addr = op->eval().val.u32val; break; case s64: addr = op->eval().val.s64val; break; case u64: addr = op->eval().val.u64val; break; default: return; } func = cfg_->find_function(addr); if(func) { printf("Instruction [%s] references function 0x%jx\n", ins_->format().c_str(), addr); func->set_addr_taken(); } } virtual void visit(RegisterAST* op) {} OperandAnalyzer(DICFG *cfg, Instruction::Ptr ins) { cfg_ = cfg; ins_ = ins; }; DICFG *cfg_; Instruction::Ptr ins_; }; OperandAnalyzer oa(cfg, ins); expr->apply(&oa); } } } } } }