bool isNopInsn(Instruction::Ptr insn) { // TODO: add LEA no-ops if(insn->getOperation().getID() == e_nop) return true; if(insn->getOperation().getID() == e_lea) { std::set<Expression::Ptr> memReadAddr; insn->getMemoryReadOperands(memReadAddr); std::set<RegisterAST::Ptr> writtenRegs; insn->getWriteSet(writtenRegs); if(memReadAddr.size() == 1 && writtenRegs.size() == 1) { if(**(memReadAddr.begin()) == **(writtenRegs.begin())) { return true; } } // Check for zero displacement nopVisitor visitor; // We need to get the src operand insn->getOperand(1).getValue()->apply(&visitor); if (visitor.isNop) return true; } return false; }
bool IA_x86Details::computeTableBounds(Instruction::Ptr maxSwitchInsn, Instruction::Ptr branchInsn, Instruction::Ptr tableInsn, bool foundJCCAlongTaken, unsigned& tableSize, unsigned& tableStride) { assert(maxSwitchInsn && branchInsn); Result compareBound = maxSwitchInsn->getOperand(1).getValue()->eval(); if(!compareBound.defined) return false; tableSize = compareBound.convert<unsigned>(); // Sanity check the bounds; 32k tables would be an oddity, and larger is almost certainly // a misparse static const unsigned int maxTableSize = 32768; if(tableSize > maxTableSize) { parsing_printf("\tmaxSwitch of %d above %d, BAILING OUT\n", tableSize, maxTableSize); return false; } if(foundJCCAlongTaken) { if(branchInsn->getOperation().getID() == e_jbe || branchInsn->getOperation().getID() == e_jle) { tableSize++; } } else { if(branchInsn->getOperation().getID() == e_jnbe || branchInsn->getOperation().getID() == e_jnle) { tableSize++; } } parsing_printf("\tmaxSwitch set to %d\n", tableSize); tableStride = currentBlock->_isrc->getAddressWidth(); std::set<Expression::Ptr> tableInsnReadAddr; tableInsn->getMemoryReadOperands(tableInsnReadAddr); if(tableStride == 8) { static Immediate::Ptr four(new Immediate(Result(u8, 4))); static BinaryFunction::funcT::Ptr multiplier(new BinaryFunction::multResult()); static Expression::Ptr dummy(new DummyExpr()); static BinaryFunction::Ptr scaleCheck(new BinaryFunction(four, dummy, u64, multiplier)); for(std::set<Expression::Ptr>::const_iterator curExpr = tableInsnReadAddr.begin(); curExpr != tableInsnReadAddr.end(); ++curExpr) { if((*curExpr)->isUsed(scaleCheck)) { tableSize = tableSize >> 1; parsing_printf("\tmaxSwitch revised to %d\n",tableSize); } } }
bool IA_x86Details::isTableInsn(Instruction::Ptr i) { Expression::Ptr jumpExpr = currentBlock->curInsn()->getControlFlowTarget(); parsing_printf("jumpExpr for table insn is %s\n", jumpExpr->format().c_str()); if(i->getOperation().getID() == e_mov && i->readsMemory() && i->isWritten(jumpExpr)) { return true; } if(i->getOperation().getID() == e_lea && i->isWritten(jumpExpr)) { return true; } return false; }
bool IA_IAPI::isFrameSetupInsn(Instruction::Ptr i) const { if(i->getOperation().getID() == e_mov) { if(i->readsMemory() || i->writesMemory()) { parsing_printf("%s[%d]: discarding insn %s as stack frame preamble, not a reg-reg move\n", FILE__, __LINE__, i->format().c_str()); //return false; } if(i->isRead(stackPtr[_isrc->getArch()]) && i->isWritten(framePtr[_isrc->getArch()])) { if((unsigned) i->getOperand(0).getValue()->size() == _isrc->getAddressWidth()) { return true; } else { parsing_printf("%s[%d]: discarding insn %s as stack frame preamble, size mismatch for %d-byte addr width\n", FILE__, __LINE__, i->format().c_str(), _isrc->getAddressWidth()); } } } return false; }
bool IA_IAPI::isThunk() const { // Before we go a-wandering, check the target bool valid; Address addr; boost::tie(valid, addr) = getCFT(); if (!valid || !_isrc->isValidAddress(addr)) { parsing_printf("... Call to 0x%lx is invalid (outside code or data)\n", addr); return false; } const unsigned char *target = (const unsigned char *)_isrc->getPtrToInstruction(addr); InstructionDecoder targetChecker(target, 2*InstructionDecoder::maxInstructionLength, _isrc->getArch()); Instruction::Ptr thunkFirst = targetChecker.decode(); Instruction::Ptr thunkSecond = targetChecker.decode(); if(thunkFirst && thunkSecond && (thunkFirst->getOperation().getID() == e_mov) && (thunkSecond->getCategory() == c_ReturnInsn)) { if(thunkFirst->isRead(stackPtr[_isrc->getArch()])) { // it is not enough that the stack pointer is read; it must // be a zero-offset read from the stack pointer ThunkVisitor tv; Operand op = thunkFirst->getOperand(1); op.getValue()->apply(&tv); return tv.offset() == 0; } } return false; }
static bool IsConditionalJump(Instruction::Ptr insn) { entryID id = insn->getOperation().getID(); if (id == e_jz || id == e_jnz || id == e_jb || id == e_jnb || id == e_jbe || id == e_jnbe || id == e_jb_jnaej_j || id == e_jnb_jae_j || id == e_jle || id == e_jl || id == e_jnl || id == e_jnle) return true; return false; }
static Address ThunkAdjustment(Address afterThunk, MachRegister reg, ParseAPI::Block *b) { // After the call to thunk, there is usually // an add insturction like ADD ebx, OFFSET to adjust // the value coming out of thunk. const unsigned char* buf = (const unsigned char*) (b->obj()->cs()->getPtrToInstruction(afterThunk)); InstructionDecoder dec(buf, b->end() - b->start(), b->obj()->cs()->getArch()); Instruction::Ptr nextInsn = dec.decode(); // It has to be an add if (nextInsn->getOperation().getID() != e_add) return 0; vector<Operand> operands; nextInsn->getOperands(operands); RegisterAST::Ptr regAST = boost::dynamic_pointer_cast<RegisterAST>(operands[0].getValue()); // The first operand should be a register if (regAST == 0) return 0; if (regAST->getID() != reg) return 0; Result res = operands[1].getValue()->eval(); // A not defined result means that // the second operand is not an immediate if (!res.defined) return 0; return res.convert<Address>(); }
bool IA_x86Details::handleCall(IA_IAPI& block) { parsing_printf("\tchecking call at 0x%lx for thunk\n", block.getAddr()); if(!block.isRealCall()) { parsing_printf("\tthunk found at 0x%lx, checking for add\n", block.getAddr()); block.advance(); thunkInsn.addrFromInsn = block.getAddr(); Instruction::Ptr addInsn = block.getInstruction(); if(addInsn) parsing_printf("\tinsn after thunk: %s\n", addInsn->format().c_str()); else parsing_printf("\tNO INSN after thunk at 0x%lx\n", thunkInsn.addrFromInsn); if(addInsn) { std::set<RegisterAST::Ptr> boundRegs; if(addInsn->getOperation().getID() == e_pop) { addInsn->getWriteSet(boundRegs); block.advance(); addInsn = block.getInstruction(); } if(addInsn && ((addInsn->getOperation().getID() == e_add) || (addInsn->getOperation().getID() == e_lea))) { Expression::Ptr op0 = addInsn->getOperand(0).getValue(); Expression::Ptr op1 = addInsn->getOperand(1).getValue(); for(std::set<RegisterAST::Ptr>::const_iterator curReg = boundRegs.begin(); curReg != boundRegs.end(); ++curReg) { op0->bind(curReg->get(), Result(u64, 0)); op1->bind(curReg->get(), Result(u64, 0)); } Result imm = addInsn->getOperand(1).getValue()->eval(); Result imm2 = addInsn->getOperand(0).getValue()->eval(); if(imm.defined) { Address thunkDiff = imm.convert<Address>(); parsing_printf("\tsetting thunkInsn.addrFromInsn to 0x%lx (0x%lx + 0x%lx)\n", thunkInsn.addrFromInsn+thunkDiff, thunkInsn.addrFromInsn, thunkDiff); thunkInsn.addrOfInsn = block.getPrevAddr(); thunkInsn.addrFromInsn = thunkInsn.addrFromInsn + thunkDiff; return true; } else if(imm2.defined) { Address thunkDiff = imm2.convert<Address>(); parsing_printf("\tsetting thunkInsn.addrFromInsn to 0x%lx (0x%lx + 0x%lx)\n", thunkInsn.addrFromInsn+thunkDiff, thunkInsn.addrFromInsn, thunkDiff); thunkInsn.addrOfInsn = block.getPrevAddr(); thunkInsn.addrFromInsn = thunkInsn.addrFromInsn + thunkDiff; return true; } else { parsing_printf("\tadd insn %s found following thunk at 0x%lx, couldn't bind operands!\n", addInsn->format().c_str(), thunkInsn.addrFromInsn); } } } thunkInsn.addrFromInsn = 0; } thunkInsn.addrFromInsn = 0; thunkInsn.addrOfInsn = 0; thunkInsn.insn.reset(); return false; }
void AssignmentConverter::convert(const Instruction::Ptr I, const Address &addr, ParseAPI::Function *func, ParseAPI::Block *block, std::vector<Assignment::Ptr> &assignments) { assignments.clear(); if (cache(func, addr, assignments)) return; // Decompose the instruction into a set of abstract assignments. // We don't have the Definition class concept yet, so we'll do the // hard work here. // Two phases: // 1) Special-cased for IA32 multiple definition instructions, // based on the opcode of the instruction // 2) Generic handling for things like flags and the PC. // Non-PC handling section switch(I->getOperation().getID()) { case e_push: { // SP = SP - 4 // *SP = <register> std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // The argument can be any of the following: // 1) a register (push eax); // 2) an immediate value (push $deadbeef) // 3) a memory location. std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePushEquivalent(I, addr, func, block, oper0, assignments); break; } case e_call: { // This can be seen as a push of the PC... std::vector<AbsRegion> pcRegion; pcRegion.push_back(Absloc::makePC(func->isrc()->getArch())); Absloc sp = Absloc::makeSP(func->isrc()->getArch()); handlePushEquivalent(I, addr, func, block, pcRegion, assignments); // Now for the PC definition // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, pcRegion[0])); if (!used.empty()) { for(std::vector<AbsRegion>::const_iterator u = used.begin(); u != used.end(); ++u) { if(!(u->contains(pcRegion[0])) && !(u->contains(sp))) { a->addInput(*u); } } } else { a->addInputs(pcRegion); } assignments.push_back(a); break; } case e_pop: { // <reg> = *SP // SP = SP + 4/8 // Amusingly... this doesn't have an intra-instruction dependence. It should to enforce // the order that <reg> = *SP happens before SP = SP - 4, but since the input to both // uses of SP in this case are the, well, input values... no "sideways" edges. // However, we still special-case it so that SP doesn't depend on the incoming stack value... // Also, we use the same logic for return, defining it as // PC = *SP // SP = SP + 4/8 // As with push, eSP shows up as operand 1. std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the explicit register, the second will be ESP. assert(operands.size() == 2); std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); handlePopEquivalent(I, addr, func, block, oper0, assignments); break; } case e_leave: { // a leave is equivalent to: // mov ebp, esp // pop ebp // From a definition POV, we have the following: // SP = BP // BP = *SP // BP STACK[newSP] // | | // v v // SP -> BP // This is going to give the stack analysis fits... for now, I think it just reverts the // stack depth to 0. // TODO FIXME update stack analysis to make this really work. AbsRegion sp(Absloc::makeSP(func->isrc()->getArch())); AbsRegion fp(Absloc::makeFP(func->isrc()->getArch())); // Should be "we assign SP using FP" Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(fp); // And now we want "FP = (stack slot -2*wordsize)" /* AbsRegion stackTop(Absloc(0, 0, func)); */ // Actually, I think this is ebp = pop esp === ebp = pop ebp Assignment::Ptr fpA = Assignment::Ptr(new Assignment(I, addr, func, block, fp)); //fpA->addInput(aConverter.stack(addr + I->size(), func, false)); fpA->addInput(aConverter.frame(addr, func, block, false)); assignments.push_back(spA); assignments.push_back(fpA); break; } case e_ret_near: case e_ret_far: { // PC = *SP // SP = SP + 4/8 // Like pop, except it's all implicit. AbsRegion pc = AbsRegion(Absloc::makePC(func->isrc()->getArch())); Assignment::Ptr pcA = Assignment::Ptr(new Assignment(I, addr, func, block, pc)); pcA->addInput(aConverter.stack(addr, func, block, false)); AbsRegion sp = AbsRegion(Absloc::makeSP(func->isrc()->getArch())); Assignment::Ptr spA = Assignment::Ptr(new Assignment(I, addr, func, block, sp)); spA->addInput(sp); assignments.push_back(pcA); assignments.push_back(spA); break; } case e_xchg: { // xchg defines two abslocs, and uses them as appropriate... std::vector<Operand> operands; I->getOperands(operands); // According to the InstructionAPI, the first operand will be the argument, the second will be ESP. assert(operands.size() == 2); // We use the first to define the second, and vice versa std::vector<AbsRegion> oper0; aConverter.convertAll(operands[0].getValue(), addr, func, block, oper0); std::vector<AbsRegion> oper1; aConverter.convertAll(operands[1].getValue(), addr, func, block, oper1); // Okay. We may have a memory reference in here, which will // cause either oper0 or oper1 to have multiple entries (the // remainder will be registers). So. Use everything from oper1 // to define oper0[0], and vice versa. Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, oper0[0])); a->addInputs(oper1); Assignment::Ptr b = Assignment::Ptr(new Assignment(I, addr, func, block, oper1[0])); b->addInputs(oper0); assignments.push_back(a); assignments.push_back(b); break; } case power_op_stwu: { std::vector<Operand> operands; I->getOperands(operands); // stwu <a>, <b>, <c> // <a> = R1 // <b> = -16(R1) // <c> = R1 // From this, R1 <= R1 - 16; -16(R1) <= R1 // So a <= b (without a deref) // deref(b) <= c std::set<Expression::Ptr> writes; I->getMemoryWriteOperands(writes); assert(writes.size() == 1); Expression::Ptr tmp = *(writes.begin()); AbsRegion effAddr = aConverter.convert(tmp, addr, func, block); std::vector<AbsRegion> regions; aConverter.convertAll(operands[0].getValue(), addr, func, block, regions); AbsRegion RS = regions[0]; regions.clear(); aConverter.convertAll(operands[2].getValue(), addr, func, block, regions); AbsRegion RA = regions[0]; Assignment::Ptr mem = Assignment::Ptr(new Assignment(I, addr, func, block, effAddr)); mem->addInput(RS); Assignment::Ptr ra = Assignment::Ptr(new Assignment(I, addr, func, block, RA)); ra->addInput(RS); assignments.push_back(mem); assignments.push_back(ra); break; } default: // Assume full intra-dependence of non-flag and non-pc registers. std::vector<AbsRegion> used; std::vector<AbsRegion> defined; aConverter.convertAll(I, addr, func, block, used, defined); for (std::vector<AbsRegion>::const_iterator i = defined.begin(); i != defined.end(); ++i) { Assignment::Ptr a = Assignment::Ptr(new Assignment(I, addr, func, block, *i)); a->addInputs(used); assignments.push_back(a); } break; } // Now for flags... // According to Matt, the easiest way to represent dependencies for flags on // IA-32/AMD-64 is to have them depend on the inputs to the instruction and // not the outputs of the instruction; therefore, there's no intra-instruction // dependence. // PC-handling section // Most instructions use the PC to set the PC. This includes calls, relative branches, // and the like. So we're basically looking for indirect branches or absolute branches. // (are there absolutes on IA-32?). // Also, conditional branches and the flag registers they use. if (cacheEnabled_) { cache_[func][addr] = assignments; } }
ReadWriteInfo LivenessAnalyzer::calcRWSets(Instruction::Ptr curInsn, Block* blk, Address a) { liveness_cerr << "calcRWSets for " << curInsn->format() << " @ " << hex << a << dec << endl; ReadWriteInfo ret; ret.read = abi->getBitArray(); ret.written = abi->getBitArray(); ret.insnSize = curInsn->size(); std::set<RegisterAST::Ptr> cur_read, cur_written; curInsn->getReadSet(cur_read); curInsn->getWriteSet(cur_written); liveness_printf("Read registers: \n"); for (std::set<RegisterAST::Ptr>::const_iterator i = cur_read.begin(); i != cur_read.end(); i++) { MachRegister cur = (*i)->getID(); if (cur.getArchitecture() == Arch_ppc64) cur = MachRegister((cur.val() & ~Arch_ppc64) | Arch_ppc32); liveness_printf("\t%s \n", cur.name().c_str()); MachRegister base = cur.getBaseRegister(); if (cur == x86::flags || cur == x86_64::flags){ if (width == 4){ ret.read[getIndex(x86::of)] = true; ret.read[getIndex(x86::cf)] = true; ret.read[getIndex(x86::pf)] = true; ret.read[getIndex(x86::af)] = true; ret.read[getIndex(x86::zf)] = true; ret.read[getIndex(x86::sf)] = true; ret.read[getIndex(x86::df)] = true; ret.read[getIndex(x86::tf)] = true; ret.read[getIndex(x86::nt_)] = true; } else { ret.read[getIndex(x86_64::of)] = true; ret.read[getIndex(x86_64::cf)] = true; ret.read[getIndex(x86_64::pf)] = true; ret.read[getIndex(x86_64::af)] = true; ret.read[getIndex(x86_64::zf)] = true; ret.read[getIndex(x86_64::sf)] = true; ret.read[getIndex(x86_64::df)] = true; ret.read[getIndex(x86_64::tf)] = true; ret.read[getIndex(x86_64::nt_)] = true; } } else{ base = changeIfMMX(base); ret.read[getIndex(base)] = true; } } liveness_printf("Write Registers: \n"); for (std::set<RegisterAST::Ptr>::const_iterator i = cur_written.begin(); i != cur_written.end(); i++) { MachRegister cur = (*i)->getID(); if (cur.getArchitecture() == Arch_ppc64) cur = MachRegister((cur.val() & ~Arch_ppc64) | Arch_ppc32); liveness_printf("\t%s \n", cur.name().c_str()); MachRegister base = cur.getBaseRegister(); if (cur == x86::flags || cur == x86_64::flags){ if (width == 4){ ret.written[getIndex(x86::of)] = true; ret.written[getIndex(x86::cf)] = true; ret.written[getIndex(x86::pf)] = true; ret.written[getIndex(x86::af)] = true; ret.written[getIndex(x86::zf)] = true; ret.written[getIndex(x86::sf)] = true; ret.written[getIndex(x86::df)] = true; ret.written[getIndex(x86::tf)] = true; ret.written[getIndex(x86::nt_)] = true; } else { ret.written[getIndex(x86_64::of)] = true; ret.written[getIndex(x86_64::cf)] = true; ret.written[getIndex(x86_64::pf)] = true; ret.written[getIndex(x86_64::af)] = true; ret.written[getIndex(x86_64::zf)] = true; ret.written[getIndex(x86_64::sf)] = true; ret.written[getIndex(x86_64::df)] = true; ret.written[getIndex(x86_64::tf)] = true; ret.written[getIndex(x86_64::nt_)] = true; } } else{ base = changeIfMMX(base); ret.written[getIndex(base)] = true; if ((cur != base && cur.size() < 4) || isMMX(base)) ret.read[getIndex(base)] = true; } } InsnCategory category = curInsn->getCategory(); switch(category) { case c_CallInsn: // Call instructions not at the end of a block are thunks, which are not ABI-compliant. // So make conservative assumptions about what they may read (ABI) but don't assume they write anything. ret.read |= (abi->getCallReadRegisters()); if(blk->lastInsnAddr() == a) { ret.written |= (abi->getCallWrittenRegisters()); } break; case c_ReturnInsn: ret.read |= (abi->getReturnReadRegisters()); // Nothing written implicitly by a return break; case c_BranchInsn: if(!curInsn->allowsFallThrough() && isExitBlock(blk)) { //Tail call, union of call and return ret.read |= ((abi->getCallReadRegisters()) | (abi->getReturnReadRegisters())); ret.written |= (abi->getCallWrittenRegisters()); } break; default: { bool isInterrupt = false; bool isSyscall = false; if ((curInsn->getOperation().getID() == e_int) || (curInsn->getOperation().getID() == e_int3)) { isInterrupt = true; } static RegisterAST::Ptr gs(new RegisterAST(x86::gs)); if (((curInsn->getOperation().getID() == e_call) && /*(curInsn()->getOperation().isRead(gs))) ||*/ (curInsn->getOperand(0).format(curInsn->getArch()) == "16")) || (curInsn->getOperation().getID() == e_syscall) || (curInsn->getOperation().getID() == e_int) || (curInsn->getOperation().getID() == power_op_sc)) { isSyscall = true; } if (curInsn->getOperation().getID() == power_op_svcs) { isSyscall = true; } if (isInterrupt || isSyscall) { ret.read |= (abi->getSyscallReadRegisters()); ret.written |= (abi->getSyscallWrittenRegisters()); } } break; } return ret; }
/* returns true if the call leads to: * -an invalid instruction (or immediately branches/calls to an invalid insn) * -a block not ending in a return instruction that pops the return address * off of the stack */ bool IA_IAPI::isFakeCall() const { assert(_obj->defensiveMode()); if (isDynamicCall()) { return false; } // get func entry bool tampers = false; bool valid; Address entry; boost::tie(valid, entry) = getCFT(); if (!valid) return false; if (! _cr->contains(entry) ) { return false; } if ( ! _isrc->isCode(entry) ) { mal_printf("WARNING: found function call at %lx " "to invalid address %lx %s[%d]\n", current, entry, FILE__,__LINE__); return false; } // get instruction at func entry const unsigned char* bufPtr = (const unsigned char *)(_cr->getPtrToInstruction(entry)); Offset entryOff = entry - _cr->offset(); InstructionDecoder newdec( bufPtr, _cr->length() - entryOff, _cr->getArch() ); IA_IAPI *ah = new IA_IAPI(newdec, entry, _obj, _cr, _isrc, _curBlk); Instruction::Ptr insn = ah->curInsn(); // follow ctrl transfers until you get a block containing non-ctrl // transfer instructions, or hit a return instruction while (insn->getCategory() == c_CallInsn || insn->getCategory() == c_BranchInsn) { boost::tie(valid, entry) = ah->getCFT(); if ( !valid || ! _cr->contains(entry) || ! _isrc->isCode(entry) ) { mal_printf("WARNING: found call to function at %lx that " "leaves to %lx, out of the code region %s[%d]\n", current, entry, FILE__,__LINE__); return false; } bufPtr = (const unsigned char *)(_cr->getPtrToInstruction(entry)); entryOff = entry - _cr->offset(); delete(ah); newdec = InstructionDecoder(bufPtr, _cr->length() - entryOff, _cr->getArch()); ah = new IA_IAPI(newdec, entry, _obj, _cr, _isrc, _curBlk); insn = ah->curInsn(); } // calculate instruction stack deltas for the block, leaving the iterator // at the last ins'n if it's a control transfer, or after calculating the // last instruction's delta if we run off the end of initialized memory int stackDelta = 0; int addrWidth = _isrc->getAddressWidth(); static Expression::Ptr theStackPtr (new RegisterAST(MachRegister::getStackPointer(_isrc->getArch()))); Address curAddr = entry; while(true) { // exit condition 1 if (insn->getCategory() == c_CallInsn || insn->getCategory() == c_ReturnInsn || insn->getCategory() == c_BranchInsn) { break; } // calculate instruction delta if(insn->isWritten(theStackPtr)) { entryID what = insn->getOperation().getID(); int sign = 1; switch(what) { case e_push: sign = -1; //FALLTHROUGH case e_pop: { int size = insn->getOperand(0).getValue()->size(); stackDelta += sign * size; break; } case e_pusha: case e_pushad: sign = -1; //FALLTHROUGH case e_popa: case e_popad: if (1 == sign) { mal_printf("popad ins'n at %lx in func at %lx changes sp " "by %d. %s[%d]\n", ah->getAddr(), entry, 8 * sign * addrWidth, FILE__, __LINE__); } stackDelta += sign * 8 * addrWidth; break; case e_pushf: case e_pushfd: sign = -1; //FALLTHROUGH case e_popf: case e_popfd: stackDelta += sign * 4; if (1 == sign) { mal_printf("popf ins'n at %lx in func at %lx changes sp " "by %d. %s[%d]\n", ah->getAddr(), entry, sign * 4, FILE__, __LINE__); } break; case e_enter: //mal_printf("Saw enter instruction at %lx in isFakeCall, " // "quitting early, assuming not fake " // "%s[%d]\n",curAddr, FILE__,__LINE__); // unhandled case, but not essential for correct analysis delete ah; return false; break; case e_leave: mal_printf("WARNING: saw leave instruction " "at %lx that is not handled by isFakeCall %s[%d]\n", curAddr, FILE__,__LINE__); // unhandled, not essential for correct analysis, would // be a red flag if there wasn't an enter ins'n first and // we didn't end in a return instruction break; case e_and: // Rounding off the stack pointer. mal_printf("WARNING: saw and instruction at %lx that is not handled by isFakeCall %s[%d]\n", curAddr, FILE__, __LINE__); delete ah; return false; break; case e_sub: sign = -1; //FALLTHROUGH case e_add: { Operand arg = insn->getOperand(1); Result delta = arg.getValue()->eval(); if(delta.defined) { int delta_int = sign; switch (delta.type) { case u8: case s8: delta_int *= (int)delta.convert<char>(); break; case u16: case s16: delta_int *= (int)delta.convert<short>(); break; case u32: case s32: delta_int *= delta.convert<int>(); break; default: assert(0 && "got add/sub operand of unusual size"); break; } stackDelta += delta_int; } else if (sign == -1) { delete ah; return false; } else { mal_printf("ERROR: in isFakeCall, add ins'n " "at %lx (in first block of function at " "%lx) modifies the sp but failed to evaluate " "its arguments %s[%d]\n", ah->getAddr(), entry, FILE__, __LINE__); delete ah; return true; } break; } default: { fprintf(stderr,"WARNING: in isFakeCall non-push/pop " "ins'n at %lx (in first block of function at " "%lx) modifies the sp by an unknown amount. " "%s[%d]\n", ah->getAddr(), entry, FILE__, __LINE__); break; } // end default block } // end switch } if (stackDelta > 0) { tampers=true; } // exit condition 2 ah->advance(); Instruction::Ptr next = ah->curInsn(); if (NULL == next) { break; } curAddr += insn->size(); insn = next; } // not a fake call if it ends w/ a return instruction if (insn->getCategory() == c_ReturnInsn) { delete ah; return false; } // if the stack delta is positive or the return address has been replaced // with an absolute value, it's a fake call, since in both cases // the return address is gone and we cannot return to the caller if ( 0 < stackDelta || tampers ) { delete ah; return true; } delete ah; return false; }
bool IA_IAPI::isTailCall(Function * context, EdgeTypeEnum type, unsigned int, const set<Address>& knownTargets) const { // Collapse down to "branch" or "fallthrough" switch(type) { case COND_TAKEN: case DIRECT: case INDIRECT: type = DIRECT; break; case CALL: case RET: case COND_NOT_TAKEN: case FALLTHROUGH: case CALL_FT: default: return false; } parsing_printf("Checking for Tail Call \n"); context->obj()->cs()->incrementCounter(PARSE_TAILCALL_COUNT); if (tailCalls.find(type) != tailCalls.end()) { parsing_printf("\tReturning cached tail call check result: %d\n", tailCalls[type]); if (tailCalls[type]) { context->obj()->cs()->incrementCounter(PARSE_TAILCALL_FAIL); return true; } return false; } bool valid; Address addr; boost::tie(valid, addr) = getCFT(); Function* callee = _obj->findFuncByEntry(_cr, addr); Block* target = _obj->findBlockByEntry(_cr, addr); // check if addr is in a block if it is not entry. if (target == NULL) { std::set<Block*> blocks; _obj->findCurrentBlocks(_cr, addr, blocks); if (blocks.size() == 1) { target = *blocks.begin(); } else if (blocks.size() == 0) { // This case can happen when the jump target is a function entry, // but we have not parsed the function yet, // or when this is an indirect jump target = NULL; } else { // If this case happens, it means the jump goes into overlapping instruction streams, // it is not likely to be a tail call. parsing_printf("\tjumps into overlapping instruction streams\n"); for (auto bit = blocks.begin(); bit != blocks.end(); ++bit) { parsing_printf("\t block [%lx,%lx)\n", (*bit)->start(), (*bit)->end()); } parsing_printf("\tjump to 0x%lx, NOT TAIL CALL\n", addr); tailCalls[type] = false; return false; } } if(curInsn()->getCategory() == c_BranchInsn && valid && callee && callee != context && target && !context->contains(target) ) { parsing_printf("\tjump to 0x%lx, TAIL CALL\n", addr); tailCalls[type] = true; return true; } if (curInsn()->getCategory() == c_BranchInsn && valid && !callee) { if (target) { parsing_printf("\tjump to 0x%lx is known block, but not func entry, NOT TAIL CALL\n", addr); tailCalls[type] = false; return false; } else if (knownTargets.find(addr) != knownTargets.end()) { parsing_printf("\tjump to 0x%lx is known target in this function, NOT TAIL CALL\n", addr); tailCalls[type] = false; return false; } } if(allInsns.size() < 2) { if(context->addr() == _curBlk->start() && curInsn()->getCategory() == c_BranchInsn) { parsing_printf("\tjump as only insn in entry block, TAIL CALL\n"); tailCalls[type] = true; return true; } else { parsing_printf("\ttoo few insns to detect tail call\n"); context->obj()->cs()->incrementCounter(PARSE_TAILCALL_FAIL); tailCalls[type] = false; return false; } } if ((curInsn()->getCategory() == c_BranchInsn)) { //std::map<Address, Instruction::Ptr>::const_iterator prevIter = //allInsns.find(current); // Updated: there may be zero or more nops between leave->jmp allInsns_t::const_iterator prevIter = curInsnIter; --prevIter; Instruction::Ptr prevInsn = prevIter->second; while ( isNopInsn(prevInsn) && (prevIter != allInsns.begin()) ) { --prevIter; prevInsn = prevIter->second; } prevInsn = prevIter->second; if(prevInsn->getOperation().getID() == e_leave) { parsing_printf("\tprev insn was leave, TAIL CALL\n"); tailCalls[type] = true; return true; } else if(prevInsn->getOperation().getID() == e_pop) { if(prevInsn->isWritten(framePtr[_isrc->getArch()])) { parsing_printf("\tprev insn was %s, TAIL CALL\n", prevInsn->format().c_str()); tailCalls[type] = true; return true; } } else if(prevInsn->getOperation().getID() == e_add) { if(prevInsn->isWritten(stackPtr[_isrc->getArch()])) { bool call_fallthrough = false; if (_curBlk->start() == prevIter->first) { for (auto eit = _curBlk->sources().begin(); eit != _curBlk->sources().end(); ++eit) { if ((*eit)->type() == CALL_FT) { call_fallthrough = true; break; } } } if (call_fallthrough) { parsing_printf("\tprev insn was %s, but it is the next instruction of a function call, not a tail call %x %x\n", prevInsn->format().c_str()); } else { parsing_printf("\tprev insn was %s, TAIL CALL\n", prevInsn->format().c_str()); tailCalls[type] = true; return true; } } else parsing_printf("\tprev insn was %s, not tail call\n", prevInsn->format().c_str()); } } tailCalls[type] = false; context->obj()->cs()->incrementCounter(PARSE_TAILCALL_FAIL); return false; }