// Substitute s into all members of the set void LocationSet::substitute(Assign& a) { Exp* lhs = a.getLeft(); if (lhs == NULL) return; Exp* rhs = a.getRight(); if (rhs == NULL) return; // ? Will this ever happen? std::set<Exp*, lessExpStar>::iterator it; // Note: it's important not to change the pointer in the set of pointers to expressions, without removing and // inserting again. Otherwise, the set becomes out of order, and operations such as set comparison fail! // To avoid any funny behaviour when iterating the loop, we use the following two sets LocationSet removeSet; // These will be removed after the loop LocationSet removeAndDelete; // These will be removed then deleted LocationSet insertSet; // These will be inserted after the loop bool change; for (it = lset.begin(); it != lset.end(); it++) { Exp* loc = *it; Exp* replace; if (loc->search(lhs, replace)) { if (rhs->isTerminal()) { // This is no longer a location of interest (e.g. %pc) removeSet.insert(loc); continue; } loc = loc->clone()->searchReplaceAll(lhs, rhs, change); if (change) { loc = loc->simplifyArith(); loc = loc->simplify(); // If the result is no longer a register or memory (e.g. // r[28]-4), then delete this expression and insert any // components it uses (in the example, just r[28]) if (!loc->isRegOf() && !loc->isMemOf()) { // Note: can't delete the expression yet, because the // act of insertion into the remove set requires silent // calls to the compare function removeAndDelete.insert(*it); loc->addUsedLocs(insertSet); continue; } // Else we just want to replace it // Regardless of whether the top level expression pointer has // changed, remove and insert it from the set of pointers removeSet.insert(*it); // Note: remove the unmodified ptr insertSet.insert(loc); } } } makeDiff(removeSet); // Remove the items to be removed makeDiff(removeAndDelete); // These are to be removed as well makeUnion(insertSet); // Insert the items to be added // Now delete the expressions that are no longer needed std::set<Exp*, lessExpStar>::iterator dd; for (dd = removeAndDelete.lset.begin(); dd != removeAndDelete.lset.end(); dd++) delete *dd; // Plug that memory leak }
bool DataFlow::renameBlockVars(UserProc* proc, int n, bool clearStacks /* = false */ ) { if (++progress > 200) { std::cerr << 'r' << std::flush; progress = 0; } bool changed = false; // Need to clear the Stacks of old, renamed locations like m[esp-4] (these will be deleted, and will cause compare // failures in the Stacks, so it can't be correctly ordered and hence balanced etc, and will lead to segfaults) if (clearStacks) Stacks.clear(); // For each statement S in block n BasicBlock::rtlit rit; StatementList::iterator sit; PBB bb = BBs[n]; Statement* S; for (S = bb->getFirstStmt(rit, sit); S; S = bb->getNextStmt(rit, sit)) { // if S is not a phi function (per Appel) /* if (!S->isPhi()) */ { // For each use of some variable x in S (not just assignments) LocationSet locs; if (S->isPhi()) { PhiAssign* pa = (PhiAssign*)S; Exp* phiLeft = pa->getLeft(); if (phiLeft->isMemOf() || phiLeft->isRegOf()) phiLeft->getSubExp1()->addUsedLocs(locs); // A phi statement may use a location defined in a childless call, in which case its use collector // needs updating PhiAssign::iterator pp; for (pp = pa->begin(); pp != pa->end(); ++pp) { Statement* def = pp->def; if (def && def->isCall()) ((CallStatement*)def)->useBeforeDefine(phiLeft->clone()); } } else { // Not a phi assignment S->addUsedLocs(locs); } LocationSet::iterator xx; for (xx = locs.begin(); xx != locs.end(); xx++) { Exp* x = *xx; // Don't rename memOfs that are not renamable according to the current policy if (!canRename(x, proc)) continue; Statement* def = NULL; if (x->isSubscript()) { // Already subscripted? // No renaming required, but redo the usage analysis, in case this is a new return, and also because // we may have just removed all call livenesses // Update use information in calls, and in the proc (for parameters) Exp* base = ((RefExp*)x)->getSubExp1(); def = ((RefExp*)x)->getDef(); if (def && def->isCall()) { // Calls have UseCollectors for locations that are used before definition at the call ((CallStatement*)def)->useBeforeDefine(base->clone()); continue; } // Update use collector in the proc (for parameters) if (def == NULL) proc->useBeforeDefine(base->clone()); continue; // Don't re-rename the renamed variable } // Else x is not subscripted yet if (STACKS_EMPTY(x)) { if (!Stacks[defineAll].empty()) def = Stacks[defineAll].top(); else { // If the both stacks are empty, use a NULL definition. This will be changed into a pointer // to an implicit definition at the start of type analysis, but not until all the m[...] // have stopped changing their expressions (complicates implicit assignments considerably). def = NULL; // Update the collector at the start of the UserProc proc->useBeforeDefine(x->clone()); } } else def = Stacks[x].top(); if (def && def->isCall()) // Calls have UseCollectors for locations that are used before definition at the call ((CallStatement*)def)->useBeforeDefine(x->clone()); // Replace the use of x with x{def} in S changed = true; if (S->isPhi()) { Exp* phiLeft = ((PhiAssign*)S)->getLeft(); phiLeft->setSubExp1(phiLeft->getSubExp1()->expSubscriptVar(x, def /*, this*/)); } else { S->subscriptVar(x, def /*, this */); } } } // MVE: Check for Call and Return Statements; these have DefCollector objects that need to be updated // Do before the below, so CallStatements have not yet processed their defines if (S->isCall() || S->isReturn()) { DefCollector* col; if (S->isCall()) col = ((CallStatement*)S)->getDefCollector(); else col = ((ReturnStatement*)S)->getCollector(); col->updateDefs(Stacks, proc); } // For each definition of some variable a in S LocationSet defs; S->getDefinitions(defs); LocationSet::iterator dd; for (dd = defs.begin(); dd != defs.end(); dd++) { Exp* a = *dd; // Don't consider a if it cannot be renamed bool suitable = canRename(a, proc); if (suitable) { // Push i onto Stacks[a] // Note: we clone a because otherwise it could be an expression that gets deleted through various // modifications. This is necessary because we do several passes of this algorithm to sort out the // memory expressions Stacks[a->clone()].push(S); // Replace definition of a with definition of a_i in S (we don't do this) } // FIXME: MVE: do we need this awful hack? if (a->getOper() == opLocal) { Exp *a1 = S->getProc()->expFromSymbol(((Const*)a->getSubExp1())->getStr()); assert(a1); a = a1; // Stacks already has a definition for a (as just the bare local) if (suitable) { Stacks[a->clone()].push(S); } } } // Special processing for define-alls (presently, only childless calls). // But note that only everythings at the current memory level are defined! if (S->isCall() && ((CallStatement*)S)->isChildless() && !Boomerang::get()->assumeABI) { // S is a childless call (and we're not assuming ABI compliance) Stacks[defineAll]; // Ensure that there is an entry for defineAll std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator dd; for (dd = Stacks.begin(); dd != Stacks.end(); ++dd) { // if (dd->first->isMemDepth(memDepth)) dd->second.push(S); // Add a definition for all vars } } } // For each successor Y of block n std::vector<PBB>& outEdges = bb->getOutEdges(); unsigned numSucc = outEdges.size(); for (unsigned succ = 0; succ < numSucc; succ++) { PBB Ybb = outEdges[succ]; // Suppose n is the jth predecessor of Y int j = Ybb->whichPred(bb); // For each phi-function in Y Statement* S; for (S = Ybb->getFirstStmt(rit, sit); S; S = Ybb->getNextStmt(rit, sit)) { PhiAssign* pa = dynamic_cast<PhiAssign*>(S); // if S is not a phi function, then quit the loop (no more phi's) // Wrong: do not quit the loop: there's an optimisation that turns a PhiAssign into an ordinary Assign. // So continue, not break. if (!pa) continue; // Suppose the jth operand of the phi is a // For now, just get the LHS Exp* a = pa->getLeft(); // Only consider variables that can be renamed if (!canRename(a, proc)) continue; Statement* def; if (STACKS_EMPTY(a)) def = NULL; // No reaching definition else def = Stacks[a].top(); // "Replace jth operand with a_i" pa->putAt(j, def, a); } } // For each child X of n // Note: linear search! unsigned numBB = proc->getCFG()->getNumBBs(); for (unsigned X=0; X < numBB; X++) { if (idom[X] == n) renameBlockVars(proc, X); } // For each statement S in block n // NOTE: Because of the need to pop childless calls from the Stacks, it is important in my algorithm to process the // statments in the BB *backwards*. (It is not important in Appel's algorithm, since he always pushes a definition // for every variable defined on the Stacks). BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit; for (S = bb->getLastStmt(rrit, srit); S; S = bb->getPrevStmt(rrit, srit)) { // For each definition of some variable a in S LocationSet defs; S->getDefinitions(defs); LocationSet::iterator dd; for (dd = defs.begin(); dd != defs.end(); dd++) { if (canRename(*dd, proc)) { // if ((*dd)->getMemDepth() == memDepth) std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator ss = Stacks.find(*dd); if (ss == Stacks.end()) { std::cerr << "Tried to pop " << *dd << " from Stacks; does not exist\n"; assert(0); } ss->second.pop(); } } // Pop all defs due to childless calls if (S->isCall() && ((CallStatement*)S)->isChildless()) { std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator sss; for (sss = Stacks.begin(); sss != Stacks.end(); ++sss) { if (!sss->second.empty() && sss->second.top() == S) { sss->second.pop(); } } } } return changed; }
/*============================================================================== * FUNCTION: FrontEnd::processProc * OVERVIEW: Process a procedure, given a native (source machine) address. * PARAMETERS: address - the address at which the procedure starts * pProc - the procedure object * frag - if true, this is just a fragment of a procedure * spec - if true, this is a speculative decode * os - the output stream for .rtl output * NOTE: This is a sort of generic front end. For many processors, this will be overridden * in the FrontEnd derived class, sometimes calling this function to do most of the work * RETURNS: true for a good decode (no illegal instructions) *============================================================================*/ bool FrontEnd::processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag /* = false */, bool spec /* = false */) { PBB pBB; // Pointer to the current basic block std::cout<<"Entering Processing Proc\n"; // just in case you missed it first_line = true; if (AssProgram) std::cout <<"Name Of Program : " << AssProgram->name << std::endl; Boomerang::get()->alert_new(pProc); // We have a set of CallStatement pointers. These may be disregarded if this is a speculative decode // that fails (i.e. an illegal instruction is found). If not, this set will be used to add to the set of calls // to be analysed in the cfg, and also to call newProc() std::list<CallStatement*> callList; // Indicates whether or not the next instruction to be decoded is the lexical successor of the current one. // Will be true for all NCTs and for CTIs with a fall through branch. bool sequentialDecode = true; Cfg* pCfg = pProc->getCFG(); // If this is a speculative decode, the second time we decode the same address, we get no cfg. Else an error. if (spec && (pCfg == 0)) return false; assert(pCfg); // Initialise the queue of control flow targets that have yet to be decoded. targetQueue.initial(uAddr); // Clear the pointer used by the caller prologue code to access the last call rtl of this procedure //decoder.resetLastCall(); // ADDRESS initAddr = uAddr; int nTotalBytes = 0; ADDRESS startAddr = uAddr; ADDRESS lastAddr = uAddr; ADDRESS address = uAddr; std::cout << "Start at address = " << uAddr << std::endl; //------IMPORTANT------------------------------------------------------------------------ list<AssemblyLabel*>::iterator lbi; list<AssemblyLine*>* temp_lines = new list<AssemblyLine*>(); if (AssProgram){ for(lbi = AssProgram->labelList->begin(); lbi != AssProgram->labelList->end(); ++lbi ){ if((*lbi)->address == uAddr){ temp_lines = (*lbi)->lineList; std::cout << "***DECODE LABEL: " << (*lbi)->name << std::endl; std::cout << "***AT ADDRESS: " << (*lbi)->address << std::endl; std::cout << "***NUMBER OF INSTRUCTION: " << (*lbi)->lineList->size() << std::endl; break; } } } list<AssemblyLine*>::iterator li; if (temp_lines->size()>0) li = temp_lines->begin(); //--------------------------------------------------------------------------------------- while ((uAddr = targetQueue.nextAddress(pCfg)) != NO_ADDRESS) { // The list of RTLs for the current basic block std::list<RTL*>* BB_rtls = new std::list<RTL*>(); // Keep decoding sequentially until a CTI without a fall through branch is decoded //ADDRESS start = uAddr; DecodeResult inst; while (sequentialDecode) { // Decode and classify the current source instruction if (Boomerang::get()->traceDecoder) LOG << "*" << uAddr << "\t"; // Decode the inst at uAddr. if(ASS_FILE){ if(li != temp_lines->end()){ inst = decodeAssemblyInstruction(uAddr,"assemblySets.at(line)", (*li)); } } else inst = decodeInstruction(uAddr); // If invalid and we are speculating, just exit if (spec && !inst.valid) return false; // Need to construct a new list of RTLs if a basic block has just been finished but decoding is // continuing from its lexical successor if (BB_rtls == NULL) BB_rtls = new std::list<RTL*>(); RTL* pRtl = inst.rtl; if (inst.valid == false) { // Alert the watchers to the problem Boomerang::get()->alert_baddecode(uAddr); // An invalid instruction. Most likely because a call did not return (e.g. call _exit()), etc. // Best thing is to emit a INVALID BB, and continue with valid instructions if (VERBOSE) { LOG << "Warning: invalid instruction at " << uAddr << ": "; // Emit the next 4 bytes for debugging for (int ii=0; ii < 4; ii++) LOG << (unsigned)(pBF->readNative1(uAddr + ii) & 0xFF) << " "; LOG << "\n"; } // Emit the RTL anyway, so we have the address and maybe some other clues BB_rtls->push_back(new RTL(uAddr)); pBB = pCfg->newBB(BB_rtls, INVALID, 0); sequentialDecode = false; BB_rtls = NULL; continue; } //pProc->unionDefine = new list<UnionDefine*>(); pProc->bitVar = AssProgram->bitVar; pProc->replacement = AssProgram->replacement; // alert the watchers that we have decoded an instruction Boomerang::get()->alert_decode(uAddr, inst.numBytes); nTotalBytes += inst.numBytes; // Check if this is an already decoded jump instruction (from a previous pass with propagation etc) // If so, we throw away the just decoded RTL (but we still may have needed to calculate the number // of bytes.. ick.) std::map<ADDRESS, RTL*>::iterator ff = previouslyDecoded.find(uAddr); if (ff != previouslyDecoded.end()) pRtl = ff->second; if (pRtl == NULL) { // This can happen if an instruction is "cancelled", e.g. call to __main in a hppa program // Just ignore the whole instruction if (inst.numBytes > 0) uAddr += inst.numBytes; continue; } // Display RTL representation if asked std::cout<<"RTL: "<<std::endl; std::ostringstream st; pRtl->print(st); std::cout << st.str().c_str()<<std::endl; ADDRESS uDest; // For each Statement in the RTL //std::list<Statement*>& sl = pRtl->getList(); std::list<Statement*> sl = pRtl->getList(); // Make a copy (!) of the list. This is needed temporarily to work around the following problem. // We are currently iterating an RTL, which could be a return instruction. The RTL is passed to // createReturnBlock; if this is not the first return statement, it will get cleared, and this will // cause problems with the current iteration. The effects seem to be worse for MSVC/Windows. // This problem will likely be easier to cope with when the RTLs are removed, and there are special // Statements to mark the start of instructions (and their native address). // FIXME: However, this workaround breaks logic below where a GOTO is changed to a CALL followed by a return // if it points to the start of a known procedure std::list<Statement*>::iterator ss; #if 1 for (ss = sl.begin(); ss != sl.end(); ss++) { // } #else // The counter is introduced because ss != sl.end() does not work as it should // FIXME: why? Does this really fix the problem? int counter = sl.size(); for (ss = sl.begin(); counter > 0; ss++, counter--) { #endif Statement* s = *ss; s->setProc(pProc); // let's do this really early! if (refHints.find(pRtl->getAddress()) != refHints.end()) { const char *nam = refHints[pRtl->getAddress()].c_str(); ADDRESS gu = prog->getGlobalAddr((char*)nam); if (gu != NO_ADDRESS) { s->searchAndReplace(new Const((int)gu), new Unary(opAddrOf, Location::global(nam, pProc))); } } s->simplify(); GotoStatement* stmt_jump = static_cast<GotoStatement*>(s); // Check for a call to an already existing procedure (including self recursive jumps), or to the PLT // (note that a LibProc entry for the PLT function may not yet exist) ADDRESS dest; Proc* proc; if (s->getKind() == STMT_GOTO) { dest = stmt_jump->getFixedDest(); if (dest != NO_ADDRESS) { proc = prog->findProc(dest); if (proc == NULL) { if(!ASS_FILE){ if (pBF->IsDynamicLinkedProc(dest)) proc = prog->setNewProc(dest); } } if (proc != NULL && proc != (Proc*)-1) { s = new CallStatement(); CallStatement *call = static_cast<CallStatement*>(s); call->setDest(dest); call->setDestProc(proc); call->setReturnAfterCall(true); // also need to change it in the actual RTL std::list<Statement*>::iterator ss1 = ss; ss1++; assert(ss1 == sl.end()); pRtl->replaceLastStmt(s); *ss = s; } } } switch (s->getKind()) { case STMT_GOTO: { uDest = stmt_jump->getFixedDest(); // Handle one way jumps and computed jumps separately if (uDest != NO_ADDRESS) { BB_rtls->push_back(pRtl); sequentialDecode = false; pBB = pCfg->newBB(BB_rtls,ONEWAY,1); BB_rtls = NULL; // Clear when make new BB // Exit the switch now if the basic block already existed if (pBB == 0) { break; } // Add the out edge if it is to a destination within the // procedure if (uDest < pBF->getLimitTextHigh()) { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else { std::cout<<"Entering Processing Proc5\n"; if (!ASS_FILE) LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to " << uDest << "\n"; else{ targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } } } break; } case STMT_CASE: { Exp* pDest = stmt_jump->getDest(); if (pDest == NULL) { // Happens if already analysed (now redecoding) // SWITCH_INFO* psi = ((CaseStatement*)stmt_jump)->getSwitchInfo(); BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, NWAY, 0); // processSwitch will update num outedges pBB->processSwitch(pProc); // decode arms, set out edges, etc sequentialDecode = false; // Don't decode after the jump BB_rtls = NULL; // New RTLList for next BB break; // Just leave it alone } // Check for indirect calls to library functions, especially in Win32 programs if (pDest && pDest->getOper() == opMemOf && pDest->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)pDest->getSubExp1())->getAddr())) { if (VERBOSE) LOG << "jump to a library function: " << stmt_jump << ", replacing with a call/ret.\n"; // jump to a library function // replace with a call ret // TODO: std::string func = pBF->GetDynamicProcName( ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr()); //------------------------------------ CallStatement *call = new CallStatement; call->setDest(stmt_jump->getDest()->clone()); LibProc *lp = pProc->getProg()->getLibraryProc(func.c_str()); if (lp == NULL) LOG << "getLibraryProc returned NULL, aborting\n"; assert(lp); call->setDestProc(lp); std::list<Statement*>* stmt_list = new std::list<Statement*>; stmt_list->push_back(call); BB_rtls->push_back(new RTL(pRtl->getAddress(), stmt_list)); pBB = pCfg->newBB(BB_rtls, CALL, 1); appendSyntheticReturn(pBB, pProc, pRtl); sequentialDecode = false; BB_rtls = NULL; if (pRtl->getAddress() == pProc->getNativeAddress()) { // it's a thunk // Proc *lp = prog->findProc(func.c_str()); func = std::string("__imp_") + func; pProc->setName(func.c_str()); //lp->setName(func.c_str()); Boomerang::get()->alert_update_signature(pProc); } callList.push_back(call); ss = sl.end(); ss--; // get out of the loop break; } BB_rtls->push_back(pRtl); // We create the BB as a COMPJUMP type, then change to an NWAY if it turns out to be a switch stmt pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0); LOG << "COMPUTED JUMP at " << uAddr << ", pDest = " << pDest << "\n"; if (Boomerang::get()->noDecompile) { // try some hacks if (pDest->isMemOf() && pDest->getSubExp1()->getOper() == opPlus && pDest->getSubExp1()->getSubExp2()->isIntConst()) { // assume subExp2 is a jump table ADDRESS jmptbl = ((Const*)pDest->getSubExp1()->getSubExp2())->getInt(); unsigned int i; for (i = 0; ; i++) { ADDRESS uDest = pBF->readNative4(jmptbl + i * 4); if (pBF->getLimitTextLow() <= uDest && uDest < pBF->getLimitTextHigh()) { LOG << " guessed uDest " << uDest << "\n"; targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else break; } pBB->updateType(NWAY, i); } } sequentialDecode = false; BB_rtls = NULL; // New RTLList for next BB break; } case STMT_BRANCH: { uDest = stmt_jump->getFixedDest(); BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, TWOWAY, 2); // Stop decoding sequentially if the basic block already existed otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else { // Add the out edge if it is to a destination within the procedure if (!ASS_FILE){ if (uDest < pBF->getLimitTextHigh()) { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to " << uDest << "\n"; } else { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } // Add the fall-through outedge pCfg->addOutEdge(pBB, uAddr + inst.numBytes); } // Create the list of RTLs for the next basic block and continue with the next instruction. BB_rtls = NULL; break; } case STMT_CALL: { CallStatement* call = static_cast<CallStatement*>(s); // Check for a dynamic linked library function // TODO: solution dont use pBF if (!ASS_FILE){ if (call->getDest()->getOper() == opMemOf && call->getDest()->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)call->getDest()->getSubExp1())->getAddr())) { // Dynamic linked proc pointers are treated as static. const char *nam = pBF->GetDynamicProcName( ((Const*)call->getDest()->getSubExp1())->getAddr()); Proc *p = pProc->getProg()->getLibraryProc(nam); call->setDestProc(p); call->setIsComputed(false); } } else { if (call->getDest()->getOper() == opMemOf && call->getDest()->getSubExp1()->getOper() == opIntConst && funcsType.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second) { // Dynamic linked proc pointers are treated as static. const char *nam = namesList.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second; Proc *p = pProc->getProg()->getLibraryProc(nam); call->setDestProc(p); call->setIsComputed(false); } } // Is the called function a thunk calling a library function? // A "thunk" is a function which only consists of: "GOTO library_function" // Should i modify if (!ASS_FILE){ if( call && call->getFixedDest() != NO_ADDRESS ) { // Get the address of the called function. ADDRESS callAddr=call->getFixedDest(); // It should not be in the PLT either, but getLimitTextHigh() takes this into account if (callAddr < pBF->getLimitTextHigh()) { // Decode it. DecodeResult decoded=decodeInstruction(callAddr); if (decoded.valid) { // is the instruction decoded succesfully? // Yes, it is. Create a Statement from it. RTL *rtl = decoded.rtl; Statement* first_statement = *rtl->getList().begin(); if (first_statement) { first_statement->setProc(pProc); first_statement->simplify(); GotoStatement* stmt_jump = static_cast<GotoStatement*>(first_statement); // In fact it's a computed (looked up) jump, so the jump seems to be a case // statement. //TODO : We dont handle this case if ( first_statement->getKind() == STMT_CASE && stmt_jump->getDest()->getOper() == opMemOf && stmt_jump->getDest()->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)stmt_jump->getDest()->getSubExp1())-> getAddr())) // Is it an "DynamicLinkedProcPointer"? { // Yes, it's a library function. Look up it's name. ADDRESS a = ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr(); // TODO : We dont handle this case const char *nam = pBF->GetDynamicProcName(a); // Assign the proc to the call Proc *p = pProc->getProg()->getLibraryProc(nam); if (call->getDestProc()) { // prevent unnecessary __imp procs prog->removeProc(call->getDestProc()->getName()); } call->setDestProc(p); call->setIsComputed(false); call->setDest(Location::memOf(new Const(a))); } } } } } } // Treat computed and static calls separately if (call->isComputed()) { BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, COMPCALL, 1); // Stop decoding sequentially if the basic block already // existed otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else pCfg->addOutEdge(pBB, uAddr + inst.numBytes); // Add this call to the list of calls to analyse. We won't // be able to analyse it's callee(s), of course. callList.push_back(call); } else { // Static call // Find the address of the callee. ADDRESS uNewAddr = call->getFixedDest(); // Calls with 0 offset (i.e. call the next instruction) are simply pushing the PC to the // stack. Treat these as non-control flow instructions and continue. if (uNewAddr == uAddr + inst.numBytes) break; // Call the virtual helper function. If implemented, will check for machine specific funcion // calls if (helperFunc(uNewAddr, uAddr, BB_rtls)) { // We have already added to BB_rtls pRtl = NULL; // Discard the call semantics break; } BB_rtls->push_back(pRtl); // Add this non computed call site to the set of call sites which need to be analysed later. //pCfg->addCall(call); callList.push_back(call); // Record the called address as the start of a new procedure if it didn't already exist. if (uNewAddr && uNewAddr != NO_ADDRESS && pProc->getProg()->findProc(uNewAddr) == NULL) { callList.push_back(call); //newProc(pProc->getProg(), uNewAddr); if (Boomerang::get()->traceDecoder) LOG << "p" << uNewAddr << "\t"; } // Check if this is the _exit or exit function. May prevent us from attempting to decode // invalid instructions, and getting invalid stack height errors const char* name; if (!ASS_FILE){ name = pBF->SymbolByAddress(uNewAddr); if (name == NULL && call->getDest()->isMemOf() && call->getDest()->getSubExp1()->isIntConst()) { ADDRESS a = ((Const*)call->getDest()->getSubExp1())->getInt(); if (pBF->IsDynamicLinkedProcPointer(a)) name = pBF->GetDynamicProcName(a); } } else { name = namesList.find(uNewAddr)->second; } if (name && noReturnCallDest(name)) { // Make sure it has a return appended (so there is only one exit from the function) //call->setReturnAfterCall(true); // I think only the Sparc frontend cares // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 1); appendSyntheticReturn(pBB, pProc, pRtl); // Stop decoding sequentially sequentialDecode = false; } else { // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 1); if (call->isReturnAfterCall()) { // Constuct the RTLs for the new basic block std::list<RTL*>* rtls = new std::list<RTL*>(); // The only RTL in the basic block is one with a ReturnStatement std::list<Statement*>* sl = new std::list<Statement*>; sl->push_back(new ReturnStatement()); rtls->push_back(new RTL(pRtl->getAddress()+1, sl)); BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0); // Add out edge from call to return pCfg->addOutEdge(pBB, returnBB); // Put a label on the return BB (since it's an orphan); a jump will be reqd pCfg->setLabel(returnBB); pBB->setJumpReqd(); // Mike: do we need to set return locations? // This ends the function sequentialDecode = false; } else { // Add the fall through edge if the block didn't // already exist if (pBB != NULL) pCfg->addOutEdge(pBB, uAddr+inst.numBytes); } } } extraProcessCall(call, BB_rtls); // Create the list of RTLs for the next basic block and continue with the next instruction. BB_rtls = NULL; break; } case STMT_RET: { // Stop decoding sequentially sequentialDecode = false; pBB = createReturnBlock(pProc, BB_rtls, pRtl); // Create the list of RTLs for the next basic block and // continue with the next instruction. BB_rtls = NULL; // New RTLList for next BB } break; case STMT_BOOLASSIGN: // This is just an ordinary instruction; no control transfer // Fall through case STMT_JUNCTION: // FIXME: Do we need to do anything here? case STMT_ASSIGN: case STMT_PHIASSIGN: case STMT_IMPASSIGN: case STMT_IMPREF: // Do nothing break; } // switch (s->getKind()) } if (BB_rtls && pRtl) // If non null, we haven't put this RTL into a the current BB as yet BB_rtls->push_back(pRtl); if (inst.reDecode) // Special case: redecode the last instruction, without advancing uAddr by numBytes continue; uAddr += inst.numBytes; if (uAddr > lastAddr) lastAddr = uAddr; // If sequentially decoding, check if the next address happens to be the start of an existing BB. If so, // finish off the current BB (if any RTLs) as a fallthrough, and no need to decode again (unless it's an // incomplete BB, then we do decode it). // In fact, mustn't decode twice, because it will muck up the coverage, but also will cause subtle problems // like add a call to the list of calls to be processed, then delete the call RTL (e.g. Pentium 134.perl // benchmark) if (sequentialDecode && pCfg->existsBB(uAddr)) { // Create the fallthrough BB, if there are any RTLs at all if (BB_rtls) { PBB pBB = pCfg->newBB(BB_rtls, FALL, 1); // Add an out edge to this address if (pBB) { pCfg->addOutEdge(pBB, uAddr); BB_rtls = NULL; // Need new list of RTLs } } // Pick a new address to decode from, if the BB is complete if (!pCfg->isIncomplete(uAddr)) sequentialDecode = false; } if(AssProgram) ++ li ; } // while sequentialDecode // Add this range to the coverage // pProc->addRange(start, uAddr); // Must set sequentialDecode back to true sequentialDecode = true; } // while nextAddress() != NO_ADDRESS //ProgWatcher *w = prog->getWatcher(); //if (w) // w->alert_done(pProc, initAddr, lastAddr, nTotalBytes); // Add the callees to the set of CallStatements, and also to the Prog object std::list<CallStatement*>::iterator it; for (it = callList.begin(); it != callList.end(); it++) { ADDRESS dest = (*it)->getFixedDest(); // Don't speculatively decode procs that are outside of the main text section, apart from dynamically // linked ones (in the .plt) // TODO: change pBF pointers if (!ASS_FILE){ if (pBF->IsDynamicLinkedProc(dest) || !spec || (dest < pBF->getLimitTextHigh())) { pCfg->addCall(*it); // Don't visit the destination of a register call Proc *np = (*it)->getDestProc(); if (np == NULL && dest != NO_ADDRESS) { //np = newProc(pProc->getProg(), dest); np = pProc->getProg()->setNewProc(dest); } if (np != NULL) { np->setFirstCaller(pProc); pProc->addCallee(np); } } } else{ pCfg->addCall(*it); // Don't visit the destination of a register call Proc *np = (*it)->getDestProc(); if (np == NULL && dest != NO_ADDRESS) { //np = newProc(pProc->getProg(), dest); np = pProc->getProg()->setNewProc(dest); } if (np != NULL) { np->setFirstCaller(pProc); pProc->addCallee(np); } } } Boomerang::get()->alert_decode(pProc, startAddr, lastAddr, nTotalBytes); std::cout<< "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n"; if (VERBOSE) LOG << "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n"; return true; } /*============================================================================== * FUNCTION: FrontEnd::getInst * OVERVIEW: Fetch the smallest (nop-sized) instruction, in an endianness independent manner * NOTE: Frequently overridden * PARAMETERS: addr - host address to getch from * RETURNS: An integer with the instruction in it *============================================================================*/ int FrontEnd::getInst(int addr) { return (int)(*(unsigned char*)addr); } /*============================================================================== * FUNCTION: TargetQueue::visit * OVERVIEW: Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later. * Note: at present, it is important to visit an address BEFORE an out edge is added to that address. * This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the * BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby * the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter. * PARAMETERS: pCfg - the enclosing CFG * uNewAddr - the address to be checked * pNewBB - set to the lower part of the BB if the address * already exists as a non explicit label (BB has to be split) * RETURNS: <nothing> *============================================================================*/ void TargetQueue::visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB) { // Find out if we've already parsed the destination bool bParsed = pCfg->label(uNewAddr, pNewBB); // Add this address to the back of the local queue, // if not already processed if (!bParsed) { targets.push(uNewAddr); if (Boomerang::get()->traceDecoder) LOG << ">" << uNewAddr << "\t"; } }