// Find the first Assignment with loc on the LHS Assignment * StatementList::findOnLeft(Exp *loc) const { for (const auto &s : slist) { auto as = (Assignment *)s; Exp *left = as->getLeft(); if (*left == *loc) return as; if (left->isLocal()) { auto l = (Location *)left; Exp *e = l->getProc()->expFromSymbol(((Const *)l->getSubExp1())->getStr()); if (e && ((*e == *loc) || (e->isSubscript() && *e->getSubExp1() == *loc))) { return as; } } } return nullptr; }
Exp *GenericExpTransformer::applyTo(Exp *e, bool &bMod) { bool change; Exp *bindings = e->match(match); if (bindings == NULL) { #if 0 if (e->getOper() == match->getOper()) LOG << "match failed: " << e << " with " << match << "\n"; #endif return e; } if (where) { Exp *cond = where->clone(); if (checkCond(cond, bindings) == false) return e; } LOG << "applying generic exp transformer match: " << match; if (where) LOG << " where: " << where; LOG << " become: " << become; LOG << " to: " << e; LOG << " bindings: " << bindings << "\n"; e = become->clone(); for (Exp *l = bindings; l->getOper() != opNil; l = l->getSubExp2()) e = e->searchReplaceAll(l->getSubExp1()->getSubExp1(), l->getSubExp1()->getSubExp2(), change); LOG << "calculated result: " << e << "\n"; bMod = true; Exp *r; if (e->search(new Unary(opVar, new Terminal(opWild)), r)) { LOG << "error: variable " << r << " in result!\n"; assert(false); } return e; }
Exp *ExpTransformer::applyAllTo(Exp *p, bool &bMod) { for (std::list<Exp*>::iterator it = cache.begin(); it != cache.end(); it++) if (*(*it)->getSubExp1() == *p) return (*it)->getSubExp2()->clone(); Exp *e = p->clone(); Exp *subs[3]; subs[0] = e->getSubExp1(); subs[1] = e->getSubExp2(); subs[2] = e->getSubExp3(); for (int i = 0; i < 3; i++) if (subs[i]) { bool mod = false; subs[i] = applyAllTo(subs[i], mod); if (mod && i == 0) e->setSubExp1(subs[i]); if (mod && i == 1) e->setSubExp2(subs[i]); if (mod && i == 2) e->setSubExp3(subs[i]); bMod |= mod; // if (mod) i--; } #if 0 LOG << "applyAllTo called on " << e << "\n"; #endif bool mod; //do { mod = false; for (std::list<ExpTransformer *>::iterator it = transformers.begin(); it != transformers.end(); it++) { e = (*it)->applyTo(e, mod); bMod |= mod; } //} while (mod); cache.push_back(new Binary(opEquals, p->clone(), e->clone())); return e; }
// Find the first Assignment with loc on the LHS Assignment* StatementList::findOnLeft(Exp* loc) { if (slist.size() == 0) return NULL; for (iterator it = slist.begin(); it != slist.end(); it++) { Exp *left = ((Assignment*)*it)->getLeft(); if (*left == *loc) return (Assignment*)*it; if (left->isLocal()) { Location *l = (Location*)left; Exp *e = l->getProc()->expFromSymbol(((Const*)l->getSubExp1())->getStr()); if (e && ((*e == *loc) || (e->isSubscript() && *e->getSubExp1() == *loc))) { return (Assignment*)*it; } } } return NULL; }
bool GenericExpTransformer::checkCond(Exp *cond, Exp *bindings) { switch (cond->getOper()) { case opAnd: return checkCond(cond->getSubExp1(), bindings) && checkCond(cond->getSubExp2(), bindings); case opEquals: { Exp *lhs = cond->getSubExp1(), *rhs = cond->getSubExp2(); for (Exp *l = bindings; l->getOper() != opNil; l = l->getSubExp2()) { Exp *e = l->getSubExp1(); bool change = false; lhs = lhs->searchReplaceAll(e->getSubExp1()->clone(), e->getSubExp2()->clone(), change); #if 0 if (change) LOG << "replaced " << e->getSubExp1() << " with " << e->getSubExp2() << "\n"; #endif change = false; rhs = rhs->searchReplaceAll(e->getSubExp1()->clone(), e->getSubExp2()->clone(), change); #if 0 if (change) LOG << "replaced " << e->getSubExp1() << " with " << e->getSubExp2() << "\n"; #endif } if (lhs->getOper() == opTypeOf) { #if 0 // ADHOC TA Type *ty = lhs->getSubExp1()->getType(); #else Type *ty = NULL; #endif if (ty == NULL) { #if 0 if (VERBOSE) LOG << "no type for typeof " << lhs << "\n"; #endif return false; } lhs = new TypeVal(ty); #if 0 LOG << "got typeval: " << lhs << "\n"; #endif } if (lhs->getOper() == opKindOf) { OPER op = lhs->getSubExp1()->getOper(); lhs = new Const(operStrings[op]); } rhs = applyFuncs(rhs); #if 0 LOG << "check equals in cond: " << lhs << " == " << rhs << "\n"; #endif if (lhs->getOper() == opVar) { Exp *le; for (le = bindings; le->getOper() != opNil && le->getSubExp2()->getOper() != opNil; le = le->getSubExp2()) ; assert(le->getOper() != opNil); le->setSubExp2(new Binary(opList, new Binary(opEquals, lhs->clone(), rhs->clone()), new Terminal(opNil))); #if 0 LOG << "bindings now: " << bindings << "\n"; #endif return true; } if (*lhs == *rhs) return true; #if 0 // ADHOC TA if (lhs->getOper() == opTypeVal && rhs->getOper() == opTypeVal && lhs->getType()->resolvesToCompound() && rhs->getType()->isCompound()) return true; #endif Exp *new_bindings = lhs->match(rhs); if (new_bindings == NULL) return false; #if 0 LOG << "matched lhs with rhs, bindings: " << new_bindings << "\n"; #endif Exp *le; for (le = bindings; le->getOper() != opNil && le->getSubExp2()->getOper() != opNil; le = le->getSubExp2()) ; assert(le->getOper() != opNil); le->setSubExp2(new_bindings); #if 0 LOG << "bindings now: " << bindings << "\n"; #endif return true; } default: LOG << "don't know how to handle oper " << operStrings[cond->getOper()] << " in cond.\n"; } return false; }
bool DataFlow::renameBlockVars(UserProc* proc, int n, bool clearStacks /* = false */ ) { if (++progress > 200) { std::cerr << 'r' << std::flush; progress = 0; } bool changed = false; // Need to clear the Stacks of old, renamed locations like m[esp-4] (these will be deleted, and will cause compare // failures in the Stacks, so it can't be correctly ordered and hence balanced etc, and will lead to segfaults) if (clearStacks) Stacks.clear(); // For each statement S in block n BasicBlock::rtlit rit; StatementList::iterator sit; PBB bb = BBs[n]; Statement* S; for (S = bb->getFirstStmt(rit, sit); S; S = bb->getNextStmt(rit, sit)) { // if S is not a phi function (per Appel) /* if (!S->isPhi()) */ { // For each use of some variable x in S (not just assignments) LocationSet locs; if (S->isPhi()) { PhiAssign* pa = (PhiAssign*)S; Exp* phiLeft = pa->getLeft(); if (phiLeft->isMemOf() || phiLeft->isRegOf()) phiLeft->getSubExp1()->addUsedLocs(locs); // A phi statement may use a location defined in a childless call, in which case its use collector // needs updating PhiAssign::iterator pp; for (pp = pa->begin(); pp != pa->end(); ++pp) { Statement* def = pp->def; if (def && def->isCall()) ((CallStatement*)def)->useBeforeDefine(phiLeft->clone()); } } else { // Not a phi assignment S->addUsedLocs(locs); } LocationSet::iterator xx; for (xx = locs.begin(); xx != locs.end(); xx++) { Exp* x = *xx; // Don't rename memOfs that are not renamable according to the current policy if (!canRename(x, proc)) continue; Statement* def = NULL; if (x->isSubscript()) { // Already subscripted? // No renaming required, but redo the usage analysis, in case this is a new return, and also because // we may have just removed all call livenesses // Update use information in calls, and in the proc (for parameters) Exp* base = ((RefExp*)x)->getSubExp1(); def = ((RefExp*)x)->getDef(); if (def && def->isCall()) { // Calls have UseCollectors for locations that are used before definition at the call ((CallStatement*)def)->useBeforeDefine(base->clone()); continue; } // Update use collector in the proc (for parameters) if (def == NULL) proc->useBeforeDefine(base->clone()); continue; // Don't re-rename the renamed variable } // Else x is not subscripted yet if (STACKS_EMPTY(x)) { if (!Stacks[defineAll].empty()) def = Stacks[defineAll].top(); else { // If the both stacks are empty, use a NULL definition. This will be changed into a pointer // to an implicit definition at the start of type analysis, but not until all the m[...] // have stopped changing their expressions (complicates implicit assignments considerably). def = NULL; // Update the collector at the start of the UserProc proc->useBeforeDefine(x->clone()); } } else def = Stacks[x].top(); if (def && def->isCall()) // Calls have UseCollectors for locations that are used before definition at the call ((CallStatement*)def)->useBeforeDefine(x->clone()); // Replace the use of x with x{def} in S changed = true; if (S->isPhi()) { Exp* phiLeft = ((PhiAssign*)S)->getLeft(); phiLeft->setSubExp1(phiLeft->getSubExp1()->expSubscriptVar(x, def /*, this*/)); } else { S->subscriptVar(x, def /*, this */); } } } // MVE: Check for Call and Return Statements; these have DefCollector objects that need to be updated // Do before the below, so CallStatements have not yet processed their defines if (S->isCall() || S->isReturn()) { DefCollector* col; if (S->isCall()) col = ((CallStatement*)S)->getDefCollector(); else col = ((ReturnStatement*)S)->getCollector(); col->updateDefs(Stacks, proc); } // For each definition of some variable a in S LocationSet defs; S->getDefinitions(defs); LocationSet::iterator dd; for (dd = defs.begin(); dd != defs.end(); dd++) { Exp* a = *dd; // Don't consider a if it cannot be renamed bool suitable = canRename(a, proc); if (suitable) { // Push i onto Stacks[a] // Note: we clone a because otherwise it could be an expression that gets deleted through various // modifications. This is necessary because we do several passes of this algorithm to sort out the // memory expressions Stacks[a->clone()].push(S); // Replace definition of a with definition of a_i in S (we don't do this) } // FIXME: MVE: do we need this awful hack? if (a->getOper() == opLocal) { Exp *a1 = S->getProc()->expFromSymbol(((Const*)a->getSubExp1())->getStr()); assert(a1); a = a1; // Stacks already has a definition for a (as just the bare local) if (suitable) { Stacks[a->clone()].push(S); } } } // Special processing for define-alls (presently, only childless calls). // But note that only everythings at the current memory level are defined! if (S->isCall() && ((CallStatement*)S)->isChildless() && !Boomerang::get()->assumeABI) { // S is a childless call (and we're not assuming ABI compliance) Stacks[defineAll]; // Ensure that there is an entry for defineAll std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator dd; for (dd = Stacks.begin(); dd != Stacks.end(); ++dd) { // if (dd->first->isMemDepth(memDepth)) dd->second.push(S); // Add a definition for all vars } } } // For each successor Y of block n std::vector<PBB>& outEdges = bb->getOutEdges(); unsigned numSucc = outEdges.size(); for (unsigned succ = 0; succ < numSucc; succ++) { PBB Ybb = outEdges[succ]; // Suppose n is the jth predecessor of Y int j = Ybb->whichPred(bb); // For each phi-function in Y Statement* S; for (S = Ybb->getFirstStmt(rit, sit); S; S = Ybb->getNextStmt(rit, sit)) { PhiAssign* pa = dynamic_cast<PhiAssign*>(S); // if S is not a phi function, then quit the loop (no more phi's) // Wrong: do not quit the loop: there's an optimisation that turns a PhiAssign into an ordinary Assign. // So continue, not break. if (!pa) continue; // Suppose the jth operand of the phi is a // For now, just get the LHS Exp* a = pa->getLeft(); // Only consider variables that can be renamed if (!canRename(a, proc)) continue; Statement* def; if (STACKS_EMPTY(a)) def = NULL; // No reaching definition else def = Stacks[a].top(); // "Replace jth operand with a_i" pa->putAt(j, def, a); } } // For each child X of n // Note: linear search! unsigned numBB = proc->getCFG()->getNumBBs(); for (unsigned X=0; X < numBB; X++) { if (idom[X] == n) renameBlockVars(proc, X); } // For each statement S in block n // NOTE: Because of the need to pop childless calls from the Stacks, it is important in my algorithm to process the // statments in the BB *backwards*. (It is not important in Appel's algorithm, since he always pushes a definition // for every variable defined on the Stacks). BasicBlock::rtlrit rrit; StatementList::reverse_iterator srit; for (S = bb->getLastStmt(rrit, srit); S; S = bb->getPrevStmt(rrit, srit)) { // For each definition of some variable a in S LocationSet defs; S->getDefinitions(defs); LocationSet::iterator dd; for (dd = defs.begin(); dd != defs.end(); dd++) { if (canRename(*dd, proc)) { // if ((*dd)->getMemDepth() == memDepth) std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator ss = Stacks.find(*dd); if (ss == Stacks.end()) { std::cerr << "Tried to pop " << *dd << " from Stacks; does not exist\n"; assert(0); } ss->second.pop(); } } // Pop all defs due to childless calls if (S->isCall() && ((CallStatement*)S)->isChildless()) { std::map<Exp*, std::stack<Statement*>, lessExpStar>::iterator sss; for (sss = Stacks.begin(); sss != Stacks.end(); ++sss) { if (!sss->second.empty() && sss->second.top() == S) { sss->second.pop(); } } } } return changed; }
std::list<Statement *> *RTLInstDict::transformPostVars(std::list<Statement *> *rts, bool optimise) { std::list<Statement *>::iterator rt; // Map from var (could be any expression really) to details std::map<Exp *, transPost, lessExpStar> vars; int tmpcount = 1; // For making temp names unique // Exp *matchParam(1, idParam); // ? Was never used anyway #ifdef DEBUG_POSTVAR std::cout << "Transforming from:\n"; for (Exp_CIT p = rts->begin(); p != rts->end(); p++) { std::cout << setw(8) << " "; (*p)->print(std::cout); std::cout << "\n"; } #endif // First pass: Scan for post-variables and usages of their referents for (rt = rts->begin(); rt != rts->end(); rt++) { // ss appears to be a list of expressions to be searched // It is either the LHS and RHS of an assignment, or it's the parameters of a flag call Binary *ss; if ((*rt)->isAssign()) { Exp *lhs = ((Assign *)*rt)->getLeft(); Exp *rhs = ((Assign *)*rt)->getRight(); // Look for assignments to post-variables if (lhs && lhs->isPostVar()) { if (vars.find(lhs) == vars.end()) { // Add a record in the map for this postvar transPost &el = vars[lhs]; el.used = false; el.type = ((Assign *)*rt)->getType(); // Constuct a temporary. We should probably be smarter and actually check that it's not otherwise // used here. std::string tmpname = el.type->getTempName() + (tmpcount++) + "post" ; el.tmp = Location::tempOf(new Const(tmpname.c_str())); // Keep a copy of the referrent. For example, if the lhs is r[0]', base is r[0] el.base = lhs->getSubExp1(); el.post = lhs; // The whole post-var, e.g. r[0]' el.isNew = true; // The emulator generator sets optimise false // I think this forces always generating the temps (MVE) if (!optimise) { el.used = true; el.isNew = false; } } } // For an assignment, the two expressions to search are the left and right hand sides (could just put the // whole assignment on, I suppose) ss = new Binary(opList, lhs->clone(), new Binary(opList, rhs->clone(), new Terminal(opNil))); } else if ((*rt)->isFlagAssgn()) { // An opFlagCall is assumed to be a Binary with a string and an opList of parameters ss = (Binary *)((Binary *)*rt)->getSubExp2(); } else ss = NULL; /* Look for usages of post-variables' referents * Trickier than you'd think, as we need to make sure to skip over the post-variables themselves. ie match * r[0] but not r[0]' * Note: back with SemStrs, we could use a match expression which was a wildcard prepended to the base * expression; this would match either the base (r[0]) or the post-var (r[0]'). * Can't really use this with Exps, so we search twice; once for the base, and once for the post, and if we * get more with the former, then we have a use of the base (consider r[0] + r[0]') */ for (std::map<Exp *, transPost, lessExpStar>::iterator sr = vars.begin(); sr != vars.end(); sr++) { if (sr->second.isNew) { // Make sure we don't match a var in its defining statement sr->second.isNew = false; continue; } Binary *cur; for (cur = ss; !cur->isNil(); cur = (Binary *)cur->getSubExp2()) { if (sr->second.used) break; // Don't bother; already know it's used Exp *s = cur->getSubExp1(); if (!s) continue; if (*s == *sr->second.base) { sr->second.used = true; break; } std::list<Exp *> res1, res2; s->searchAll(sr->second.base, res1); s->searchAll(sr->second.post, res2); // Each match of a post will also match the base. // But if there is a bare (non-post) use of the base, there will be a result in res1 that is not in res2 if (res1.size() > res2.size()) sr->second.used = true; } } } // Second pass: Replace post-variables with temporaries where needed for (rt = rts->begin(); rt != rts->end(); rt++) { for (std::map<Exp *, transPost, lessExpStar>::iterator sr = vars.begin(); sr != vars.end(); sr++) { if (sr->second.used) { (*rt)->searchAndReplace(sr->first, sr->second.tmp); } else { (*rt)->searchAndReplace(sr->first, sr->second.base); } } } // Finally: Append assignments where needed from temps to base vars // Example: esp' = esp-4; m[esp'] = modrm; FLAG(esp) // all the esp' are replaced with say tmp1, you need a "esp = tmp1" at the end to actually make the change for (std::map<Exp *, transPost, lessExpStar>::iterator sr = vars.begin(); sr != vars.end(); sr++) { if (sr->second.used) { Assign *te = new Assign(sr->second.type, sr->second.base->clone(), sr->second.tmp); rts->push_back(te); } else { // The temp is either used (uncloned) in the assignment, or is deleted here //delete sr->second.tmp; } } #ifdef DEBUG_POSTVAR std::cout << "\nTo =>\n"; for (std::list<Exp *>::iterator p = rts->begin(); p != rts->end(); p++) { std::cout << setw(8) << " "; (*p)->print(std::cout); std::cout << "\n"; } std::cout << "\n"; #endif return rts; }
/*============================================================================== * FUNCTION: FrontEnd::processProc * OVERVIEW: Process a procedure, given a native (source machine) address. * PARAMETERS: address - the address at which the procedure starts * pProc - the procedure object * frag - if true, this is just a fragment of a procedure * spec - if true, this is a speculative decode * os - the output stream for .rtl output * NOTE: This is a sort of generic front end. For many processors, this will be overridden * in the FrontEnd derived class, sometimes calling this function to do most of the work * RETURNS: true for a good decode (no illegal instructions) *============================================================================*/ bool FrontEnd::processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag /* = false */, bool spec /* = false */) { PBB pBB; // Pointer to the current basic block std::cout<<"Entering Processing Proc\n"; // just in case you missed it first_line = true; if (AssProgram) std::cout <<"Name Of Program : " << AssProgram->name << std::endl; Boomerang::get()->alert_new(pProc); // We have a set of CallStatement pointers. These may be disregarded if this is a speculative decode // that fails (i.e. an illegal instruction is found). If not, this set will be used to add to the set of calls // to be analysed in the cfg, and also to call newProc() std::list<CallStatement*> callList; // Indicates whether or not the next instruction to be decoded is the lexical successor of the current one. // Will be true for all NCTs and for CTIs with a fall through branch. bool sequentialDecode = true; Cfg* pCfg = pProc->getCFG(); // If this is a speculative decode, the second time we decode the same address, we get no cfg. Else an error. if (spec && (pCfg == 0)) return false; assert(pCfg); // Initialise the queue of control flow targets that have yet to be decoded. targetQueue.initial(uAddr); // Clear the pointer used by the caller prologue code to access the last call rtl of this procedure //decoder.resetLastCall(); // ADDRESS initAddr = uAddr; int nTotalBytes = 0; ADDRESS startAddr = uAddr; ADDRESS lastAddr = uAddr; ADDRESS address = uAddr; std::cout << "Start at address = " << uAddr << std::endl; //------IMPORTANT------------------------------------------------------------------------ list<AssemblyLabel*>::iterator lbi; list<AssemblyLine*>* temp_lines = new list<AssemblyLine*>(); if (AssProgram){ for(lbi = AssProgram->labelList->begin(); lbi != AssProgram->labelList->end(); ++lbi ){ if((*lbi)->address == uAddr){ temp_lines = (*lbi)->lineList; std::cout << "***DECODE LABEL: " << (*lbi)->name << std::endl; std::cout << "***AT ADDRESS: " << (*lbi)->address << std::endl; std::cout << "***NUMBER OF INSTRUCTION: " << (*lbi)->lineList->size() << std::endl; break; } } } list<AssemblyLine*>::iterator li; if (temp_lines->size()>0) li = temp_lines->begin(); //--------------------------------------------------------------------------------------- while ((uAddr = targetQueue.nextAddress(pCfg)) != NO_ADDRESS) { // The list of RTLs for the current basic block std::list<RTL*>* BB_rtls = new std::list<RTL*>(); // Keep decoding sequentially until a CTI without a fall through branch is decoded //ADDRESS start = uAddr; DecodeResult inst; while (sequentialDecode) { // Decode and classify the current source instruction if (Boomerang::get()->traceDecoder) LOG << "*" << uAddr << "\t"; // Decode the inst at uAddr. if(ASS_FILE){ if(li != temp_lines->end()){ inst = decodeAssemblyInstruction(uAddr,"assemblySets.at(line)", (*li)); } } else inst = decodeInstruction(uAddr); // If invalid and we are speculating, just exit if (spec && !inst.valid) return false; // Need to construct a new list of RTLs if a basic block has just been finished but decoding is // continuing from its lexical successor if (BB_rtls == NULL) BB_rtls = new std::list<RTL*>(); RTL* pRtl = inst.rtl; if (inst.valid == false) { // Alert the watchers to the problem Boomerang::get()->alert_baddecode(uAddr); // An invalid instruction. Most likely because a call did not return (e.g. call _exit()), etc. // Best thing is to emit a INVALID BB, and continue with valid instructions if (VERBOSE) { LOG << "Warning: invalid instruction at " << uAddr << ": "; // Emit the next 4 bytes for debugging for (int ii=0; ii < 4; ii++) LOG << (unsigned)(pBF->readNative1(uAddr + ii) & 0xFF) << " "; LOG << "\n"; } // Emit the RTL anyway, so we have the address and maybe some other clues BB_rtls->push_back(new RTL(uAddr)); pBB = pCfg->newBB(BB_rtls, INVALID, 0); sequentialDecode = false; BB_rtls = NULL; continue; } //pProc->unionDefine = new list<UnionDefine*>(); pProc->bitVar = AssProgram->bitVar; pProc->replacement = AssProgram->replacement; // alert the watchers that we have decoded an instruction Boomerang::get()->alert_decode(uAddr, inst.numBytes); nTotalBytes += inst.numBytes; // Check if this is an already decoded jump instruction (from a previous pass with propagation etc) // If so, we throw away the just decoded RTL (but we still may have needed to calculate the number // of bytes.. ick.) std::map<ADDRESS, RTL*>::iterator ff = previouslyDecoded.find(uAddr); if (ff != previouslyDecoded.end()) pRtl = ff->second; if (pRtl == NULL) { // This can happen if an instruction is "cancelled", e.g. call to __main in a hppa program // Just ignore the whole instruction if (inst.numBytes > 0) uAddr += inst.numBytes; continue; } // Display RTL representation if asked std::cout<<"RTL: "<<std::endl; std::ostringstream st; pRtl->print(st); std::cout << st.str().c_str()<<std::endl; ADDRESS uDest; // For each Statement in the RTL //std::list<Statement*>& sl = pRtl->getList(); std::list<Statement*> sl = pRtl->getList(); // Make a copy (!) of the list. This is needed temporarily to work around the following problem. // We are currently iterating an RTL, which could be a return instruction. The RTL is passed to // createReturnBlock; if this is not the first return statement, it will get cleared, and this will // cause problems with the current iteration. The effects seem to be worse for MSVC/Windows. // This problem will likely be easier to cope with when the RTLs are removed, and there are special // Statements to mark the start of instructions (and their native address). // FIXME: However, this workaround breaks logic below where a GOTO is changed to a CALL followed by a return // if it points to the start of a known procedure std::list<Statement*>::iterator ss; #if 1 for (ss = sl.begin(); ss != sl.end(); ss++) { // } #else // The counter is introduced because ss != sl.end() does not work as it should // FIXME: why? Does this really fix the problem? int counter = sl.size(); for (ss = sl.begin(); counter > 0; ss++, counter--) { #endif Statement* s = *ss; s->setProc(pProc); // let's do this really early! if (refHints.find(pRtl->getAddress()) != refHints.end()) { const char *nam = refHints[pRtl->getAddress()].c_str(); ADDRESS gu = prog->getGlobalAddr((char*)nam); if (gu != NO_ADDRESS) { s->searchAndReplace(new Const((int)gu), new Unary(opAddrOf, Location::global(nam, pProc))); } } s->simplify(); GotoStatement* stmt_jump = static_cast<GotoStatement*>(s); // Check for a call to an already existing procedure (including self recursive jumps), or to the PLT // (note that a LibProc entry for the PLT function may not yet exist) ADDRESS dest; Proc* proc; if (s->getKind() == STMT_GOTO) { dest = stmt_jump->getFixedDest(); if (dest != NO_ADDRESS) { proc = prog->findProc(dest); if (proc == NULL) { if(!ASS_FILE){ if (pBF->IsDynamicLinkedProc(dest)) proc = prog->setNewProc(dest); } } if (proc != NULL && proc != (Proc*)-1) { s = new CallStatement(); CallStatement *call = static_cast<CallStatement*>(s); call->setDest(dest); call->setDestProc(proc); call->setReturnAfterCall(true); // also need to change it in the actual RTL std::list<Statement*>::iterator ss1 = ss; ss1++; assert(ss1 == sl.end()); pRtl->replaceLastStmt(s); *ss = s; } } } switch (s->getKind()) { case STMT_GOTO: { uDest = stmt_jump->getFixedDest(); // Handle one way jumps and computed jumps separately if (uDest != NO_ADDRESS) { BB_rtls->push_back(pRtl); sequentialDecode = false; pBB = pCfg->newBB(BB_rtls,ONEWAY,1); BB_rtls = NULL; // Clear when make new BB // Exit the switch now if the basic block already existed if (pBB == 0) { break; } // Add the out edge if it is to a destination within the // procedure if (uDest < pBF->getLimitTextHigh()) { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else { std::cout<<"Entering Processing Proc5\n"; if (!ASS_FILE) LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to " << uDest << "\n"; else{ targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } } } break; } case STMT_CASE: { Exp* pDest = stmt_jump->getDest(); if (pDest == NULL) { // Happens if already analysed (now redecoding) // SWITCH_INFO* psi = ((CaseStatement*)stmt_jump)->getSwitchInfo(); BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, NWAY, 0); // processSwitch will update num outedges pBB->processSwitch(pProc); // decode arms, set out edges, etc sequentialDecode = false; // Don't decode after the jump BB_rtls = NULL; // New RTLList for next BB break; // Just leave it alone } // Check for indirect calls to library functions, especially in Win32 programs if (pDest && pDest->getOper() == opMemOf && pDest->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)pDest->getSubExp1())->getAddr())) { if (VERBOSE) LOG << "jump to a library function: " << stmt_jump << ", replacing with a call/ret.\n"; // jump to a library function // replace with a call ret // TODO: std::string func = pBF->GetDynamicProcName( ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr()); //------------------------------------ CallStatement *call = new CallStatement; call->setDest(stmt_jump->getDest()->clone()); LibProc *lp = pProc->getProg()->getLibraryProc(func.c_str()); if (lp == NULL) LOG << "getLibraryProc returned NULL, aborting\n"; assert(lp); call->setDestProc(lp); std::list<Statement*>* stmt_list = new std::list<Statement*>; stmt_list->push_back(call); BB_rtls->push_back(new RTL(pRtl->getAddress(), stmt_list)); pBB = pCfg->newBB(BB_rtls, CALL, 1); appendSyntheticReturn(pBB, pProc, pRtl); sequentialDecode = false; BB_rtls = NULL; if (pRtl->getAddress() == pProc->getNativeAddress()) { // it's a thunk // Proc *lp = prog->findProc(func.c_str()); func = std::string("__imp_") + func; pProc->setName(func.c_str()); //lp->setName(func.c_str()); Boomerang::get()->alert_update_signature(pProc); } callList.push_back(call); ss = sl.end(); ss--; // get out of the loop break; } BB_rtls->push_back(pRtl); // We create the BB as a COMPJUMP type, then change to an NWAY if it turns out to be a switch stmt pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0); LOG << "COMPUTED JUMP at " << uAddr << ", pDest = " << pDest << "\n"; if (Boomerang::get()->noDecompile) { // try some hacks if (pDest->isMemOf() && pDest->getSubExp1()->getOper() == opPlus && pDest->getSubExp1()->getSubExp2()->isIntConst()) { // assume subExp2 is a jump table ADDRESS jmptbl = ((Const*)pDest->getSubExp1()->getSubExp2())->getInt(); unsigned int i; for (i = 0; ; i++) { ADDRESS uDest = pBF->readNative4(jmptbl + i * 4); if (pBF->getLimitTextLow() <= uDest && uDest < pBF->getLimitTextHigh()) { LOG << " guessed uDest " << uDest << "\n"; targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else break; } pBB->updateType(NWAY, i); } } sequentialDecode = false; BB_rtls = NULL; // New RTLList for next BB break; } case STMT_BRANCH: { uDest = stmt_jump->getFixedDest(); BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, TWOWAY, 2); // Stop decoding sequentially if the basic block already existed otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else { // Add the out edge if it is to a destination within the procedure if (!ASS_FILE){ if (uDest < pBF->getLimitTextHigh()) { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } else LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to " << uDest << "\n"; } else { targetQueue.visit(pCfg, uDest, pBB); pCfg->addOutEdge(pBB, uDest, true); } // Add the fall-through outedge pCfg->addOutEdge(pBB, uAddr + inst.numBytes); } // Create the list of RTLs for the next basic block and continue with the next instruction. BB_rtls = NULL; break; } case STMT_CALL: { CallStatement* call = static_cast<CallStatement*>(s); // Check for a dynamic linked library function // TODO: solution dont use pBF if (!ASS_FILE){ if (call->getDest()->getOper() == opMemOf && call->getDest()->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)call->getDest()->getSubExp1())->getAddr())) { // Dynamic linked proc pointers are treated as static. const char *nam = pBF->GetDynamicProcName( ((Const*)call->getDest()->getSubExp1())->getAddr()); Proc *p = pProc->getProg()->getLibraryProc(nam); call->setDestProc(p); call->setIsComputed(false); } } else { if (call->getDest()->getOper() == opMemOf && call->getDest()->getSubExp1()->getOper() == opIntConst && funcsType.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second) { // Dynamic linked proc pointers are treated as static. const char *nam = namesList.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second; Proc *p = pProc->getProg()->getLibraryProc(nam); call->setDestProc(p); call->setIsComputed(false); } } // Is the called function a thunk calling a library function? // A "thunk" is a function which only consists of: "GOTO library_function" // Should i modify if (!ASS_FILE){ if( call && call->getFixedDest() != NO_ADDRESS ) { // Get the address of the called function. ADDRESS callAddr=call->getFixedDest(); // It should not be in the PLT either, but getLimitTextHigh() takes this into account if (callAddr < pBF->getLimitTextHigh()) { // Decode it. DecodeResult decoded=decodeInstruction(callAddr); if (decoded.valid) { // is the instruction decoded succesfully? // Yes, it is. Create a Statement from it. RTL *rtl = decoded.rtl; Statement* first_statement = *rtl->getList().begin(); if (first_statement) { first_statement->setProc(pProc); first_statement->simplify(); GotoStatement* stmt_jump = static_cast<GotoStatement*>(first_statement); // In fact it's a computed (looked up) jump, so the jump seems to be a case // statement. //TODO : We dont handle this case if ( first_statement->getKind() == STMT_CASE && stmt_jump->getDest()->getOper() == opMemOf && stmt_jump->getDest()->getSubExp1()->getOper() == opIntConst && pBF->IsDynamicLinkedProcPointer(((Const*)stmt_jump->getDest()->getSubExp1())-> getAddr())) // Is it an "DynamicLinkedProcPointer"? { // Yes, it's a library function. Look up it's name. ADDRESS a = ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr(); // TODO : We dont handle this case const char *nam = pBF->GetDynamicProcName(a); // Assign the proc to the call Proc *p = pProc->getProg()->getLibraryProc(nam); if (call->getDestProc()) { // prevent unnecessary __imp procs prog->removeProc(call->getDestProc()->getName()); } call->setDestProc(p); call->setIsComputed(false); call->setDest(Location::memOf(new Const(a))); } } } } } } // Treat computed and static calls separately if (call->isComputed()) { BB_rtls->push_back(pRtl); pBB = pCfg->newBB(BB_rtls, COMPCALL, 1); // Stop decoding sequentially if the basic block already // existed otherwise complete the basic block if (pBB == 0) sequentialDecode = false; else pCfg->addOutEdge(pBB, uAddr + inst.numBytes); // Add this call to the list of calls to analyse. We won't // be able to analyse it's callee(s), of course. callList.push_back(call); } else { // Static call // Find the address of the callee. ADDRESS uNewAddr = call->getFixedDest(); // Calls with 0 offset (i.e. call the next instruction) are simply pushing the PC to the // stack. Treat these as non-control flow instructions and continue. if (uNewAddr == uAddr + inst.numBytes) break; // Call the virtual helper function. If implemented, will check for machine specific funcion // calls if (helperFunc(uNewAddr, uAddr, BB_rtls)) { // We have already added to BB_rtls pRtl = NULL; // Discard the call semantics break; } BB_rtls->push_back(pRtl); // Add this non computed call site to the set of call sites which need to be analysed later. //pCfg->addCall(call); callList.push_back(call); // Record the called address as the start of a new procedure if it didn't already exist. if (uNewAddr && uNewAddr != NO_ADDRESS && pProc->getProg()->findProc(uNewAddr) == NULL) { callList.push_back(call); //newProc(pProc->getProg(), uNewAddr); if (Boomerang::get()->traceDecoder) LOG << "p" << uNewAddr << "\t"; } // Check if this is the _exit or exit function. May prevent us from attempting to decode // invalid instructions, and getting invalid stack height errors const char* name; if (!ASS_FILE){ name = pBF->SymbolByAddress(uNewAddr); if (name == NULL && call->getDest()->isMemOf() && call->getDest()->getSubExp1()->isIntConst()) { ADDRESS a = ((Const*)call->getDest()->getSubExp1())->getInt(); if (pBF->IsDynamicLinkedProcPointer(a)) name = pBF->GetDynamicProcName(a); } } else { name = namesList.find(uNewAddr)->second; } if (name && noReturnCallDest(name)) { // Make sure it has a return appended (so there is only one exit from the function) //call->setReturnAfterCall(true); // I think only the Sparc frontend cares // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 1); appendSyntheticReturn(pBB, pProc, pRtl); // Stop decoding sequentially sequentialDecode = false; } else { // Create the new basic block pBB = pCfg->newBB(BB_rtls, CALL, 1); if (call->isReturnAfterCall()) { // Constuct the RTLs for the new basic block std::list<RTL*>* rtls = new std::list<RTL*>(); // The only RTL in the basic block is one with a ReturnStatement std::list<Statement*>* sl = new std::list<Statement*>; sl->push_back(new ReturnStatement()); rtls->push_back(new RTL(pRtl->getAddress()+1, sl)); BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0); // Add out edge from call to return pCfg->addOutEdge(pBB, returnBB); // Put a label on the return BB (since it's an orphan); a jump will be reqd pCfg->setLabel(returnBB); pBB->setJumpReqd(); // Mike: do we need to set return locations? // This ends the function sequentialDecode = false; } else { // Add the fall through edge if the block didn't // already exist if (pBB != NULL) pCfg->addOutEdge(pBB, uAddr+inst.numBytes); } } } extraProcessCall(call, BB_rtls); // Create the list of RTLs for the next basic block and continue with the next instruction. BB_rtls = NULL; break; } case STMT_RET: { // Stop decoding sequentially sequentialDecode = false; pBB = createReturnBlock(pProc, BB_rtls, pRtl); // Create the list of RTLs for the next basic block and // continue with the next instruction. BB_rtls = NULL; // New RTLList for next BB } break; case STMT_BOOLASSIGN: // This is just an ordinary instruction; no control transfer // Fall through case STMT_JUNCTION: // FIXME: Do we need to do anything here? case STMT_ASSIGN: case STMT_PHIASSIGN: case STMT_IMPASSIGN: case STMT_IMPREF: // Do nothing break; } // switch (s->getKind()) } if (BB_rtls && pRtl) // If non null, we haven't put this RTL into a the current BB as yet BB_rtls->push_back(pRtl); if (inst.reDecode) // Special case: redecode the last instruction, without advancing uAddr by numBytes continue; uAddr += inst.numBytes; if (uAddr > lastAddr) lastAddr = uAddr; // If sequentially decoding, check if the next address happens to be the start of an existing BB. If so, // finish off the current BB (if any RTLs) as a fallthrough, and no need to decode again (unless it's an // incomplete BB, then we do decode it). // In fact, mustn't decode twice, because it will muck up the coverage, but also will cause subtle problems // like add a call to the list of calls to be processed, then delete the call RTL (e.g. Pentium 134.perl // benchmark) if (sequentialDecode && pCfg->existsBB(uAddr)) { // Create the fallthrough BB, if there are any RTLs at all if (BB_rtls) { PBB pBB = pCfg->newBB(BB_rtls, FALL, 1); // Add an out edge to this address if (pBB) { pCfg->addOutEdge(pBB, uAddr); BB_rtls = NULL; // Need new list of RTLs } } // Pick a new address to decode from, if the BB is complete if (!pCfg->isIncomplete(uAddr)) sequentialDecode = false; } if(AssProgram) ++ li ; } // while sequentialDecode // Add this range to the coverage // pProc->addRange(start, uAddr); // Must set sequentialDecode back to true sequentialDecode = true; } // while nextAddress() != NO_ADDRESS //ProgWatcher *w = prog->getWatcher(); //if (w) // w->alert_done(pProc, initAddr, lastAddr, nTotalBytes); // Add the callees to the set of CallStatements, and also to the Prog object std::list<CallStatement*>::iterator it; for (it = callList.begin(); it != callList.end(); it++) { ADDRESS dest = (*it)->getFixedDest(); // Don't speculatively decode procs that are outside of the main text section, apart from dynamically // linked ones (in the .plt) // TODO: change pBF pointers if (!ASS_FILE){ if (pBF->IsDynamicLinkedProc(dest) || !spec || (dest < pBF->getLimitTextHigh())) { pCfg->addCall(*it); // Don't visit the destination of a register call Proc *np = (*it)->getDestProc(); if (np == NULL && dest != NO_ADDRESS) { //np = newProc(pProc->getProg(), dest); np = pProc->getProg()->setNewProc(dest); } if (np != NULL) { np->setFirstCaller(pProc); pProc->addCallee(np); } } } else{ pCfg->addCall(*it); // Don't visit the destination of a register call Proc *np = (*it)->getDestProc(); if (np == NULL && dest != NO_ADDRESS) { //np = newProc(pProc->getProg(), dest); np = pProc->getProg()->setNewProc(dest); } if (np != NULL) { np->setFirstCaller(pProc); pProc->addCallee(np); } } } Boomerang::get()->alert_decode(pProc, startAddr, lastAddr, nTotalBytes); std::cout<< "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n"; if (VERBOSE) LOG << "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n"; return true; } /*============================================================================== * FUNCTION: FrontEnd::getInst * OVERVIEW: Fetch the smallest (nop-sized) instruction, in an endianness independent manner * NOTE: Frequently overridden * PARAMETERS: addr - host address to getch from * RETURNS: An integer with the instruction in it *============================================================================*/ int FrontEnd::getInst(int addr) { return (int)(*(unsigned char*)addr); } /*============================================================================== * FUNCTION: TargetQueue::visit * OVERVIEW: Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later. * Note: at present, it is important to visit an address BEFORE an out edge is added to that address. * This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the * BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby * the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter. * PARAMETERS: pCfg - the enclosing CFG * uNewAddr - the address to be checked * pNewBB - set to the lower part of the BB if the address * already exists as a non explicit label (BB has to be split) * RETURNS: <nothing> *============================================================================*/ void TargetQueue::visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB) { // Find out if we've already parsed the destination bool bParsed = pCfg->label(uNewAddr, pNewBB); // Add this address to the back of the local queue, // if not already processed if (!bParsed) { targets.push(uNewAddr); if (Boomerang::get()->traceDecoder) LOG << ">" << uNewAddr << "\t"; } }