Ejemplo n.º 1
0
/*==============================================================================
 * FUNCTION:	  FrontEnd::processProc
 * OVERVIEW:	  Process a procedure, given a native (source machine) address.
 * PARAMETERS:	  address - the address at which the procedure starts
 *				  pProc - the procedure object
 *				  frag - if true, this is just a fragment of a procedure
 *				  spec - if true, this is a speculative decode
 *				  os - the output stream for .rtl output
 * NOTE:		  This is a sort of generic front end. For many processors, this will be overridden
 *					in the FrontEnd derived class, sometimes calling this function to do most of the work
 * RETURNS:		  true for a good decode (no illegal instructions)
 *============================================================================*/
bool FrontEnd::processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag /* = false */,
		bool spec /* = false */) {
	PBB pBB;					// Pointer to the current basic block
	std::cout<<"Entering Processing Proc\n"; 
	// just in case you missed it
	first_line = true;
	if (AssProgram)
		std::cout <<"Name Of Program : " << AssProgram->name << std::endl;
	Boomerang::get()->alert_new(pProc);
	 
	// We have a set of CallStatement pointers. These may be disregarded if this is a speculative decode
	// that fails (i.e. an illegal instruction is found). If not, this set will be used to add to the set of calls
	// to be analysed in the cfg, and also to call newProc()
	std::list<CallStatement*> callList;

	// Indicates whether or not the next instruction to be decoded is the lexical successor of the current one.
	// Will be true for all NCTs and for CTIs with a fall through branch.
	bool sequentialDecode = true;

	Cfg* pCfg = pProc->getCFG();

	// If this is a speculative decode, the second time we decode the same address, we get no cfg. Else an error.
	if (spec && (pCfg == 0))
		return false;
	assert(pCfg);

	// Initialise the queue of control flow targets that have yet to be decoded.
	targetQueue.initial(uAddr);

	// Clear the pointer used by the caller prologue code to access the last call rtl of this procedure
	//decoder.resetLastCall();

	// ADDRESS initAddr = uAddr;
	int nTotalBytes = 0;
	ADDRESS startAddr = uAddr;
	ADDRESS lastAddr = uAddr;
	ADDRESS address = uAddr;
	std::cout << "Start at address = " << uAddr << std::endl; 
	//------IMPORTANT------------------------------------------------------------------------
	list<AssemblyLabel*>::iterator lbi;
	list<AssemblyLine*>* temp_lines = new list<AssemblyLine*>();
	
	if (AssProgram){
		for(lbi = AssProgram->labelList->begin(); lbi != AssProgram->labelList->end(); ++lbi ){
			if((*lbi)->address == uAddr){
				temp_lines = (*lbi)->lineList;
				std::cout << "***DECODE LABEL: " << (*lbi)->name << std::endl;
				std::cout << "***AT ADDRESS: " << (*lbi)->address << std::endl;
				std::cout << "***NUMBER OF INSTRUCTION: " << (*lbi)->lineList->size() << std::endl;
				break;
			}
		}
	}
	
	list<AssemblyLine*>::iterator li;
	if (temp_lines->size()>0)
		li = temp_lines->begin();
	
	//---------------------------------------------------------------------------------------
	while ((uAddr = targetQueue.nextAddress(pCfg)) != NO_ADDRESS) {
		// The list of RTLs for the current basic block
		std::list<RTL*>* BB_rtls = new std::list<RTL*>();

		// Keep decoding sequentially until a CTI without a fall through branch is decoded
		//ADDRESS start = uAddr;
		DecodeResult inst;
		while (sequentialDecode) {

			// Decode and classify the current source instruction
			if (Boomerang::get()->traceDecoder)
				LOG << "*" << uAddr << "\t";

			// Decode the inst at uAddr.
			
			if(ASS_FILE){
				if(li != temp_lines->end()){

					inst = decodeAssemblyInstruction(uAddr,"assemblySets.at(line)", (*li));
				}
			}
			else
				inst = decodeInstruction(uAddr);
			
			// If invalid and we are speculating, just exit
			if (spec && !inst.valid)
				return false;

			// Need to construct a new list of RTLs if a basic block has just been finished but decoding is
			// continuing from its lexical successor
			if (BB_rtls == NULL)
				BB_rtls = new std::list<RTL*>();

			RTL* pRtl = inst.rtl;
			if (inst.valid == false) {
				
				// Alert the watchers to the problem
				Boomerang::get()->alert_baddecode(uAddr);

				// An invalid instruction. Most likely because a call did not return (e.g. call _exit()), etc.
				// Best thing is to emit a INVALID BB, and continue with valid instructions
				if (VERBOSE) {
					LOG << "Warning: invalid instruction at " << uAddr << ": ";
					// Emit the next 4 bytes for debugging
					for (int ii=0; ii < 4; ii++)
						LOG << (unsigned)(pBF->readNative1(uAddr + ii) & 0xFF) << " ";
					LOG << "\n";
				}
				// Emit the RTL anyway, so we have the address and maybe some other clues
				BB_rtls->push_back(new RTL(uAddr));	 
				pBB = pCfg->newBB(BB_rtls, INVALID, 0);
				sequentialDecode = false; BB_rtls = NULL; continue;
			}
            //pProc->unionDefine = new list<UnionDefine*>();
            pProc->bitVar = AssProgram->bitVar;
            pProc->replacement = AssProgram->replacement;
			// alert the watchers that we have decoded an instruction
			Boomerang::get()->alert_decode(uAddr, inst.numBytes);
			nTotalBytes += inst.numBytes;			
			
			// Check if this is an already decoded jump instruction (from a previous pass with propagation etc)
			// If so, we throw away the just decoded RTL (but we still may have needed to calculate the number
			// of bytes.. ick.)
			std::map<ADDRESS, RTL*>::iterator ff = previouslyDecoded.find(uAddr);
			if (ff != previouslyDecoded.end())
				pRtl = ff->second;

			if (pRtl == NULL) {
				// This can happen if an instruction is "cancelled", e.g. call to __main in a hppa program
				// Just ignore the whole instruction
				if (inst.numBytes > 0)
					uAddr += inst.numBytes;
				continue;
			}
			
			// Display RTL representation if asked
                std::cout<<"RTL: "<<std::endl;
				std::ostringstream st;
				pRtl->print(st);
                std::cout << st.str().c_str()<<std::endl;

	
			ADDRESS uDest;

			// For each Statement in the RTL
			//std::list<Statement*>& sl = pRtl->getList();
			std::list<Statement*> sl = pRtl->getList();
			// Make a copy (!) of the list. This is needed temporarily to work around the following problem.
			// We are currently iterating an RTL, which could be a return instruction. The RTL is passed to
			// createReturnBlock; if this is not the first return statement, it will get cleared, and this will
			// cause problems with the current iteration. The effects seem to be worse for MSVC/Windows.
			// This problem will likely be easier to cope with when the RTLs are removed, and there are special
			// Statements to mark the start of instructions (and their native address).
			// FIXME: However, this workaround breaks logic below where a GOTO is changed to a CALL followed by a return
			// if it points to the start of a known procedure
			std::list<Statement*>::iterator ss;
#if 1
			for (ss = sl.begin(); ss != sl.end(); ss++) { // }
#else
			// The counter is introduced because ss != sl.end() does not work as it should
			// FIXME: why? Does this really fix the problem?
			int counter = sl.size();

			for (ss = sl.begin(); counter > 0; ss++, counter--) {
#endif
				Statement* s = *ss;
				s->setProc(pProc);		// let's do this really early!
				if (refHints.find(pRtl->getAddress()) != refHints.end()) {
					const char *nam = refHints[pRtl->getAddress()].c_str();
					ADDRESS gu = prog->getGlobalAddr((char*)nam);
					if (gu != NO_ADDRESS) {
						s->searchAndReplace(new Const((int)gu), new Unary(opAddrOf, Location::global(nam, pProc)));
					}
				}
				s->simplify();
				GotoStatement* stmt_jump = static_cast<GotoStatement*>(s);

				// Check for a call to an already existing procedure (including self recursive jumps), or to the PLT
				// (note that a LibProc entry for the PLT function may not yet exist)
				ADDRESS dest;
				Proc* proc;
				
				if (s->getKind() == STMT_GOTO) {
					dest = stmt_jump->getFixedDest();
					if (dest != NO_ADDRESS) {
						proc = prog->findProc(dest);
						if (proc == NULL) {
							if(!ASS_FILE){
								if (pBF->IsDynamicLinkedProc(dest))
									proc = prog->setNewProc(dest);
							}
						}
						if (proc != NULL && proc != (Proc*)-1) {
							s = new CallStatement();
							CallStatement *call = static_cast<CallStatement*>(s);
							call->setDest(dest);
							call->setDestProc(proc);
							call->setReturnAfterCall(true);
							// also need to change it in the actual RTL
							std::list<Statement*>::iterator ss1 = ss;
							ss1++;
							assert(ss1 == sl.end());
							pRtl->replaceLastStmt(s);
							*ss = s;
						}
					}
				}

				switch (s->getKind())
				{

				case STMT_GOTO: {
					uDest = stmt_jump->getFixedDest();
	
					// Handle one way jumps and computed jumps separately
					if (uDest != NO_ADDRESS) {

						BB_rtls->push_back(pRtl);
						sequentialDecode = false;

						pBB = pCfg->newBB(BB_rtls,ONEWAY,1);
						BB_rtls = NULL;		// Clear when make new BB

						// Exit the switch now if the basic block already existed
						if (pBB == 0) {
							break;
						}

						// Add the out edge if it is to a destination within the
						// procedure

						if (uDest < pBF->getLimitTextHigh()) {
							targetQueue.visit(pCfg, uDest, pBB);
							pCfg->addOutEdge(pBB, uDest, true);
						}
						else {
							std::cout<<"Entering Processing Proc5\n"; 
							if (!ASS_FILE)
								LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
									<< uDest << "\n";
							else{
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
							} 

						}
					}
					break;
				}

				case STMT_CASE: {
					Exp* pDest = stmt_jump->getDest();
					if (pDest == NULL) {				// Happens if already analysed (now redecoding)
						// SWITCH_INFO* psi = ((CaseStatement*)stmt_jump)->getSwitchInfo();
						BB_rtls->push_back(pRtl);
						pBB = pCfg->newBB(BB_rtls, NWAY, 0);	// processSwitch will update num outedges
						pBB->processSwitch(pProc);		// decode arms, set out edges, etc
						sequentialDecode = false;		// Don't decode after the jump
						BB_rtls = NULL;					// New RTLList for next BB
						break;							// Just leave it alone
					}
					// Check for indirect calls to library functions, especially in Win32 programs
					if (pDest && pDest->getOper() == opMemOf &&
							pDest->getSubExp1()->getOper() == opIntConst && 
							pBF->IsDynamicLinkedProcPointer(((Const*)pDest->getSubExp1())->getAddr())) {
						if (VERBOSE)
							LOG << "jump to a library function: " << stmt_jump << ", replacing with a call/ret.\n";
						// jump to a library function
						// replace with a call ret
						// TODO: 
						std::string func = pBF->GetDynamicProcName(
							((Const*)stmt_jump->getDest()->getSubExp1())->getAddr());
						//------------------------------------
						CallStatement *call = new CallStatement;
						call->setDest(stmt_jump->getDest()->clone());
						LibProc *lp = pProc->getProg()->getLibraryProc(func.c_str());
						if (lp == NULL)
							LOG << "getLibraryProc returned NULL, aborting\n";
						assert(lp);
						call->setDestProc(lp);
						std::list<Statement*>* stmt_list = new std::list<Statement*>;
						stmt_list->push_back(call);
						BB_rtls->push_back(new RTL(pRtl->getAddress(), stmt_list));
						pBB = pCfg->newBB(BB_rtls, CALL, 1);
						appendSyntheticReturn(pBB, pProc, pRtl);
						sequentialDecode = false;
						BB_rtls = NULL;
						if (pRtl->getAddress() == pProc->getNativeAddress()) {
							// it's a thunk
							// Proc *lp = prog->findProc(func.c_str());
							func = std::string("__imp_") + func;
							pProc->setName(func.c_str());
							//lp->setName(func.c_str());
							Boomerang::get()->alert_update_signature(pProc);
						}
						callList.push_back(call);
						ss = sl.end(); ss--;	// get out of the loop
						break;
					}
					BB_rtls->push_back(pRtl);
					// We create the BB as a COMPJUMP type, then change to an NWAY if it turns out to be a switch stmt
					pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0);
					LOG << "COMPUTED JUMP at " << uAddr << ", pDest = " << pDest << "\n";
					if (Boomerang::get()->noDecompile) {
						// try some hacks
						if (pDest->isMemOf() && pDest->getSubExp1()->getOper() == opPlus &&
								pDest->getSubExp1()->getSubExp2()->isIntConst()) {
							// assume subExp2 is a jump table
							ADDRESS jmptbl = ((Const*)pDest->getSubExp1()->getSubExp2())->getInt();
							unsigned int i;
							for (i = 0; ; i++) {
								ADDRESS uDest = pBF->readNative4(jmptbl + i * 4);
								if (pBF->getLimitTextLow() <= uDest && uDest < pBF->getLimitTextHigh()) {
									LOG << "  guessed uDest " << uDest << "\n";
									targetQueue.visit(pCfg, uDest, pBB);
									pCfg->addOutEdge(pBB, uDest, true);
								} else
									break;
							}
							pBB->updateType(NWAY, i);
						}
					}
					sequentialDecode = false;
					BB_rtls = NULL;		// New RTLList for next BB
					break;
				}


				case STMT_BRANCH: {
					uDest = stmt_jump->getFixedDest();
					BB_rtls->push_back(pRtl);
					pBB = pCfg->newBB(BB_rtls, TWOWAY, 2);

					// Stop decoding sequentially if the basic block already existed otherwise complete the basic block
					if (pBB == 0)
						sequentialDecode = false;
					else {

						// Add the out edge if it is to a destination within the procedure
						if (!ASS_FILE){
							if (uDest < pBF->getLimitTextHigh()) {
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
							}
							else
								LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
									<< uDest << "\n";
						}
						else {
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
						}

						// Add the fall-through outedge
						pCfg->addOutEdge(pBB, uAddr + inst.numBytes); 
					}

					// Create the list of RTLs for the next basic block and continue with the next instruction.
					BB_rtls = NULL;
					break;
				}

				case STMT_CALL: {
					CallStatement* call = static_cast<CallStatement*>(s);
					
					// Check for a dynamic linked library function
					// TODO: solution dont use pBF
					if (!ASS_FILE){ 
						if (call->getDest()->getOper() == opMemOf &&
								call->getDest()->getSubExp1()->getOper() == opIntConst &&
								pBF->IsDynamicLinkedProcPointer(((Const*)call->getDest()->getSubExp1())->getAddr())) {
							// Dynamic linked proc pointers are treated as static.
							const char *nam = pBF->GetDynamicProcName( ((Const*)call->getDest()->getSubExp1())->getAddr());
							Proc *p = pProc->getProg()->getLibraryProc(nam);
							call->setDestProc(p);
							call->setIsComputed(false);
						}
					}
					else {
						if (call->getDest()->getOper() == opMemOf &&
								call->getDest()->getSubExp1()->getOper() == opIntConst &&
								funcsType.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second) {
							// Dynamic linked proc pointers are treated as static.
							const char *nam = namesList.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second;
							Proc *p = pProc->getProg()->getLibraryProc(nam);
							call->setDestProc(p);
							call->setIsComputed(false);
						}
					}

					// Is the called function a thunk calling a library function?
					// A "thunk" is a function which only consists of: "GOTO library_function"
					// Should i modify
					if (!ASS_FILE){
						if(	call &&	call->getFixedDest() != NO_ADDRESS ) {
							// Get the address of the called function.
							ADDRESS callAddr=call->getFixedDest();
							// It should not be in the PLT either, but getLimitTextHigh() takes this into account
							if (callAddr < pBF->getLimitTextHigh()) {
								// Decode it.
								DecodeResult decoded=decodeInstruction(callAddr);
								if (decoded.valid) { // is the instruction decoded succesfully?
									// Yes, it is. Create a Statement from it.
									RTL *rtl = decoded.rtl;
									Statement* first_statement = *rtl->getList().begin();
									if (first_statement) {
										first_statement->setProc(pProc);
										first_statement->simplify();
										GotoStatement* stmt_jump = static_cast<GotoStatement*>(first_statement);
										// In fact it's a computed (looked up) jump, so the jump seems to be a case
										// statement.
										//TODO : We dont handle this case
										if ( first_statement->getKind() == STMT_CASE &&
											stmt_jump->getDest()->getOper() == opMemOf &&
											stmt_jump->getDest()->getSubExp1()->getOper() == opIntConst &&
											pBF->IsDynamicLinkedProcPointer(((Const*)stmt_jump->getDest()->getSubExp1())->
												getAddr())) // Is it an "DynamicLinkedProcPointer"?
										{
											// Yes, it's a library function. Look up it's name.
											ADDRESS a = ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr();
											// TODO : We dont handle this case
											const char *nam = pBF->GetDynamicProcName(a);
											// Assign the proc to the call
											Proc *p = pProc->getProg()->getLibraryProc(nam);
											if (call->getDestProc()) {
												// prevent unnecessary __imp procs
												prog->removeProc(call->getDestProc()->getName());
											}
											call->setDestProc(p);
											call->setIsComputed(false);
											call->setDest(Location::memOf(new Const(a)));
										}
									}
								}
							}
						}
					}
					// Treat computed and static calls separately
					if (call->isComputed()) {
						BB_rtls->push_back(pRtl);
						pBB = pCfg->newBB(BB_rtls, COMPCALL, 1);

						// Stop decoding sequentially if the basic block already
						// existed otherwise complete the basic block
						if (pBB == 0)
							sequentialDecode = false;
						else
							pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
						// Add this call to the list of calls to analyse. We won't
						// be able to analyse it's callee(s), of course.
						callList.push_back(call);
					}
					else {		// Static call
						// Find the address of the callee.
						ADDRESS uNewAddr = call->getFixedDest();

						// Calls with 0 offset (i.e. call the next instruction) are simply pushing the PC to the
						// stack. Treat these as non-control flow instructions and continue.
						if (uNewAddr == uAddr + inst.numBytes)
							break;

						// Call the virtual helper function. If implemented, will check for machine specific funcion
						// calls
						if (helperFunc(uNewAddr, uAddr, BB_rtls)) {
							// We have already added to BB_rtls
							pRtl = NULL;		// Discard the call semantics
							break;
						}

						BB_rtls->push_back(pRtl);

						// Add this non computed call site to the set of call sites which need to be analysed later.
						//pCfg->addCall(call);
						callList.push_back(call);

						// Record the called address as the start of a new procedure if it didn't already exist.
						if (uNewAddr && uNewAddr != NO_ADDRESS && pProc->getProg()->findProc(uNewAddr) == NULL) {
							callList.push_back(call);
							//newProc(pProc->getProg(), uNewAddr);
							if (Boomerang::get()->traceDecoder)
								LOG << "p" << uNewAddr << "\t";
						}

 						// Check if this is the _exit or exit function. May prevent us from attempting to decode
						// invalid instructions, and getting invalid stack height errors
						
						const char* name;
						if (!ASS_FILE){
							name = pBF->SymbolByAddress(uNewAddr);
							if (name == NULL && call->getDest()->isMemOf() && 
												call->getDest()->getSubExp1()->isIntConst()) {
								ADDRESS a = ((Const*)call->getDest()->getSubExp1())->getInt();
								if (pBF->IsDynamicLinkedProcPointer(a))
									name = pBF->GetDynamicProcName(a);
							}
						}
						else {
							name = namesList.find(uNewAddr)->second;
						}	
						if (name && noReturnCallDest(name)) {
							// Make sure it has a return appended (so there is only one exit from the function)
							//call->setReturnAfterCall(true);		// I think only the Sparc frontend cares
							// Create the new basic block
							pBB = pCfg->newBB(BB_rtls, CALL, 1);
							appendSyntheticReturn(pBB, pProc, pRtl);

							// Stop decoding sequentially
							sequentialDecode = false;
						}
						else {
							// Create the new basic block
							pBB = pCfg->newBB(BB_rtls, CALL, 1);

							if (call->isReturnAfterCall()) {
								// Constuct the RTLs for the new basic block
								std::list<RTL*>* rtls = new std::list<RTL*>();
								// The only RTL in the basic block is one with a ReturnStatement
								std::list<Statement*>* sl = new std::list<Statement*>;
								sl->push_back(new ReturnStatement());
								rtls->push_back(new RTL(pRtl->getAddress()+1, sl));
		
								BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0);
								// Add out edge from call to return
								pCfg->addOutEdge(pBB, returnBB);
								// Put a label on the return BB (since it's an orphan); a jump will be reqd
								pCfg->setLabel(returnBB);
								pBB->setJumpReqd();
								// Mike: do we need to set return locations?
								// This ends the function
								sequentialDecode = false;
							}
							else {
								// Add the fall through edge if the block didn't
								// already exist
								if (pBB != NULL)
									pCfg->addOutEdge(pBB, uAddr+inst.numBytes);
							}
						}
					}

					extraProcessCall(call, BB_rtls);

					// Create the list of RTLs for the next basic block and continue with the next instruction.
					BB_rtls = NULL;
					break;	
				}

				case STMT_RET: {
					// Stop decoding sequentially
					sequentialDecode = false;

					pBB = createReturnBlock(pProc, BB_rtls, pRtl);

					// Create the list of RTLs for the next basic block and
					// continue with the next instruction.
					BB_rtls = NULL;		// New RTLList for next BB
				}
				break;

				case STMT_BOOLASSIGN:
					// This is just an ordinary instruction; no control transfer
					// Fall through
				case STMT_JUNCTION:
					// FIXME: Do we need to do anything here?
				case STMT_ASSIGN:
				case STMT_PHIASSIGN:
				case STMT_IMPASSIGN:
				case STMT_IMPREF:
					// Do nothing
					break;
		
				} // switch (s->getKind())
			}
			if (BB_rtls && pRtl)
				// If non null, we haven't put this RTL into a the current BB as yet
				BB_rtls->push_back(pRtl);

			if (inst.reDecode)
				// Special case: redecode the last instruction, without advancing uAddr by numBytes
				continue;
			uAddr += inst.numBytes;
			
			if (uAddr > lastAddr)
				lastAddr = uAddr;

			// If sequentially decoding, check if the next address happens to be the start of an existing BB. If so,
			// finish off the current BB (if any RTLs) as a fallthrough, and no need to decode again (unless it's an
			// incomplete BB, then we do decode it).
			// In fact, mustn't decode twice, because it will muck up the coverage, but also will cause subtle problems
			// like add a call to the list of calls to be processed, then delete the call RTL (e.g. Pentium 134.perl
			// benchmark)
			if (sequentialDecode && pCfg->existsBB(uAddr)) {
				// Create the fallthrough BB, if there are any RTLs at all
				if (BB_rtls) {
					PBB pBB = pCfg->newBB(BB_rtls, FALL, 1);
					// Add an out edge to this address
					if (pBB) {
						pCfg->addOutEdge(pBB, uAddr);
						BB_rtls = NULL;			// Need new list of RTLs
					}
				}
				// Pick a new address to decode from, if the BB is complete
				if (!pCfg->isIncomplete(uAddr))
					sequentialDecode = false;
			}
			if(AssProgram)
			++ li ;
		}	// while sequentialDecode
		
		// Add this range to the coverage
//		  pProc->addRange(start, uAddr);

		// Must set sequentialDecode back to true
		sequentialDecode = true;
		

	}	// while nextAddress() != NO_ADDRESS

	//ProgWatcher *w = prog->getWatcher();
	//if (w)
	//	  w->alert_done(pProc, initAddr, lastAddr, nTotalBytes);

	// Add the callees to the set of CallStatements, and also to the Prog object
	std::list<CallStatement*>::iterator it;
	for (it = callList.begin(); it != callList.end(); it++) {
		ADDRESS dest = (*it)->getFixedDest();
		// Don't speculatively decode procs that are outside of the main text section, apart from dynamically
		// linked ones (in the .plt)
		// TODO: change pBF pointers
		if (!ASS_FILE){
			if (pBF->IsDynamicLinkedProc(dest) || !spec || (dest < pBF->getLimitTextHigh())) {
				pCfg->addCall(*it);
				// Don't visit the destination of a register call
				Proc *np = (*it)->getDestProc();
				if (np == NULL && dest != NO_ADDRESS) {
					//np = newProc(pProc->getProg(), dest);
					np = pProc->getProg()->setNewProc(dest);
				}
				if (np != NULL) {
					np->setFirstCaller(pProc);
					pProc->addCallee(np);
				}			
			}
		}
		else{
				pCfg->addCall(*it);
				// Don't visit the destination of a register call
				Proc *np = (*it)->getDestProc();
				if (np == NULL && dest != NO_ADDRESS) {
					//np = newProc(pProc->getProg(), dest);
					np = pProc->getProg()->setNewProc(dest);
				}
				if (np != NULL) {
					np->setFirstCaller(pProc);
					pProc->addCallee(np);
				}			
		}
	}

	Boomerang::get()->alert_decode(pProc, startAddr, lastAddr, nTotalBytes);
	std::cout<< "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n";
	if (VERBOSE)
		LOG << "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n";

	return true;
}

/*==============================================================================
 * FUNCTION:	FrontEnd::getInst
 * OVERVIEW:	Fetch the smallest (nop-sized) instruction, in an endianness independent manner
 * NOTE:		Frequently overridden
 * PARAMETERS:	addr - host address to getch from
 * RETURNS:		An integer with the instruction in it
 *============================================================================*/
int FrontEnd::getInst(int addr)
{
	return (int)(*(unsigned char*)addr);
}


/*==============================================================================
 * FUNCTION:	TargetQueue::visit
 * OVERVIEW:	Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later.
 *				Note: at present, it is important to visit an address BEFORE an out edge is added to that address.
 *				This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the
 *				BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby
 *				the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter.
 * PARAMETERS:	pCfg - the enclosing CFG
 *				uNewAddr - the address to be checked
 *				pNewBB - set to the lower part of the BB if the address
 *				already exists as a non explicit label (BB has to be split)
 * RETURNS:		<nothing>
 *============================================================================*/
void TargetQueue::visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB) {
	// Find out if we've already parsed the destination
	bool bParsed = pCfg->label(uNewAddr, pNewBB);
	// Add this address to the back of the local queue,
	// if not already processed
	if (!bParsed) {
		targets.push(uNewAddr);
		if (Boomerang::get()->traceDecoder)
			LOG << ">" << uNewAddr << "\t";
	}
}
Ejemplo n.º 2
0
ProcStatus ProcDecompiler::tryDecompileRecursive(UserProc *proc)
{
    /* Cycle detection logic:
     * *********************
     * cycleGrp is an initially null pointer to a set of procedures, representing the procedures
     * involved in the current recursion group, if any. These procedures have to be analysed
     * together as a group, after individual pre-group analysis. child is a set of procedures,
     * cleared at the top of decompile(), representing the cycles associated with the current
     * procedure and all of its children. If this is empty, the current procedure is not involved in
     * recursion, and can be decompiled up to and including removing unused statements. callStack is
     * an initially empty list of procedures, representing the call stack from the current entry
     * point to the current procedure, inclusive. If (after all children have been processed:
     * important!) the first element in callStack and also cycleGrp is the current procedure, we
     * have the maximal set of distinct cycles, so we can do the recursion group analysis and return
     * an empty set. At the end of the recursion group analysis, the whole group is complete, ready
     * for the global analyses.
     *
     *   cycleSet decompile(ProcList callStack)        // call stack initially empty
     *     child = new ProcSet
     *     push this proc to the call stack
     *     for each child c called by this proc
     *       if c has already been visited but not finished
     *         // have new cycle
     *         if c is in callStack
     *           // this is a completely new cycle
     *           insert every proc from c to the end of callStack into child
     *         else
     *           // this is a new branch of an existing cycle
     *           child = c->cycleGrp
     *           find first element f of callStack that is in cycleGrp
     *           insert every proc after f to the end of callStack into child
     *           for each element e of child
     *         insert e->cycleGrp into child
     *         e->cycleGrp = child
     *       else
     *         // no new cycle
     *         tmp = c->decompile(callStack)
     *         child = union(child, tmp)
     *         set return statement in call to that of c
     *
     *     if (child empty)
     *       earlyDecompile()
     *       child = middleDecompile()
     *       removeUnusedStatments()            // Not involved in recursion
     *     else
     *       // Is involved in recursion
     *       find first element f in callStack that is also in cycleGrp
     *       if (f == this) // The big test: have we got the complete strongly connected component?
     *         recursionGroupAnalysis() // Yes, we have
     *         child = new ProcSet      // Don't add these processed cycles to the parent
     *     remove last element (= this) from callStack
     *     return child
     */

    Project *project = proc->getProg()->getProject();

    LOG_MSG("%1 procedure '%2'", (proc->getStatus() >= PROC_VISITED) ? "Re-visiting" : "Visiting",
            proc->getName());
    project->alertDiscovered(proc);

    // Prevent infinite loops when there are cycles in the call graph (should never happen now)
    if (proc->getStatus() >= PROC_FINAL) {
        LOG_WARN("Proc %1 already has status PROC_FINAL", proc->getName());
        return PROC_FINAL; // Already decompiled
    }

    if (proc->getStatus() < PROC_DECODED) {
        // Can happen e.g. if a callee is visible only after analysing a switch statement
        // Actually decoding for the first time, not REdecoding
        if (!proc->getProg()->reDecode(proc)) {
            return PROC_UNDECODED;
        }
    }

    if (proc->getStatus() < PROC_VISITED) {
        proc->setStatus(PROC_VISITED); // We have at least visited this proc "on the way down"
    }

    m_callStack.push_back(proc);

    if (project->getSettings()->verboseOutput) {
        printCallStack();
    }

    if (project->getSettings()->decodeChildren) {
        // Recurse to callees first, to perform a depth first search
        for (BasicBlock *bb : *proc->getCFG()) {
            if (bb->getType() != BBType::Call) {
                continue;
            }

            // The call Statement will be in the last RTL in this BB
            CallStatement *call = static_cast<CallStatement *>(bb->getRTLs()->back()->getHlStmt());

            if (!call->isCall()) {
                LOG_WARN("BB at address %1 is a CALL but last stmt is not a call: %2",
                         bb->getLowAddr(), call);
                continue;
            }

            assert(call->isCall());
            UserProc *callee = dynamic_cast<UserProc *>(call->getDestProc());

            if (callee == nullptr) { // not an user proc, or missing dest
                continue;
            }

            if (callee->getStatus() == PROC_FINAL) {
                // Already decompiled, but the return statement still needs to be set for this call
                call->setCalleeReturn(callee->getRetStmt());
                continue;
            }

            // check if the callee has already been visited but not done (apart from global
            // analyses). This means that we have found a new cycle or a part of an existing cycle
            if ((callee->getStatus() >= PROC_VISITED) && (callee->getStatus() <= PROC_EARLYDONE)) {
                // if callee is in callStack
                ProcList::iterator calleeIt = std::find(m_callStack.begin(), m_callStack.end(),
                                                        callee);

                if (calleeIt != m_callStack.end()) {
                    // This is a completely new cycle
                    std::shared_ptr<ProcSet> newRecursionGroup(new ProcSet());
                    newRecursionGroup->insert(calleeIt, m_callStack.end());
                    createRecursionGoup(newRecursionGroup);
                }
                else if (callee->getRecursionGroup()) {
                    // This is a new branch of an existing cycle that was visited previously
                    std::shared_ptr<ProcSet> recursionGroup = callee->getRecursionGroup();

                    // Find first element func of callStack that is in callee->recursionGroup
                    ProcList::iterator _pi = std::find_if(
                        m_callStack.begin(), m_callStack.end(), [callee](UserProc *func) {
                            return callee->getRecursionGroup()->find(func) !=
                                   callee->getRecursionGroup()->end();
                        });

                    // Insert every proc after func to the end of path into child
                    assert(_pi != m_callStack.end());
                    for (auto it = std::next(_pi); it != m_callStack.end(); ++it) {
                        addToRecursionGroup(*it, recursionGroup);
                    }
                }

                proc->setStatus(PROC_INCYCLE);
            }
            else {
                // No new cycle
                LOG_VERBOSE("Preparing to decompile callee '%1' of '%2'", callee->getName(),
                            proc->getName());

                callee->promoteSignature();
                tryDecompileRecursive(callee);
                // Child has at least done middleDecompile(), possibly more
                call->setCalleeReturn(callee->getRetStmt());

                if (proc->getStatus() != PROC_INCYCLE &&
                    m_recursionGroups.find(proc) != m_recursionGroups.end()) {
                    proc->setStatus(PROC_INCYCLE);
                    proc->setRecursionGroup(m_recursionGroups.find(proc)->second);
                }
            }
        }
    }

    // if no child involved in recursion
    if (proc->getStatus() != PROC_INCYCLE) {
        project->alertDecompiling(proc);
        LOG_MSG("Decompiling procedure '%1'", proc->getName());

        earlyDecompile(proc);
        middleDecompile(proc);

        if (project->getSettings()->verboseOutput) {
            printCallStack();
        }
    }

    if (proc->getStatus() != PROC_INCYCLE) {
        lateDecompile(proc); // Do the whole works
        proc->setStatus(PROC_FINAL);
        project->alertEndDecompile(proc);
    }
    else if (m_recursionGroups.find(proc) != m_recursionGroups.end()) {
        // This proc's callees, and hence this proc, is/are involved in recursion.
        // Find first element f in path that is also in our recursion group
        ProcList::iterator f = std::find_if(
            m_callStack.begin(), m_callStack.end(), [proc](UserProc *func) {
                return proc->getRecursionGroup()->find(func) != proc->getRecursionGroup()->end();
            });

        // The big test: have we found the whole strongly connected component (in the call graph)?
        if (*f == proc) {
            // Yes, process these procs as a group
            recursionGroupAnalysis(proc->getRecursionGroup());
            proc->setStatus(PROC_FINAL);
            project->alertEndDecompile(proc);
        }
    }

    // Remove last element (= this) from path
    assert(!m_callStack.empty());
    assert(m_callStack.back() == proc);
    m_callStack.pop_back();

    LOG_MSG("Finished decompile of '%1'", proc->getName());

    if (project->getSettings()->verboseOutput) {
        printCallStack();
    }

    return proc->getStatus();
}