Пример #1
0
/*==============================================================================
 * FUNCTION:	  FrontEnd::processProc
 * OVERVIEW:	  Process a procedure, given a native (source machine) address.
 * PARAMETERS:	  address - the address at which the procedure starts
 *				  pProc - the procedure object
 *				  frag - if true, this is just a fragment of a procedure
 *				  spec - if true, this is a speculative decode
 *				  os - the output stream for .rtl output
 * NOTE:		  This is a sort of generic front end. For many processors, this will be overridden
 *					in the FrontEnd derived class, sometimes calling this function to do most of the work
 * RETURNS:		  true for a good decode (no illegal instructions)
 *============================================================================*/
bool FrontEnd::processProc(ADDRESS uAddr, UserProc* pProc, std::ofstream &os, bool frag /* = false */,
		bool spec /* = false */) {
	PBB pBB;					// Pointer to the current basic block
	std::cout<<"Entering Processing Proc\n"; 
	// just in case you missed it
	first_line = true;
	if (AssProgram)
		std::cout <<"Name Of Program : " << AssProgram->name << std::endl;
	Boomerang::get()->alert_new(pProc);
	 
	// We have a set of CallStatement pointers. These may be disregarded if this is a speculative decode
	// that fails (i.e. an illegal instruction is found). If not, this set will be used to add to the set of calls
	// to be analysed in the cfg, and also to call newProc()
	std::list<CallStatement*> callList;

	// Indicates whether or not the next instruction to be decoded is the lexical successor of the current one.
	// Will be true for all NCTs and for CTIs with a fall through branch.
	bool sequentialDecode = true;

	Cfg* pCfg = pProc->getCFG();

	// If this is a speculative decode, the second time we decode the same address, we get no cfg. Else an error.
	if (spec && (pCfg == 0))
		return false;
	assert(pCfg);

	// Initialise the queue of control flow targets that have yet to be decoded.
	targetQueue.initial(uAddr);

	// Clear the pointer used by the caller prologue code to access the last call rtl of this procedure
	//decoder.resetLastCall();

	// ADDRESS initAddr = uAddr;
	int nTotalBytes = 0;
	ADDRESS startAddr = uAddr;
	ADDRESS lastAddr = uAddr;
	ADDRESS address = uAddr;
	std::cout << "Start at address = " << uAddr << std::endl; 
	//------IMPORTANT------------------------------------------------------------------------
	list<AssemblyLabel*>::iterator lbi;
	list<AssemblyLine*>* temp_lines = new list<AssemblyLine*>();
	
	if (AssProgram){
		for(lbi = AssProgram->labelList->begin(); lbi != AssProgram->labelList->end(); ++lbi ){
			if((*lbi)->address == uAddr){
				temp_lines = (*lbi)->lineList;
				std::cout << "***DECODE LABEL: " << (*lbi)->name << std::endl;
				std::cout << "***AT ADDRESS: " << (*lbi)->address << std::endl;
				std::cout << "***NUMBER OF INSTRUCTION: " << (*lbi)->lineList->size() << std::endl;
				break;
			}
		}
	}
	
	list<AssemblyLine*>::iterator li;
	if (temp_lines->size()>0)
		li = temp_lines->begin();
	
	//---------------------------------------------------------------------------------------
	while ((uAddr = targetQueue.nextAddress(pCfg)) != NO_ADDRESS) {
		// The list of RTLs for the current basic block
		std::list<RTL*>* BB_rtls = new std::list<RTL*>();

		// Keep decoding sequentially until a CTI without a fall through branch is decoded
		//ADDRESS start = uAddr;
		DecodeResult inst;
		while (sequentialDecode) {

			// Decode and classify the current source instruction
			if (Boomerang::get()->traceDecoder)
				LOG << "*" << uAddr << "\t";

			// Decode the inst at uAddr.
			
			if(ASS_FILE){
				if(li != temp_lines->end()){

					inst = decodeAssemblyInstruction(uAddr,"assemblySets.at(line)", (*li));
				}
			}
			else
				inst = decodeInstruction(uAddr);
			
			// If invalid and we are speculating, just exit
			if (spec && !inst.valid)
				return false;

			// Need to construct a new list of RTLs if a basic block has just been finished but decoding is
			// continuing from its lexical successor
			if (BB_rtls == NULL)
				BB_rtls = new std::list<RTL*>();

			RTL* pRtl = inst.rtl;
			if (inst.valid == false) {
				
				// Alert the watchers to the problem
				Boomerang::get()->alert_baddecode(uAddr);

				// An invalid instruction. Most likely because a call did not return (e.g. call _exit()), etc.
				// Best thing is to emit a INVALID BB, and continue with valid instructions
				if (VERBOSE) {
					LOG << "Warning: invalid instruction at " << uAddr << ": ";
					// Emit the next 4 bytes for debugging
					for (int ii=0; ii < 4; ii++)
						LOG << (unsigned)(pBF->readNative1(uAddr + ii) & 0xFF) << " ";
					LOG << "\n";
				}
				// Emit the RTL anyway, so we have the address and maybe some other clues
				BB_rtls->push_back(new RTL(uAddr));	 
				pBB = pCfg->newBB(BB_rtls, INVALID, 0);
				sequentialDecode = false; BB_rtls = NULL; continue;
			}
            //pProc->unionDefine = new list<UnionDefine*>();
            pProc->bitVar = AssProgram->bitVar;
            pProc->replacement = AssProgram->replacement;
			// alert the watchers that we have decoded an instruction
			Boomerang::get()->alert_decode(uAddr, inst.numBytes);
			nTotalBytes += inst.numBytes;			
			
			// Check if this is an already decoded jump instruction (from a previous pass with propagation etc)
			// If so, we throw away the just decoded RTL (but we still may have needed to calculate the number
			// of bytes.. ick.)
			std::map<ADDRESS, RTL*>::iterator ff = previouslyDecoded.find(uAddr);
			if (ff != previouslyDecoded.end())
				pRtl = ff->second;

			if (pRtl == NULL) {
				// This can happen if an instruction is "cancelled", e.g. call to __main in a hppa program
				// Just ignore the whole instruction
				if (inst.numBytes > 0)
					uAddr += inst.numBytes;
				continue;
			}
			
			// Display RTL representation if asked
                std::cout<<"RTL: "<<std::endl;
				std::ostringstream st;
				pRtl->print(st);
                std::cout << st.str().c_str()<<std::endl;

	
			ADDRESS uDest;

			// For each Statement in the RTL
			//std::list<Statement*>& sl = pRtl->getList();
			std::list<Statement*> sl = pRtl->getList();
			// Make a copy (!) of the list. This is needed temporarily to work around the following problem.
			// We are currently iterating an RTL, which could be a return instruction. The RTL is passed to
			// createReturnBlock; if this is not the first return statement, it will get cleared, and this will
			// cause problems with the current iteration. The effects seem to be worse for MSVC/Windows.
			// This problem will likely be easier to cope with when the RTLs are removed, and there are special
			// Statements to mark the start of instructions (and their native address).
			// FIXME: However, this workaround breaks logic below where a GOTO is changed to a CALL followed by a return
			// if it points to the start of a known procedure
			std::list<Statement*>::iterator ss;
#if 1
			for (ss = sl.begin(); ss != sl.end(); ss++) { // }
#else
			// The counter is introduced because ss != sl.end() does not work as it should
			// FIXME: why? Does this really fix the problem?
			int counter = sl.size();

			for (ss = sl.begin(); counter > 0; ss++, counter--) {
#endif
				Statement* s = *ss;
				s->setProc(pProc);		// let's do this really early!
				if (refHints.find(pRtl->getAddress()) != refHints.end()) {
					const char *nam = refHints[pRtl->getAddress()].c_str();
					ADDRESS gu = prog->getGlobalAddr((char*)nam);
					if (gu != NO_ADDRESS) {
						s->searchAndReplace(new Const((int)gu), new Unary(opAddrOf, Location::global(nam, pProc)));
					}
				}
				s->simplify();
				GotoStatement* stmt_jump = static_cast<GotoStatement*>(s);

				// Check for a call to an already existing procedure (including self recursive jumps), or to the PLT
				// (note that a LibProc entry for the PLT function may not yet exist)
				ADDRESS dest;
				Proc* proc;
				
				if (s->getKind() == STMT_GOTO) {
					dest = stmt_jump->getFixedDest();
					if (dest != NO_ADDRESS) {
						proc = prog->findProc(dest);
						if (proc == NULL) {
							if(!ASS_FILE){
								if (pBF->IsDynamicLinkedProc(dest))
									proc = prog->setNewProc(dest);
							}
						}
						if (proc != NULL && proc != (Proc*)-1) {
							s = new CallStatement();
							CallStatement *call = static_cast<CallStatement*>(s);
							call->setDest(dest);
							call->setDestProc(proc);
							call->setReturnAfterCall(true);
							// also need to change it in the actual RTL
							std::list<Statement*>::iterator ss1 = ss;
							ss1++;
							assert(ss1 == sl.end());
							pRtl->replaceLastStmt(s);
							*ss = s;
						}
					}
				}

				switch (s->getKind())
				{

				case STMT_GOTO: {
					uDest = stmt_jump->getFixedDest();
	
					// Handle one way jumps and computed jumps separately
					if (uDest != NO_ADDRESS) {

						BB_rtls->push_back(pRtl);
						sequentialDecode = false;

						pBB = pCfg->newBB(BB_rtls,ONEWAY,1);
						BB_rtls = NULL;		// Clear when make new BB

						// Exit the switch now if the basic block already existed
						if (pBB == 0) {
							break;
						}

						// Add the out edge if it is to a destination within the
						// procedure

						if (uDest < pBF->getLimitTextHigh()) {
							targetQueue.visit(pCfg, uDest, pBB);
							pCfg->addOutEdge(pBB, uDest, true);
						}
						else {
							std::cout<<"Entering Processing Proc5\n"; 
							if (!ASS_FILE)
								LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
									<< uDest << "\n";
							else{
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
							} 

						}
					}
					break;
				}

				case STMT_CASE: {
					Exp* pDest = stmt_jump->getDest();
					if (pDest == NULL) {				// Happens if already analysed (now redecoding)
						// SWITCH_INFO* psi = ((CaseStatement*)stmt_jump)->getSwitchInfo();
						BB_rtls->push_back(pRtl);
						pBB = pCfg->newBB(BB_rtls, NWAY, 0);	// processSwitch will update num outedges
						pBB->processSwitch(pProc);		// decode arms, set out edges, etc
						sequentialDecode = false;		// Don't decode after the jump
						BB_rtls = NULL;					// New RTLList for next BB
						break;							// Just leave it alone
					}
					// Check for indirect calls to library functions, especially in Win32 programs
					if (pDest && pDest->getOper() == opMemOf &&
							pDest->getSubExp1()->getOper() == opIntConst && 
							pBF->IsDynamicLinkedProcPointer(((Const*)pDest->getSubExp1())->getAddr())) {
						if (VERBOSE)
							LOG << "jump to a library function: " << stmt_jump << ", replacing with a call/ret.\n";
						// jump to a library function
						// replace with a call ret
						// TODO: 
						std::string func = pBF->GetDynamicProcName(
							((Const*)stmt_jump->getDest()->getSubExp1())->getAddr());
						//------------------------------------
						CallStatement *call = new CallStatement;
						call->setDest(stmt_jump->getDest()->clone());
						LibProc *lp = pProc->getProg()->getLibraryProc(func.c_str());
						if (lp == NULL)
							LOG << "getLibraryProc returned NULL, aborting\n";
						assert(lp);
						call->setDestProc(lp);
						std::list<Statement*>* stmt_list = new std::list<Statement*>;
						stmt_list->push_back(call);
						BB_rtls->push_back(new RTL(pRtl->getAddress(), stmt_list));
						pBB = pCfg->newBB(BB_rtls, CALL, 1);
						appendSyntheticReturn(pBB, pProc, pRtl);
						sequentialDecode = false;
						BB_rtls = NULL;
						if (pRtl->getAddress() == pProc->getNativeAddress()) {
							// it's a thunk
							// Proc *lp = prog->findProc(func.c_str());
							func = std::string("__imp_") + func;
							pProc->setName(func.c_str());
							//lp->setName(func.c_str());
							Boomerang::get()->alert_update_signature(pProc);
						}
						callList.push_back(call);
						ss = sl.end(); ss--;	// get out of the loop
						break;
					}
					BB_rtls->push_back(pRtl);
					// We create the BB as a COMPJUMP type, then change to an NWAY if it turns out to be a switch stmt
					pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0);
					LOG << "COMPUTED JUMP at " << uAddr << ", pDest = " << pDest << "\n";
					if (Boomerang::get()->noDecompile) {
						// try some hacks
						if (pDest->isMemOf() && pDest->getSubExp1()->getOper() == opPlus &&
								pDest->getSubExp1()->getSubExp2()->isIntConst()) {
							// assume subExp2 is a jump table
							ADDRESS jmptbl = ((Const*)pDest->getSubExp1()->getSubExp2())->getInt();
							unsigned int i;
							for (i = 0; ; i++) {
								ADDRESS uDest = pBF->readNative4(jmptbl + i * 4);
								if (pBF->getLimitTextLow() <= uDest && uDest < pBF->getLimitTextHigh()) {
									LOG << "  guessed uDest " << uDest << "\n";
									targetQueue.visit(pCfg, uDest, pBB);
									pCfg->addOutEdge(pBB, uDest, true);
								} else
									break;
							}
							pBB->updateType(NWAY, i);
						}
					}
					sequentialDecode = false;
					BB_rtls = NULL;		// New RTLList for next BB
					break;
				}


				case STMT_BRANCH: {
					uDest = stmt_jump->getFixedDest();
					BB_rtls->push_back(pRtl);
					pBB = pCfg->newBB(BB_rtls, TWOWAY, 2);

					// Stop decoding sequentially if the basic block already existed otherwise complete the basic block
					if (pBB == 0)
						sequentialDecode = false;
					else {

						// Add the out edge if it is to a destination within the procedure
						if (!ASS_FILE){
							if (uDest < pBF->getLimitTextHigh()) {
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
							}
							else
								LOG << "Error: Instruction at " << uAddr << " branches beyond end of section, to "
									<< uDest << "\n";
						}
						else {
								targetQueue.visit(pCfg, uDest, pBB);
								pCfg->addOutEdge(pBB, uDest, true);
						}

						// Add the fall-through outedge
						pCfg->addOutEdge(pBB, uAddr + inst.numBytes); 
					}

					// Create the list of RTLs for the next basic block and continue with the next instruction.
					BB_rtls = NULL;
					break;
				}

				case STMT_CALL: {
					CallStatement* call = static_cast<CallStatement*>(s);
					
					// Check for a dynamic linked library function
					// TODO: solution dont use pBF
					if (!ASS_FILE){ 
						if (call->getDest()->getOper() == opMemOf &&
								call->getDest()->getSubExp1()->getOper() == opIntConst &&
								pBF->IsDynamicLinkedProcPointer(((Const*)call->getDest()->getSubExp1())->getAddr())) {
							// Dynamic linked proc pointers are treated as static.
							const char *nam = pBF->GetDynamicProcName( ((Const*)call->getDest()->getSubExp1())->getAddr());
							Proc *p = pProc->getProg()->getLibraryProc(nam);
							call->setDestProc(p);
							call->setIsComputed(false);
						}
					}
					else {
						if (call->getDest()->getOper() == opMemOf &&
								call->getDest()->getSubExp1()->getOper() == opIntConst &&
								funcsType.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second) {
							// Dynamic linked proc pointers are treated as static.
							const char *nam = namesList.find(((Const*)call->getDest()->getSubExp1())->getAddr())->second;
							Proc *p = pProc->getProg()->getLibraryProc(nam);
							call->setDestProc(p);
							call->setIsComputed(false);
						}
					}

					// Is the called function a thunk calling a library function?
					// A "thunk" is a function which only consists of: "GOTO library_function"
					// Should i modify
					if (!ASS_FILE){
						if(	call &&	call->getFixedDest() != NO_ADDRESS ) {
							// Get the address of the called function.
							ADDRESS callAddr=call->getFixedDest();
							// It should not be in the PLT either, but getLimitTextHigh() takes this into account
							if (callAddr < pBF->getLimitTextHigh()) {
								// Decode it.
								DecodeResult decoded=decodeInstruction(callAddr);
								if (decoded.valid) { // is the instruction decoded succesfully?
									// Yes, it is. Create a Statement from it.
									RTL *rtl = decoded.rtl;
									Statement* first_statement = *rtl->getList().begin();
									if (first_statement) {
										first_statement->setProc(pProc);
										first_statement->simplify();
										GotoStatement* stmt_jump = static_cast<GotoStatement*>(first_statement);
										// In fact it's a computed (looked up) jump, so the jump seems to be a case
										// statement.
										//TODO : We dont handle this case
										if ( first_statement->getKind() == STMT_CASE &&
											stmt_jump->getDest()->getOper() == opMemOf &&
											stmt_jump->getDest()->getSubExp1()->getOper() == opIntConst &&
											pBF->IsDynamicLinkedProcPointer(((Const*)stmt_jump->getDest()->getSubExp1())->
												getAddr())) // Is it an "DynamicLinkedProcPointer"?
										{
											// Yes, it's a library function. Look up it's name.
											ADDRESS a = ((Const*)stmt_jump->getDest()->getSubExp1())->getAddr();
											// TODO : We dont handle this case
											const char *nam = pBF->GetDynamicProcName(a);
											// Assign the proc to the call
											Proc *p = pProc->getProg()->getLibraryProc(nam);
											if (call->getDestProc()) {
												// prevent unnecessary __imp procs
												prog->removeProc(call->getDestProc()->getName());
											}
											call->setDestProc(p);
											call->setIsComputed(false);
											call->setDest(Location::memOf(new Const(a)));
										}
									}
								}
							}
						}
					}
					// Treat computed and static calls separately
					if (call->isComputed()) {
						BB_rtls->push_back(pRtl);
						pBB = pCfg->newBB(BB_rtls, COMPCALL, 1);

						// Stop decoding sequentially if the basic block already
						// existed otherwise complete the basic block
						if (pBB == 0)
							sequentialDecode = false;
						else
							pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
						// Add this call to the list of calls to analyse. We won't
						// be able to analyse it's callee(s), of course.
						callList.push_back(call);
					}
					else {		// Static call
						// Find the address of the callee.
						ADDRESS uNewAddr = call->getFixedDest();

						// Calls with 0 offset (i.e. call the next instruction) are simply pushing the PC to the
						// stack. Treat these as non-control flow instructions and continue.
						if (uNewAddr == uAddr + inst.numBytes)
							break;

						// Call the virtual helper function. If implemented, will check for machine specific funcion
						// calls
						if (helperFunc(uNewAddr, uAddr, BB_rtls)) {
							// We have already added to BB_rtls
							pRtl = NULL;		// Discard the call semantics
							break;
						}

						BB_rtls->push_back(pRtl);

						// Add this non computed call site to the set of call sites which need to be analysed later.
						//pCfg->addCall(call);
						callList.push_back(call);

						// Record the called address as the start of a new procedure if it didn't already exist.
						if (uNewAddr && uNewAddr != NO_ADDRESS && pProc->getProg()->findProc(uNewAddr) == NULL) {
							callList.push_back(call);
							//newProc(pProc->getProg(), uNewAddr);
							if (Boomerang::get()->traceDecoder)
								LOG << "p" << uNewAddr << "\t";
						}

 						// Check if this is the _exit or exit function. May prevent us from attempting to decode
						// invalid instructions, and getting invalid stack height errors
						
						const char* name;
						if (!ASS_FILE){
							name = pBF->SymbolByAddress(uNewAddr);
							if (name == NULL && call->getDest()->isMemOf() && 
												call->getDest()->getSubExp1()->isIntConst()) {
								ADDRESS a = ((Const*)call->getDest()->getSubExp1())->getInt();
								if (pBF->IsDynamicLinkedProcPointer(a))
									name = pBF->GetDynamicProcName(a);
							}
						}
						else {
							name = namesList.find(uNewAddr)->second;
						}	
						if (name && noReturnCallDest(name)) {
							// Make sure it has a return appended (so there is only one exit from the function)
							//call->setReturnAfterCall(true);		// I think only the Sparc frontend cares
							// Create the new basic block
							pBB = pCfg->newBB(BB_rtls, CALL, 1);
							appendSyntheticReturn(pBB, pProc, pRtl);

							// Stop decoding sequentially
							sequentialDecode = false;
						}
						else {
							// Create the new basic block
							pBB = pCfg->newBB(BB_rtls, CALL, 1);

							if (call->isReturnAfterCall()) {
								// Constuct the RTLs for the new basic block
								std::list<RTL*>* rtls = new std::list<RTL*>();
								// The only RTL in the basic block is one with a ReturnStatement
								std::list<Statement*>* sl = new std::list<Statement*>;
								sl->push_back(new ReturnStatement());
								rtls->push_back(new RTL(pRtl->getAddress()+1, sl));
		
								BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0);
								// Add out edge from call to return
								pCfg->addOutEdge(pBB, returnBB);
								// Put a label on the return BB (since it's an orphan); a jump will be reqd
								pCfg->setLabel(returnBB);
								pBB->setJumpReqd();
								// Mike: do we need to set return locations?
								// This ends the function
								sequentialDecode = false;
							}
							else {
								// Add the fall through edge if the block didn't
								// already exist
								if (pBB != NULL)
									pCfg->addOutEdge(pBB, uAddr+inst.numBytes);
							}
						}
					}

					extraProcessCall(call, BB_rtls);

					// Create the list of RTLs for the next basic block and continue with the next instruction.
					BB_rtls = NULL;
					break;	
				}

				case STMT_RET: {
					// Stop decoding sequentially
					sequentialDecode = false;

					pBB = createReturnBlock(pProc, BB_rtls, pRtl);

					// Create the list of RTLs for the next basic block and
					// continue with the next instruction.
					BB_rtls = NULL;		// New RTLList for next BB
				}
				break;

				case STMT_BOOLASSIGN:
					// This is just an ordinary instruction; no control transfer
					// Fall through
				case STMT_JUNCTION:
					// FIXME: Do we need to do anything here?
				case STMT_ASSIGN:
				case STMT_PHIASSIGN:
				case STMT_IMPASSIGN:
				case STMT_IMPREF:
					// Do nothing
					break;
		
				} // switch (s->getKind())
			}
			if (BB_rtls && pRtl)
				// If non null, we haven't put this RTL into a the current BB as yet
				BB_rtls->push_back(pRtl);

			if (inst.reDecode)
				// Special case: redecode the last instruction, without advancing uAddr by numBytes
				continue;
			uAddr += inst.numBytes;
			
			if (uAddr > lastAddr)
				lastAddr = uAddr;

			// If sequentially decoding, check if the next address happens to be the start of an existing BB. If so,
			// finish off the current BB (if any RTLs) as a fallthrough, and no need to decode again (unless it's an
			// incomplete BB, then we do decode it).
			// In fact, mustn't decode twice, because it will muck up the coverage, but also will cause subtle problems
			// like add a call to the list of calls to be processed, then delete the call RTL (e.g. Pentium 134.perl
			// benchmark)
			if (sequentialDecode && pCfg->existsBB(uAddr)) {
				// Create the fallthrough BB, if there are any RTLs at all
				if (BB_rtls) {
					PBB pBB = pCfg->newBB(BB_rtls, FALL, 1);
					// Add an out edge to this address
					if (pBB) {
						pCfg->addOutEdge(pBB, uAddr);
						BB_rtls = NULL;			// Need new list of RTLs
					}
				}
				// Pick a new address to decode from, if the BB is complete
				if (!pCfg->isIncomplete(uAddr))
					sequentialDecode = false;
			}
			if(AssProgram)
			++ li ;
		}	// while sequentialDecode
		
		// Add this range to the coverage
//		  pProc->addRange(start, uAddr);

		// Must set sequentialDecode back to true
		sequentialDecode = true;
		

	}	// while nextAddress() != NO_ADDRESS

	//ProgWatcher *w = prog->getWatcher();
	//if (w)
	//	  w->alert_done(pProc, initAddr, lastAddr, nTotalBytes);

	// Add the callees to the set of CallStatements, and also to the Prog object
	std::list<CallStatement*>::iterator it;
	for (it = callList.begin(); it != callList.end(); it++) {
		ADDRESS dest = (*it)->getFixedDest();
		// Don't speculatively decode procs that are outside of the main text section, apart from dynamically
		// linked ones (in the .plt)
		// TODO: change pBF pointers
		if (!ASS_FILE){
			if (pBF->IsDynamicLinkedProc(dest) || !spec || (dest < pBF->getLimitTextHigh())) {
				pCfg->addCall(*it);
				// Don't visit the destination of a register call
				Proc *np = (*it)->getDestProc();
				if (np == NULL && dest != NO_ADDRESS) {
					//np = newProc(pProc->getProg(), dest);
					np = pProc->getProg()->setNewProc(dest);
				}
				if (np != NULL) {
					np->setFirstCaller(pProc);
					pProc->addCallee(np);
				}			
			}
		}
		else{
				pCfg->addCall(*it);
				// Don't visit the destination of a register call
				Proc *np = (*it)->getDestProc();
				if (np == NULL && dest != NO_ADDRESS) {
					//np = newProc(pProc->getProg(), dest);
					np = pProc->getProg()->setNewProc(dest);
				}
				if (np != NULL) {
					np->setFirstCaller(pProc);
					pProc->addCallee(np);
				}			
		}
	}

	Boomerang::get()->alert_decode(pProc, startAddr, lastAddr, nTotalBytes);
	std::cout<< "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n";
	if (VERBOSE)
		LOG << "finished processing proc " << pProc->getName() << " at address " << pProc->getNativeAddress() << "\n";

	return true;
}

/*==============================================================================
 * FUNCTION:	FrontEnd::getInst
 * OVERVIEW:	Fetch the smallest (nop-sized) instruction, in an endianness independent manner
 * NOTE:		Frequently overridden
 * PARAMETERS:	addr - host address to getch from
 * RETURNS:		An integer with the instruction in it
 *============================================================================*/
int FrontEnd::getInst(int addr)
{
	return (int)(*(unsigned char*)addr);
}


/*==============================================================================
 * FUNCTION:	TargetQueue::visit
 * OVERVIEW:	Visit a destination as a label, i.e. check whether we need to queue it as a new BB to create later.
 *				Note: at present, it is important to visit an address BEFORE an out edge is added to that address.
 *				This is because adding an out edge enters the address into the Cfg's BB map, and it looks like the
 *				BB has already been visited, and it gets overlooked. It would be better to have a scheme whereby
 *				the order of calling these functions (i.e. visit() and AddOutEdge()) did not matter.
 * PARAMETERS:	pCfg - the enclosing CFG
 *				uNewAddr - the address to be checked
 *				pNewBB - set to the lower part of the BB if the address
 *				already exists as a non explicit label (BB has to be split)
 * RETURNS:		<nothing>
 *============================================================================*/
void TargetQueue::visit(Cfg* pCfg, ADDRESS uNewAddr, PBB& pNewBB) {
	// Find out if we've already parsed the destination
	bool bParsed = pCfg->label(uNewAddr, pNewBB);
	// Add this address to the back of the local queue,
	// if not already processed
	if (!bParsed) {
		targets.push(uNewAddr);
		if (Boomerang::get()->traceDecoder)
			LOG << ">" << uNewAddr << "\t";
	}
}
Пример #2
0
/*==============================================================================
 * FUNCTION:      processProc
 * OVERVIEW:      Process a procedure, given a native (source machine) address.
 * PARAMETERS:    address - the address at which the procedure starts
 *                delta - the offset of the above address from the logical
 *                  address at which the procedure starts (i.e. the one
 *                  given by dis)
 *                uUpper - the highest address of the text segment
 *                pProc - the procedure object
 *                decoder - NJMCDecoder object
 * RETURNS:       <nothing>
 *============================================================================*/
void processProc(ADDRESS uAddr, ptrdiff_t delta, ADDRESS uUpper, UserProc* pProc,
                 NJMCDecoder& decoder)
{
    PBB pBB;                    // Pointer to the current basic block
    INSTTYPE type;              // Cfg type of instruction (e.g. IRET)

    // Declare a queue of targets not yet processed yet. This has to be
    // individual to the procedure!
    TARGETS targets;

    // Indicates whether or not the next instruction to be decoded is the
    // lexical successor of the current one. Will be true for all NCTs and for
    // CTIs with a fall through branch.
    bool sequentialDecode = true;

    Cfg* pCfg = pProc->getCFG();

    // Initialise the queue of control flow targets that have yet to be decoded.
    targets.push(uAddr);

    // Clear the pointer used by the caller prologue code to access the last
    // call rtl of this procedure
    //decoder.resetLastCall();

    while ((uAddr = nextAddress(targets, pCfg)) != 0)
        {

            // The list of RTLs for the current basic block
            list<HRTL*>* BB_rtls = new list<HRTL*>();

            // Keep decoding sequentially until a CTI without a fall through branch
            // is decoded
            ADDRESS start = uAddr;
            DecodeResult inst;
            while (sequentialDecode)
                {

                    // Decode and classify the current instruction
                    if (progOptions.trace)
                        cout << "*" << hex << uAddr << "\t" << flush;

                    // Decode the inst at uAddr.
                    inst = decoder.decodeInstruction(uAddr, delta, pProc);

                    // Need to construct a new list of RTLs if a basic block has just
                    // been finished but decoding is continuing from its lexical
                    // successor
                    if (BB_rtls == NULL)
                        BB_rtls = new list<HRTL*>();

                    HRTL* pRtl = inst.rtl;
                    if (inst.numBytes == 0)
                        {
                            // An invalid instruction. Most likely because a call did
                            // not return (e.g. call _exit()), etc. Best thing is to
                            // emit a INVALID BB, and continue with valid instructions
                            ostrstream ost;
                            ost << "invalid instruction at " << hex << uAddr;
                            warning(str(ost));
                            // Emit the RTL anyway, so we have the address and maybe
                            // some other clues
                            BB_rtls->push_back(new RTL(uAddr));
                            pBB = pCfg->newBB(BB_rtls, INVALID, 0);
                            sequentialDecode = false;
                            BB_rtls = NULL;
                            continue;
                        }

                    HLJump* rtl_jump = static_cast<HLJump*>(pRtl);

                    // Display RTL representation if asked
                    if (progOptions.rtl) pRtl->print();

                    ADDRESS uDest;

                    switch (pRtl->getKind())
                        {

                        case JUMP_HRTL:
                        {
                            uDest = rtl_jump->getFixedDest();

                            // Handle one way jumps and computed jumps separately
                            if (uDest != NO_ADDRESS)
                                {
                                    BB_rtls->push_back(pRtl);
                                    sequentialDecode = false;

                                    pBB = pCfg->newBB(BB_rtls,ONEWAY,1);

                                    // Exit the switch now and stop decoding sequentially if the
                                    // basic block already existed
                                    if (pBB == 0)
                                        {
                                            sequentialDecode = false;
                                            BB_rtls = NULL;
                                            break;
                                        }

                                    // Add the out edge if it is to a destination within the
                                    // procedure
                                    if (uDest < uUpper)
                                        {
                                            visit(pCfg, uDest, targets, pBB);
                                            pCfg->addOutEdge(pBB, uDest, true);
                                        }
                                    else
                                        {
                                            ostrstream ost;
                                            ost << "Error: Instruction at " << hex << uAddr;
                                            ost << " branches beyond end of section, to ";
                                            ost << uDest;
                                            error(str(ost));
                                        }
                                }
                            break;
                        }

                        case NWAYJUMP_HRTL:
                        {
                            BB_rtls->push_back(pRtl);
                            // We create the BB as a COMPJUMP type, then change
                            // to an NWAY if it turns out to be a switch stmt
                            pBB = pCfg->newBB(BB_rtls, COMPJUMP, 0);
                            if (isSwitch(pBB, rtl_jump->getDest(), pProc, pBF))
                                {
                                    processSwitch(pBB, delta, pCfg, targets, pBF);
                                }
                            else   // Computed jump
                                {
                                    // Not a switch statement
                                    ostrstream ost;
                                    string sKind("JUMP");
                                    if (type == I_COMPCALL) sKind = "CALL";
                                    ost << "COMPUTED " << sKind << " at "
                                        << hex << uAddr << endl;
                                    warning(str(ost));
                                    BB_rtls = NULL;    // New HRTLList for next BB
                                }
                            sequentialDecode = false;
                            break;
                        }



                        case JCOND_HRTL:
                        {
                            uDest = rtl_jump->getFixedDest();
                            BB_rtls->push_back(pRtl);
                            pBB = pCfg->newBB(BB_rtls, TWOWAY, 2);

                            // Stop decoding sequentially if the basic block already existed
                            // otherwise complete the basic block
                            if (pBB == 0)
                                sequentialDecode = false;
                            else
                                {

                                    // Add the out edge if it is to a destination within the
                                    // procedure
                                    if (uDest < uUpper)
                                        {
                                            visit(pCfg, uDest, targets, pBB);
                                            pCfg->addOutEdge(pBB, uDest, true);
                                        }
                                    else
                                        {
                                            ostrstream ost;
                                            ost << "Error: Instruction at " << hex << uAddr;
                                            ost << " branches beyond end of section, to ";
                                            ost << uDest;
                                            error(str(ost));
                                        }

                                    // Add the fall-through outedge
                                    pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
                                }

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;
                            break;
                        }

                        case CALL_HRTL:
                        {
                            HLCall* call = static_cast<HLCall*>(pRtl);

                            // Treat computed and static calls seperately
                            if (call->isComputed())
                                {
                                    BB_rtls->push_back(pRtl);
                                    pBB = pCfg->newBB(BB_rtls, COMPCALL, 1);

                                    // Stop decoding sequentially if the basic block already
                                    // existed otherwise complete the basic block
                                    if (pBB == 0)
                                        sequentialDecode = false;
                                    else
                                        pCfg->addOutEdge(pBB, uAddr + inst.numBytes);

                                }
                            else        // Static call
                                {

                                    BB_rtls->push_back(pRtl);

                                    // Find the address of the callee.
                                    ADDRESS uNewAddr = call->getFixedDest();

                                    // Add this non computed call site to the set of call
                                    // sites which need to be analysed later.
                                    pCfg->addCall(call);

                                    // Record the called address as the start of a new
                                    // procedure if it didn't already exist.
                                    if ((uNewAddr != NO_ADDRESS) &&
                                            prog.findProc(uNewAddr) == NULL)
                                        {
                                            prog.visitProc(uNewAddr);
                                            if (progOptions.trace)
                                                cout << "p" << hex << uNewAddr << "\t" << flush;
                                        }

                                    // Check if this is the _exit function. May prevent us from
                                    // attempting to decode invalid instructions.
                                    char* name = prog.pBF->SymbolByAddress(uNewAddr);
                                    if (name && strcmp(name, "_exit") == 0)
                                        {
                                            // Create the new basic block
                                            pBB = pCfg->newBB(BB_rtls, CALL, 0);

                                            // Stop decoding sequentially
                                            sequentialDecode = false;
                                        }
                                    else
                                        {
                                            // Create the new basic block
                                            pBB = pCfg->newBB(BB_rtls, CALL, 1);

                                            if (call->isReturnAfterCall())
                                                {
                                                    // Constuct the RTLs for the new basic block
                                                    list<HRTL*>* rtls = new list<HRTL*>();
                                                    // The only RTL in the basic block is a high level
                                                    // return that doesn't have any RTs.
                                                    rtls->push_back(new HLReturn(0, NULL));

                                                    BasicBlock* returnBB = pCfg->newBB(rtls, RET, 0);
                                                    // Add out edge from call to return
                                                    pCfg->addOutEdge(pBB, returnBB);
                                                    // Put a label on the return BB (since it's an
                                                    // orphan); a jump will be reqd
                                                    pCfg->setLabel(returnBB);
                                                    pBB->setJumpReqd();
                                                    // Give the enclosing proc a dummy callee epilogue
                                                    pProc->setEpilogue(new CalleeEpilogue("__dummy",
                                                                                          list<string>()));
                                                    // Mike: do we need to set return locations?
                                                    // This ends the function
                                                    sequentialDecode = false;
                                                }
                                            else
                                                {
                                                    // Add the fall through edge if the block didn't
                                                    // already exist
                                                    if (pBB != NULL)
                                                        pCfg->addOutEdge(pBB, uAddr + inst.numBytes);
                                                }
                                        }
                                }

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;
                            break;
                        }

                        case RET_HRTL:
                            // Stop decoding sequentially
                            sequentialDecode = false;

                            // Add the RTL to the list
                            BB_rtls->push_back(pRtl);
                            // Create the basic block
                            pBB = pCfg->newBB(BB_rtls, RET, 0);

                            // Create the list of RTLs for the next basic block and continue
                            // with the next instruction.
                            BB_rtls = NULL;    // New HRTLList for next BB
                            break;

                        case SCOND_HRTL:
                            // This is just an ordinary instruction; no control transfer
                            // Fall through
                        case LOW_LEVEL_HRTL:
                            // We must emit empty RTLs for NOPs, because they could be the
                            // destinations of jumps (and splitBB won't work)
                            // Just emit the current instr to the current BB
                            BB_rtls->push_back(pRtl);
                            break;

                        } // switch (pRtl->getKind())

                    uAddr += inst.numBytes;
                    // Update the RTL's number of bytes for coverage analysis (only)
                    inst.rtl->updateNumBytes(inst.numBytes);

                    // If sequentially decoding, check if the next address happens to
                    // be the start of an existing BB. If so, finish off the current BB
                    // (if any RTLs) as a fallthrough, and  no need to decode again
                    // (unless it's an incomplete BB, then we do decode it).
                    // In fact, mustn't decode twice, because it will muck up the
                    // coverage, but also will cause subtle problems like add a call
                    // to the list of calls to be processed, then delete the call RTL
                    // (e.g. Pentium 134.perl benchmark)
                    if (sequentialDecode && pCfg->existsBB(uAddr))
                        {
                            // Create the fallthrough BB, if there are any RTLs at all
                            if (BB_rtls)
                                {
                                    PBB pBB = pCfg->newBB(BB_rtls, FALL, 1);
                                    // Add an out edge to this address
                                    if (pBB)
                                        {
                                            pCfg->addOutEdge(pBB, uAddr);
                                            BB_rtls = NULL;         // Need new list of RTLs
                                        }
                                }
                            // Pick a new address to decode from, if the BB is complete
                            if (!pCfg->isIncomplete(uAddr))
                                sequentialDecode = false;
                        }

                }   // while sequentialDecode

            // Add this range to the coverage
            pProc->addRange(start, uAddr);

            // Must set sequentialDecode back to true
            sequentialDecode = true;

        }   // while nextAddress()

    // This pass is to remove up to 3 nops between ranges.
    // These will be assumed to be padding for alignments of BBs
    // Possibly removes a lot of ranges that could otherwise be combined
    ADDRESS a1, a2;
    COV_CIT ii;
    Coverage temp;
    if (pProc->getFirstGap(a1, a2, ii))
        {
            do
                {
                    int gap = a2 - a1;
                    if (gap < 8)
                        {
                            bool allNops = true;
                            for (int i=0; i < gap; i+= 2)
                                {
                                    // Beware endianness! getWord will work properly
                                    if (getWord(a1+i+delta) != 0x4e71)
                                        {
                                            allNops = false;
                                            break;
                                        }
                                }
                            if (allNops)
                                // Remove this gap, by adding a range equal to the gap
                                // Note: it's not safe to add the range now, so we put
                                // the range into a temp Coverage object to be added later
                                temp.addRange(a1, a2);
                        }
                }
            while (pProc->getNextGap(a1, a2, ii));
        }
    // Now add the ranges in temp
    pProc->addRanges(temp);

}