BPatch_variableExpr *BPatch_image::createVarExprByName(BPatch_module *mod, const char *name) { Symbol syminfo; BPatch_type *type; type = mod->getModuleTypes()->globalVarsByName[name]; if (!type) { switch (syminfo.size()) { case 1: type = findType("char"); break; case 2: type = findType("short"); break; case 8: type = findType("integer*8"); break; case 4: default: type = findType("int"); break; } } if (!type) return NULL; if (!proc->llproc->getSymbolInfo(name, syminfo)) { return NULL; } // Error case. if (syminfo.addr() == 0) return NULL; BPatch_variableExpr *var = AddrToVarExpr->hash[syminfo.addr()]; if (!var) { var = new BPatch_variableExpr( const_cast<char *>(name), proc, (void *)syminfo.addr(), type); AddrToVarExpr->hash[syminfo.addr()] = var; } return var; }
bool process::loadDYNINSTlib() { /* Look for a function we can hijack to forcibly load dyninstapi_rt. This is effectively an inferior RPC with the caveat that we're overwriting code instead of allocating memory from the RT heap. (So 'hijack' doesn't mean quite what you might think.) */ Address codeBase = findFunctionToHijack(this); if( !codeBase ) { return false; } /* glibc 2.3.4 and higher adds a fourth parameter to _dl_open(). While we could probably get away with treating the three and four -argument functions the same, check the version anyway, since we'll probably need to later. */ bool useFourArguments = true; Symbol libcVersionSymbol; if( getSymbolInfo( "__libc_version", libcVersionSymbol ) ) { char libcVersion[ sizeof( int ) * libcVersionSymbol.size() + 1 ]; libcVersion[ sizeof( int ) * libcVersionSymbol.size() ] = '\0'; if( ! readDataSpace( (void *) libcVersionSymbol.addr(), libcVersionSymbol.size(), libcVersion, true ) ) { fprintf( stderr, "%s[%d]: warning, failed to read libc version, assuming 2.3.4+\n", __FILE__, __LINE__ ); } else { startup_printf( "%s[%d]: libcVersion: %s\n", __FILE__, __LINE__, libcVersion ); /* We could potentially add a sanity check here to make sure we're looking at 2.3.x. */ int microVersion = ((int)libcVersion[4]) - ((int)'0'); if( microVersion <= 3 ) { useFourArguments = false; } } /* end if we read the version symbol */ } /* end if we found the version symbol */ if( useFourArguments ) { startup_printf( "%s[%d]: using four arguments.\n", __FILE__, __LINE__ ); } /* Fetch the name of the run-time library. */ const char DyninstEnvVar[]="DYNINSTAPI_RT_LIB"; if( ! dyninstRT_name.length() ) { // we didn't get anything on the command line if (getenv(DyninstEnvVar) != NULL) { dyninstRT_name = getenv(DyninstEnvVar); } else { pdstring msg = pdstring( "Environment variable " + pdstring( DyninstEnvVar ) + " has not been defined for process " ) + pdstring( getPid() ); showErrorCallback(101, msg); return false; } /* end if enviromental variable not found */ } /* end enviromental variable extraction */ /* Save the (main thread's) current PC.*/ savedPC = getRepresentativeLWP()->getActiveFrame().getPC(); /* _dl_open() takes three arguments: a pointer to the library name, the DLOPEN_MODE, and the return address of the current frame (that is, the location of the SIGILL-generating bundle we'll use to handleIfDueToDyninstLib()). We construct the first here. */ /* Write the string to entry, and then move the PC to the next bundle. */ codeGen gen(BYTES_TO_SAVE); Address dyninstlib_addr = gen.used() + codeBase; gen.copy(dyninstRT_name.c_str(), dyninstRT_name.length()+1); Address dlopencall_addr = gen.used() + codeBase; /* At this point, we use the generic iRPC headers and trailers around the call to _dl_open. (Note that pre-1.35 versions of this file had a simpler mechanism well-suited to boot- strapping a new port. The current complexity is to handle the attach() case, where we don't know if execution was stopped at the entry the entry point to a function. */ bool ok = theRpcMgr->emitInferiorRPCheader(gen); if( ! ok ) { return false; } /* Generate the call to _dl_open with a large dummy constant as the the third argument to make sure we generate the same size code the second time around, with the correct "return address." (dyninstlib_brk_addr) */ // As a quick note, we want to "return" to the beginning of the restore // segment, not dyninstlib_brk_addr (or we skip all the restores). // Of course, we're not sure what this addr represents.... pdvector< AstNode * > dlOpenArguments( 4 ); AstNode * dlOpenCall; dlOpenArguments[ 0 ] = new AstNode( AstNode::Constant, (void *)dyninstlib_addr ); dlOpenArguments[ 1 ] = new AstNode( AstNode::Constant, (void *)DLOPEN_MODE ); dlOpenArguments[ 2 ] = new AstNode( AstNode::Constant, (void *)0xFFFFFFFFFFFFFFFF ); if( useFourArguments ) { /* I derived the -2 as follows: from dlfcn/dlopen.c in the glibc sources, line 59, we find the call to _dl_open(), whose last argument is 'args->file == NULL ? LM_ID_BASE : NS'. Since the filename we pass in is non-null, this means we (would) pass in NS, which is defined to be __LM_ID_CALLER in the same file, line 48. (Since glibc must be shared for us to be calling _dl_open(), we fall into the second case of the #ifdef.) __LM_ID_CALLER is defined in include/dlfcn.h, where it has the value -2. */ dlOpenArguments[ 3 ] = new AstNode( AstNode::Constant, (void *)(long unsigned int)-2 ); } dlOpenCall = new AstNode( "_dl_open", dlOpenArguments ); /* Remember where we originally generated the call. */ codeBufIndex_t index = gen.getIndex(); /* emitInferiorRPCheader() configures (the global) registerSpace for us. */ dlOpenCall->generateCode( this, regSpace, gen, true, true ); // Okay, we're done with the generation, and we know where we'll be. // Go back and regenerate it Address dlopenRet = codeBase + gen.used(); gen.setIndex(index); /* Clean up the reference counts before regenerating. */ removeAst( dlOpenCall ); removeAst( dlOpenArguments[ 2 ] ); dlOpenArguments[ 2 ] = new AstNode( AstNode::Constant, (void *)dlopenRet ); dlOpenCall = new AstNode( "_dl_open", dlOpenArguments ); /* Regenerate the call at the same original location with the correct constants. */ dlOpenCall->generateCode( this, regSpace, gen, true, true ); /* Clean up the reference counting. */ removeAst( dlOpenCall ); removeAst( dlOpenArguments[ 0 ] ); removeAst( dlOpenArguments[ 1 ] ); removeAst( dlOpenArguments[ 2 ] ); if( useFourArguments ) { removeAst( dlOpenArguments[ 3 ] ); } // Okay, that was fun. Now restore. And trap. And stuff. unsigned breakOffset, resultOffset, justAfterResultOffset; ok = theRpcMgr->emitInferiorRPCtrailer(gen, breakOffset, false, resultOffset, justAfterResultOffset ); if( ! ok ) { return false; } /* Let everyone else know that we're expecting a SIGILL. */ dyninstlib_brk_addr = codeBase + breakOffset; assert(gen.used() < BYTES_TO_SAVE); /* Save the function we're going to hijack. */ InsnAddr iAddr = InsnAddr::generateFromAlignedDataAddress( codeBase, this ); /* We need to save the whole buffer, because we don't know how big gen is when we do the restore. This could be made more efficient by storing gen.used() somewhere. */ iAddr.saveBundlesTo( savedCodeBuffer, sizeof( savedCodeBuffer ) / 16 ); /* Write the call into the mutatee. */ InsnAddr jAddr = InsnAddr::generateFromAlignedDataAddress( codeBase, this ); jAddr.writeBundlesFrom( (unsigned char *)gen.start_ptr(), gen.used() / 16 ); /* Now that we know where the code will start, move the (main thread's) PC there. */ getRepresentativeLWP()->changePC( dlopencall_addr, NULL ); /* Let them know we're working on it. */ setBootstrapState( loadingRT_bs ); return true; } /* end dlopenDYNINSTlib() */
// parseStabTypes: parses type and variable info, does some init // does NOT parse file-line info anymore, this is done later, upon request. void BPatch_module::parseStabTypes() { stab_entry *stabptr; const char *next_stabstr; unsigned i; char *modName; pdstring temp; image * imgPtr=NULL; char *ptr, *ptr2, *ptr3; bool parseActive = false; pdstring* currentFunctionName = NULL; Address currentFunctionBase = 0; BPatch_variableExpr *commonBlockVar = NULL; char *commonBlockName; BPatch_typeCommon *commonBlock = NULL; int mostRecentLinenum = 0; #if defined(TIMED_PARSE) struct timeval starttime; gettimeofday(&starttime, NULL); unsigned int pss_count = 0; double pss_dur = 0; unsigned int src_count = 0; double src_dur = 0; unsigned int fun_count = 0; double fun_dur = 0; struct timeval t1, t2; #endif imgPtr = mod->obj()->parse_img(); imgPtr->analyzeIfNeeded(); const Object &objPtr = imgPtr->getObject(); //Using the Object to get the pointers to the .stab and .stabstr // XXX - Elf32 specific needs to be in seperate file -- jkh 3/18/99 stabptr = objPtr.get_stab_info(); next_stabstr = stabptr->getStringBase(); for (i=0; i<stabptr->count(); i++) { switch(stabptr->type(i)){ case N_UNDF: /* start of object file */ /* value contains offset of the next string table for next module */ // assert(stabptr->nameIdx(i) == 1); stabptr->setStringBase(next_stabstr); next_stabstr = stabptr->getStringBase() + stabptr->val(i); //N_UNDF is the start of object file. It is time to //clean source file name at this moment. /* if(currentSourceFile){ delete currentSourceFile; currentSourceFile = NULL; delete absoluteDirectory; absoluteDirectory = NULL; delete currentFunctionName; currentFunctionName = NULL; currentFileInfo = NULL; currentFuncInfo = NULL; } */ break; case N_ENDM: /* end of object file */ break; case N_SO: /* compilation source or file name */ /* bperr("Resetting CURRENT FUNCTION NAME FOR NEXT OBJECT FILE\n");*/ #ifdef TIMED_PARSE src_count++; gettimeofday(&t1, NULL); #endif current_func_name = ""; // reset for next object file current_mangled_func_name = ""; // reset for next object file current_func = NULL; modName = const_cast<char*>(stabptr->name(i)); // cerr << "checkpoint B" << endl; ptr = strrchr(modName, '/'); // cerr << "checkpoint C" << endl; if (ptr) { ptr++; modName = ptr; } if (!strcmp(modName, mod->fileName().c_str())) { parseActive = true; moduleTypes->clearNumberedTypes(); BPatch_language lang; // language should be set in the constructor, this is probably redundant switch (stabptr->desc(i)) { case N_SO_FORTRAN: lang = BPatch_fortran; break; case N_SO_F90: lang = BPatch_fortran90; break; case N_SO_AS: lang = BPatch_assembly; break; case N_SO_ANSI_C: case N_SO_C: lang = BPatch_c; break; case N_SO_CC: lang = BPatch_cPlusPlus; break; default: lang = BPatch_unknownLanguage; break; } if (BPatch_f90_demangled_stabstr != getLanguage()) setLanguage(lang); } else { parseActive = false; } #ifdef TIMED_PARSE gettimeofday(&t2, NULL); src_dur += (t2.tv_sec - t1.tv_sec)*1000.0 + (t2.tv_usec - t1.tv_usec)/1000.0; //src_dur += (t2.tv_sec/1000 + t2.tv_usec*1000) - (t1.tv_sec/1000 + t1.tv_usec*1000) ; #endif break; case N_SLINE: mostRecentLinenum = stabptr->desc(i); break; default: break; } if(parseActive || mod->obj()->isSharedLib()) { BPatch_Vector<BPatch_function *> bpfv; switch(stabptr->type(i)){ case N_FUN: #ifdef TIMED_PARSE fun_count++; gettimeofday(&t1, NULL); #endif //all we have to do with function stabs at this point is to assure that we have //properly set the var currentFunctionName for the later case of (parseActive) current_func = NULL; int currentEntry = i; int funlen = strlen(stabptr->name(currentEntry)); ptr = new char[funlen+1]; strcpy(ptr, stabptr->name(currentEntry)); while(strlen(ptr) != 0 && ptr[strlen(ptr)-1] == '\\'){ ptr[strlen(ptr)-1] = '\0'; currentEntry++; strcat(ptr,stabptr->name(currentEntry)); } char* colonPtr = NULL; if(currentFunctionName) delete currentFunctionName; if(!ptr || !(colonPtr = strchr(ptr,':'))) currentFunctionName = NULL; else { char* tmp = new char[colonPtr-ptr+1]; strncpy(tmp,ptr,colonPtr-ptr); tmp[colonPtr-ptr] = '\0'; currentFunctionName = new pdstring(tmp); currentFunctionBase = 0; Symbol info; // Shouldn't this be a function name lookup? if (!proc->llproc->getSymbolInfo(*currentFunctionName, info)) { pdstring fortranName = *currentFunctionName + pdstring("_"); if (proc->llproc->getSymbolInfo(fortranName,info)) { delete currentFunctionName; currentFunctionName = new pdstring(fortranName); } } currentFunctionBase = info.addr(); delete[] tmp; // if(currentSourceFile && (currentFunctionBase > 0)){ // lineInformation->insertSourceFileName( // *currentFunctionName, // *currentSourceFile, // ¤tFileInfo,¤tFuncInfo); //} } // used to be a symbol lookup here to find currentFunctionBase, do we need it? delete[] ptr; #ifdef TIMED_PARSE gettimeofday(&t2, NULL); fun_dur += (t2.tv_sec - t1.tv_sec)*1000.0 + (t2.tv_usec - t1.tv_usec)/1000.0; //fun_dur += (t2.tv_sec/1000 + t2.tv_usec*1000) - (t1.tv_sec/1000 + t1.tv_usec*1000); #endif break; } if (!parseActive) continue; switch(stabptr->type(i)){ case N_BCOMM: { // begin Fortran named common block commonBlockName = const_cast<char*>(stabptr->name(i)); // find the variable for the common block BPatch_image *progam = (BPatch_image *) getObjParent(); commonBlockVar = progam->findVariable(commonBlockName); if (!commonBlockVar) { bperr("unable to find variable %s\n", commonBlockName); } else { commonBlock = dynamic_cast<BPatch_typeCommon *>(const_cast<BPatch_type *> (commonBlockVar->getType())); if (commonBlock == NULL) { // its still the null type, create a new one for it commonBlock = new BPatch_typeCommon(commonBlockName); commonBlockVar->setType(commonBlock); moduleTypes->addGlobalVariable(commonBlockName, commonBlock); } // reset field list commonBlock->beginCommonBlock(); } break; } case N_ECOMM: { // copy this set of fields assert(currentFunctionName); if (NULL == findFunction(currentFunctionName->c_str(), bpfv) || !bpfv.size()) { bperr("unable to locate current function %s\n", currentFunctionName->c_str()); } else { if (bpfv.size() > 1) { // warn if we find more than one function with this name bperr("%s[%d]: WARNING: found %d funcs matching name %s, using the first\n", __FILE__, __LINE__, bpfv.size(), currentFunctionName->c_str()); } BPatch_function *func = bpfv[0]; commonBlock->endCommonBlock(func, commonBlockVar->getBaseAddr()); } // update size if needed if (commonBlockVar) commonBlockVar->setSize(commonBlock->getSize()); commonBlockVar = NULL; commonBlock = NULL; break; } // case C_BINCL: -- what is the elf version of this jkh 8/21/01 // case C_EINCL: -- what is the elf version of this jkh 8/21/01 case 32: // Global symbols -- N_GYSM case 38: // Global Static -- N_STSYM case N_FUN: case 128: // typedefs and variables -- N_LSYM case 160: // parameter variable -- N_PSYM case 0xc6: // position-independant local typedefs -- N_ISYM case 0xc8: // position-independant external typedefs -- N_ESYM #ifdef TIMED_PARSE pss_count++; gettimeofday(&t1, NULL); #endif if (stabptr->type(i) == N_FUN) current_func = NULL; ptr = const_cast<char *>(stabptr->name(i)); while (ptr[strlen(ptr)-1] == '\\') { //ptr[strlen(ptr)-1] = '\0'; ptr2 = const_cast<char *>(stabptr->name(i+1)); ptr3 = (char *) malloc(strlen(ptr) + strlen(ptr2)); strcpy(ptr3, ptr); ptr3[strlen(ptr)-1] = '\0'; strcat(ptr3, ptr2); ptr = ptr3; i++; // XXX - memory leak on multiple cont. lines } // bperr("stab #%d = %s\n", i, ptr); // may be nothing to parse - XXX jdd 5/13/99 if (nativeCompiler) temp = parseStabString(this, mostRecentLinenum, (char *)ptr, stabptr->val(i), commonBlock); else temp = parseStabString(this, stabptr->desc(i), (char *)ptr, stabptr->val(i), commonBlock); if (temp.length()) { //Error parsing the stabstr, return should be \0 bperr( "Stab string parsing ERROR!! More to parse: %s\n", temp.c_str()); bperr( " symbol: %s\n", ptr); } #ifdef TIMED_PARSE gettimeofday(&t2, NULL); pss_dur += (t2.tv_sec - t1.tv_sec)*1000.0 + (t2.tv_usec - t1.tv_usec)/1000.0; // pss_dur += (t2.tv_sec/1000 + t2.tv_usec*1000) - (t1.tv_sec/1000 + t1.tv_usec*1000); #endif break; default: break; } } } #if defined(TIMED_PARSE) struct timeval endtime; gettimeofday(&endtime, NULL); unsigned long lstarttime = starttime.tv_sec * 1000 * 1000 + starttime.tv_usec; unsigned long lendtime = endtime.tv_sec * 1000 * 1000 + endtime.tv_usec; unsigned long difftime = lendtime - lstarttime; double dursecs = difftime/(1000 ); cout << __FILE__ << ":" << __LINE__ <<": parseTypes("<< mod->fileName() <<") took "<<dursecs <<" msecs" << endl; cout << "Breakdown:" << endl; cout << " Functions: " << fun_count << " took " << fun_dur << "msec" << endl; cout << " Sources: " << src_count << " took " << src_dur << "msec" << endl; cout << " parseStabString: " << pss_count << " took " << pss_dur << "msec" << endl; cout << " Total: " << pss_dur + fun_dur + src_dur << " msec" << endl; #endif }