// // DacEnumCodeForStackwalk // This is a helper function to enumerate the instructions around a call site to aid heuristics // used by debugger stack walkers. // // Arguments: // taCallEnd - target address of the instruction just after the call instruction for the stack // frame we want to examine(i.e. the return address for the next frame). // // Note that this is shared by our two stackwalks during minidump generation, // code:Thread::EnumMemoryRegionsWorker and code:ClrDataAccess::EnumMemWalkStackHelper. Ideally // we'd only have one stackwalk, but we currently have two different APIs for stackwalking // (CLR StackFrameIterator and IXCLRDataStackWalk), and we must ensure that the memory needed // for either is captured in a minidump. Eventually, all clients should get moved over to the // arrowhead debugging architecture, at which time we can rip out all the IXCLRData APIs, and // so this logic could just be private to the EnumMem code for Thread. // void DacEnumCodeForStackwalk(TADDR taCallEnd) { if (taCallEnd == 0) return; // // x86 stack walkers often end up having to guess // about what's a return address on the stack. // Doing so involves looking at the code at the // possible call site and seeing if it could // reach the callee. Save enough code and around // the call site to allow this with a dump. // // For whatever reason 64-bit platforms require us to save // the instructions around the call sites on the stack as well. // Otherwise we cannnot show the stack in a minidump. // // Note that everything we do here is a heuristic that won't always work in general. // Eg., part of the 2xMAX_INSTRUCTION_LENGTH range might not be mapped (we could be // right on a page boundary). More seriously, X86 is not necessarily parsable in reverse // (eg. there could be a segment-override prefix in front of the call instruction that // we miss). So we'll dump what we can and ignore any failures. Ideally we'd better // quantify exactly what debuggers need and why, and try and avoid these ugly heuristics. // It seems like these heuristics are too tightly coupled to the implementation details // of some specific debugger stackwalking algorithm. // DacEnumMemoryRegion(taCallEnd - MAX_INSTRUCTION_LENGTH, MAX_INSTRUCTION_LENGTH * 2, false); #if defined(_TARGET_X86_) // If it was an indirect call we also need to save the data indirected through. // Note that this only handles absolute indirect calls (ModR/M byte of 0x15), all the other forms of // indirect calls are register-relative, and so we'd have to do a much more complicated decoding based // on the register context. Regardless, it seems like this is fundamentally error-prone because it's // aways possible that the call instruction was not 6 bytes long, and we could have some other instructions // that happen to match the pattern we're looking for. PTR_BYTE callCode = PTR_BYTE(taCallEnd - 6); PTR_BYTE callMrm = PTR_BYTE(taCallEnd - 5); PTR_TADDR callInd = PTR_TADDR(taCallEnd - 4); if (callCode.IsValid() && (*callCode == 0xff) && callMrm.IsValid() && (*callMrm == 0x15) && callInd.IsValid()) { DacEnumMemoryRegion(*callInd, sizeof(TADDR), false); } #endif // #ifdef _TARGET_X86_ }
static bool epilogInstrumented() { PTR_BYTE ptr = PTR_BYTE(zeroFtn); if (ptr[0] == 0xe8) // call <helper> (prolog instrumentation) ptr += 5; if (ptr[0] == 0x33 && ptr[1] == 0xc0) // xor eax eax ptr += 2; return (ptr[0] == 0xeb || ptr[0] == 0xe9); // jmp <XXXX> }
static bool shouldEnterCall(PTR_BYTE ip) { SUPPORTS_DAC; int datasize; // helper variable for decoding of address modes int mod; // helper variable for decoding of mod r/m int rm; // helper variable for decoding of mod r/m int pushes = 0; // we should start unbalenced pops within 48 instrs. If not, it is not a special epilog function // the only reason we need as many instructions as we have below is because coreclr // gets instrumented for profiling, code coverage, BBT etc, and we want these things to // just work. for (int i = 0; i < 48; i++) { switch(*ip) { case 0x68: // push 0xXXXXXXXX ip += 5; // For office profiler. They morph tail calls into push TARGET; jmp helper // so if you see // // push XXXX // jmp xxxx // // and we notice that coreclr has been instrumented and // xxxx starts with a JMP [] then do what you would do for jmp XXXX if (*ip == 0xE9 && callsInstrumented()) { // jmp helper PTR_BYTE tmpIp = ip + 5; PTR_BYTE target = tmpIp + (__int32)*((PTR_TADDR)(PTR_TO_TADDR(tmpIp) - 4)); if (target[0] == 0xFF && target[1] == 0x25) { // jmp [xxxx] (to external dll) ip = PTR_BYTE(*((PTR_TADDR)(PTR_TO_TADDR(ip) - 4))); } } else { pushes++; } break; case 0x50: // push EAX case 0x51: // push ECX case 0x52: // push EDX case 0x53: // push EBX case 0x55: // push EBP case 0x56: // push ESI case 0x57: // push EDI pushes++; ip++; break; case 0xE8: // call <disp32> ip += 5; pushes = 0; // This assumes that all of the previous pushes are arguments to this call break; case 0xFF: if (ip[1] != 0x15) // call [XXXX] is OK (prolog of epilog helper is intrumented) return false; // but everything else is not OK. ip += 6; pushes = 0; // This assumes that all of the previous pushes are arguments to this call break; case 0x9C: // pushfd case 0x9D: // popfd // a pushfd can never be an argument, so we model a pair of // these instruction as not changing the stack so that a call // that occurs between them does not consume the value of pushfd ip++; break; case 0x5D: // pop EBP case 0x5E: // pop ESI case 0x5F: // pop EDI case 0x5B: // pop EBX case 0x58: // pop EAX case 0x59: // pop ECX case 0x5A: // pop EDX if (pushes <= 0) { // We now have more pops than pushes. This is our indication // that we are in an EH_epilog function so we return true. // This is the only way to exit this method with a retval of true. return true; } --pushes; ip++; break; case 0xA1: // MOV EAX, [XXXX] ip += 5; break; case 0xC6: // MOV r/m8, imm8 datasize = 1; goto decodeRM; case 0x89: // MOV r/m, reg if (ip[1] == 0xE5) // MOV EBP, ESP return false; if (ip[1] == 0xEC) // MOV ESP, EBP return false; goto move; case 0x8B: // MOV reg, r/m if (ip[1] == 0xE5) // MOV ESP, EBP return false; if (ip[1] == 0xEC) // MOV EBP, ESP return false; goto move; case 0x88: // MOV reg, r/m (BYTE) case 0x8A: // MOV r/m, reg (BYTE) case 0x31: // XOR case 0x32: // XOR case 0x33: // XOR move: datasize = 0; decodeRM: // Note that we don't want to read from ip[] after // we do ANY incrementing of ip mod = (ip[1] & 0xC0) >> 6; if (mod != 3) { rm = (ip[1] & 0x07); if (mod == 0) { // (mod == 0) if (rm == 5) ip += 4; // disp32 else if (rm == 4) ip += 1; // [reg*K+reg] // otherwise [reg] } else if (mod == 1) { // (mod == 1) ip += 1; // for disp8 if (rm == 4) ip += 1; // [reg*K+reg+disp8] // otherwise [reg+disp8] } else { // (mod == 2) ip += 4; // for disp32 if (rm == 4) ip += 1; // [reg*K+reg+disp32] // otherwise [reg+disp32] } } ip += 2; ip += datasize; break; case 0x64: // FS: prefix ip++; break; case 0xEB: // jmp <disp8> ip += (signed __int8) ip[1] + 2; break; case 0xE9: // jmp <disp32> ip += (__int32)*PTR_DWORD(PTR_TO_TADDR(ip) + 1) + 5; break; case 0xF7: // test r/m32, imm32 // Magellan code coverage build if ( (ip[1] & 0x38) == 0x00) { datasize = 4; goto decodeRM; } else { return false; } break; case 0x75: // jnz <target> // Magellan code coverage build // We always follow forward jump to avoid possible looping. { PTR_BYTE tmpIp = ip + (TADDR)(signed __int8) ip[1] + 2; if (tmpIp > ip) { ip = tmpIp; // follow forwards jump } else { return false; // backwards jump implies not EH_epilog function } } break; case 0xC2: // ret case 0xC3: // ret n default: return false; } } return false; }
/* Has mscorwks been instrumented so that calls are morphed into push XXXX call <helper> */ static bool callsInstrumented() { // Does the recusive function begin with push XXXX call <helper> PTR_BYTE ptr = PTR_BYTE(recursiveFtn); return (ptr[0] == 0x68 && ptr[5] == 0xe8); // PUSH XXXX, call <helper> }
lazyState->_edi = baseState->_edi; lazyState->_esi = baseState->_esi; lazyState->_ebx = baseState->_ebx; lazyState->_ebp = baseState->captureEbp; #ifndef DACCESS_COMPILE lazyState->_pEdi = &baseState->_edi; lazyState->_pEsi = &baseState->_esi; lazyState->_pEbx = &baseState->_ebx; lazyState->_pEbp = &baseState->_ebp; #endif // We have captured the state of the registers as they exist in 'captureState' // we need to simulate execution from the return address captured in 'captureState // until we return from the caller of captureState. PTR_BYTE ip = PTR_BYTE(baseState->captureEip); PTR_TADDR ESP = PTR_TADDR(baseState->captureEsp); ESP++; // pop captureState's return address // VC now has small helper calls that it uses in epilogs. We need to walk into these // helpers if we are to decode the stack properly. After we walk the helper we need // to return and continue walking the epiliog. This varaible remembers were to return to PTR_BYTE epilogCallRet = PTR_BYTE((TADDR)0); // The very first conditional jump that we are going to encounter is // the one testing for the return value of LazyMachStateCaptureState. // The non-zero path is the one directly leading to a return statement. // This variable keeps track of whether we are still looking for that // first conditional jump. BOOL bFirstCondJmp = TRUE;