static void x86_thread_state64_to_vex(const x86_thread_state64_t *mach, VexGuestAMD64State *vex) { LibVEX_GuestAMD64_initialise(vex); vex->guest_RAX = mach->__rax; vex->guest_RBX = mach->__rbx; vex->guest_RCX = mach->__rcx; vex->guest_RDX = mach->__rdx; vex->guest_RDI = mach->__rdi; vex->guest_RSI = mach->__rsi; vex->guest_RBP = mach->__rbp; vex->guest_RSP = mach->__rsp; // DDD: #warning GrP fixme eflags vex->guest_RIP = mach->__rip; vex->guest_R8 = mach->__r8; vex->guest_R9 = mach->__r9; vex->guest_R10 = mach->__r10; vex->guest_R11 = mach->__r11; vex->guest_R12 = mach->__r12; vex->guest_R13 = mach->__r13; vex->guest_R14 = mach->__r14; vex->guest_R15 = mach->__r15; /* GrP fixme vex->guest_CS = mach->__cs; vex->guest_FS = mach->__fs; vex->guest_GS = mach->__gs; */ }
/* Just before starting the client, we may need to make final adjustments to its initial image. Also we need to set up the VEX guest state for thread 1 (the root thread) and copy in essential starting values. This is handed the IIFinaliseImageInfo created by VG_(ii_create_image). */ void VG_(ii_finalise_image)( IIFinaliseImageInfo iifii ) { ThreadArchState* arch = &VG_(threads)[1].arch; /* GrP fixme doesn't handle all registers from LC_THREAD or LC_UNIXTHREAD */ # if defined(VGP_x86_darwin) vg_assert(0 == sizeof(VexGuestX86State) % 16); /* Zero out the initial state, and set up the simulated FPU in a sane way. */ LibVEX_GuestX86_initialise(&arch->vex); /* Zero out the shadow areas. */ VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestX86State)); VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestX86State)); /* Put essential stuff into the new state. */ arch->vex.guest_ESP = iifii.initial_client_SP; arch->vex.guest_EIP = iifii.initial_client_IP; # elif defined(VGP_amd64_darwin) vg_assert(0 == sizeof(VexGuestAMD64State) % 16); /* Zero out the initial state, and set up the simulated FPU in a sane way. */ LibVEX_GuestAMD64_initialise(&arch->vex); /* Zero out the shadow areas. */ VG_(memset)(&arch->vex_shadow1, 0, sizeof(VexGuestAMD64State)); VG_(memset)(&arch->vex_shadow2, 0, sizeof(VexGuestAMD64State)); /* Put essential stuff into the new state. */ arch->vex.guest_RSP = iifii.initial_client_SP; arch->vex.guest_RIP = iifii.initial_client_IP; # else # error Unknown platform # endif /* Tell the tool that we just wrote to the registers. */ VG_TRACK( post_reg_write, Vg_CoreStartup, /*tid*/1, /*offset*/0, sizeof(VexGuestArchState)); }
/* wqthread note: The kernel may create or destroy pthreads in the wqthread pool at any time with no userspace interaction, and wqthread_start may be entered at any time with no userspace interaction. To handle this in valgrind, we create and destroy a valgrind thread for every work item. */ void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, Int reuse, Addr sp) { ThreadState *tst; VexGuestAMD64State *vex; Addr stack; SizeT stacksize; vki_sigset_t blockall; /* When we enter here we hold no lock (!), so we better acquire it pronto. Why do we hold no lock? Because (presumably) the only way to get here is as a result of a SfMayBlock syscall "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the lock. At least that's clear for the 'reuse' case. The non-reuse case? Dunno, perhaps it's a new thread the kernel pulled out of a hat. In any case we still need to take a lock. */ VG_(acquire_BigLock_LL)("wqthread_hijack"); if (0) VG_(printf)( "wqthread_hijack: self %#lx, kport %#lx, " "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n", self, kport, stackaddr, workitem, reuse, sp); /* Start the thread with all signals blocked. VG_(scheduler) will set the mask correctly when we finally get there. */ VG_(sigfillset)(&blockall); VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL); /* For 10.7 and earlier, |reuse| appeared to be used as a simple boolean. In 10.8 and later its name changed to |flags| and has various other bits OR-d into it too, so it's necessary to fish out just the relevant parts. Hence: */ # if DARWIN_VERS <= DARWIN_10_7 Bool is_reuse = reuse != 0; # elif DARWIN_VERS == DARWIN_10_8 || DARWIN_VERS == DARWIN_10_9 Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0; # endif if (is_reuse) { /* For whatever reason, tst->os_state.pthread appear to have a constant offset of 96 on 10.7, but zero on 10.6 and 10.5. No idea why. */ # if DARWIN_VERS <= DARWIN_10_6 UWord magic_delta = 0; # elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8 UWord magic_delta = 0x60; # elif DARWIN_VERS == DARWIN_10_9 UWord magic_delta = 0xE0; # else # error "magic_delta: to be computed on new OS version" // magic_delta = tst->os_state.pthread - self # endif // This thread already exists; we're merely re-entering // after leaving via workq_ops(WQOPS_THREAD_RETURN). // Don't allocate any V thread resources. // Do reset thread registers. ThreadId tid = VG_(lwpid_to_vgtid)(kport); vg_assert(VG_(is_valid_tid)(tid)); vg_assert(mach_thread_self() == kport); tst = VG_(get_ThreadState)(tid); if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, " "tst->os_state.pthread %#lx\n", tst->os_state.pthread == self ? "SAME" : "DIFF", tid, tst, tst->os_state.pthread); vex = &tst->arch.vex; vg_assert(tst->os_state.pthread - magic_delta == self); } else { // This is a new thread. tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)()); vex = &tst->arch.vex; allocstack(tst->tid); LibVEX_GuestAMD64_initialise(vex); } // Set thread's registers // Do this FIRST because some code below tries to collect a backtrace, // which requires valid register data. vex->guest_RIP = wqthread_starter; vex->guest_RDI = self; vex->guest_RSI = kport; vex->guest_RDX = stackaddr; vex->guest_RCX = workitem; vex->guest_R8 = reuse; vex->guest_R9 = 0; vex->guest_RSP = sp; stacksize = 512*1024; // wq stacks are always DEFAULT_STACK_SIZE stack = VG_PGROUNDUP(sp) - stacksize; if (is_reuse) { // Continue V's thread back in the scheduler. // The client thread is of course in another location entirely. /* Drop the lock before going into ML_(wqthread_continue_NORETURN). The latter will immediately attempt to reacquire it in non-LL mode, which is a bit wasteful but I don't think is harmful. A better solution would be to not drop the lock but instead "upgrade" it from a LL lock to a full lock, but that's too much like hard work right now. */ VG_(release_BigLock_LL)("wqthread_hijack(1)"); ML_(wqthread_continue_NORETURN)(tst->tid); } else { // Record thread's stack and Mach port and pthread struct tst->os_state.pthread = self; tst->os_state.lwpid = kport; record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p"); // kernel allocated stack - needs mapping tst->client_stack_highest_word = stack+stacksize; tst->client_stack_szB = stacksize; // GrP fixme scheduler lock?! // pthread structure ML_(notify_core_and_tool_of_mmap)( stack+stacksize, pthread_structsize, VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0); // stack contents // GrP fixme uninitialized! ML_(notify_core_and_tool_of_mmap)( stack, stacksize, VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0); // guard page // GrP fixme ban_mem_stack! ML_(notify_core_and_tool_of_mmap)( stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 0, VKI_MAP_PRIVATE, -1, 0); ML_(sync_mappings)("after", "wqthread_hijack", 0); // Go! /* Same comments as the 'release' in the then-clause. start_thread_NORETURN calls run_thread_NORETURN calls thread_wrapper which acquires the lock before continuing. Let's hope nothing non-thread-local happens until that point. DDD: I think this is plain wrong .. if we get to thread_wrapper not holding the lock, and someone has recycled this thread slot in the meantime, we're hosed. Is that possible, though? */ VG_(release_BigLock_LL)("wqthread_hijack(2)"); call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, start_thread_NORETURN, (Word)tst); } /*NOTREACHED*/ vg_assert(0); }
void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg, Addr stacksize, Addr flags, Addr sp) { vki_sigset_t blockall; ThreadState *tst = (ThreadState *)func_arg; VexGuestAMD64State *vex = &tst->arch.vex; // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp); // Wait for parent thread's permission. // The parent thread holds V's lock on our behalf. semaphore_wait(tst->os_state.child_go); /* Start the thread with all signals blocked. VG_(scheduler) will set the mask correctly when we finally get there. */ VG_(sigfillset)(&blockall); VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL); // Set thread's registers // Do this FIRST because some code below tries to collect a backtrace, // which requires valid register data. LibVEX_GuestAMD64_initialise(vex); vex->guest_RIP = pthread_starter; vex->guest_RDI = self; vex->guest_RSI = kport; vex->guest_RDX = func; vex->guest_RCX = tst->os_state.func_arg; vex->guest_R8 = stacksize; vex->guest_R9 = flags; vex->guest_RSP = sp; // Record thread's stack and Mach port and pthread struct tst->os_state.pthread = self; tst->os_state.lwpid = kport; record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p"); if ((flags & 0x01000000) == 0) { // kernel allocated stack - needs mapping Addr stack = VG_PGROUNDUP(sp) - stacksize; tst->client_stack_highest_word = stack+stacksize; tst->client_stack_szB = stacksize; // pthread structure ML_(notify_core_and_tool_of_mmap)( stack+stacksize, pthread_structsize, VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0); // stack contents ML_(notify_core_and_tool_of_mmap)( stack, stacksize, VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0); // guard page ML_(notify_core_and_tool_of_mmap)( stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 0, VKI_MAP_PRIVATE, -1, 0); } else { // client allocated stack find_stack_segment(tst->tid, sp); } ML_(sync_mappings)("after", "pthread_hijack", 0); // DDD: should this be here rather than in POST(sys_bsdthread_create)? // But we don't have ptid here... //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid ); // Tell parent thread's POST(sys_bsdthread_create) that we're done // initializing registers and mapping memory. semaphore_signal(tst->os_state.child_done); // LOCK IS GONE BELOW THIS POINT // Go! call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, start_thread_NORETURN, (Word)tst); /*NOTREACHED*/ vg_assert(0); }