Example #1
0
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestAMD64State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   if (0) VG_(printf)(
             "wqthread_hijack: self %#lx, kport %#lx, "
	     "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n", 
	     self, kport, stackaddr, workitem, reuse, sp);

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   /* For 10.7 and earlier, |reuse| appeared to be used as a simple
      boolean.  In 10.8 and later its name changed to |flags| and has
      various other bits OR-d into it too, so it's necessary to fish
      out just the relevant parts.  Hence: */
#  if DARWIN_VERS <= DARWIN_10_7
   Bool is_reuse = reuse != 0;
#  elif DARWIN_VERS == DARWIN_10_8 || DARWIN_VERS == DARWIN_10_9
   Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0;
#  endif

   if (is_reuse) {

     /* For whatever reason, tst->os_state.pthread appear to have a
        constant offset of 96 on 10.7, but zero on 10.6 and 10.5.  No
        idea why. */
#      if DARWIN_VERS <= DARWIN_10_6
       UWord magic_delta = 0;
#      elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8
       UWord magic_delta = 0x60;
#      elif DARWIN_VERS == DARWIN_10_9
       UWord magic_delta = 0xE0;
#      else
#        error "magic_delta: to be computed on new OS version"
         // magic_delta = tst->os_state.pthread - self
#      endif

       // This thread already exists; we're merely re-entering 
       // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
       // Don't allocate any V thread resources.
       // Do reset thread registers.
       ThreadId tid = VG_(lwpid_to_vgtid)(kport);
       vg_assert(VG_(is_valid_tid)(tid));
       vg_assert(mach_thread_self() == kport);

       tst = VG_(get_ThreadState)(tid);

       if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, "
                          "tst->os_state.pthread %#lx\n",
                          tst->os_state.pthread == self ? "SAME" : "DIFF",
                          tid, tst, tst->os_state.pthread);

       vex = &tst->arch.vex;
       vg_assert(tst->os_state.pthread - magic_delta == self);
   }
   else {
       // This is a new thread.
       tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
       vex = &tst->arch.vex;
       allocstack(tst->tid);
       LibVEX_GuestAMD64_initialise(vex);
   }
       
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_RIP = wqthread_starter;
   vex->guest_RDI = self;
   vex->guest_RSI = kport;
   vex->guest_RDX = stackaddr;
   vex->guest_RCX = workitem;
   vex->guest_R8  = reuse;
   vex->guest_R9  = 0;
   vex->guest_RSP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   if (is_reuse) {
      // Continue V's thread back in the scheduler. 
      // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
      VG_(release_BigLock_LL)("wqthread_hijack(1)");
      ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // GrP fixme scheduler lock?!
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestX86State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   if (reuse) {
      // This thread already exists; we're merely re-entering 
      // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
      // Don't allocate any V thread resources.
      // Do reset thread registers.
      ThreadId tid = VG_(lwpid_to_vgtid)(kport);
      vg_assert(VG_(is_valid_tid)(tid));
      vg_assert(mach_thread_self() == kport);

      tst = VG_(get_ThreadState)(tid);
      vex = &tst->arch.vex;
      vg_assert(tst->os_state.pthread == self);
   }
   else {
      // This is a new thread.
      tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
      vex = &tst->arch.vex;
      allocstack(tst->tid);
      LibVEX_GuestX86_initialise(vex);
   }
        
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_EIP = wqthread_starter;
   vex->guest_EAX = self;
   vex->guest_EBX = kport;
   vex->guest_ECX = stackaddr;
   vex->guest_EDX = workitem;
   vex->guest_EDI = reuse;
   vex->guest_ESI = 0;
   vex->guest_ESP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   if (reuse) {
       // Continue V's thread back in the scheduler. 
       // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
       VG_(release_BigLock_LL)("wqthread_hijack(1)");
       ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // GrP fixme scheduler lock?!
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}
Example #3
0
void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg, 
                    Addr stacksize, Addr flags, Addr sp)
{
   vki_sigset_t blockall;
   ThreadState *tst = (ThreadState *)func_arg;
   VexGuestAMD64State *vex = &tst->arch.vex;

   // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp);

   // Wait for parent thread's permission.
   // The parent thread holds V's lock on our behalf.
   semaphore_wait(tst->os_state.child_go);

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   LibVEX_GuestAMD64_initialise(vex);
   vex->guest_RIP = pthread_starter;
   vex->guest_RDI = self;
   vex->guest_RSI = kport;
   vex->guest_RDX = func;
   vex->guest_RCX = tst->os_state.func_arg;
   vex->guest_R8  = stacksize;
   vex->guest_R9  = flags;
   vex->guest_RSP = sp;

   // Record thread's stack and Mach port and pthread struct
   tst->os_state.pthread = self;
   tst->os_state.lwpid = kport;
   record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p");

   if ((flags & 0x01000000) == 0) {
      // kernel allocated stack - needs mapping
      Addr stack = VG_PGROUNDUP(sp) - stacksize;
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
            0, VKI_MAP_PRIVATE, -1, 0);
   } else {
      // client allocated stack
      find_stack_segment(tst->tid, sp);
   }
   ML_(sync_mappings)("after", "pthread_hijack", 0);

   // DDD: should this be here rather than in POST(sys_bsdthread_create)?
   // But we don't have ptid here...
   //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid );

   // Tell parent thread's POST(sys_bsdthread_create) that we're done 
   // initializing registers and mapping memory.
   semaphore_signal(tst->os_state.child_done);
   // LOCK IS GONE BELOW THIS POINT

   // Go!
   call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                         start_thread_NORETURN, (Word)tst);

   /*NOTREACHED*/
   vg_assert(0);
}
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestX86State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   if (reuse) {

      /* For whatever reason, tst->os_state.pthread appear to have a
         constant offset of 72 on 10.7, but zero on 10.6 and 10.5.  No
         idea why. */
#     if DARWIN_VERS <= DARWIN_10_6
      UWord magic_delta = 0;
#     elif DARWIN_VERS >= DARWIN_10_7
      UWord magic_delta = 0x48;
#     endif

      // This thread already exists; we're merely re-entering 
      // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
      // Don't allocate any V thread resources.
      // Do reset thread registers.
      ThreadId tid = VG_(lwpid_to_vgtid)(kport);
      vg_assert(VG_(is_valid_tid)(tid));
      vg_assert(mach_thread_self() == kport);

      tst = VG_(get_ThreadState)(tid);

      if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, "
                         "tst->os_state.pthread %#lx, self %#lx\n",
                         tst->os_state.pthread == self ? "SAME" : "DIFF",
                         tid, tst, tst->os_state.pthread, self);

      vex = &tst->arch.vex;
      vg_assert(tst->os_state.pthread - magic_delta == self);
   }
   else {
      // This is a new thread.
      tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
      vex = &tst->arch.vex;
      allocstack(tst->tid);
      LibVEX_GuestX86_initialise(vex);
      /* Tell threading tools the new thread exists.  FIXME: we need
         to know the identity (tid) of the parent thread, in order
         that threading tools can make a dependency edge from it to
         this thread.  But we don't know what the parent thread is.
         Hence pass 1 (the root thread).  This is completely wrong in
         general, and could cause large numbers of false races to be
         reported.  In fact, it's positively dangerous; we don't even
         know if thread 1 is still alive, and the thread checkers are
         likely to assert if it isn't. */
      VG_TRACK(pre_thread_ll_create, 1/*BOGUS*/, tst->tid);
   }
        
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_EIP = wqthread_starter;
   vex->guest_EAX = self;
   vex->guest_EBX = kport;
   vex->guest_ECX = stackaddr;
   vex->guest_EDX = workitem;
   vex->guest_EDI = reuse;
   vex->guest_ESI = 0;
   vex->guest_ESP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   VG_TRACK(workq_task_start, tst->tid, workitem);
   if (reuse) {
       // Continue V's thread back in the scheduler. 
       // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
       VG_(release_BigLock_LL)("wqthread_hijack(1)");
       ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // tell the tool that we are at a point after the new thread
      // has its registers set up (so we can take a stack snapshot),
      // but before it has executed any instructions (or, really,
      // before it has done any memory references).
      VG_TRACK(pre_thread_first_insn, tst->tid);
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}