UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt max_n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               UnwindStartRegs* startRegs,
                               Addr fp_max_orig )
{
   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   D3UnwindRegs uregs;
   uregs.xip = (Addr)startRegs->r_pc;
   uregs.xsp = (Addr)startRegs->r_sp;
   uregs.xbp = startRegs->misc.X86.r_ebp;
   Addr fp_min = uregs.xsp;

   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
                  max_n_ips, fp_min, fp_max_orig, fp_max,
                  uregs.xip, uregs.xbp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   // On Darwin, this kicks in for pthread-related stack traces, so they're
   // only 1 entry long which is wrong.
#  if !defined(VGO_darwin)
   if (fp_min + 512 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      if (sps) sps[0] = uregs.xsp;
      if (fps) fps[0] = uregs.xbp;
      ips[0] = uregs.xip;
      return 1;
   } 
#  endif

   /* fp is %ebp.  sp is %esp.  ip is %eip. */

   if (sps) sps[0] = uregs.xsp;
   if (fps) fps[0] = uregs.xbp;
   ips[0] = uregs.xip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= max_n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* On x86, first try the old-fashioned method of following the
         %ebp-chain.  Code which doesn't use this (that is, compiled
         with -fomit-frame-pointer) is not ABI compliant and so
         relatively rare.  Besides, trying the CFI first almost always
         fails, and is expensive. */
      /* Deal with frames resulting from functions which begin "pushl%
         ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
      if (fp_min <= uregs.xbp &&
          uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/)
      {
         /* fp looks sane, so use it. */
         uregs.xip = (((UWord*)uregs.xbp)[1]);
         // We stop if we hit a zero (the traditional end-of-stack
         // marker) or a one -- these correspond to recorded IPs of 0 or -1.
         // The latter because r8818 (in this file) changes the meaning of
         // entries [1] and above in a stack trace, by subtracting 1 from
         // them.  Hence stacks that used to end with a zero value now end in
         // -1 and so we must detect that too.
         if (0 == uregs.xip || 1 == uregs.xip) break;
         uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/ 
                               + sizeof(Addr) /*ra*/;
         uregs.xbp = (((UWord*)uregs.xbp)[0]);
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
         if (debug)
            VG_(printf)("     ipsF[%d]=0x%08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1;
            /* as per comment at the head of this loop */
         continue;
      }

      /* That didn't work out, so see if there is any CF info to hand
         which can be used. */
      if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
         if (0 == uregs.xip || 1 == uregs.xip) break;
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
         if (debug)
            VG_(printf)("     ipsC[%d]=0x%08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1;
            /* as per comment at the head of this loop */
         continue;
      }

      /* And, similarly, try for MSVC FPO unwind info. */
      if ( VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
                              fp_min, fp_max ) ) {
         if (0 == uregs.xip || 1 == uregs.xip) break;
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip;
         if (debug)
            VG_(printf)("     ipsC[%d]=0x%08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1;
         continue;
      }

      /* No luck.  We have to give up. */
      break;
   }

   n_found = i;
   return n_found;
}
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt max_n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               UnwindStartRegs* startRegs,
                               Addr fp_max_orig )
{
   Bool  lr_is_first_RA = False;
#  if defined(VG_PLAT_USES_PPCTOC)
   Word redir_stack_size = 0;
   Word redirs_used      = 0;
#  endif

   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   Addr ip = (Addr)startRegs->r_pc;
   Addr sp = (Addr)startRegs->r_sp;
   Addr fp = sp;
#  if defined(VGP_ppc32_linux) || defined(VGP_ppc32_aix5)
   Addr lr = startRegs->misc.PPC32.r_lr;
#  elif defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
   Addr lr = startRegs->misc.PPC64.r_lr;
#  endif
   Addr fp_min = sp;

   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   if (fp_min + 512 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      if (sps) sps[0] = sp;
      if (fps) fps[0] = fp;
      ips[0] = ip;
      return 1;
   } 

   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
      frame pointers. */

#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
   redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  elif defined(VGP_ppc32_aix5)
   redir_stack_size = VEX_GUEST_PPC32_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  endif

#  if defined(VG_PLAT_USES_PPCTOC)
   /* Deal with bogus LR values caused by function
      interception/wrapping on ppc-TOC platforms; see comment on
      similar code a few lines further down. */
   if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
       && VG_(is_valid_tid)(tid_if_known)) {
      Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
      redirs_used++;
      if (hsp >= 1 && hsp < redir_stack_size)
         lr = VG_(threads)[tid_if_known]
                 .arch.vex.guest_REDIR_STACK[hsp-1];
   }
#  endif

   /* We have to determine whether or not LR currently holds this fn
      (call it F)'s return address.  It might not if F has previously
      called some other function, hence overwriting LR with a pointer
      to some part of F.  Hence if LR and IP point to the same
      function then we conclude LR does not hold this function's
      return address; instead the LR at entry must have been saved in
      the stack by F's prologue and so we must get it from there
      instead.  Note all this guff only applies to the innermost
      frame. */
   lr_is_first_RA = False;
   {
#     define M_VG_ERRTXT 1000
      UChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
      /* The following conditional looks grossly inefficient and
         surely could be majorly improved, with not much effort. */
      if (VG_(get_fnname_raw) (lr, buf_lr, M_VG_ERRTXT))
         if (VG_(get_fnname_raw) (ip, buf_ip, M_VG_ERRTXT))
            if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
               lr_is_first_RA = True;
#     undef M_VG_ERRTXT
   }

   if (sps) sps[0] = fp; /* NB. not sp */
   if (fps) fps[0] = fp;
   ips[0] = ip;
   i = 1;

   if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {

      /* initial FP is sane; keep going */
      fp = (((UWord*)fp)[0]);

      while (True) {

        /* On ppc64-linux (ppc64-elf, really), and on AIX, the lr save
           slot is 2 words back from sp, whereas on ppc32-elf(?) it's
           only one word back. */
#        if defined(VG_PLAT_USES_PPCTOC)
         const Int lr_offset = 2;
#        else
         const Int lr_offset = 1;
#        endif

         if (i >= max_n_ips)
            break;

         /* Try to derive a new (ip,fp) pair from the current set. */

         if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
            /* fp looks sane, so use it. */

            if (i == 1 && lr_is_first_RA)
               ip = lr;
            else
               ip = (((UWord*)fp)[lr_offset]);

#           if defined(VG_PLAT_USES_PPCTOC)
            /* Nasty hack to do with function replacement/wrapping on
               ppc64-linux/ppc64-aix/ppc32-aix.  If LR points to our
               magic return stub, then we are in a wrapped or
               intercepted function, in which LR has been messed with.
               The original LR will have been pushed onto the thread's
               hidden REDIR stack one down from the top (top element
               is the saved R2) and so we should restore the value
               from there instead.  Since nested redirections can and
               do happen, we keep track of the number of nested LRs
               used by the unwinding so far with 'redirs_used'. */
            if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
                && VG_(is_valid_tid)(tid_if_known)) {
               Word hsp = VG_(threads)[tid_if_known]
                             .arch.vex.guest_REDIR_SP;
               hsp -= 2 * redirs_used;
               redirs_used ++;
               if (hsp >= 1 && hsp < redir_stack_size)
                  ip = VG_(threads)[tid_if_known]
                          .arch.vex.guest_REDIR_STACK[hsp-1];
            }
#           endif

            if (0 == ip || 1 == ip) break;
            if (sps) sps[i] = fp; /* NB. not sp */
            if (fps) fps[i] = fp;
            fp = (((UWord*)fp)[0]);
            ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
            if (debug)
               VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
            ip = ip - 1; /* ip is probably dead at this point, but
                            play safe, a la x86/amd64 above.  See
                            extensive comments above. */
            continue;
         }

         /* No luck there.  We have to give up. */
         break;
      }
   }

   n_found = i;
   return n_found;
}
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt max_n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               UnwindStartRegs* startRegs,
                               Addr fp_max_orig )
{
   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   D3UnwindRegs uregs;
   uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
   uregs.r14 = startRegs->misc.ARM.r14;
   uregs.r13 = startRegs->r_sp;
   uregs.r12 = startRegs->misc.ARM.r12;
   uregs.r11 = startRegs->misc.ARM.r11;
   uregs.r7  = startRegs->misc.ARM.r7;
   Addr fp_min = uregs.r13;

   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("\nmax_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
                  "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
                  max_n_ips, fp_min, fp_max_orig, fp_max,
                  uregs.r15, uregs.r13);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   // On Darwin, this kicks in for pthread-related stack traces, so they're
   // only 1 entry long which is wrong.
   if (fp_min + 512 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      if (sps) sps[0] = uregs.r13;
      if (fps) fps[0] = 0;
      ips[0] = uregs.r15;
      return 1;
   } 

   /* */

   if (sps) sps[0] = uregs.r13;
   if (fps) fps[0] = 0;
   ips[0] = uregs.r15;
   i = 1;

   /* Loop unwinding the stack. */

   while (True) {
      if (debug) {
         VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
                     i, uregs.r15, uregs.r13);
      }

      if (i >= max_n_ips)
         break;

      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
         if (sps) sps[i] = uregs.r13;
         if (fps) fps[i] = 0;
         ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
         if (debug)
            VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
                        uregs.r15, uregs.r13);
         uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
         continue;
      }
      /* No luck.  We have to give up. */
      break;
   }

   n_found = i;
   return n_found;
}
static 
Addr setup_client_stack( void*  init_sp,
                         HChar** orig_envp, 
                         const ExeInfo* info,
                         Addr   clstack_end,
                         SizeT  clstack_max_size,
                         const VexArchInfo* vex_archinfo )
{
   HChar **cpp;
   HChar *strtab;		/* string table */
   HChar *stringbase;
   Addr *ptr;
   unsigned stringsize;		/* total size of strings in bytes */
   unsigned auxsize;		/* total size of auxv in bytes */
   Int argc;			/* total argc */
   Int envc;			/* total number of env vars */
   unsigned stacksize;		/* total client stack size */
   Addr client_SP;	        /* client stack base (initial SP) */
   Addr clstack_start;
   Int i;

   vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
   vg_assert( VG_(args_for_client) );

   /* ==================== compute sizes ==================== */

   /* first of all, work out how big the client stack will be */
   stringsize   = 0;
   auxsize = 0;

   /* paste on the extra args if the loader needs them (ie, the #! 
      interpreter and its argument) */
   argc = 0;
   if (info->interp_name != NULL) {
      argc++;
      stringsize += VG_(strlen)(info->interp_name) + 1;
   }
   if (info->interp_args != NULL) {
      argc++;
      stringsize += VG_(strlen)(info->interp_args) + 1;
   }

   /* now scan the args we're given... */
   stringsize += VG_(strlen)( VG_(args_the_exename) ) + 1;

   for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
      argc++;
      stringsize += VG_(strlen)( * (HChar**) 
                                   VG_(indexXA)( VG_(args_for_client), i ))
                    + 1;
   }

   /* ...and the environment */
   envc = 0;
   for (cpp = orig_envp; cpp && *cpp; cpp++) {
      envc++;
      stringsize += VG_(strlen)(*cpp) + 1;
   }

   /* Darwin executable_path + NULL */
   auxsize += 2 * sizeof(Word);
   if (info->executable_path) {
       stringsize += 1 + VG_(strlen)(info->executable_path);
   }

   /* Darwin mach_header */
   if (info->dynamic) auxsize += sizeof(Word);

   /* OK, now we know how big the client stack is */
   stacksize =
      sizeof(Word) +                          /* argc */
      sizeof(HChar **) +                      /* argc[0] == exename */
      sizeof(HChar **)*argc +                 /* argv */
      sizeof(HChar **) +                      /* terminal NULL */
      sizeof(HChar **)*envc +                 /* envp */
      sizeof(HChar **) +                      /* terminal NULL */
      auxsize +                               /* auxv */
      VG_ROUNDUP(stringsize, sizeof(Word));   /* strings (aligned) */

   if (0) VG_(printf)("stacksize = %d\n", stacksize);

   /* client_SP is the client's stack pointer */
   client_SP = clstack_end + 1 - stacksize;
   client_SP = VG_ROUNDDN(client_SP, 32); /* make stack 32 byte aligned */

   /* base of the string table (aligned) */
   stringbase = strtab = (HChar *)clstack_end 
                         - VG_ROUNDUP(stringsize, sizeof(int));

   /* The max stack size */
   clstack_max_size = VG_PGROUNDUP(clstack_max_size);

   /* Darwin stack is chosen by the ume loader */
   clstack_start = clstack_end + 1 - clstack_max_size;

   /* Record stack extent -- needed for stack-change code. */
   /* GrP fixme really? */
   VG_(clstk_start_base) = clstack_start;
   VG_(clstk_end)  = clstack_end;

   if (0)
      VG_(printf)("stringsize=%d auxsize=%d stacksize=%d maxsize=0x%x\n"
                  "clstack_start %p\n"
                  "clstack_end   %p\n",
	          stringsize, auxsize, stacksize, (Int)clstack_max_size,
                  (void*)clstack_start, (void*)clstack_end);

   /* ==================== allocate space ==================== */

   /* Stack was allocated by the ume loader. */

   /* ==================== create client stack ==================== */

   ptr = (Addr*)client_SP;

   /* --- mach_header --- */
   if (info->dynamic) *ptr++ = info->text;

   /* --- client argc --- */
   *ptr++ = (Addr)(argc + 1);

   /* --- client argv --- */
   if (info->interp_name) {
      *ptr++ = (Addr)copy_str(&strtab, info->interp_name);
      VG_(free)(info->interp_name);
   }
   if (info->interp_args) {
      *ptr++ = (Addr)copy_str(&strtab, info->interp_args);
      VG_(free)(info->interp_args);
   }

   *ptr++ = (Addr)copy_str(&strtab, VG_(args_the_exename));

   for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
      *ptr++ = (Addr)copy_str(
                       &strtab, 
                       * (HChar**) VG_(indexXA)( VG_(args_for_client), i )
                     );
   }
   *ptr++ = 0;

   /* --- envp --- */
   VG_(client_envp) = (HChar **)ptr;
   for (cpp = orig_envp; cpp && *cpp; ptr++, cpp++)
      *ptr = (Addr)copy_str(&strtab, *cpp);
   *ptr++ = 0;

   /* --- executable_path + NULL --- */
   if (info->executable_path) 
       *ptr++ = (Addr)copy_str(&strtab, info->executable_path);
   else 
       *ptr++ = 0;
   *ptr++ = 0;

   vg_assert((strtab-stringbase) == stringsize);

   /* client_SP is pointing at client's argc/argv */

   if (0) VG_(printf)("startup SP = %#lx\n", client_SP);
   return client_SP;
}
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt max_n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               UnwindStartRegs* startRegs,
                               Addr fp_max_orig )
{
   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   D3UnwindRegs uregs;
   uregs.xip = startRegs->r_pc;
   uregs.xsp = startRegs->r_sp;
   uregs.xbp = startRegs->misc.AMD64.r_rbp;
   Addr fp_min = uregs.xsp;

   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   extern unsigned long nacl_head;

   if (nacl_head && uregs.xip > nacl_head && uregs.xip < nacl_head + (1ULL << 32)) {
     fp_min = nacl_head;
     fp_max = nacl_head + (1ULL << 32) - 1;
   }


   if (debug)
      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
                  max_n_ips, fp_min, fp_max_orig, fp_max,
                  uregs.xip, uregs.xbp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   // On Darwin, this kicks in for pthread-related stack traces, so they're
   // only 1 entry long which is wrong.
#  if !defined(VGO_darwin)
   if (fp_min + 256 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      if (sps) sps[0] = uregs.xsp;
      if (fps) fps[0] = uregs.xbp;
      ips[0] = uregs.xip;
      return 1;
   } 
#  endif

   /* fp is %rbp.  sp is %rsp.  ip is %rip. */

   ips[0] = uregs.xip;
   if (sps) sps[0] = uregs.xsp;
   if (fps) fps[0] = uregs.xbp;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= max_n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current set. */

      /* First off, see if there is any CFI info to hand which can
         be used. */
      if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
         if (0 == uregs.xip || 1 == uregs.xip) break;
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
         if (debug)
            VG_(printf)("     ipsC[%d]=%#08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
         continue;
      }

      /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
         we can safely try the old-fashioned method. */
      /* This bit is supposed to deal with frames resulting from
         functions which begin "pushq %rbp ; movq %rsp, %rbp".
         Unfortunately, since we can't (easily) look at the insns at
         the start of the fn, like GDB does, there's no reliable way
         to tell.  Hence the hack of first trying out CFI, and if that
         fails, then use this as a fallback. */
      /* Note: re "- 1 * sizeof(UWord)", need to take account of the
         fact that we are prodding at & ((UWord*)fp)[1] and so need to
         adjust the limit check accordingly.  Omitting this has been
         observed to cause segfaults on rare occasions. */
      if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
         /* fp looks sane, so use it. */
         uregs.xip = (((UWord*)uregs.xbp)[1]);
         if (0 == uregs.xip || 1 == uregs.xip) break;
         uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/ 
                               + sizeof(Addr) /*ra*/;
         uregs.xbp = (((UWord*)uregs.xbp)[0]);
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
         if (debug)
            VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
         continue;
      }

      /* Last-ditch hack (evidently GDB does something similar).  We
         are in the middle of nowhere and we have a nonsense value for
         the frame pointer.  If the stack pointer is still valid,
         assume that what it points at is a return address.  Yes,
         desperate measures.  Could do better here:
         - check that the supposed return address is in
           an executable page
         - check that the supposed return address is just after a call insn
         - given those two checks, don't just consider *sp as the return 
           address; instead scan a likely section of stack (eg sp .. sp+256)
           and use suitable values found there.
      */
      if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
         uregs.xip = ((UWord*)uregs.xsp)[0];
         if (0 == uregs.xip || 1 == uregs.xip) break;
         if (sps) sps[i] = uregs.xsp;
         if (fps) fps[i] = uregs.xbp;
         ips[i++] = uregs.xip == 0 
                    ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
                           thread stack */
                    : uregs.xip - 1;
                        /* -1: refer to calling insn, not the RA */
         if (debug)
            VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
         uregs.xsp += 8;
         continue;
      }

      /* No luck at all.  We have to give up. */
      break;
   }

   n_found = i;
   return n_found;
}
void test_VG_ROUND_et_al()
{
   CHECK( 0 == VG_ROUNDDN(0, 1) );
   CHECK( 1 == VG_ROUNDDN(1, 1) );
   CHECK( 2 == VG_ROUNDDN(2, 1) );
   CHECK( 3 == VG_ROUNDDN(3, 1) );
   CHECK( 4 == VG_ROUNDDN(4, 1) );
   CHECK( 5 == VG_ROUNDDN(5, 1) );
   CHECK( 6 == VG_ROUNDDN(6, 1) );
   CHECK( 7 == VG_ROUNDDN(7, 1) );

   CHECK( 0 == VG_ROUNDUP(0, 1) );
   CHECK( 1 == VG_ROUNDUP(1, 1) );
   CHECK( 2 == VG_ROUNDUP(2, 1) );
   CHECK( 3 == VG_ROUNDUP(3, 1) );
   CHECK( 4 == VG_ROUNDUP(4, 1) );
   CHECK( 5 == VG_ROUNDUP(5, 1) );
   CHECK( 6 == VG_ROUNDUP(6, 1) );
   CHECK( 7 == VG_ROUNDUP(7, 1) );

   CHECK( 0 == VG_ROUNDDN(0, 2) );
   CHECK( 0 == VG_ROUNDDN(1, 2) );
   CHECK( 2 == VG_ROUNDDN(2, 2) );
   CHECK( 2 == VG_ROUNDDN(3, 2) );
   CHECK( 4 == VG_ROUNDDN(4, 2) );
   CHECK( 4 == VG_ROUNDDN(5, 2) );
   CHECK( 6 == VG_ROUNDDN(6, 2) );
   CHECK( 6 == VG_ROUNDDN(7, 2) );

   CHECK( 0 == VG_ROUNDUP(0, 2) );
   CHECK( 2 == VG_ROUNDUP(1, 2) );
   CHECK( 2 == VG_ROUNDUP(2, 2) );
   CHECK( 4 == VG_ROUNDUP(3, 2) );
   CHECK( 4 == VG_ROUNDUP(4, 2) );
   CHECK( 6 == VG_ROUNDUP(5, 2) );
   CHECK( 6 == VG_ROUNDUP(6, 2) );
   CHECK( 8 == VG_ROUNDUP(7, 2) );

   CHECK( 0 == VG_ROUNDDN(0, 4) );
   CHECK( 0 == VG_ROUNDDN(1, 4) );
   CHECK( 0 == VG_ROUNDDN(2, 4) );
   CHECK( 0 == VG_ROUNDDN(3, 4) );
   CHECK( 4 == VG_ROUNDDN(4, 4) );
   CHECK( 4 == VG_ROUNDDN(5, 4) );
   CHECK( 4 == VG_ROUNDDN(6, 4) );
   CHECK( 4 == VG_ROUNDDN(7, 4) );

   CHECK( 0 == VG_ROUNDUP(0, 4) );
   CHECK( 4 == VG_ROUNDUP(1, 4) );
   CHECK( 4 == VG_ROUNDUP(2, 4) );
   CHECK( 4 == VG_ROUNDUP(3, 4) );
   CHECK( 4 == VG_ROUNDUP(4, 4) );
   CHECK( 8 == VG_ROUNDUP(5, 4) );
   CHECK( 8 == VG_ROUNDUP(6, 4) );
   CHECK( 8 == VG_ROUNDUP(7, 4) );

   CHECK( 0 == VG_ROUNDDN(0, 8) );
   CHECK( 0 == VG_ROUNDDN(1, 8) );
   CHECK( 0 == VG_ROUNDDN(2, 8) );
   CHECK( 0 == VG_ROUNDDN(3, 8) );
   CHECK( 0 == VG_ROUNDDN(4, 8) );
   CHECK( 0 == VG_ROUNDDN(5, 8) );
   CHECK( 0 == VG_ROUNDDN(6, 8) );
   CHECK( 0 == VG_ROUNDDN(7, 8) );

   CHECK( 0 == VG_ROUNDUP(0, 8) );
   CHECK( 8 == VG_ROUNDUP(1, 8) );
   CHECK( 8 == VG_ROUNDUP(2, 8) );
   CHECK( 8 == VG_ROUNDUP(3, 8) );
   CHECK( 8 == VG_ROUNDUP(4, 8) );
   CHECK( 8 == VG_ROUNDUP(5, 8) );
   CHECK( 8 == VG_ROUNDUP(6, 8) );
   CHECK( 8 == VG_ROUNDUP(7, 8) );

   CHECK( 0             == VG_PGROUNDDN(0) );
   CHECK( 0             == VG_PGROUNDDN(1) );
   CHECK( 0             == VG_PGROUNDDN(2) );
   CHECK( 0             == VG_PGROUNDDN(3) );
   CHECK( 0             == VG_PGROUNDDN(4) );
   CHECK( 0             == VG_PGROUNDDN(VKI_PAGE_SIZE-1) );
   CHECK( VKI_PAGE_SIZE == VG_PGROUNDDN(VKI_PAGE_SIZE  ) );
   CHECK( VKI_PAGE_SIZE == VG_PGROUNDDN(VKI_PAGE_SIZE+1) );

   CHECK( 0               == VG_PGROUNDUP(0) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(1) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(2) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(3) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(4) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(VKI_PAGE_SIZE-1) );
   CHECK( VKI_PAGE_SIZE   == VG_PGROUNDUP(VKI_PAGE_SIZE  ) );
   CHECK( VKI_PAGE_SIZE*2 == VG_PGROUNDUP(VKI_PAGE_SIZE+1) );
}
/* 
   When a client clones, we need to keep track of the new thread.  This means:
   1. allocate a ThreadId+ThreadState+stack for the the thread

   2. initialize the thread's new VCPU state

   3. create the thread using the same args as the client requested,
   but using the scheduler entrypoint for EIP, and a separate stack
   for ESP.
 */
static SysRes do_clone ( ThreadId ptid, 
                         ULong flags, Addr rsp, 
                         Long* parent_tidptr, 
                         Long* child_tidptr, 
                         Addr tlsaddr )
{
   static const Bool debug = False;

   ThreadId     ctid = VG_(alloc_ThreadState)();
   ThreadState* ptst = VG_(get_ThreadState)(ptid);
   ThreadState* ctst = VG_(get_ThreadState)(ctid);
   UWord*       stack;
   NSegment*    seg;
   SysRes       res;
   Long         rax;
   vki_sigset_t blockall, savedmask;

   VG_(sigfillset)(&blockall);

   vg_assert(VG_(is_running_thread)(ptid));
   vg_assert(VG_(is_valid_tid)(ctid));

   stack = (UWord*)ML_(allocstack)(ctid);
   if (stack == NULL) {
      res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
      goto out;
   }

   /* Copy register state

      Both parent and child return to the same place, and the code
      following the clone syscall works out which is which, so we
      don't need to worry about it.

      The parent gets the child's new tid returned from clone, but the
      child gets 0.

      If the clone call specifies a NULL rsp for the new thread, then
      it actually gets a copy of the parent's rsp.
   */
   setup_child( &ctst->arch, &ptst->arch );

   /* Make sys_clone appear to have returned Success(0) in the
      child. */
   ctst->arch.vex.guest_RAX = 0;

   if (rsp != 0)
      ctst->arch.vex.guest_RSP = rsp;

   ctst->os_state.parent = ptid;

   /* inherit signal mask */
   ctst->sig_mask = ptst->sig_mask;
   ctst->tmp_sig_mask = ptst->sig_mask;

   /* We don't really know where the client stack is, because its
      allocated by the client.  The best we can do is look at the
      memory mappings and try to derive some useful information.  We
      assume that esp starts near its highest possible value, and can
      only go down to the start of the mmaped segment. */
   seg = VG_(am_find_nsegment)((Addr)rsp);
   if (seg && seg->kind != SkResvn) {
      ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(rsp);
      ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;

      if (debug)
	 VG_(printf)("tid %d: guessed client stack range %p-%p\n",
		     ctid, seg->start, VG_PGROUNDUP(rsp));
   } else {
      VG_(message)(Vg_UserMsg, "!? New thread %d starts with RSP(%p) unmapped\n",
		   ctid, rsp);
      ctst->client_stack_szB  = 0;
   }

   if (flags & VKI_CLONE_SETTLS) {
      if (debug)
	 VG_(printf)("clone child has SETTLS: tls at %p\n", tlsaddr);
      ctst->arch.vex.guest_FS_ZERO = tlsaddr;
   }

   flags &= ~VKI_CLONE_SETTLS;

   /* start the thread with everything blocked */
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);

   /* Create the new thread */
   rax = do_syscall_clone_amd64_linux(
            ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
            child_tidptr, parent_tidptr, NULL
         );
   res = VG_(mk_SysRes_amd64_linux)( rax );

   VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);

  out:
   if (res.isError) {
      /* clone failed */
      VG_(cleanup_thread)(&ctst->arch);
      ctst->status = VgTs_Empty;
   }

   return res;
}
Beispiel #8
0
/* Take a snapshot of the client's stack, putting the up to 'n_ips'
   IPs into 'ips'.  In order to be thread-safe, we pass in the
   thread's IP SP, FP if that's meaningful, and LR if that's
   meaningful.  Returns number of IPs put in 'ips'.
*/
UInt VG_(get_StackTrace2) ( Addr* ips, UInt n_ips, 
                            Addr ip, Addr sp, Addr fp, Addr lr,
                            Addr fp_min, Addr fp_max_orig )
{
#if defined(VGP_ppc32_linux)
   Bool  lr_is_first_RA = False; /* ppc only */
#endif
   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   VGP_PUSHCC(VgpExeContext);

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   /* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
      putting zeroes in when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   for (i = 0; i < n_ips; i++)
      ips[i] = 0;

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("n_ips=%d fp_min=%p fp_max_orig=%p, fp_max=%p ip=%p fp=%p\n",
		  n_ips, fp_min, fp_max_orig, fp_max, ip, fp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/

   if (fp_min + VG_(clo_max_stackframe) <= fp_max) {
      /* If the stack is ridiculously big, don't poke around ... but
         don't bomb out either.  Needed to make John Regehr's
         user-space threads package work. JRS 20021001 */
      ips[0] = ip;
      VGP_POPCC(VgpExeContext);
      return 1;
   } 

   /* Otherwise unwind the stack in a platform-specific way.  Trying
      to merge the x86, amd64 and ppc32 logic into a single piece of
      code is just too confusing and difficult to performance-tune.  */

#  if defined(VGP_x86_linux)

   /*--------------------- x86 ---------------------*/

   /* fp is %ebp.  sp is %esp.  ip is %eip. */

   ips[0] = ip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* On x86, first try the old-fashioned method of following the
         %ebp-chain.  Code which doesn't use this (that is, compiled
         with -fomit-frame-pointer) is not ABI compliant and so
         relatively rare.  Besides, trying the CFI first almost always
         fails, and is expensive. */
      /* Deal with frames resulting from functions which begin "pushl%
         ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %ebp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* That didn't work out, so see if there is any CFI info to hand
         which can be used. */
      if ( VG_(use_CFI_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* No luck.  We have to give up. */
      break;
   }

#  elif defined(VGP_amd64_linux)

   /*--------------------- amd64 ---------------------*/

   /* fp is %rbp.  sp is %rsp.  ip is %rip. */

   ips[0] = ip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* First off, see if there is any CFI info to hand which can
         be used. */
      if ( VG_(use_CFI_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* If VG_(use_CFI_info) fails, it won't modify ip/sp/fp, so
         we can safely try the old-fashioned method. */
      /* This bit is supposed to deal with frames resulting from
         functions which begin "pushq %rbp ; movq %rsp, %rbp".
         Unfortunately, since we can't (easily) look at the insns at
         the start of the fn, like GDB does, there's no reliable way
         to tell.  Hence the hack of first trying out CFI, and if that
         fails, then use this as a fallback. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %rbp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* No luck there.  We have to give up. */
      break;
   }

#  elif defined(VGP_ppc32_linux)

   /*--------------------- ppc32 ---------------------*/

   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
      frame pointers. */

   lr_is_first_RA = False;
   {
#     define M_VG_ERRTXT 1000
      UChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
      if (VG_(get_fnname_nodemangle) (lr, buf_lr, M_VG_ERRTXT))
         if (VG_(get_fnname_nodemangle) (ip, buf_ip, M_VG_ERRTXT))
            if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
               lr_is_first_RA = True;
#     undef M_VG_ERRTXT
   }

   ips[0] = ip;
   i = 1;

   if (fp_min <= fp && fp < fp_max-4+1) {

      /* initial FP is sane; keep going */
      fp = (((UWord*)fp)[0]);

      while (True) {

         if (i >= n_ips)
            break;

         /* Try to derive a new (ip,fp) pair from the current set. */

         if (fp_min <= fp && fp <= fp_max) {
            /* fp looks sane, so use it. */

            if (i == 1 && lr_is_first_RA)
               ip = lr;
            else
               ip = (((UWord*)fp)[1]);

            fp = (((UWord*)fp)[0]);
            ips[i++] = ip;
            if (debug)
               VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
            continue;
         }

         /* No luck there.  We have to give up. */
         break;
      }
   }

#  else
#    error "Unknown platform"
#  endif

   n_found = i;
   VGP_POPCC(VgpExeContext);
   return n_found;
}
Beispiel #9
0
void VG_(describe_addr) ( Addr a, /*OUT*/AddrInfo* ai )
{
   VgSectKind sect;

   /* -- Perhaps the variable type/location data describes it? -- */
   ai->Addr.Variable.descr1
      = VG_(newXA)( VG_(malloc), "mc.da.descr1",
                    VG_(free), sizeof(HChar) );
   ai->Addr.Variable.descr2
      = VG_(newXA)( VG_(malloc), "mc.da.descr2",
                    VG_(free), sizeof(HChar) );

   (void) VG_(get_data_description)( ai->Addr.Variable.descr1,
                                     ai->Addr.Variable.descr2, a );
   /* If there's nothing in descr1/2, free them.  Why is it safe to to
      VG_(indexXA) at zero here?  Because VG_(get_data_description)
      guarantees to zero terminate descr1/2 regardless of the outcome
      of the call.  So there's always at least one element in each XA
      after the call.
   */
   if (0 == VG_(strlen)( VG_(indexXA)( ai->Addr.Variable.descr1, 0 ))) {
      VG_(deleteXA)( ai->Addr.Variable.descr1 );
      ai->Addr.Variable.descr1 = NULL;
   }
   if (0 == VG_(strlen)( VG_(indexXA)( ai->Addr.Variable.descr2, 0 ))) {
      VG_(deleteXA)( ai->Addr.Variable.descr2 );
      ai->Addr.Variable.descr2 = NULL;
   }
   /* Assume (assert) that VG_(get_data_description) fills in descr1
      before it fills in descr2 */
   if (ai->Addr.Variable.descr1 == NULL)
      vg_assert(ai->Addr.Variable.descr2 == NULL);
   /* So did we get lucky? */
   if (ai->Addr.Variable.descr1 != NULL) {
      ai->tag = Addr_Variable;
      return;
   }
   /* -- Have a look at the low level data symbols - perhaps it's in
      there. -- */
   const HChar *name;
   if (VG_(get_datasym_and_offset)(
             a, &name,
             &ai->Addr.DataSym.offset )) {
      ai->Addr.DataSym.name = VG_(strdup)("mc.da.dsname", name);
      ai->tag = Addr_DataSym;
      return;
   }
   /* -- Perhaps it's on a thread's stack? -- */
   {
      ThreadId   tid;
      Addr       stack_min, stack_max;
      VG_(thread_stack_reset_iter)(&tid);
      while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) {
         if (stack_min - VG_STACK_REDZONE_SZB <= a && a <= stack_max) {
            Addr ips[VG_(clo_backtrace_size)],
                 sps[VG_(clo_backtrace_size)];
            UInt n_frames;
            UInt f;

            ai->tag            = Addr_Stack;
            VG_(initThreadInfo)(&ai->Addr.Stack.tinfo);
            ai->Addr.Stack.tinfo.tid = tid;
            ai->Addr.Stack.IP = 0;
            ai->Addr.Stack.frameNo = -1;
            ai->Addr.Stack.stackPos = StackPos_stacked;
            ai->Addr.Stack.spoffset = 0; // Unused.
            /* It is on thread tid stack. Build a stacktrace, and
               find the frame sp[f] .. sp[f+1] where the address is.
               Store the found frameNo and the corresponding IP in
               the description. 
               When description is printed, IP will be translated to
               the function name containing IP. 
               Before accepting to describe addr with sp[f] .. sp[f+1],
               we verify the sp looks sane: reasonably sized frame,
               inside the stack.
               We could check the ABI required alignment for sp (what is it?)
               is respected, except for the innermost stack pointer ? */
            n_frames = VG_(get_StackTrace)( tid, ips, VG_(clo_backtrace_size),
                                            sps, NULL, 0/*first_ip_delta*/ );
            for (f = 0; f < n_frames-1; f++) {
               if (sps[f] <= a && a < sps[f+1]
                   && sps[f+1] - sps[f] <= 0x4000000 // 64 MB, arbitrary
                   && sps[f+1] <= stack_max
                   && sps[f]   >= stack_min - VG_STACK_REDZONE_SZB) {
                  ai->Addr.Stack.frameNo = f;
                  ai->Addr.Stack.IP = ips[f];
                  break;
               }
            }
            return;
         }
      }
   }

   /* -- Maybe it is in one of the m_mallocfree.c arenas. --  */
   {
      AddrArenaInfo aai;
      VG_(describe_arena_addr) ( a, &aai );
      if (aai.name != NULL) {
         ai->tag = Addr_Block;
         if (aai.aid == VG_AR_CLIENT)
            ai->Addr.Block.block_kind 
               = aai.free ? Block_ClientArenaFree : Block_ClientArenaMallocd;
         else
            ai->Addr.Block.block_kind 
               = aai.free 
                  ? Block_ValgrindArenaFree :  Block_ValgrindArenaMallocd;
         ai->Addr.Block.block_desc = aai.name;
         ai->Addr.Block.block_szB = aai.block_szB;
         ai->Addr.Block.rwoffset = aai.rwoffset;
         ai->Addr.Block.allocated_at = VG_(null_ExeContext)();
         VG_(initThreadInfo) (&ai->Addr.Block.alloc_tinfo);
         ai->Addr.Block.freed_at = VG_(null_ExeContext)();
         return;
      }
   }

   /* -- last ditch attempt at classification -- */
   sect = VG_(DebugInfo_sect_kind)( &name, a);
   ai->Addr.SectKind.objname = VG_(strdup)("mc.da.dsname", name);

   if (sect != Vg_SectUnknown) {
      ai->tag = Addr_SectKind;
      ai->Addr.SectKind.kind = sect;
      return;
   }

   /* -- and yet another last ditch attempt at classification -- */
   /* If the address is in a stack between the stack bottom (highest byte)
      and the current stack ptr, it will have been already described above.
      But maybe it is in a stack, but below the stack ptr (typical
      for a 'use after return' or in the stack guard page (thread stack
      too small). */
   {
      ThreadId   tid;
      StackPos stackPos = StackPos_stacked;
      // Default init to StackPos_stacked, to silence gcc warning.
      // We assert this value is overriden if a stack descr is produced.

      // First try to find a tid with stack containing a
      tid = find_tid_with_stack_containing (a);
      if (tid != VG_INVALID_THREADID) {
         /* Should be below stack pointer, as if it is >= SP, it
            will have been described as StackPos_stacked above. */
         stackPos = StackPos_below_stack_ptr;
      } else {
         /* Try to find a stack with guard page containing a.
            For this, check if a is in a page mapped without r, w and x. */
         const NSegment *seg = VG_(am_find_nsegment) (a);
         if (seg != NULL && seg->kind == SkAnonC
             && !seg->hasR && !seg->hasW && !seg->hasX) {
            /* This looks a plausible guard page. Check if a is close to
               the start of stack (lowest byte). */
            tid = find_tid_with_stack_containing (VG_PGROUNDUP(a+1));
            if (tid != VG_INVALID_THREADID)
               stackPos = StackPos_guard_page;
         }
      }

      if (tid != VG_INVALID_THREADID) {
         ai->tag  = Addr_Stack;
         VG_(initThreadInfo)(&ai->Addr.Stack.tinfo);
         ai->Addr.Stack.tinfo.tid = tid;
         ai->Addr.Stack.IP = 0;
         ai->Addr.Stack.frameNo = -1;
         vg_assert (stackPos != StackPos_stacked);
         ai->Addr.Stack.stackPos = stackPos;
         vg_assert (a < VG_(get_SP)(tid));
         ai->Addr.Stack.spoffset = a - VG_(get_SP)(tid);
         return;
      }
   }

   /* -- and yet another last ditch attempt at classification -- */
   /* Try to find a segment belonging to the client. */
   {
      const NSegment *seg = VG_(am_find_nsegment) (a);

      /* Special case to detect the brk data segment. */
      if (seg != NULL
          && seg->kind == SkAnonC
          && VG_(brk_limit) >= seg->start
          && VG_(brk_limit) <= seg->end+1) {
         /* Address a is in a Anon Client segment which contains
            VG_(brk_limit). So, this segment is the brk data segment
            as initimg-linux.c:setup_client_dataseg maps an anonymous
            segment followed by a reservation, with one reservation
            page that will never be used by syswrap-generic.c:do_brk,
            when increasing VG_(brk_limit).
            So, the brk data segment will never be merged with the
            next segment, and so an address in that area will
            either be in the brk data segment, or in the unmapped
            part of the brk data segment reservation. */
         ai->tag = Addr_BrkSegment;
         ai->Addr.BrkSegment.brk_limit = VG_(brk_limit);
         return;
      }

      if (seg != NULL 
          && (seg->kind == SkAnonC 
              || seg->kind == SkFileC
              || seg->kind == SkShmC)) {
         ai->tag = Addr_SegmentKind;
         ai->Addr.SegmentKind.segkind = seg->kind;
         ai->Addr.SegmentKind.filename = NULL;
         if (seg->kind == SkFileC)
            ai->Addr.SegmentKind.filename
               = VG_(strdup)("mc.da.skfname", VG_(am_get_filename)(seg));
         ai->Addr.SegmentKind.hasR = seg->hasR;
         ai->Addr.SegmentKind.hasW = seg->hasW;
         ai->Addr.SegmentKind.hasX = seg->hasX;
         return;
      }
   }

   /* -- Clueless ... -- */
   ai->tag = Addr_Unknown;
   return;
}
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestAMD64State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   if (0) VG_(printf)(
             "wqthread_hijack: self %#lx, kport %#lx, "
	     "stackaddr %#lx, workitem %#lx, reuse/flags %x, sp %#lx\n", 
	     self, kport, stackaddr, workitem, reuse, sp);

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   /* For 10.7 and earlier, |reuse| appeared to be used as a simple
      boolean.  In 10.8 and later its name changed to |flags| and has
      various other bits OR-d into it too, so it's necessary to fish
      out just the relevant parts.  Hence: */
#  if DARWIN_VERS <= DARWIN_10_7
   Bool is_reuse = reuse != 0;
#  elif DARWIN_VERS > DARWIN_10_7
   Bool is_reuse = (reuse & 0x20000 /* == WQ_FLAG_THREAD_REUSE */) != 0;
#  else
#    error "Unsupported Darwin version"
#  endif

   if (is_reuse) {

     /* For whatever reason, tst->os_state.pthread appear to have a
        constant offset of 96 on 10.7, but zero on 10.6 and 10.5.  No
        idea why. */
#      if DARWIN_VERS <= DARWIN_10_6
       UWord magic_delta = 0;
#      elif DARWIN_VERS == DARWIN_10_7 || DARWIN_VERS == DARWIN_10_8
       UWord magic_delta = 0x60;
#      elif DARWIN_VERS == DARWIN_10_9 || DARWIN_VERS == DARWIN_10_10
       UWord magic_delta = 0xE0;
#      elif DARWIN_VERS == DARWIN_10_11
       UWord magic_delta = 0x100;
#      else
#        error "magic_delta: to be computed on new OS version"
         // magic_delta = tst->os_state.pthread - self
#      endif

       // This thread already exists; we're merely re-entering 
       // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
       // Don't allocate any V thread resources.
       // Do reset thread registers.
       ThreadId tid = VG_(lwpid_to_vgtid)(kport);
       vg_assert(VG_(is_valid_tid)(tid));
       vg_assert(mach_thread_self() == kport);

       tst = VG_(get_ThreadState)(tid);

       if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, "
                          "tst->os_state.pthread %#lx, self %#lx\n",
                          tst->os_state.pthread == self ? "SAME" : "DIFF",
                          tid, tst, tst->os_state.pthread, self);

       vex = &tst->arch.vex;
       vg_assert(tst->os_state.pthread - magic_delta == self);
   }
   else {
       // This is a new thread.
       tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
       vex = &tst->arch.vex;
       allocstack(tst->tid);
       LibVEX_GuestAMD64_initialise(vex);
   }
       
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_RIP = wqthread_starter;
   vex->guest_RDI = self;
   vex->guest_RSI = kport;
   vex->guest_RDX = stackaddr;
   vex->guest_RCX = workitem;
   vex->guest_R8  = reuse;
   vex->guest_R9  = 0;
   vex->guest_RSP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   if (is_reuse) {
      // Continue V's thread back in the scheduler. 
      // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
      VG_(release_BigLock_LL)("wqthread_hijack(1)");
      ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_byte = stack+stacksize-1;
      tst->client_stack_szB = stacksize;

      // GrP fixme scheduler lock?!
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE,
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}
/* 
   Process an LC_SEGMENT command, mapping it into memory if appropriate.
   fd[offset..size) is a Mach-O thin file. 
   Returns 0 on success, -1 on any failure.
   If this segment contains the executable's Mach headers, their 
     loaded address is returned in *text.
   If this segment is a __UNIXSTACK, its start address is returned in 
     *stack_start.
*/
static int
load_segment(int fd, vki_off_t offset, vki_off_t size, 
             vki_uint8_t **text, vki_uint8_t **stack_start, 
             struct SEGMENT_COMMAND *segcmd, const HChar *filename)
{
   SysRes res;
   Addr addr;
   vki_size_t filesize; // page-aligned 
   vki_size_t vmsize;   // page-aligned
   unsigned int prot;

   // GrP fixme mark __UNIXSTACK as SF_STACK
    
   // Don't honour the client's request to map PAGEZERO.  Why not?
   // Because when the kernel loaded the valgrind tool executable,
   // it will have mapped pagezero itself.  So further attempts
   // to map it when loading the client are guaranteed to fail.
#if VG_WORDSIZE == 4
   if (segcmd->vmaddr == 0 && 0 == VG_(strcmp)(segcmd->segname, SEG_PAGEZERO)) {
      if (segcmd->vmsize != 0x1000) {
         print("bad executable (__PAGEZERO is not 4 KB)\n");
         return -1;
      }
      return 0;
   }
#endif
#if VG_WORDSIZE == 8
   if (segcmd->vmaddr == 0 && 0 == VG_(strcmp)(segcmd->segname, SEG_PAGEZERO)) {
      if (segcmd->vmsize != 0x100000000) {
         print("bad executable (__PAGEZERO is not 4 GB)\n");
         return -1;
      }
      return 0;
   }
#endif

   // Record the segment containing the Mach headers themselves
   if (segcmd->fileoff == 0  &&  segcmd->filesize != 0) {
      if (text) *text = (vki_uint8_t *)segcmd->vmaddr;
   }

   // Record the __UNIXSTACK start
   if (0 == VG_(strcmp)(segcmd->segname, SEG_UNIXSTACK)) {
      if (stack_start) *stack_start = (vki_uint8_t *)segcmd->vmaddr;
   }

   // Sanity-check the segment
   if (segcmd->fileoff + segcmd->filesize > size) {
      print("bad executable (invalid segment command)\n");
      return -1;
   }
   if (segcmd->vmsize == 0) {
      return 0;  // nothing to map - ok
   }

   // Get desired memory protection
   // GrP fixme need maxprot too
   prot = (((segcmd->initprot & VM_PROT_READ) ? VKI_PROT_READ : 0) |
           ((segcmd->initprot & VM_PROT_WRITE) ? VKI_PROT_WRITE : 0) |
           ((segcmd->initprot & VM_PROT_EXECUTE) ? VKI_PROT_EXEC : 0));

   // Map the segment    
   filesize = VG_PGROUNDUP(segcmd->filesize);
   vmsize = VG_PGROUNDUP(segcmd->vmsize);
   if (filesize > 0) {
      addr = (Addr)segcmd->vmaddr;
      VG_(debugLog)(2, "ume", "mmap fixed (file) (%#lx, %lu)\n", addr, filesize);
      res = VG_(am_mmap_named_file_fixed_client)(addr, filesize, prot, fd, 
                                                 offset + segcmd->fileoff, 
                                                 filename);
      check_mmap(res, addr, filesize, "load_segment1");
   }

   // Zero-fill the remainder of the segment, if any
   if (segcmd->filesize != filesize) {
      // non-page-aligned part
      // GrP fixme kernel doesn't do this?
      //bzero(segcmd->filesize+(vki_uint8_t *)addr, filesize-segcmd->filesize);
   }
   if (filesize != vmsize) {
      // page-aligned part
      SizeT length = vmsize - filesize;
      addr = (Addr)(filesize + segcmd->vmaddr);
      VG_(debugLog)(2, "ume", "mmap fixed (anon) (%#lx, %lu)\n", addr, length);
      res = VG_(am_mmap_anon_fixed_client)(addr, length, prot);
      check_mmap(res, addr, length, "load_segment2");
   }

   return 0;
}
Beispiel #12
0
static void main2(void)
{
    int err, padfile;
    struct exeinfo info;
    extern char _end;
    int *esp;
    char buf[strlen(valgrind_lib) + sizeof(stage2) + 16];
    info.exe_end  = VG_PGROUNDDN(init_sp); // rounding down
    info.exe_base = KICKSTART_BASE;
    printf("info.exe_end = %p\n", info.exe_end);
#ifdef HAVE_PIE
    info.exe_base = VG_ROUNDDN(info.exe_end - 0x02000000, 0x10000000);
    assert(info.exe_base >= VG_PGROUNDUP(&_end));
    info.map_base = info.exe_base + 0x01000000  ;
#else
    // If this system doesn't have PIE (position-independent executables),
    // we have to choose a hardwired location for stage2.
    //   info.exe_base = VG_PGROUNDUP(&_end);
    printf("info.exe_base = %p\n", info.exe_base);
    info.map_base = KICKSTART_BASE  + 0x01000000 ;
    printf("info.map_base = %p\n", info.map_base);
#endif

    info.argv = NULL;

    snprintf(buf, sizeof(buf), "%s/%s", valgrind_lib, stage2);
    printf("valgrind_lib = %s\n",valgrind_lib);
    err = do_exec(buf, &info);

    if (err != 0) {
        fprintf(stderr, "valgrind: failed to load %s: %s\n",
                buf, strerror(err));
        exit(1);
    }

    /* Make sure stage2's dynamic linker can't tromp on the lower part
       of the address space. */
    padfile = as_openpadfile();
    as_pad(0, (void *)info.map_base, padfile); // map base is the start of our stuff
    printf("init sp : %x\n", init_sp);

    esp = fix_auxv(init_sp, &info, padfile);
    printf("after fix_auxb\n");

    if (1) {
        printf("---------- launch stage 2 ----------\n");
        printf("eip=%p esp=%p\n", (void *)info.init_eip, esp);
        foreach_map(prmap, /*dummy*/NULL);
    }

    VG_(debugLog)(1, "stage1", "main2(): starting stage2\n");
    printf("jumping to stage 2 \n");
    printf("esp : %x \n eip : %x\n",esp, info.init_eip);

    jump_and_switch_stacks(
        (Addr) esp,            /* stack */
        (Addr) info.init_eip   /* Where to jump */
    );

    /*NOTREACHED*/
    assert(0);
}
/* Note: this is VG_, not ML_. */
SysRes VG_(am_do_mmap_NO_NOTIFY)( Addr start, SizeT length, UInt prot, 
                                  UInt flags, Int fd, Off64T offset)
{
#define DEBUG_MYSELF 0
#if defined(VGO_l4re)
   void *val;
#endif
   SysRes res;
   aspacem_assert(VG_IS_PAGE_ALIGNED(offset));
#  if defined(VGP_x86_linux) || defined(VGP_ppc32_linux) \
      || defined(VGP_arm_linux)
   /* mmap2 uses 4096 chunks even if actual page size is bigger. */
   aspacem_assert((offset % 4096) == 0);
   res = VG_(do_syscall6)(__NR_mmap2, (UWord)start, length,
                          prot, flags, fd, offset / 4096);
#  elif defined(VGP_amd64_linux) || defined(VGP_ppc64_linux) \
        || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5) \
        || defined(VGP_s390x_linux)
   res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length, 
                         prot, flags, fd, offset);
#  elif defined(VGP_x86_darwin)
   if (fd == 0  &&  (flags & VKI_MAP_ANONYMOUS)) {
       fd = -1;  // MAP_ANON with fd==0 is EINVAL
   }
   res = VG_(do_syscall7)(__NR_mmap, (UWord)start, length,
                          prot, flags, fd, offset & 0xffffffff, offset >> 32);
#  elif defined(VGP_amd64_darwin)
   if (fd == 0  &&  (flags & VKI_MAP_ANONYMOUS)) {
       fd = -1;  // MAP_ANON with fd==0 is EINVAL
   }
   res = VG_(do_syscall6)(__NR_mmap, (UWord)start, length,
                          prot, flags, (UInt)fd, offset);
#  elif defined(VGO_l4re)
   #if DEBUG_MYSELF
   VG_(am_show_nsegments)(0,"before mmap");
   l4re_rm_show_lists();
   VG_(debugLog)(1, "aspacem", "\033[34mmmap(start=%p, length=%d (0x%x), fd=%d,\033[0m\n",(void *)start, (int) length, (int)length, fd);
   VG_(debugLog)(1, "aspacem", "\033[34moffset=%ld (0x%lx), prot=%d=%c%c%c, flags=%d=%s%s%s%s)\033[0m\n",
                 (long)offset, (long) offset,
                 prot,
                 prot & VKI_PROT_READ ? 'r' : '-',
                 prot & VKI_PROT_WRITE ? 'w' : '-',
                 prot & VKI_PROT_EXEC ? 'x' : '-',
                 flags,
                 flags & VKI_MAP_SHARED ? "MAP_SHARED " : "",
                 flags & VKI_MAP_PRIVATE ? "MAP_PRIVATE " : "",
                 flags & VKI_MAP_FIXED ? "MAP_FIXED " : "",
                 flags & VKI_MAP_ANONYMOUS ? "MAP_ANONYMOUS " : "");

   #endif

   val = mmap((void *)start, VG_PGROUNDUP(length), prot, flags, fd, offset);

   #if DEBUG_MYSELF
      VG_(debugLog)(1, "aspacem", "\033[34;1mmmap returns %p\n", val);
      VG_(am_show_nsegments)(0,"after mmap");
      l4re_rm_show_lists();
   #endif

   res._isError = (val == (void *)-1);
   if (sr_isError(res)) {
      res._err = - (int)val;
      res._res = -1;
      VG_(debugLog)(1, "aspacem", "mmap failed\n");
      enter_kdebug("ERROR: mmap failed");
   } else {
      res._err = 0;
      res._res = (int)val;
   }

#  else
#    error Unknown platform
#  endif
   return res;
}
/* 
   When a client clones, we need to keep track of the new thread.  This means:
   1. allocate a ThreadId+ThreadState+stack for the the thread

   2. initialize the thread's new VCPU state

   3. create the thread using the same args as the client requested,
   but using the scheduler entrypoint for IP, and a separate stack
   for SP.
 */
static SysRes do_clone ( ThreadId ptid, 
                         UInt flags, Addr sp, 
                         Int *parent_tidptr, 
                         Int *child_tidptr, 
                         Addr child_tls)
{
   const Bool debug = False;

   ThreadId     ctid = VG_(alloc_ThreadState)();
   ThreadState* ptst = VG_(get_ThreadState)(ptid);
   ThreadState* ctst = VG_(get_ThreadState)(ctid);
   ULong        word64;
   UWord*       stack;
   NSegment const* seg;
   SysRes       res;
   vki_sigset_t blockall, savedmask;

   VG_(sigfillset)(&blockall);

   vg_assert(VG_(is_running_thread)(ptid));
   vg_assert(VG_(is_valid_tid)(ctid));

   stack = (UWord*)ML_(allocstack)(ctid);
   if (stack == NULL) {
      res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
      goto out;
   }

//?   /* make a stack frame */
//?   stack -= 16;
//?   *(UWord *)stack = 0;


   /* Copy register state

      Both parent and child return to the same place, and the code
      following the clone syscall works out which is which, so we
      don't need to worry about it.

      The parent gets the child's new tid returned from clone, but the
      child gets 0.

      If the clone call specifies a NULL SP for the new thread, then
      it actually gets a copy of the parent's SP.

      The child's TLS register (r2) gets set to the tlsaddr argument
      if the CLONE_SETTLS flag is set.
   */
   setup_child( &ctst->arch, &ptst->arch );

   /* Make sys_clone appear to have returned Success(0) in the
      child. */
   { UInt old_cr = LibVEX_GuestPPC64_get_CR( &ctst->arch.vex );
     /* %r3 = 0 */
     ctst->arch.vex.guest_GPR3 = 0;
     /* %cr0.so = 0 */
     LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), &ctst->arch.vex );
   }

   if (sp != 0)
      ctst->arch.vex.guest_GPR1 = sp;

   ctst->os_state.parent = ptid;

   /* inherit signal mask */
   ctst->sig_mask = ptst->sig_mask;
   ctst->tmp_sig_mask = ptst->sig_mask;

   /* Start the child with its threadgroup being the same as the
      parent's.  This is so that any exit_group calls that happen
      after the child is created but before it sets its
      os_state.threadgroup field for real (in thread_wrapper in
      syswrap-linux.c), really kill the new thread.  a.k.a this avoids
      a race condition in which the thread is unkillable (via
      exit_group) because its threadgroup is not set.  The race window
      is probably only a few hundred or a few thousand cycles long.
      See #226116. */
   ctst->os_state.threadgroup = ptst->os_state.threadgroup;

   /* We don't really know where the client stack is, because its
      allocated by the client.  The best we can do is look at the
      memory mappings and try to derive some useful information.  We
      assume that esp starts near its highest possible value, and can
      only go down to the start of the mmaped segment. */
   seg = VG_(am_find_nsegment)(sp);
   if (seg && seg->kind != SkResvn) {
      ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(sp);
      ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;

      VG_(register_stack)(seg->start, ctst->client_stack_highest_word);

      if (debug)
	 VG_(printf)("\ntid %d: guessed client stack range %#lx-%#lx\n",
		     ctid, seg->start, VG_PGROUNDUP(sp));
   } else {
      VG_(message)(Vg_UserMsg,
                   "!? New thread %d starts with R1(%#lx) unmapped\n",
		   ctid, sp);
      ctst->client_stack_szB  = 0;
   }

   /* Assume the clone will succeed, and tell any tool that wants to
      know that this thread has come into existence.  If the clone
      fails, we'll send out a ll_exit notification for it at the out:
      label below, to clean up. */
   vg_assert(VG_(owns_BigLock_LL)(ptid));
   VG_TRACK ( pre_thread_ll_create, ptid, ctid );

   if (flags & VKI_CLONE_SETTLS) {
      if (debug)
         VG_(printf)("clone child has SETTLS: tls at %#lx\n", child_tls);
      ctst->arch.vex.guest_GPR13 = child_tls;
   }

   flags &= ~VKI_CLONE_SETTLS;

   /* start the thread with everything blocked */
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);

   /* Create the new thread */
   word64 = do_syscall_clone_ppc64_linux(
               ML_(start_thread_NORETURN),
               stack, flags, &VG_(threads)[ctid],
               child_tidptr, parent_tidptr, NULL
            );

   /* Low half word64 is syscall return value.  Hi half is
      the entire CR, from which we need to extract CR0.SO. */
   /* VG_(printf)("word64 = 0x%llx\n", word64); */
   res = VG_(mk_SysRes_ppc64_linux)( 
            /*val*/(UInt)(word64 & 0xFFFFFFFFULL), 
            /*errflag*/ (UInt)((word64 >> (32+28)) & 1)
         );

   VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);

  out:
   if (sr_isError(res)) {
      /* clone failed */
      VG_(cleanup_thread)(&ctst->arch);
      ctst->status = VgTs_Empty;
      /* oops.  Better tell the tool the thread exited in a hurry :-) */
      VG_TRACK( pre_thread_ll_exit, ctid );
   }

   return res;
}
void pthread_hijack(Addr self, Addr kport, Addr func, Addr func_arg, 
                    Addr stacksize, Addr flags, Addr sp)
{
   vki_sigset_t blockall;
   ThreadState *tst = (ThreadState *)func_arg;
   VexGuestX86State *vex = &tst->arch.vex;

   // VG_(printf)("pthread_hijack pthread %p, machthread %p, func %p, arg %p, stack %p, flags %p, stack %p\n", self, kport, func, func_arg, stacksize, flags, sp);

   // Wait for parent thread's permission.
   // The parent thread holds V's lock on our behalf.
   semaphore_wait(tst->os_state.child_go);

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   // DDD: need to do post_reg_write events here?
   LibVEX_GuestX86_initialise(vex);
   vex->guest_EIP = pthread_starter;
   vex->guest_EAX = self;
   vex->guest_EBX = kport;
   vex->guest_ECX = func;
   vex->guest_EDX = tst->os_state.func_arg;
   vex->guest_EDI = stacksize;
   vex->guest_ESI = flags;
   vex->guest_ESP = sp;

   // Record thread's stack and Mach port and pthread struct
   tst->os_state.pthread = self;
   tst->os_state.lwpid = kport;
   record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "thread-%p");

   if ((flags & 0x01000000) == 0) {
      // kernel allocated stack - needs mapping
      Addr stack = VG_PGROUNDUP(sp) - stacksize;
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 
            0, VKI_MAP_PRIVATE, -1, 0);
   } else {
      // client allocated stack
      find_stack_segment(tst->tid, sp);
   }
   ML_(sync_mappings)("after", "pthread_hijack", 0);

   // DDD: should this be here rather than in POST(sys_bsdthread_create)?
   // But we don't have ptid here...
   //VG_TRACK ( pre_thread_ll_create, ptid, tst->tid );

   // Tell parent thread's POST(sys_bsdthread_create) that we're done 
   // initializing registers and mapping memory.
   semaphore_signal(tst->os_state.child_done);
   // LOCK IS GONE BELOW THIS POINT

   // Go!
   call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                         start_thread_NORETURN, (Word)tst);

   /*NOTREACHED*/
   vg_assert(0);
}
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestX86State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   if (reuse) {

      /* For whatever reason, tst->os_state.pthread appear to have a
         constant offset of 72 on 10.7, but zero on 10.6 and 10.5.  No
         idea why. */
#     if DARWIN_VERS <= DARWIN_10_6
      UWord magic_delta = 0;
#     elif DARWIN_VERS >= DARWIN_10_7
      UWord magic_delta = 0x48;
#     endif

      // This thread already exists; we're merely re-entering 
      // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
      // Don't allocate any V thread resources.
      // Do reset thread registers.
      ThreadId tid = VG_(lwpid_to_vgtid)(kport);
      vg_assert(VG_(is_valid_tid)(tid));
      vg_assert(mach_thread_self() == kport);

      tst = VG_(get_ThreadState)(tid);

      if (0) VG_(printf)("wqthread_hijack reuse %s: tid %d, tst %p, "
                         "tst->os_state.pthread %#lx, self %#lx\n",
                         tst->os_state.pthread == self ? "SAME" : "DIFF",
                         tid, tst, tst->os_state.pthread, self);

      vex = &tst->arch.vex;
      vg_assert(tst->os_state.pthread - magic_delta == self);
   }
   else {
      // This is a new thread.
      tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
      vex = &tst->arch.vex;
      allocstack(tst->tid);
      LibVEX_GuestX86_initialise(vex);
      /* Tell threading tools the new thread exists.  FIXME: we need
         to know the identity (tid) of the parent thread, in order
         that threading tools can make a dependency edge from it to
         this thread.  But we don't know what the parent thread is.
         Hence pass 1 (the root thread).  This is completely wrong in
         general, and could cause large numbers of false races to be
         reported.  In fact, it's positively dangerous; we don't even
         know if thread 1 is still alive, and the thread checkers are
         likely to assert if it isn't. */
      VG_TRACK(pre_thread_ll_create, 1/*BOGUS*/, tst->tid);
   }
        
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_EIP = wqthread_starter;
   vex->guest_EAX = self;
   vex->guest_EBX = kport;
   vex->guest_ECX = stackaddr;
   vex->guest_EDX = workitem;
   vex->guest_EDI = reuse;
   vex->guest_ESI = 0;
   vex->guest_ESP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   VG_TRACK(workq_task_start, tst->tid, workitem);
   if (reuse) {
       // Continue V's thread back in the scheduler. 
       // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
       VG_(release_BigLock_LL)("wqthread_hijack(1)");
       ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // tell the tool that we are at a point after the new thread
      // has its registers set up (so we can take a stack snapshot),
      // but before it has executed any instructions (or, really,
      // before it has done any memory references).
      VG_TRACK(pre_thread_first_insn, tst->tid);
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}
/*  wqthread note: The kernel may create or destroy pthreads in the 
    wqthread pool at any time with no userspace interaction, 
    and wqthread_start may be entered at any time with no userspace 
    interaction.
    To handle this in valgrind, we create and destroy a valgrind 
    thread for every work item.
*/
void wqthread_hijack(Addr self, Addr kport, Addr stackaddr, Addr workitem, 
                     Int reuse, Addr sp)
{
   ThreadState *tst;
   VexGuestX86State *vex;
   Addr stack;
   SizeT stacksize;
   vki_sigset_t blockall;

   /* When we enter here we hold no lock (!), so we better acquire it
      pronto.  Why do we hold no lock?  Because (presumably) the only
      way to get here is as a result of a SfMayBlock syscall
      "workq_ops(WQOPS_THREAD_RETURN)", which will have dropped the
      lock.  At least that's clear for the 'reuse' case.  The
      non-reuse case?  Dunno, perhaps it's a new thread the kernel
      pulled out of a hat.  In any case we still need to take a
      lock. */
   VG_(acquire_BigLock_LL)("wqthread_hijack");

   /* Start the thread with all signals blocked.  VG_(scheduler) will
      set the mask correctly when we finally get there. */
   VG_(sigfillset)(&blockall);
   VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, NULL);

   if (reuse) {
      // This thread already exists; we're merely re-entering 
      // after leaving via workq_ops(WQOPS_THREAD_RETURN). 
      // Don't allocate any V thread resources.
      // Do reset thread registers.
      ThreadId tid = VG_(lwpid_to_vgtid)(kport);
      vg_assert(VG_(is_valid_tid)(tid));
      vg_assert(mach_thread_self() == kport);

      tst = VG_(get_ThreadState)(tid);
      vex = &tst->arch.vex;
      vg_assert(tst->os_state.pthread == self);
   }
   else {
      // This is a new thread.
      tst = VG_(get_ThreadState)(VG_(alloc_ThreadState)());        
      vex = &tst->arch.vex;
      allocstack(tst->tid);
      LibVEX_GuestX86_initialise(vex);
   }
        
   // Set thread's registers
   // Do this FIRST because some code below tries to collect a backtrace, 
   // which requires valid register data.
   vex->guest_EIP = wqthread_starter;
   vex->guest_EAX = self;
   vex->guest_EBX = kport;
   vex->guest_ECX = stackaddr;
   vex->guest_EDX = workitem;
   vex->guest_EDI = reuse;
   vex->guest_ESI = 0;
   vex->guest_ESP = sp;

   stacksize = 512*1024;  // wq stacks are always DEFAULT_STACK_SIZE
   stack = VG_PGROUNDUP(sp) - stacksize;

   if (reuse) {
       // Continue V's thread back in the scheduler. 
       // The client thread is of course in another location entirely.

      /* Drop the lock before going into
         ML_(wqthread_continue_NORETURN).  The latter will immediately
         attempt to reacquire it in non-LL mode, which is a bit
         wasteful but I don't think is harmful.  A better solution
         would be to not drop the lock but instead "upgrade" it from a
         LL lock to a full lock, but that's too much like hard work
         right now. */
       VG_(release_BigLock_LL)("wqthread_hijack(1)");
       ML_(wqthread_continue_NORETURN)(tst->tid);
   } 
   else {
      // Record thread's stack and Mach port and pthread struct
      tst->os_state.pthread = self;
      tst->os_state.lwpid = kport;
      record_named_port(tst->tid, kport, MACH_PORT_RIGHT_SEND, "wqthread-%p");
      
      // kernel allocated stack - needs mapping
      tst->client_stack_highest_word = stack+stacksize;
      tst->client_stack_szB = stacksize;

      // GrP fixme scheduler lock?!
      
      // pthread structure
      ML_(notify_core_and_tool_of_mmap)(
            stack+stacksize, pthread_structsize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // stack contents
      // GrP fixme uninitialized!
      ML_(notify_core_and_tool_of_mmap)(
            stack, stacksize, 
            VKI_PROT_READ|VKI_PROT_WRITE, VKI_MAP_PRIVATE, -1, 0);
      // guard page
      // GrP fixme ban_mem_stack!
      ML_(notify_core_and_tool_of_mmap)(
            stack-VKI_PAGE_SIZE, VKI_PAGE_SIZE, 
            0, VKI_MAP_PRIVATE, -1, 0);

      ML_(sync_mappings)("after", "wqthread_hijack", 0);

      // Go!
      /* Same comments as the 'release' in the then-clause.
         start_thread_NORETURN calls run_thread_NORETURN calls
         thread_wrapper which acquires the lock before continuing.
         Let's hope nothing non-thread-local happens until that point.

         DDD: I think this is plain wrong .. if we get to
         thread_wrapper not holding the lock, and someone has recycled
         this thread slot in the meantime, we're hosed.  Is that
         possible, though? */
      VG_(release_BigLock_LL)("wqthread_hijack(2)");
      call_on_new_stack_0_1(tst->os_state.valgrind_stack_init_SP, 0, 
                            start_thread_NORETURN, (Word)tst);
   }

   /*NOTREACHED*/
   vg_assert(0);
}
Beispiel #18
0
/* Take a snapshot of the client's stack, putting the up to 'n_ips'
   IPs into 'ips'.  In order to be thread-safe, we pass in the
   thread's IP SP, FP if that's meaningful, and LR if that's
   meaningful.  Returns number of IPs put in 'ips'.

   If you know what the thread ID for this stack is, send that as the
   first parameter, else send zero.  This helps generate better stack
   traces on ppc64-linux and has no effect on other platforms.
*/
UInt VG_(get_StackTrace2) ( ThreadId tid_if_known,
                            Addr* ips, UInt n_ips, 
                            Addr ip, Addr sp, Addr fp, Addr lr,
                            Addr fp_min, Addr fp_max_orig )
{
#if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
   Bool  lr_is_first_RA = False; /* ppc only */
#endif
   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   /* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("n_ips=%d fp_min=%p fp_max_orig=%p, fp_max=%p ip=%p fp=%p\n",
		  n_ips, fp_min, fp_max_orig, fp_max, ip, fp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/

   if (fp_min + VG_(clo_max_stackframe) <= fp_max) {
      /* If the stack is ridiculously big, don't poke around ... but
         don't bomb out either.  Needed to make John Regehr's
         user-space threads package work. JRS 20021001 */
      ips[0] = ip;
      return 1;
   } 

   /* Otherwise unwind the stack in a platform-specific way.  Trying
      to merge the x86, amd64, ppc32 and ppc64 logic into a single
      piece of code is just too confusing and difficult to
      performance-tune.  */

#  if defined(VGP_x86_linux)

   /*--------------------- x86 ---------------------*/

   /* fp is %ebp.  sp is %esp.  ip is %eip. */

   ips[0] = ip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* On x86, first try the old-fashioned method of following the
         %ebp-chain.  Code which doesn't use this (that is, compiled
         with -fomit-frame-pointer) is not ABI compliant and so
         relatively rare.  Besides, trying the CFI first almost always
         fails, and is expensive. */
      /* Deal with frames resulting from functions which begin "pushl%
         ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %ebp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* That didn't work out, so see if there is any CFI info to hand
         which can be used. */
      if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* No luck.  We have to give up. */
      break;
   }

#  elif defined(VGP_amd64_linux)

   /*--------------------- amd64 ---------------------*/

   /* fp is %rbp.  sp is %rsp.  ip is %rip. */

   ips[0] = ip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* First off, see if there is any CFI info to hand which can
         be used. */
      if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
         we can safely try the old-fashioned method. */
      /* This bit is supposed to deal with frames resulting from
         functions which begin "pushq %rbp ; movq %rsp, %rbp".
         Unfortunately, since we can't (easily) look at the insns at
         the start of the fn, like GDB does, there's no reliable way
         to tell.  Hence the hack of first trying out CFI, and if that
         fails, then use this as a fallback. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %rbp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* No luck there.  We have to give up. */
      break;
   }

#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)

   /*--------------------- ppc32/64 ---------------------*/

   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
      frame pointers. */

#  if defined(VGP_ppc64_linux)
   /* Deal with bogus LR values caused by function
      interception/wrapping; see comment on similar code a few lines
      further down. */
   if (lr == (Addr)&VG_(ppc64_linux_magic_redirect_return_stub)
       && VG_(is_valid_tid)(tid_if_known)) {
      Long hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
      if (hsp >= 1 && hsp < VEX_GUEST_PPC64_REDIR_STACK_SIZE)
         lr = VG_(threads)[tid_if_known]
                 .arch.vex.guest_REDIR_STACK[hsp-1];
   }
#  endif

   lr_is_first_RA = False;
   {
#     define M_VG_ERRTXT 1000
      UChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
      if (VG_(get_fnname_nodemangle) (lr, buf_lr, M_VG_ERRTXT))
         if (VG_(get_fnname_nodemangle) (ip, buf_ip, M_VG_ERRTXT))
            if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
               lr_is_first_RA = True;
#     undef M_VG_ERRTXT
   }

   ips[0] = ip;
   i = 1;

   if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {

      /* initial FP is sane; keep going */
      fp = (((UWord*)fp)[0]);

      while (True) {

        /* on ppc64-linux (ppc64-elf, really), the lr save slot is 2
           words back from sp, whereas on ppc32-elf(?) it's only one
           word back. */
#        if defined(VGP_ppc64_linux)
         const Int lr_offset = 2;
#        else
         const Int lr_offset = 1;
#        endif

         if (i >= n_ips)
            break;

         /* Try to derive a new (ip,fp) pair from the current set. */

         if (fp_min <= fp && fp <= fp_max) {
            /* fp looks sane, so use it. */

            if (i == 1 && lr_is_first_RA)
               ip = lr;
            else
               ip = (((UWord*)fp)[lr_offset]);

#           if defined(VGP_ppc64_linux)
            /* Nasty hack to do with function replacement/wrapping on
               ppc64-linux.  If LR points to our magic return stub,
               then we are in a wrapped or intercepted function, in
               which LR has been messed with.  The original LR will
               have been pushed onto the thread's hidden REDIR stack
               one down from the top (top element is the saved R2) and
               so we should restore the value from there instead. */
            if (i == 1 
                && ip == (Addr)&VG_(ppc64_linux_magic_redirect_return_stub)
                && VG_(is_valid_tid)(tid_if_known)) {
               Long hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
               if (hsp >= 1 && hsp < VEX_GUEST_PPC64_REDIR_STACK_SIZE)
                  ip = VG_(threads)[tid_if_known]
                          .arch.vex.guest_REDIR_STACK[hsp-1];
            }
#           endif

            fp = (((UWord*)fp)[0]);
            ips[i++] = ip;
            if (debug)
               VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
            continue;
         }

         /* No luck there.  We have to give up. */
         break;
      }
   }

#  else
#    error "Unknown platform"
#  endif

   n_found = i;
   return n_found;
}