예제 #1
0
/* First pass over a BB to instrument, counting instructions and jumps
 * This is needed for the size of the BB struct to allocate
 *
 * Called from CLG_(get_bb)
 */
void CLG_(collectBlockInfo)(IRBB* bbIn,
                            /*INOUT*/ UInt* instrs,
                            /*INOUT*/ UInt* cjmps,
                            /*INOUT*/ Bool* cjmp_inverted)
{
    Int i;
    IRStmt* st;
    Addr instrAddr =0, jumpDst;
    UInt instrLen = 0;
    Bool toNextInstr = False;

    // Ist_Exit has to be ignored in preamble code, before first IMark:
    // preamble code is added by VEX for self modifying code, and has
    // nothing to do with client code
    Bool inPreamble = True;

    if (!bbIn) return;

    for (i = 0; i < bbIn->stmts_used; i++) {
        st = bbIn->stmts[i];
        if (Ist_IMark == st->tag) {
            inPreamble = False;

            instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
            instrLen  = st->Ist.IMark.len;

            (*instrs)++;
            toNextInstr = False;
        }
        if (inPreamble) continue;
        if (Ist_Exit == st->tag) {
            jumpDst = IRConst2Addr(st->Ist.Exit.dst);
            toNextInstr =  (jumpDst == instrAddr + instrLen);

            (*cjmps)++;
        }
    }

    /* if the last instructions of BB conditionally jumps to next instruction
     * (= first instruction of next BB in memory), this is a inverted by VEX.
     */
    *cjmp_inverted = toNextInstr;
}
예제 #2
0
static
void collectStatementInfo(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st,
                          Addr* instrAddr, UInt* instrLen,
                          IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
                          UInt* dataSize, IRType hWordTy)
{
    CLG_ASSERT(isFlatIRStmt(st));

    switch (st->tag) {
    case Ist_NoOp:
        break;

    case Ist_AbiHint:
        /* ABI hints aren't interesting.  Ignore. */
        break;

    case Ist_IMark:
        /* st->Ist.IMark.addr is a 64-bit int.  ULong_to_Ptr casts this
           to the host's native pointer type; if that is 32 bits then it
           discards the upper 32 bits.  If we are cachegrinding on a
           32-bit host then we are also ensured that the guest word size
           is 32 bits, due to the assertion in cg_instrument that the
           host and guest word sizes must be the same.  Hence
           st->Ist.IMark.addr will have been derived from a 32-bit guest
           code address and truncation of it is safe.  I believe this
           assignment should be correct for both 32- and 64-bit
           machines. */
        *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
        *instrLen =        st->Ist.IMark.len;
        break;

    case Ist_Tmp: {
        IRExpr* data = st->Ist.Tmp.data;
        if (data->tag == Iex_Load) {
            IRExpr* aexpr = data->Iex.Load.addr;
            CLG_ASSERT( isIRAtom(aexpr) );
            // Note also, endianness info is ignored.  I guess that's not
            // interesting.
            // XXX: repe cmpsb does two loads... the first one is ignored here!
            //tl_assert( NULL == *loadAddrExpr );          // XXX: ???
            *loadAddrExpr = aexpr;
            *dataSize = sizeofIRType(data->Iex.Load.ty);
        }
        break;
    }

    case Ist_Store: {
        IRExpr* data  = st->Ist.Store.data;
        IRExpr* aexpr = st->Ist.Store.addr;
        CLG_ASSERT( isIRAtom(aexpr) );
        if ( NULL == *storeAddrExpr ) {
            /* this is a kludge: ignore all except the first store from
               an instruction. */
            *storeAddrExpr = aexpr;
            *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
        }
        break;
    }

    case Ist_Dirty: {
        IRDirty* d = st->Ist.Dirty.details;
        if (d->mFx != Ifx_None) {
            /* This dirty helper accesses memory.  Collect the
               details. */
            CLG_ASSERT(d->mAddr != NULL);
            CLG_ASSERT(d->mSize != 0);
            *dataSize = d->mSize;
            if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
                *loadAddrExpr = d->mAddr;
            if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
                *storeAddrExpr = d->mAddr;
        } else {
            CLG_ASSERT(d->mAddr == NULL);
            CLG_ASSERT(d->mSize == 0);
        }
        break;
    }

    case Ist_Put:
    case Ist_PutI:
    case Ist_MFence:
    case Ist_Exit:
        break;

    default:
        VG_(printf)("\n");
        ppIRStmt(st);
        VG_(printf)("\n");
        VG_(tool_panic)("Callgrind: unhandled IRStmt");
    }
}
예제 #3
0
static 
Bool handleOneStatement(IRTypeEnv* tyenv, IRBB* bbOut, IRStmt* st, IRStmt* st2,
                        Addr* instrAddr, UInt* instrLen,
                        IRExpr** loadAddrExpr, IRExpr** storeAddrExpr,
                        UInt* dataSize)
{
   tl_assert(isFlatIRStmt(st));

   switch (st->tag) {
   case Ist_NoOp:
   case Ist_AbiHint:
   case Ist_Put:
   case Ist_PutI:
   case Ist_MFence:
      break;

   case Ist_Exit: {
      // This is a conditional jump.  Most of the time, we want to add the
      // instrumentation before it, to ensure it gets executed.  Eg, (1) if
      // this conditional jump is just before an IMark:
      //
      //   t108 = Not1(t107)
      //   [add instrumentation here]
      //   if (t108) goto {Boring} 0x3A96637D:I32
      //   ------ IMark(0x3A966370, 7) ------
      //
      // or (2) if this conditional jump is the last thing before the
      // block-ending unconditional jump:
      //
      //   t111 = Not1(t110)
      //   [add instrumentation here]
      //   if (t111) goto {Boring} 0x3A96637D:I32
      //   goto {Boring} 0x3A966370:I32
      //
      // One case (3) where we want the instrumentation after the conditional
      // jump is when the conditional jump is for an x86 REP instruction:
      //
      //   ------ IMark(0x3A967F13, 2) ------
      //   t1 = GET:I32(4)
      //   t6 = CmpEQ32(t1,0x0:I32) 
      //   if (t6) goto {Boring} 0x3A967F15:I32    # ignore this cond jmp
      //   t7 = Sub32(t1,0x1:I32)
      //   PUT(4) = t7
      //   ...
      //   t56 = Not1(t55)
      //   [add instrumentation here]
      //   if (t56) goto {Boring} 0x3A967F15:I32
      //
      // Therefore, we return true if the next statement is an IMark, or if
      // there is no next statement (which matches case (2), as the final
      // unconditional jump is not represented in the IRStmt list).
      //
      // Note that this approach won't do in the long run for supporting
      // PPC, but it's good enough for x86/AMD64 for the 3.0.X series.
      if (NULL == st2 || Ist_IMark == st2->tag)
         return True;
      else
         return False;
   }

   case Ist_IMark:
      /* st->Ist.IMark.addr is a 64-bit int.  ULong_to_Ptr casts this
         to the host's native pointer type; if that is 32 bits then it
         discards the upper 32 bits.  If we are cachegrinding on a
         32-bit host then we are also ensured that the guest word size
         is 32 bits, due to the assertion in cg_instrument that the
         host and guest word sizes must be the same.  Hence
         st->Ist.IMark.addr will have been derived from a 32-bit guest
         code address and truncation of it is safe.  I believe this
         assignment should be correct for both 32- and 64-bit
         machines. */
      *instrAddr = (Addr)ULong_to_Ptr(st->Ist.IMark.addr);
      *instrLen =        st->Ist.IMark.len;
      break;

   case Ist_Tmp: {
      IRExpr* data = st->Ist.Tmp.data;
      if (data->tag == Iex_Load) {
         IRExpr* aexpr = data->Iex.Load.addr;
         tl_assert( isIRAtom(aexpr) );
         // Note also, endianness info is ignored.  I guess that's not
         // interesting.
         // XXX: repe cmpsb does two loads... the first one is ignored here!
         //tl_assert( NULL == *loadAddrExpr );          // XXX: ???
         *loadAddrExpr = aexpr;
         *dataSize = sizeofIRType(data->Iex.Load.ty);
      }
      break;
   }
      
   case Ist_Store: {
      IRExpr* data  = st->Ist.Store.data;
      IRExpr* aexpr = st->Ist.Store.addr;
      tl_assert( isIRAtom(aexpr) );
      tl_assert( NULL == *storeAddrExpr );          // XXX: ???
      *storeAddrExpr = aexpr;
      *dataSize = sizeofIRType(typeOfIRExpr(tyenv, data));
      break;
   }
   
   case Ist_Dirty: {
      IRDirty* d = st->Ist.Dirty.details;
      if (d->mFx != Ifx_None) {
         /* This dirty helper accesses memory.  Collect the
            details. */
         tl_assert(d->mAddr != NULL);
         tl_assert(d->mSize != 0);
         *dataSize = d->mSize;
         if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify)
            *loadAddrExpr = d->mAddr;
         if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify)
            *storeAddrExpr = d->mAddr;
      } else {
         tl_assert(d->mAddr == NULL);
         tl_assert(d->mSize == 0);
      }
      break;
   }

   default:
      VG_(printf)("\n");
      ppIRStmt(st);
      VG_(printf)("\n");
      VG_(tool_panic)("Cachegrind: unhandled IRStmt");
   }

   return False;
}
예제 #4
0
/* Take a snapshot of the client's stack, putting the up to 'n_ips'
   IPs into 'ips'.  In order to be thread-safe, we pass in the
   thread's IP SP, FP if that's meaningful, and LR if that's
   meaningful.  Returns number of IPs put in 'ips'.

   If you know what the thread ID for this stack is, send that as the
   first parameter, else send zero.  This helps generate better stack
   traces on ppc64-linux and has no effect on other platforms.
*/
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               Addr ip, Addr sp, Addr fp, Addr lr,
                               Addr fp_min, Addr fp_max_orig )
{
#  if defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
                               || defined(VGP_ppc32_aix5) \
                               || defined(VGP_ppc64_aix5)
   Bool  lr_is_first_RA = False;
#  endif
#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5) \
                               || defined(VGP_ppc32_aix5)
   Word redir_stack_size = 0;
   Word redirs_used      = 0;
#  endif

   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   /* Snaffle IPs from the client's stack into ips[0 .. n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("n_ips=%d fp_min=%p fp_max_orig=%p, "
                  "fp_max=%p ip=%p fp=%p\n",
		  n_ips, fp_min, fp_max_orig, fp_max, ip, fp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   if (fp_min + 512 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      ips[0] = ip;
      return 1;
   } 

   /* Otherwise unwind the stack in a platform-specific way.  Trying
      to merge the x86, amd64, ppc32 and ppc64 logic into a single
      piece of code is just too confusing and difficult to
      performance-tune.  */

#  if defined(VGP_x86_linux)

   /*--------------------- x86 ---------------------*/

   /* fp is %ebp.  sp is %esp.  ip is %eip. */

   if (sps) sps[0] = sp;
   if (fps) fps[0] = fp;
   ips[0] = ip;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    *
    * Note that VG_(get_data_description) (in m_debuginfo) has to take
    * this same problem into account when unwinding the stack to
    * examine local variable descriptions (as documented therein in
    * comments).
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* On x86, first try the old-fashioned method of following the
         %ebp-chain.  Code which doesn't use this (that is, compiled
         with -fomit-frame-pointer) is not ABI compliant and so
         relatively rare.  Besides, trying the CFI first almost always
         fails, and is expensive. */
      /* Deal with frames resulting from functions which begin "pushl%
         ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %ebp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         if (sps) sps[i] = sp;
         if (fps) fps[i] = fp;
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=0x%08lx\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* That didn't work out, so see if there is any CF info to hand
         which can be used. */
      if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         if (sps) sps[i] = sp;
         if (fps) fps[i] = fp;
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=0x%08lx\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* No luck.  We have to give up. */
      break;
   }

#  elif defined(VGP_amd64_linux)

   /*--------------------- amd64 ---------------------*/

   /* fp is %rbp.  sp is %rsp.  ip is %rip. */

   ips[0] = ip;
   if (sps) sps[0] = sp;
   if (fps) fps[0] = fp;
   i = 1;

   /* Loop unwinding the stack. Note that the IP value we get on
    * each pass (whether from CFI info or a stack frame) is a
    * return address so is actually after the calling instruction
    * in the calling function.
    *
    * Because of this we subtract one from the IP after each pass
    * of the loop so that we find the right CFI block on the next
    * pass - otherwise we can find the wrong CFI info if it happens
    * to change after the calling instruction and that will mean
    * that we will fail to unwind the next step.
    *
    * This most frequently happens at the end of a function when
    * a tail call occurs and we wind up using the CFI info for the
    * next function which is completely wrong.
    *
    * Note that VG_(get_data_description) (in m_debuginfo) has to take
    * this same problem into account when unwinding the stack to
    * examine local variable descriptions (as documented therein in
    * comments).
    */
   while (True) {

      if (i >= n_ips)
         break;

      /* Try to derive a new (ip,sp,fp) triple from the current
         set. */

      /* First off, see if there is any CFI info to hand which can
         be used. */
      if ( VG_(use_CF_info)( &ip, &sp, &fp, fp_min, fp_max ) ) {
         if (sps) sps[i] = sp;
         if (fps) fps[i] = fp;
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsC[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
         we can safely try the old-fashioned method. */
      /* This bit is supposed to deal with frames resulting from
         functions which begin "pushq %rbp ; movq %rsp, %rbp".
         Unfortunately, since we can't (easily) look at the insns at
         the start of the fn, like GDB does, there's no reliable way
         to tell.  Hence the hack of first trying out CFI, and if that
         fails, then use this as a fallback. */
      if (fp_min <= fp && fp <= fp_max) {
         /* fp looks sane, so use it. */
         ip = (((UWord*)fp)[1]);
         sp = fp + sizeof(Addr) /*saved %rbp*/ 
                 + sizeof(Addr) /*ra*/;
         fp = (((UWord*)fp)[0]);
         if (sps) sps[i] = sp;
         if (fps) fps[i] = fp;
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         continue;
      }

      /* Last-ditch hack (evidently GDB does something similar).  We
         are in the middle of nowhere and we have a nonsense value for
         the frame pointer.  If the stack pointer is still valid,
         assume that what it points at is a return address.  Yes,
         desperate measures.  Could do better here:
         - check that the supposed return address is in
           an executable page
         - check that the supposed return address is just after a call insn
         - given those two checks, don't just consider *sp as the return 
           address; instead scan a likely section of stack (eg sp .. sp+256)
           and use suitable values found there.
      */
      if (fp_min <= sp && sp < fp_max) {
         ip = ((UWord*)sp)[0];
         if (sps) sps[i] = sp;
         if (fps) fps[i] = fp;
         ips[i++] = ip;
         if (debug)
            VG_(printf)("     ipsH[%d]=%08p\n", i-1, ips[i-1]);
         ip = ip - 1;
         sp += 8;
         continue;
      }

      /* No luck at all.  We have to give up. */
      break;
   }

#  elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux) \
        || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)

   /*--------------------- ppc32/64 ---------------------*/

   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
      frame pointers. */

#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
   redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  elif defined(VGP_ppc32_aix5)
   redir_stack_size = VEX_GUEST_PPC32_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  endif

#  if defined(VG_PLAT_USES_PPCTOC)
   /* Deal with bogus LR values caused by function
      interception/wrapping on ppc-TOC platforms; see comment on
      similar code a few lines further down. */
   if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
       && VG_(is_valid_tid)(tid_if_known)) {
      Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
      redirs_used++;
      if (hsp >= 1 && hsp < redir_stack_size)
         lr = VG_(threads)[tid_if_known]
                 .arch.vex.guest_REDIR_STACK[hsp-1];
   }
#  endif

   /* We have to determine whether or not LR currently holds this fn
      (call it F)'s return address.  It might not if F has previously
      called some other function, hence overwriting LR with a pointer
      to some part of F.  Hence if LR and IP point to the same
      function then we conclude LR does not hold this function's
      return address; instead the LR at entry must have been saved in
      the stack by F's prologue and so we must get it from there
      instead.  Note all this guff only applies to the innermost
      frame. */
   lr_is_first_RA = False;
   {
#     define M_VG_ERRTXT 1000
      UChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
      if (VG_(get_fnname_nodemangle) (lr, buf_lr, M_VG_ERRTXT))
         if (VG_(get_fnname_nodemangle) (ip, buf_ip, M_VG_ERRTXT))
            if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
               lr_is_first_RA = True;
#     undef M_VG_ERRTXT
   }

   if (sps) sps[0] = fp; /* NB. not sp */
   if (fps) fps[0] = fp;
   ips[0] = ip;
   i = 1;

   if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {

      /* initial FP is sane; keep going */
      fp = (((UWord*)fp)[0]);

      while (True) {

        /* On ppc64-linux (ppc64-elf, really), and on AIX, the lr save
           slot is 2 words back from sp, whereas on ppc32-elf(?) it's
           only one word back. */
#        if defined(VGP_ppc64_linux) \
            || defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
         const Int lr_offset = 2;
#        else
         const Int lr_offset = 1;
#        endif

         if (i >= n_ips)
            break;

         /* Try to derive a new (ip,fp) pair from the current set. */

         if (fp_min <= fp && fp <= fp_max) {
            /* fp looks sane, so use it. */

            if (i == 1 && lr_is_first_RA)
               ip = lr;
            else
               ip = (((UWord*)fp)[lr_offset]);

#           if defined(VG_PLAT_USES_PPCTOC)
            /* Nasty hack to do with function replacement/wrapping on
               ppc64-linux/ppc64-aix/ppc32-aix.  If LR points to our
               magic return stub, then we are in a wrapped or
               intercepted function, in which LR has been messed with.
               The original LR will have been pushed onto the thread's
               hidden REDIR stack one down from the top (top element
               is the saved R2) and so we should restore the value
               from there instead.  Since nested redirections can and
               do happen, we keep track of the number of nested LRs
               used by the unwinding so far with 'redirs_used'. */
            if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
                && VG_(is_valid_tid)(tid_if_known)) {
               Word hsp = VG_(threads)[tid_if_known]
                             .arch.vex.guest_REDIR_SP;
               hsp -= 2 * redirs_used;
               redirs_used ++;
               if (hsp >= 1 && hsp < redir_stack_size)
                  ip = VG_(threads)[tid_if_known]
                          .arch.vex.guest_REDIR_STACK[hsp-1];
            }
#           endif

            fp = (((UWord*)fp)[0]);
            if (sps) sps[i] = fp; /* NB. not sp */
            if (fps) fps[i] = fp;
            ips[i++] = ip;
            if (debug)
               VG_(printf)("     ipsF[%d]=%08p\n", i-1, ips[i-1]);
            continue;
         }

         /* No luck there.  We have to give up. */
         break;
      }
   }

#  else
#    error "Unknown platform"
#  endif

   n_found = i;
   return n_found;
}
예제 #5
0
UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
                               /*OUT*/Addr* ips, UInt max_n_ips,
                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
                               UnwindStartRegs* startRegs,
                               Addr fp_max_orig )
{
   Bool  lr_is_first_RA = False;
#  if defined(VG_PLAT_USES_PPCTOC)
   Word redir_stack_size = 0;
   Word redirs_used      = 0;
#  endif

   Bool  debug = False;
   Int   i;
   Addr  fp_max;
   UInt  n_found = 0;

   vg_assert(sizeof(Addr) == sizeof(UWord));
   vg_assert(sizeof(Addr) == sizeof(void*));

   Addr ip = (Addr)startRegs->r_pc;
   Addr sp = (Addr)startRegs->r_sp;
   Addr fp = sp;
#  if defined(VGP_ppc32_linux) || defined(VGP_ppc32_aix5)
   Addr lr = startRegs->misc.PPC32.r_lr;
#  elif defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
   Addr lr = startRegs->misc.PPC64.r_lr;
#  endif
   Addr fp_min = sp;

   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
      stopping when the trail goes cold, which we guess to be
      when FP is not a reasonable stack location. */

   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
   // current page, at least.  Dunno if it helps.
   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
   fp_max = VG_PGROUNDUP(fp_max_orig);
   if (fp_max >= sizeof(Addr))
      fp_max -= sizeof(Addr);

   if (debug)
      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);

   /* Assertion broken before main() is reached in pthreaded programs;  the
    * offending stack traces only have one item.  --njn, 2002-aug-16 */
   /* vg_assert(fp_min <= fp_max);*/
   if (fp_min + 512 >= fp_max) {
      /* If the stack limits look bogus, don't poke around ... but
         don't bomb out either. */
      if (sps) sps[0] = sp;
      if (fps) fps[0] = fp;
      ips[0] = ip;
      return 1;
   } 

   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
      frame pointers. */

#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
   redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  elif defined(VGP_ppc32_aix5)
   redir_stack_size = VEX_GUEST_PPC32_REDIR_STACK_SIZE;
   redirs_used      = 0;
#  endif

#  if defined(VG_PLAT_USES_PPCTOC)
   /* Deal with bogus LR values caused by function
      interception/wrapping on ppc-TOC platforms; see comment on
      similar code a few lines further down. */
   if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
       && VG_(is_valid_tid)(tid_if_known)) {
      Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
      redirs_used++;
      if (hsp >= 1 && hsp < redir_stack_size)
         lr = VG_(threads)[tid_if_known]
                 .arch.vex.guest_REDIR_STACK[hsp-1];
   }
#  endif

   /* We have to determine whether or not LR currently holds this fn
      (call it F)'s return address.  It might not if F has previously
      called some other function, hence overwriting LR with a pointer
      to some part of F.  Hence if LR and IP point to the same
      function then we conclude LR does not hold this function's
      return address; instead the LR at entry must have been saved in
      the stack by F's prologue and so we must get it from there
      instead.  Note all this guff only applies to the innermost
      frame. */
   lr_is_first_RA = False;
   {
#     define M_VG_ERRTXT 1000
      UChar buf_lr[M_VG_ERRTXT], buf_ip[M_VG_ERRTXT];
      /* The following conditional looks grossly inefficient and
         surely could be majorly improved, with not much effort. */
      if (VG_(get_fnname_raw) (lr, buf_lr, M_VG_ERRTXT))
         if (VG_(get_fnname_raw) (ip, buf_ip, M_VG_ERRTXT))
            if (VG_(strncmp)(buf_lr, buf_ip, M_VG_ERRTXT))
               lr_is_first_RA = True;
#     undef M_VG_ERRTXT
   }

   if (sps) sps[0] = fp; /* NB. not sp */
   if (fps) fps[0] = fp;
   ips[0] = ip;
   i = 1;

   if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {

      /* initial FP is sane; keep going */
      fp = (((UWord*)fp)[0]);

      while (True) {

        /* On ppc64-linux (ppc64-elf, really), and on AIX, the lr save
           slot is 2 words back from sp, whereas on ppc32-elf(?) it's
           only one word back. */
#        if defined(VG_PLAT_USES_PPCTOC)
         const Int lr_offset = 2;
#        else
         const Int lr_offset = 1;
#        endif

         if (i >= max_n_ips)
            break;

         /* Try to derive a new (ip,fp) pair from the current set. */

         if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
            /* fp looks sane, so use it. */

            if (i == 1 && lr_is_first_RA)
               ip = lr;
            else
               ip = (((UWord*)fp)[lr_offset]);

#           if defined(VG_PLAT_USES_PPCTOC)
            /* Nasty hack to do with function replacement/wrapping on
               ppc64-linux/ppc64-aix/ppc32-aix.  If LR points to our
               magic return stub, then we are in a wrapped or
               intercepted function, in which LR has been messed with.
               The original LR will have been pushed onto the thread's
               hidden REDIR stack one down from the top (top element
               is the saved R2) and so we should restore the value
               from there instead.  Since nested redirections can and
               do happen, we keep track of the number of nested LRs
               used by the unwinding so far with 'redirs_used'. */
            if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
                && VG_(is_valid_tid)(tid_if_known)) {
               Word hsp = VG_(threads)[tid_if_known]
                             .arch.vex.guest_REDIR_SP;
               hsp -= 2 * redirs_used;
               redirs_used ++;
               if (hsp >= 1 && hsp < redir_stack_size)
                  ip = VG_(threads)[tid_if_known]
                          .arch.vex.guest_REDIR_STACK[hsp-1];
            }
#           endif

            if (0 == ip || 1 == ip) break;
            if (sps) sps[i] = fp; /* NB. not sp */
            if (fps) fps[i] = fp;
            fp = (((UWord*)fp)[0]);
            ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
            if (debug)
               VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
            ip = ip - 1; /* ip is probably dead at this point, but
                            play safe, a la x86/amd64 above.  See
                            extensive comments above. */
            continue;
         }

         /* No luck there.  We have to give up. */
         break;
      }
   }

   n_found = i;
   return n_found;
}