/* _mcount; may be static, inline, etc */ _MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) { #ifdef GUPROF int delta; #endif fptrdiff_t frompci; u_short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_DECL(s) #endif p = &_gmonparam; #ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; #endif #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_ENTER(s); #else p->state = GMON_PROF_BUSY; #endif frompci = frompc - p->lowpc; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) /* * When we are called from an exception handler, frompci may be * for a user address. Convert such frompci's to the index of * user() to merge all user counts. * * XXX doesn't work properly with vkernel */ if (frompci >= p->textsize) { if (frompci + p->lowpc >= (uintfptr_t)(VM_MAX_USER_ADDRESS + UPAGES * PAGE_SIZE)) goto done; frompci = (uintfptr_t)user - p->lowpc; if (frompci >= p->textsize) goto done; } #endif #ifdef GUPROF if (p->state == GMON_PROF_HIRES) { /* * Count the time since cputime() was previously called * against `frompc'. Compensate for overheads. * * cputime() sets its prev_count variable to the count when * it is called. This in effect starts a counter for * the next period of execution (normally from now until * the next call to mcount() or mexitcount()). We set * cputime_bias to compensate for our own overhead. * * We use the usual sampling counters since they can be * located efficiently. 4-byte counters are usually * necessary. gprof will add up the scattered counts * just like it does for statistical profiling. All * counts are signed so that underflow in the subtractions * doesn't matter much (negative counts are normally * compensated for by larger counts elsewhere). Underflow * shouldn't occur, but may be caused by slightly wrong * calibrations or from not clearing cputime_bias. */ delta = cputime() - cputime_bias - p->mcount_pre_overhead; cputime_bias = p->mcount_post_overhead; KCOUNT(p, frompci) += delta; *p->cputime_count += p->cputime_overhead; *p->mcount_count += p->mcount_overhead; } #endif /* GUPROF */ #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) /* * When we are called from an exception handler, frompc is faked * to be for where the exception occurred. We've just solidified * the count for there. Now convert frompci to the index of btrap() * for trap handlers and bintr() for interrupt handlers to make * exceptions appear in the call graph as calls from btrap() and * bintr() instead of calls from all over. */ if ((uintfptr_t)selfpc >= (uintfptr_t)btrap && (uintfptr_t)selfpc < (uintfptr_t)eintr) { if ((uintfptr_t)selfpc >= (uintfptr_t)bintr) frompci = (uintfptr_t)bintr - p->lowpc; else frompci = (uintfptr_t)btrap - p->lowpc; } #endif /* * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ if (frompci >= p->textsize) goto done; frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_EXIT(s); #else p->state = GMON_PROF_ON; #endif return; overflow: p->state = GMON_PROF_ERROR; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_EXIT(s); #endif return; }
/* _mcount; may be static, inline, etc */ _MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) { #ifdef GUPROF u_int delta; #endif fptrdiff_t frompci; u_short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; #ifdef _KERNEL MCOUNT_DECL(s) #endif p = &_gmonparam; #ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; #endif #ifdef _KERNEL MCOUNT_ENTER(s); #else if (!atomic_cmpset_acq_int(&p->state, GMON_PROF_ON, GMON_PROF_BUSY)) return; #endif frompci = frompc - p->lowpc; #ifdef _KERNEL /* * When we are called from an exception handler, frompci may be * for a user address. Convert such frompci's to the index of * user() to merge all user counts. */ if (frompci >= p->textsize) { if (frompci + p->lowpc >= (uintfptr_t)(VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE)) goto done; frompci = (uintfptr_t)user - p->lowpc; if (frompci >= p->textsize) goto done; } #endif #ifdef GUPROF if (p->state != GMON_PROF_HIRES) goto skip_guprof_stuff; /* * Look at the clock and add the count of clock cycles since the * clock was last looked at to a counter for frompc. This * solidifies the count for the function containing frompc and * effectively starts another clock for the current function. * The count for the new clock will be solidified when another * function call is made or the function returns. * * We use the usual sampling counters since they can be located * efficiently. 4-byte counters are usually necessary. * * There are many complications for subtracting the profiling * overheads from the counts for normal functions and adding * them to the counts for mcount(), mexitcount() and cputime(). * We attempt to handle fractional cycles, but the overheads * are usually underestimated because they are calibrated for * a simpler than usual setup. */ delta = cputime() - p->mcount_overhead; p->cputime_overhead_resid += p->cputime_overhead_frac; p->mcount_overhead_resid += p->mcount_overhead_frac; if ((int)delta < 0) *p->mcount_count += delta + p->mcount_overhead - p->cputime_overhead; else if (delta != 0) { if (p->cputime_overhead_resid >= CALIB_SCALE) { p->cputime_overhead_resid -= CALIB_SCALE; ++*p->cputime_count; --delta; } if (delta != 0) { if (p->mcount_overhead_resid >= CALIB_SCALE) { p->mcount_overhead_resid -= CALIB_SCALE; ++*p->mcount_count; --delta; } KCOUNT(p, frompci) += delta; } *p->mcount_count += p->mcount_overhead_sub; } *p->cputime_count += p->cputime_overhead; skip_guprof_stuff: #endif /* GUPROF */ #ifdef _KERNEL /* * When we are called from an exception handler, frompc is faked * to be for where the exception occurred. We've just solidified * the count for there. Now convert frompci to the index of btrap() * for trap handlers and bintr() for interrupt handlers to make * exceptions appear in the call graph as calls from btrap() and * bintr() instead of calls from all over. */ if ((uintfptr_t)selfpc >= (uintfptr_t)btrap && (uintfptr_t)selfpc < (uintfptr_t)eintr) { if ((uintfptr_t)selfpc >= (uintfptr_t)bintr) frompci = (uintfptr_t)bintr - p->lowpc; else frompci = (uintfptr_t)btrap - p->lowpc; } #endif /* * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ if (frompci >= p->textsize) goto done; frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: #ifdef _KERNEL MCOUNT_EXIT(s); #else atomic_store_rel_int(&p->state, GMON_PROF_ON); #endif return; overflow: atomic_store_rel_int(&p->state, GMON_PROF_ERROR); #ifdef _KERNEL MCOUNT_EXIT(s); #endif return; }