/* * Update the histograms to support extending the text region arbitrarily. * This is done slightly naively (no sparse regions), so will waste slight * amounts of memory, but will overall work nicely enough to allow profiling * of KLDs. */ void kmupetext(uintfptr_t nhighpc) { struct gmonparam np; /* slightly large */ struct gmonparam *p = &_gmonparam; char *cp; GIANT_REQUIRED; bcopy(p, &np, sizeof(*p)); np.highpc = ROUNDUP(nhighpc, HISTFRACTION * sizeof(HISTCOUNTER)); if (np.highpc <= p->highpc) return; np.textsize = np.highpc - p->lowpc; np.kcountsize = np.textsize / HISTFRACTION; np.hashfraction = HASHFRACTION; np.fromssize = np.textsize / HASHFRACTION; np.tolimit = np.textsize * ARCDENSITY / 100; if (np.tolimit < MINARCS) np.tolimit = MINARCS; else if (np.tolimit > MAXARCS) np.tolimit = MAXARCS; np.tossize = np.tolimit * sizeof(struct tostruct); cp = malloc(np.kcountsize + np.fromssize + np.tossize, M_GPROF, M_WAITOK); /* * Check for something else extending highpc while we slept. */ if (np.highpc <= p->highpc) { free(cp, M_GPROF); return; } np.tos = (struct tostruct *)cp; cp += np.tossize; np.kcount = (HISTCOUNTER *)cp; cp += np.kcountsize; np.froms = (u_short *)cp; #ifdef GUPROF /* Reinitialize pointers to overhead counters. */ np.cputime_count = &KCOUNT(&np, PC_TO_I(&np, cputime)); np.mcount_count = &KCOUNT(&np, PC_TO_I(&np, mcount)); np.mexitcount_count = &KCOUNT(&np, PC_TO_I(&np, mexitcount)); #endif critical_enter(); bcopy(p->tos, np.tos, p->tossize); bzero((char *)np.tos + p->tossize, np.tossize - p->tossize); bcopy(p->kcount, np.kcount, p->kcountsize); bzero((char *)np.kcount + p->kcountsize, np.kcountsize - p->kcountsize); bcopy(p->froms, np.froms, p->fromssize); bzero((char *)np.froms + p->fromssize, np.fromssize - p->fromssize); cp = (char *)p->tos; bcopy(&np, p, sizeof(*p)); critical_exit(); free(cp, M_GPROF); }
/* * Actual definition of mcount function. Defined in <machine/profile.h>, * which is included by <sys/gmon.h>. */ MCOUNT #ifdef GUPROF void mexitcount(uintfptr_t selfpc) { struct gmonparam *p; uintfptr_t selfpcdiff; p = &_gmonparam; selfpcdiff = selfpc - (uintfptr_t)p->lowpc; if (selfpcdiff < p->textsize) { int delta; /* * Count the time since cputime() was previously called * against `selfpc'. Compensate for overheads. */ delta = cputime() - cputime_bias - p->mexitcount_pre_overhead; cputime_bias = p->mexitcount_post_overhead; KCOUNT(p, selfpcdiff) += delta; *p->cputime_count += p->cputime_overhead; *p->mexitcount_count += p->mexitcount_overhead; } }
/* _mcount; may be static, inline, etc */ _MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) { #ifdef GUPROF int delta; #endif fptrdiff_t frompci; u_short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_DECL(s) #endif p = &_gmonparam; #ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; #endif #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_ENTER(s); #else p->state = GMON_PROF_BUSY; #endif frompci = frompc - p->lowpc; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) /* * When we are called from an exception handler, frompci may be * for a user address. Convert such frompci's to the index of * user() to merge all user counts. * * XXX doesn't work properly with vkernel */ if (frompci >= p->textsize) { if (frompci + p->lowpc >= (uintfptr_t)(VM_MAX_USER_ADDRESS + UPAGES * PAGE_SIZE)) goto done; frompci = (uintfptr_t)user - p->lowpc; if (frompci >= p->textsize) goto done; } #endif #ifdef GUPROF if (p->state == GMON_PROF_HIRES) { /* * Count the time since cputime() was previously called * against `frompc'. Compensate for overheads. * * cputime() sets its prev_count variable to the count when * it is called. This in effect starts a counter for * the next period of execution (normally from now until * the next call to mcount() or mexitcount()). We set * cputime_bias to compensate for our own overhead. * * We use the usual sampling counters since they can be * located efficiently. 4-byte counters are usually * necessary. gprof will add up the scattered counts * just like it does for statistical profiling. All * counts are signed so that underflow in the subtractions * doesn't matter much (negative counts are normally * compensated for by larger counts elsewhere). Underflow * shouldn't occur, but may be caused by slightly wrong * calibrations or from not clearing cputime_bias. */ delta = cputime() - cputime_bias - p->mcount_pre_overhead; cputime_bias = p->mcount_post_overhead; KCOUNT(p, frompci) += delta; *p->cputime_count += p->cputime_overhead; *p->mcount_count += p->mcount_overhead; } #endif /* GUPROF */ #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) /* * When we are called from an exception handler, frompc is faked * to be for where the exception occurred. We've just solidified * the count for there. Now convert frompci to the index of btrap() * for trap handlers and bintr() for interrupt handlers to make * exceptions appear in the call graph as calls from btrap() and * bintr() instead of calls from all over. */ if ((uintfptr_t)selfpc >= (uintfptr_t)btrap && (uintfptr_t)selfpc < (uintfptr_t)eintr) { if ((uintfptr_t)selfpc >= (uintfptr_t)bintr) frompci = (uintfptr_t)bintr - p->lowpc; else frompci = (uintfptr_t)btrap - p->lowpc; } #endif /* * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ if (frompci >= p->textsize) goto done; frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_EXIT(s); #else p->state = GMON_PROF_ON; #endif return; overflow: p->state = GMON_PROF_ERROR; #if defined(_KERNEL) && !defined(_KERNEL_VIRTUAL) MCOUNT_EXIT(s); #endif return; }
/* _mcount; may be static, inline, etc */ _MCOUNT_DECL(uintfptr_t frompc, uintfptr_t selfpc) { #ifdef GUPROF u_int delta; #endif fptrdiff_t frompci; u_short *frompcindex; struct tostruct *top, *prevtop; struct gmonparam *p; long toindex; #ifdef _KERNEL MCOUNT_DECL(s) #endif p = &_gmonparam; #ifndef GUPROF /* XXX */ /* * check that we are profiling * and that we aren't recursively invoked. */ if (p->state != GMON_PROF_ON) return; #endif #ifdef _KERNEL MCOUNT_ENTER(s); #else if (!atomic_cmpset_acq_int(&p->state, GMON_PROF_ON, GMON_PROF_BUSY)) return; #endif frompci = frompc - p->lowpc; #ifdef _KERNEL /* * When we are called from an exception handler, frompci may be * for a user address. Convert such frompci's to the index of * user() to merge all user counts. */ if (frompci >= p->textsize) { if (frompci + p->lowpc >= (uintfptr_t)(VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE)) goto done; frompci = (uintfptr_t)user - p->lowpc; if (frompci >= p->textsize) goto done; } #endif #ifdef GUPROF if (p->state != GMON_PROF_HIRES) goto skip_guprof_stuff; /* * Look at the clock and add the count of clock cycles since the * clock was last looked at to a counter for frompc. This * solidifies the count for the function containing frompc and * effectively starts another clock for the current function. * The count for the new clock will be solidified when another * function call is made or the function returns. * * We use the usual sampling counters since they can be located * efficiently. 4-byte counters are usually necessary. * * There are many complications for subtracting the profiling * overheads from the counts for normal functions and adding * them to the counts for mcount(), mexitcount() and cputime(). * We attempt to handle fractional cycles, but the overheads * are usually underestimated because they are calibrated for * a simpler than usual setup. */ delta = cputime() - p->mcount_overhead; p->cputime_overhead_resid += p->cputime_overhead_frac; p->mcount_overhead_resid += p->mcount_overhead_frac; if ((int)delta < 0) *p->mcount_count += delta + p->mcount_overhead - p->cputime_overhead; else if (delta != 0) { if (p->cputime_overhead_resid >= CALIB_SCALE) { p->cputime_overhead_resid -= CALIB_SCALE; ++*p->cputime_count; --delta; } if (delta != 0) { if (p->mcount_overhead_resid >= CALIB_SCALE) { p->mcount_overhead_resid -= CALIB_SCALE; ++*p->mcount_count; --delta; } KCOUNT(p, frompci) += delta; } *p->mcount_count += p->mcount_overhead_sub; } *p->cputime_count += p->cputime_overhead; skip_guprof_stuff: #endif /* GUPROF */ #ifdef _KERNEL /* * When we are called from an exception handler, frompc is faked * to be for where the exception occurred. We've just solidified * the count for there. Now convert frompci to the index of btrap() * for trap handlers and bintr() for interrupt handlers to make * exceptions appear in the call graph as calls from btrap() and * bintr() instead of calls from all over. */ if ((uintfptr_t)selfpc >= (uintfptr_t)btrap && (uintfptr_t)selfpc < (uintfptr_t)eintr) { if ((uintfptr_t)selfpc >= (uintfptr_t)bintr) frompci = (uintfptr_t)bintr - p->lowpc; else frompci = (uintfptr_t)btrap - p->lowpc; } #endif /* * check that frompc is a reasonable pc value. * for example: signal catchers get called from the stack, * not from text space. too bad. */ if (frompci >= p->textsize) goto done; frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))]; toindex = *frompcindex; if (toindex == 0) { /* * first time traversing this arc */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) /* halt further profiling */ goto overflow; *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* * arc at front of chain; usual case. */ top->count++; goto done; } /* * have to go looking down chain for it. * top points to what we are looking at, * prevtop points to previous top. * we know it is not at the head of the chain. */ for (; /* goto done */; ) { if (top->link == 0) { /* * top is end of the chain and none of the chain * had top->selfpc == selfpc. * so we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) goto overflow; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* * otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* * there it is. * increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: #ifdef _KERNEL MCOUNT_EXIT(s); #else atomic_store_rel_int(&p->state, GMON_PROF_ON); #endif return; overflow: atomic_store_rel_int(&p->state, GMON_PROF_ERROR); #ifdef _KERNEL MCOUNT_EXIT(s); #endif return; }