/* Print some basic latency/rate information to assist in debugging. */ static void acpi_hpet_test(struct acpi_hpet_softc *sc) { int i; uint32_t u1, u2; struct timeval b0, b1, b2; struct timespec ts; microuptime(&b0); microuptime(&b0); microuptime(&b1); u1 = bus_space_read_4(acpi_hpet_bst, acpi_hpet_bsh, HPET_MAIN_COUNTER); for (i = 1; i < 1000; i++) { u2 = bus_space_read_4(acpi_hpet_bst, acpi_hpet_bsh, HPET_MAIN_COUNTER); } microuptime(&b2); u2 = bus_space_read_4(acpi_hpet_bst, acpi_hpet_bsh, HPET_MAIN_COUNTER); timevalsub(&b2, &b1); timevalsub(&b1, &b0); timevalsub(&b2, &b1); TIMEVAL_TO_TIMESPEC(&b2, &ts); device_printf(sc->dev, "%ld.%09ld: %u ... %u = %u\n", (long)b2.tv_sec, b2.tv_usec, u1, u2, u2 - u1); device_printf(sc->dev, "time per call: %ld ns\n", ts.tv_nsec / 1000); }
static su_time64_t mono64(void) { #if HAVE_CLOCK_GETTIME && CLOCK_MONOTONIC { struct timespec tv; if (clock_gettime(CLOCK_MONOTONIC, &tv) == 0) return (su_time64_t)tv.tv_sec * E9 + tv.tv_nsec; } #endif #if HAVE_NANOUPTIME { struct timespec tv; nanouptime(&tv); return (su_time64_t)tv.tv_sec * E9 + tv.tv_nsec; } #elif HAVE_MICROUPTIME { struct timeval tv; microuptime(&tv); return (su_time64_t)tv.tv_sec * E9 + tv.tv_usec * 1000; } #endif return now64(); }
/* * netisr_poll is typically scheduled once per tick. */ void netisr_poll(void) { int i, cycles; enum poll_cmd arg = POLL_ONLY; mtx_lock(&poll_mtx); if (!netisr_poll_scheduled) { mtx_unlock(&poll_mtx); return; } netisr_poll_scheduled = 0; phase = 3; if (residual_burst == 0) { /* first call in this tick */ microuptime(&poll_start_t); if (++reg_frac_count == reg_frac) { arg = POLL_AND_CHECK_STATUS; reg_frac_count = 0; } residual_burst = poll_burst; } cycles = (residual_burst < poll_each_burst) ? residual_burst : poll_each_burst; residual_burst -= cycles; for (i = 0 ; i < poll_handlers ; i++) pr[i].handler(pr[i].ifp, arg, cycles); phase = 4; mtx_unlock(&poll_mtx); }
/* * Attach a disk. */ void disk_attach(struct disk *diskp) { if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED)) disk_construct(diskp, diskp->dk_name); /* * Allocate and initialize the disklabel structures. Note that * it's not safe to sleep here, since we're probably going to be * called during autoconfiguration. */ diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, M_NOWAIT); if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) panic("disk_attach: can't allocate storage for disklabel"); bzero(diskp->dk_label, sizeof(struct disklabel)); bzero(diskp->dk_cpulabel, sizeof(struct cpu_disklabel)); /* * Set the attached timestamp. */ microuptime(&diskp->dk_attachtime); /* * Link into the disklist. */ TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); ++disk_count; disk_change = 1; }
/* * Decrement a disk's busy counter, increment the byte count, total busy * time, and reset the timestamp. */ void disk_unbusy(struct disk *diskp, long bcount, int read) { struct timeval dv_time, diff_time; if (diskp->dk_busy-- == 0) printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); microuptime(&dv_time); timersub(&dv_time, &diskp->dk_timestamp, &diff_time); timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); diskp->dk_timestamp = dv_time; if (bcount > 0) { if (read) { diskp->dk_rbytes += bcount; diskp->dk_rxfer++; } else { diskp->dk_wbytes += bcount; diskp->dk_wxfer++; } } else diskp->dk_seek++; add_disk_randomness(bcount ^ diff_time.tv_usec); }
int procfs_douptime(struct lwp *curl, struct proc *p, struct pfsnode *pfs, struct uio *uio) { char *bf; int len; struct timeval runtime; u_int64_t idle; int error = 0; bf = malloc(LBFSZ, M_TEMP, M_WAITOK); microuptime(&runtime); idle = curcpu()->ci_schedstate.spc_cp_time[CP_IDLE]; len = snprintf(bf, LBFSZ, "%lld.%02lu %" PRIu64 ".%02" PRIu64 "\n", (long long)runtime.tv_sec, (long)runtime.tv_usec / 10000, idle / hz, (((idle % hz) * 100) / hz) % 100); if (len == 0) goto out; error = uiomove_frombuf(bf, len, uio); out: free(bf, M_TEMP); return error; }
static __inline void ifpoll_time_get(union ifpoll_time *t) { if (__predict_true(tsc_present)) t->tsc = rdtsc(); else microuptime(&t->tv); }
/* Open Solaris lbolt is in hz */ uint64_t zfs_lbolt() { struct timeval tv; uint64_t lbolt_hz; microuptime(&tv); lbolt_hz = ((uint64_t)tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10000; return (lbolt_hz); }
void netisr_pollmore() { struct timeval t; int kern_load; mtx_lock(&poll_mtx); if (!netisr_pollmore_scheduled) { mtx_unlock(&poll_mtx); return; } netisr_pollmore_scheduled = 0; phase = 5; if (residual_burst > 0) { netisr_poll_scheduled = 1; netisr_pollmore_scheduled = 1; netisr_sched_poll(); mtx_unlock(&poll_mtx); /* will run immediately on return, followed by netisrs */ return; } /* here we can account time spent in netisr's in this tick */ microuptime(&t); kern_load = (t.tv_usec - poll_start_t.tv_usec) + (t.tv_sec - poll_start_t.tv_sec)*1000000; /* us */ kern_load = (kern_load * hz) / 10000; /* 0..100 */ if (kern_load > (100 - user_frac)) { /* try decrease ticks */ if (poll_burst > 1) poll_burst--; } else { if (poll_burst < poll_burst_max) poll_burst++; } pending_polls--; if (pending_polls == 0) /* we are done */ phase = 0; else { /* * Last cycle was long and caused us to miss one or more * hardclock ticks. Restart processing again, but slightly * reduce the burst size to prevent that this happens again. */ poll_burst -= (poll_burst / 8); if (poll_burst < 1) poll_burst = 1; netisr_poll_scheduled = 1; netisr_pollmore_scheduled = 1; netisr_sched_poll(); phase = 6; } mtx_unlock(&poll_mtx); }
/* * Increment a disk's busy counter. If the counter is going from * 0 to 1, set the timestamp. */ void disk_busy(struct disk *diskp) { /* * XXX We'd like to use something as accurate as microtime(), * but that doesn't depend on the system TOD clock. */ if (diskp->dk_busy++ == 0) { microuptime(&diskp->dk_timestamp); } }
void update_last_io_time(mount_t mp) { struct _throttle_io_info_t *info; if (mp == NULL) info = &_throttle_io_info[LOWPRI_MAX_NUM_DEV - 1]; else if (mp->mnt_throttle_info == NULL) info = &_throttle_io_info[mp->mnt_devbsdunit]; else info = mp->mnt_throttle_info; microuptime(&info->last_IO_timestamp); }
/* * The CPU ends up here when its ready to run * This is called from code in mptramp.s; at this point, we are running * in the idle pcb/idle stack of the new cpu. When this function returns, * this processor will enter the idle loop and start looking for work. * * XXX should share some of this with init386 in machdep.c */ void cpu_hatch(void *v) { struct cpu_info *ci = (struct cpu_info *)v; int s; cpu_init_msrs(ci); cpu_probe_features(ci); cpu_feature &= ci->ci_feature_flags; #ifdef DEBUG if (ci->ci_flags & CPUF_PRESENT) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif ci->ci_flags |= CPUF_PRESENT; lapic_enable(); lapic_initclocks(); while ((ci->ci_flags & CPUF_GO) == 0) delay(10); #ifdef DEBUG if (ci->ci_flags & CPUF_RUNNING) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif lcr0(ci->ci_idle_pcb->pcb_cr0); cpu_init_idt(); lapic_set_lvt(); gdt_init_cpu(ci); fpuinit(ci); lldt(GSYSSEL(GLDT_SEL, SEL_KPL)); cpu_init(ci); s = splhigh(); lcr8(0); enable_intr(); microuptime(&ci->ci_schedstate.spc_runtime); splx(s); SCHED_LOCK(s); cpu_switchto(NULL, sched_chooseproc()); }
static int throttle_io_will_be_throttled_internal(int lowpri_window_msecs, void * throttle_info) { struct _throttle_io_info_t *info = throttle_info; struct timeval elapsed; int elapsed_msecs; microuptime(&elapsed); timevalsub(&elapsed, &info->last_normal_IO_timestamp); elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000; if (lowpri_window_msecs == -1) // use the max waiting time lowpri_window_msecs = lowpri_max_waiting_msecs; return elapsed_msecs < lowpri_window_msecs; }
/* * Reset the metrics counters on the given disk. Note that we cannot * reset the busy counter, as it may case a panic in disk_unbusy(). * We also must avoid playing with the timestamp information, as it * may skew any pending transfer results. */ void disk_resetstat(struct disk *diskp) { int s = splbio(); diskp->dk_rxfer = 0; diskp->dk_rbytes = 0; diskp->dk_wxfer = 0; diskp->dk_wbytes = 0; diskp->dk_seek = 0; microuptime(&diskp->dk_attachtime); timerclear(&diskp->dk_time); splx(s); }
static void racctd(void) { struct thread *td; struct proc *p; struct timeval wallclock; uint64_t runtime; for (;;) { sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { if (p->p_state != PRS_NORMAL) continue; if (p->p_flag & P_SYSTEM) continue; microuptime(&wallclock); timevalsub(&wallclock, &p->p_stats->p_start); PROC_LOCK(p); PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { ruxagg(p, td); thread_lock(td); thread_unlock(td); } runtime = cputick2usec(p->p_rux.rux_runtime); PROC_SUNLOCK(p); #ifdef notyet KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime")); #else if (runtime < p->p_prev_runtime) runtime = p->p_prev_runtime; #endif p->p_prev_runtime = runtime; mtx_lock(&racct_lock); racct_set_locked(p, RACCT_CPU, runtime); racct_set_locked(p, RACCT_WALLCLOCK, wallclock.tv_sec * 1000000 + wallclock.tv_usec); mtx_unlock(&racct_lock); PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); pause("-", hz); }
int BTScanInitialize( const FCB * btreeFile, u_int32_t startingNode, u_int32_t startingRecord, u_int32_t recordsFound, u_int32_t bufferSize, BTScanState * scanState ) { BTreeControlBlock *btcb; // // Make sure this is a valid B-Tree file // btcb = (BTreeControlBlock *) btreeFile->fcbBTCBPtr; if (btcb == NULL) return fsBTInvalidFileErr; // // Make sure buffer size is big enough, and a multiple of the // B-Tree node size // if ( bufferSize < btcb->nodeSize ) return paramErr; bufferSize = (bufferSize / btcb->nodeSize) * btcb->nodeSize; // // Set up the scanner's state // scanState->bufferSize = bufferSize; scanState->bufferPtr = NULL; scanState->btcb = btcb; scanState->nodeNum = startingNode; scanState->recordNum = startingRecord; scanState->currentNodePtr = NULL; scanState->nodesLeftInBuffer = 0; // no nodes currently in buffer scanState->recordsFound = recordsFound; microuptime(&scanState->startTime); // initialize our throttle return noErr; } /* BTScanInitialize */
static int zkbd_on(void *v) { #if NAPM > 0 struct zkbd_softc *sc = (struct zkbd_softc *)v; int down; if (sc->sc_onkey_pin < 0) return 1; down = pxa2x0_gpio_get_bit(sc->sc_onkey_pin) ? 1 : 0; /* * Change run mode depending on how long the key is held down. * Ignore the key if it gets pressed while the lid is closed. * * Keys can bounce and we have to work around missed interrupts. * Only the second edge is detected upon exit from sleep mode. */ if (down) { if (sc->sc_hinge == 3) { zkbdondown = 0; } else { microuptime(&zkbdontv); zkbdondown = 1; } } else if (zkbdondown) { if (ratecheck(&zkbdontv, &zkbdhalttv)) { if (kbd_reset == 1) { kbd_reset = 0; psignal(initproc, SIGUSR1); } } else if (ratecheck(&zkbdontv, &zkbdsleeptv)) { apm_suspends++; } zkbdondown = 0; } #endif return 1; }
/* * Hook from hardclock. Tries to schedule a netisr, but keeps track * of lost ticks due to the previous handler taking too long. * Normally, this should not happen, because polling handler should * run for a short time. However, in some cases (e.g. when there are * changes in link status etc.) the drivers take a very long time * (even in the order of milliseconds) to reset and reconfigure the * device, causing apparent lost polls. * * The first part of the code is just for debugging purposes, and tries * to count how often hardclock ticks are shorter than they should, * meaning either stray interrupts or delayed events. */ void hardclock_device_poll(void) { static struct timeval prev_t, t; int delta; if (poll_handlers == 0 || poll_shutting_down) return; microuptime(&t); delta = (t.tv_usec - prev_t.tv_usec) + (t.tv_sec - prev_t.tv_sec)*1000000; if (delta * hz < 500000) short_ticks++; else prev_t = t; if (pending_polls > 100) { /* * Too much, assume it has stalled (not always true * see comment above). */ stalled++; pending_polls = 0; phase = 0; } if (phase <= 2) { if (phase != 0) suspect++; phase = 1; netisr_poll_scheduled = 1; netisr_pollmore_scheduled = 1; netisr_sched_poll(); phase = 2; } if (pending_polls++ > 0) lost_polls++; }
xtime_t gethrxtime (void) { # if HAVE_NANOUPTIME { struct timespec ts; nanouptime (&ts); return xtime_make (ts.tv_sec, ts.tv_nsec); } # else # if defined CLOCK_MONOTONIC && HAVE_CLOCK_GETTIME { struct timespec ts; if (clock_gettime (CLOCK_MONOTONIC, &ts) == 0) return xtime_make (ts.tv_sec, ts.tv_nsec); } # endif # if HAVE_MICROUPTIME { struct timeval tv; microuptime (&tv); return xtime_make (tv.tv_sec, 1000 * tv.tv_usec); } # else /* No monotonically increasing clocks are available; fall back on a clock that might jump backwards, since it's the best we can do. */ { struct timespec ts; gettime (&ts); return xtime_make (ts.tv_sec, ts.tv_nsec); } # endif # endif }
/* Should this be public? */ prng_error_status prngForceReseed(PRNG *p, LONGLONG ticks) { int i; #ifdef WIN_NT FILETIME a,b,c,usertime; #endif BYTE buf[64]; BYTE dig[20]; #if defined(macintosh) || defined(__APPLE__) #if (defined(TARGET_API_MAC_OSX) || defined(KERNEL_BUILD)) struct timeval tv; int64_t endTime, curTime; #else /* TARGET_API_MAC_CARBON */ UnsignedWide uwide; /* struct needed for Microseconds() */ LONGLONG start; LONGLONG now; #endif #endif CHECKSTATE(p); POOLCHECK(p); ZCHECK(ticks); /* Set up start and end times */ #if defined(macintosh) || defined(__APPLE__) #if (defined(TARGET_API_MAC_OSX) || defined(KERNEL_BUILD)) /* note we can't loop for more than a million microseconds */ #ifdef KERNEL_BUILD microuptime (&tv); #else gettimeofday(&tv, NULL); #endif endTime = (int64_t)tv.tv_sec*1000000LL + (int64_t)tv.tv_usec + ticks; #else /* TARGET_API_MAC_OSX */ Microseconds(&uwide); start = UnsignedWideToUInt64(uwide); #endif /* TARGET_API_xxx */ #endif /* macintosh */ do { /* Do a couple of iterations between time checks */ prngOutput(p, buf,64); YSHA1Update(&p->pool,buf,64); prngOutput(p, buf,64); YSHA1Update(&p->pool,buf,64); prngOutput(p, buf,64); YSHA1Update(&p->pool,buf,64); prngOutput(p, buf,64); YSHA1Update(&p->pool,buf,64); prngOutput(p, buf,64); YSHA1Update(&p->pool,buf,64); #if defined(macintosh) || defined(__APPLE__) #if defined(TARGET_API_MAC_OSX) || defined(KERNEL_BUILD) #ifdef TARGET_API_MAC_OSX gettimeofday(&tv, NULL); #else microuptime (&tv); curTime = (int64_t)tv.tv_sec*1000000LL + (int64_t)tv.tv_usec; #endif } while(curTime < endTime); #else Microseconds(&uwide); now = UnsignedWideToUInt64(uwide); } while ( (now-start) < ticks) ;
/* * red support routines */ red_t * red_alloc(struct ifnet *ifp, int weight, int inv_pmax, int th_min, int th_max, int flags, int pkttime) { red_t *rp; int w, i; int npkts_per_sec; VERIFY(ifp != NULL); rp = zalloc(red_zone); if (rp == NULL) return (NULL); bzero(rp, red_size); rp->red_avg = 0; rp->red_idle = 1; if (weight == 0) rp->red_weight = W_WEIGHT; else rp->red_weight = weight; if (inv_pmax == 0) rp->red_inv_pmax = default_inv_pmax; else rp->red_inv_pmax = inv_pmax; if (th_min == 0) rp->red_thmin = default_th_min; else rp->red_thmin = th_min; if (th_max == 0) rp->red_thmax = default_th_max; else rp->red_thmax = th_max; rp->red_ifp = ifp; rp->red_flags = (flags & REDF_USERFLAGS); #if !PF_ECN if (rp->red_flags & REDF_ECN) { rp->red_flags &= ~REDF_ECN; log(LOG_ERR, "%s: RED ECN not available; ignoring " "REDF_ECN flag!\n", if_name(ifp)); } #endif /* !PF_ECN */ if (pkttime == 0) /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ rp->red_pkttime = 800; else rp->red_pkttime = pkttime; if (weight == 0) { /* when the link is very slow, adjust red parameters */ npkts_per_sec = 1000000 / rp->red_pkttime; if (npkts_per_sec < 50) { /* up to about 400Kbps */ rp->red_weight = W_WEIGHT_2; } else if (npkts_per_sec < 300) { /* up to about 2.4Mbps */ rp->red_weight = W_WEIGHT_1; } } /* calculate wshift. weight must be power of 2 */ w = rp->red_weight; for (i = 0; w > 1; i++) w = w >> 1; rp->red_wshift = i; w = 1 << rp->red_wshift; if (w != rp->red_weight) { printf("invalid weight value %d for red! use %d\n", rp->red_weight, w); rp->red_weight = w; } /* * thmin_s and thmax_s are scaled versions of th_min and th_max * to be compared with avg. */ rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); /* * precompute probability denominator * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */ rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) * rp->red_inv_pmax) << FP_SHIFT; /* allocate weight table */ rp->red_wtab = wtab_alloc(rp->red_weight); if (rp->red_wtab == NULL) { red_destroy(rp); return (NULL); } microuptime(&rp->red_last); return (rp); }
/* * The CPU ends up here when its ready to run * This is called from code in mptramp.s; at this point, we are running * in the idle pcb/idle stack of the new cpu. When this function returns, * this processor will enter the idle loop and start looking for work. * * XXX should share some of this with init386 in machdep.c */ void cpu_hatch(void *v) { struct cpu_info *ci = (struct cpu_info *)v; int s; cpu_init_msrs(ci); #ifdef DEBUG if (ci->ci_flags & CPUF_PRESENT) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif ci->ci_flags |= CPUF_PRESENT; lapic_enable(); lapic_startclock(); if ((ci->ci_flags & CPUF_IDENTIFIED) == 0) { /* * We need to wait until we can identify, otherwise dmesg * output will be messy. */ while ((ci->ci_flags & CPUF_IDENTIFY) == 0) delay(10); identifycpu(ci); /* Signal we're done */ atomic_clearbits_int(&ci->ci_flags, CPUF_IDENTIFY); /* Prevent identifycpu() from running again */ atomic_setbits_int(&ci->ci_flags, CPUF_IDENTIFIED); } while ((ci->ci_flags & CPUF_GO) == 0) delay(10); #ifdef DEBUG if (ci->ci_flags & CPUF_RUNNING) panic("%s: already running!?", ci->ci_dev->dv_xname); #endif lcr0(ci->ci_idle_pcb->pcb_cr0); cpu_init_idt(); lapic_set_lvt(); gdt_init_cpu(ci); fpuinit(ci); lldt(0); cpu_init(ci); s = splhigh(); lcr8(0); enable_intr(); microuptime(&ci->ci_schedstate.spc_runtime); splx(s); SCHED_LOCK(s); cpu_switchto(NULL, sched_chooseproc()); }
int spec_strategy(struct vnop_strategy_args *ap) { buf_t bp; int bflags; int policy; dev_t bdev; uthread_t ut; size_t devbsdunit; mount_t mp; bp = ap->a_bp; bdev = buf_device(bp); bflags = buf_flags(bp); mp = buf_vnode(bp)->v_mount; if (kdebug_enable) { int code = 0; if (bflags & B_READ) code |= DKIO_READ; if (bflags & B_ASYNC) code |= DKIO_ASYNC; if (bflags & B_META) code |= DKIO_META; else if (bflags & B_PAGEIO) code |= DKIO_PAGING; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); } if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && mp && (mp->mnt_kern_flag & MNTK_ROOTDEV)) hard_throttle_on_root = 1; if (mp != NULL) devbsdunit = mp->mnt_devbsdunit; else devbsdunit = LOWPRI_MAX_NUM_DEV - 1; throttle_info_update(&_throttle_io_info[devbsdunit], bflags); if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) { bp->b_flags |= B_THROTTLED_IO; } if ((bflags & B_READ) == 0) { microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp); if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size); } } else if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size); } (*bdevsw[major(bdev)].d_strategy)(bp); return (0); }
void throttle_info_update(void *throttle_info, int flags) { struct _throttle_io_info_t *info = throttle_info; struct uthread *ut; int policy; int is_throttleable_io = 0; int is_passive_io = 0; SInt32 oldValue; if (!lowpri_IO_initial_window_msecs || (info == NULL)) return; policy = throttle_get_io_policy(&ut); switch (policy) { case IOPOL_DEFAULT: case IOPOL_NORMAL: break; case IOPOL_THROTTLE: is_throttleable_io = 1; break; case IOPOL_PASSIVE: is_passive_io = 1; break; default: printf("unknown I/O policy %d", policy); break; } if (!is_throttleable_io && ISSET(flags, B_PASSIVE)) is_passive_io |= 1; if (!is_throttleable_io) { if (!is_passive_io){ microuptime(&info->last_normal_IO_timestamp); } } else if (ut) { /* * I'd really like to do the IOSleep here, but * we may be holding all kinds of filesystem related locks * and the pages for this I/O marked 'busy'... * we don't want to cause a normal task to block on * one of these locks while we're throttling a task marked * for low priority I/O... we'll mark the uthread and * do the delay just before we return from the system * call that triggered this I/O or from vnode_pagein */ if (ut->uu_lowpri_window == 0) { ut->uu_throttle_info = info; throttle_info_ref(ut->uu_throttle_info); DEBUG_ALLOC_THROTTLE_INFO("updating info = %p\n", info, info ); oldValue = OSIncrementAtomic(&info->numthreads_throttling); if (oldValue < 0) { panic("%s: numthreads negative", __func__); } ut->uu_lowpri_window = lowpri_IO_initial_window_msecs; ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue; } else { /* The thread sends I/Os to different devices within the same system call */ if (ut->uu_throttle_info != info) { struct _throttle_io_info_t *old_info = ut->uu_throttle_info; // keep track of the numthreads in the right device OSDecrementAtomic(&old_info->numthreads_throttling); OSIncrementAtomic(&info->numthreads_throttling); DEBUG_ALLOC_THROTTLE_INFO("switching from info = %p\n", old_info, old_info ); DEBUG_ALLOC_THROTTLE_INFO("switching to info = %p\n", info, info ); /* This thread no longer needs a reference on that throttle info */ throttle_info_rel(ut->uu_throttle_info); ut->uu_throttle_info = info; /* Need to take a reference on this throttle info */ throttle_info_ref(ut->uu_throttle_info); } int numthreads = MAX(1, info->numthreads_throttling); ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads; if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads) ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads; } } }
static void do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2, struct vmspace *vm2, struct file *fp_procdesc) { struct proc *p1, *pptr; int trypid; struct filedesc *fd; struct filedesc_to_leader *fdtol; struct sigacts *newsigacts; sx_assert(&proctree_lock, SX_SLOCKED); sx_assert(&allproc_lock, SX_XLOCKED); p1 = td->td_proc; trypid = fork_findpid(fr->fr_flags); sx_sunlock(&proctree_lock); p2->p_state = PRS_NEW; /* protect against others */ p2->p_pid = trypid; AUDIT_ARG_PID(p2->p_pid); LIST_INSERT_HEAD(&allproc, p2, p_list); allproc_gen++; LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash); tidhash_add(td2); PROC_LOCK(p2); PROC_LOCK(p1); sx_xunlock(&allproc_lock); bcopy(&p1->p_startcopy, &p2->p_startcopy, __rangeof(struct proc, p_startcopy, p_endcopy)); pargs_hold(p2->p_args); PROC_UNLOCK(p1); bzero(&p2->p_startzero, __rangeof(struct proc, p_startzero, p_endzero)); /* Tell the prison that we exist. */ prison_proc_hold(p2->p_ucred->cr_prison); PROC_UNLOCK(p2); /* * Malloc things while we don't hold any locks. */ if (fr->fr_flags & RFSIGSHARE) newsigacts = NULL; else newsigacts = sigacts_alloc(); /* * Copy filedesc. */ if (fr->fr_flags & RFCFDG) { fd = fdinit(p1->p_fd, false); fdtol = NULL; } else if (fr->fr_flags & RFFDG) { fd = fdcopy(p1->p_fd); fdtol = NULL; } else { fd = fdshare(p1->p_fd); if (p1->p_fdtol == NULL) p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, p1->p_leader); if ((fr->fr_flags & RFTHREAD) != 0) { /* * Shared file descriptor table, and shared * process leaders. */ fdtol = p1->p_fdtol; FILEDESC_XLOCK(p1->p_fd); fdtol->fdl_refcount++; FILEDESC_XUNLOCK(p1->p_fd); } else { /* * Shared file descriptor table, and different * process leaders. */ fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p1->p_fd, p2); } } /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ PROC_LOCK(p2); PROC_LOCK(p1); bzero(&td2->td_startzero, __rangeof(struct thread, td_startzero, td_endzero)); bcopy(&td->td_startcopy, &td2->td_startcopy, __rangeof(struct thread, td_startcopy, td_endcopy)); bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name)); td2->td_sigstk = td->td_sigstk; td2->td_flags = TDF_INMEM; td2->td_lend_user_pri = PRI_MAX; #ifdef VIMAGE td2->td_vnet = NULL; td2->td_vnet_lpush = NULL; #endif /* * Allow the scheduler to initialize the child. */ thread_lock(td); sched_fork(td, td2); thread_unlock(td); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. */ p2->p_flag = P_INMEM; p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP); p2->p_swtick = ticks; if (p1->p_flag & P_PROFIL) startprofclock(p2); /* * Whilst the proc lock is held, copy the VM domain data out * using the VM domain method. */ vm_domain_policy_init(&p2->p_vm_dom_policy); vm_domain_policy_localcopy(&p2->p_vm_dom_policy, &p1->p_vm_dom_policy); if (fr->fr_flags & RFSIGSHARE) { p2->p_sigacts = sigacts_hold(p1->p_sigacts); } else { sigacts_copy(newsigacts, p1->p_sigacts); p2->p_sigacts = newsigacts; } if (fr->fr_flags & RFTSIGZMB) p2->p_sigparent = RFTSIGNUM(fr->fr_flags); else if (fr->fr_flags & RFLINUXTHPN) p2->p_sigparent = SIGUSR1; else p2->p_sigparent = SIGCHLD; p2->p_textvp = p1->p_textvp; p2->p_fd = fd; p2->p_fdtol = fdtol; if (p1->p_flag2 & P2_INHERIT_PROTECTED) { p2->p_flag |= P_PROTECTED; p2->p_flag2 |= P2_INHERIT_PROTECTED; } /* * p_limit is copy-on-write. Bump its refcount. */ lim_fork(p1, p2); thread_cow_get_proc(td2, p2); pstats_fork(p1->p_stats, p2->p_stats); PROC_UNLOCK(p1); PROC_UNLOCK(p2); /* Bump references to the text vnode (for procfs). */ if (p2->p_textvp) vrefact(p2->p_textvp); /* * Set up linkage for kernel based threading. */ if ((fr->fr_flags & RFTHREAD) != 0) { mtx_lock(&ppeers_lock); p2->p_peers = p1->p_peers; p1->p_peers = p2; p2->p_leader = p1->p_leader; mtx_unlock(&ppeers_lock); PROC_LOCK(p1->p_leader); if ((p1->p_leader->p_flag & P_WEXIT) != 0) { PROC_UNLOCK(p1->p_leader); /* * The task leader is exiting, so process p1 is * going to be killed shortly. Since p1 obviously * isn't dead yet, we know that the leader is either * sending SIGKILL's to all the processes in this * task or is sleeping waiting for all the peers to * exit. We let p1 complete the fork, but we need * to go ahead and kill the new process p2 since * the task leader may not get a chance to send * SIGKILL to it. We leave it on the list so that * the task leader will wait for this new process * to commit suicide. */ PROC_LOCK(p2); kern_psignal(p2, SIGKILL); PROC_UNLOCK(p2); } else PROC_UNLOCK(p1->p_leader); } else { p2->p_peers = NULL; p2->p_leader = p2; } sx_xlock(&proctree_lock); PGRP_LOCK(p1->p_pgrp); PROC_LOCK(p2); PROC_LOCK(p1); /* * Preserve some more flags in subprocess. P_PROFIL has already * been preserved. */ p2->p_flag |= p1->p_flag & P_SUGID; td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING; SESS_LOCK(p1->p_session); if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT) p2->p_flag |= P_CONTROLT; SESS_UNLOCK(p1->p_session); if (fr->fr_flags & RFPPWAIT) p2->p_flag |= P_PPWAIT; p2->p_pgrp = p1->p_pgrp; LIST_INSERT_AFTER(p1, p2, p_pglist); PGRP_UNLOCK(p1->p_pgrp); LIST_INIT(&p2->p_children); LIST_INIT(&p2->p_orphans); callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0); /* * If PF_FORK is set, the child process inherits the * procfs ioctl flags from its parent. */ if (p1->p_pfsflags & PF_FORK) { p2->p_stops = p1->p_stops; p2->p_pfsflags = p1->p_pfsflags; } /* * This begins the section where we must prevent the parent * from being swapped. */ _PHOLD(p1); PROC_UNLOCK(p1); /* * Attach the new process to its parent. * * If RFNOWAIT is set, the newly created process becomes a child * of init. This effectively disassociates the child from the * parent. */ if ((fr->fr_flags & RFNOWAIT) != 0) { pptr = p1->p_reaper; p2->p_reaper = pptr; } else { p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ? p1 : p1->p_reaper; pptr = p1; } p2->p_pptr = pptr; LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling); LIST_INIT(&p2->p_reaplist); LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling); if (p2->p_reaper == p1) p2->p_reapsubtree = p2->p_pid; sx_xunlock(&proctree_lock); /* Inform accounting that we have forked. */ p2->p_acflag = AFORK; PROC_UNLOCK(p2); #ifdef KTRACE ktrprocfork(p1, p2); #endif /* * Finish creating the child process. It will return via a different * execution path later. (ie: directly into user mode) */ vm_forkproc(td, p2, td2, vm2, fr->fr_flags); if (fr->fr_flags == (RFFDG | RFPROC)) { VM_CNT_INC(v_forks); VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) { VM_CNT_INC(v_vforks); VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else if (p1 == &proc0) { VM_CNT_INC(v_kthreads); VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } else { VM_CNT_INC(v_rforks); VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize); } /* * Associate the process descriptor with the process before anything * can happen that might cause that process to need the descriptor. * However, don't do this until after fork(2) can no longer fail. */ if (fr->fr_flags & RFPROCDESC) procdesc_new(p2, fr->fr_pd_flags); /* * Both processes are set up, now check if any loadable modules want * to adjust anything. */ EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags); /* * Set the child start time and mark the process as being complete. */ PROC_LOCK(p2); PROC_LOCK(p1); microuptime(&p2->p_stats->p_start); PROC_SLOCK(p2); p2->p_state = PRS_NORMAL; PROC_SUNLOCK(p2); #ifdef KDTRACE_HOOKS /* * Tell the DTrace fasttrap provider about the new process so that any * tracepoints inherited from the parent can be removed. We have to do * this only after p_state is PRS_NORMAL since the fasttrap module will * use pfind() later on. */ if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork) dtrace_fasttrap_fork(p1, p2); #endif /* * Hold the process so that it cannot exit after we make it runnable, * but before we wait for the debugger. */ _PHOLD(p2); if (p1->p_ptevents & PTRACE_FORK) { /* * Arrange for debugger to receive the fork event. * * We can report PL_FLAG_FORKED regardless of * P_FOLLOWFORK settings, but it does not make a sense * for runaway child. */ td->td_dbgflags |= TDB_FORK; td->td_dbg_forked = p2->p_pid; td2->td_dbgflags |= TDB_STOPATFORK; } if (fr->fr_flags & RFPPWAIT) { td->td_pflags |= TDP_RFPPWAIT; td->td_rfppwait_p = p2; td->td_dbgflags |= TDB_VFORK; } PROC_UNLOCK(p2); /* * Now can be swapped. */ _PRELE(p1); PROC_UNLOCK(p1); /* * Tell any interested parties about the new process. */ knote_fork(p1->p_klist, p2->p_pid); SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags); if (fr->fr_flags & RFPROCDESC) { procdesc_finit(p2->p_procdesc, fp_procdesc); fdrop(fp_procdesc, td); } if ((fr->fr_flags & RFSTOPPED) == 0) { /* * If RFSTOPPED not requested, make child runnable and * add to run queue. */ thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); thread_unlock(td2); if (fr->fr_pidp != NULL) *fr->fr_pidp = p2->p_pid; } else { *fr->fr_procp = p2; } PROC_LOCK(p2); /* * Wait until debugger is attached to child. */ while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0) cv_wait(&p2->p_dbgwait, &p2->p_mtx); _PRELE(p2); racct_proc_fork_done(p2); PROC_UNLOCK(p2); }