/* * Device ioctl operation. */ int spec_ioctl(struct vnop_ioctl_args *ap) { proc_t p = vfs_context_proc(ap->a_context); dev_t dev = ap->a_vp->v_rdev; int retval = 0; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_START, (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, (unsigned int)ap->a_vp->v_type, 0); switch (ap->a_vp->v_type) { case VCHR: retval = (*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, p); break; case VBLK: retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, p); break; default: panic("spec_ioctl"); /* NOTREACHED */ } KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_END, (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, retval, 0); return (retval); }
/* * Event timer interrupt. * * XXX a drawback of this implementation is that events serviced earlier must not set deadlines * that occur before the entire chain completes. * * XXX a better implementation would use a set of generic callouts and iterate over them */ void etimer_intr( __unused int inuser, __unused uint64_t iaddr) { uint64_t abstime; rtclock_timer_t *mytimer; struct per_proc_info *pp; pp = getPerProc(); mytimer = &pp->rtclock_timer; /* Point to the event timer */ abstime = mach_absolute_time(); /* Get the time now */ /* is it time for power management state change? */ if (pp->pms.pmsPop <= abstime) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0); pmsStep(1); /* Yes, advance step */ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0); abstime = mach_absolute_time(); /* Get the time again since we ran a bit */ } /* has a pending clock timer expired? */ if (mytimer->deadline <= abstime) { /* Have we expired the deadline? */ mytimer->has_expired = TRUE; /* Remember that we popped */ mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime); mytimer->has_expired = FALSE; } /* schedule our next deadline */ pp->rtcPop = EndOfAllTime; /* any real deadline will be earlier */ etimer_resync_deadlines(); }
/* * Call for enabling global system override. * This should be called only with the sys_override_lock held. */ static void enable_system_override(uint64_t flags) { if (flags & SYS_OVERRIDE_IO_THROTTLE) { if (io_throttle_assert_cnt == 0) { /* Disable I/O Throttling */ printf("Process %s [%d] disabling system-wide I/O Throttling\n", current_proc()->p_comm, current_proc()->p_pid); KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); sys_override_io_throttle(THROTTLE_IO_DISABLE); } io_throttle_assert_cnt++; } if (flags & SYS_OVERRIDE_CPU_THROTTLE) { if (cpu_throttle_assert_cnt == 0) { /* Disable CPU Throttling */ printf("Process %s [%d] disabling system-wide CPU Throttling\n", current_proc()->p_comm, current_proc()->p_pid); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0); sys_override_cpu_throttle(CPU_THROTTLE_DISABLE); } cpu_throttle_assert_cnt++; } }
/* * lck_rw_clear_promotion: Undo priority promotions when the last RW * lock is released by a thread (if a promotion was active) */ void lck_rw_clear_promotion(thread_t thread) { assert(thread->rwlock_count == 0); /* Cancel any promotions if the thread had actually blocked while holding a RW lock */ spl_t s = splsched(); thread_lock(thread); if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED; if (thread->sched_flags & TH_SFLAG_PROMOTED) { /* Thread still has a mutex promotion */ } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, DEPRESSPRI, 0, 0, 0); set_sched_pri(thread, DEPRESSPRI); } else { KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, thread->base_pri, 0, 0, 0); thread_recompute_sched_pri(thread, FALSE); } } thread_unlock(thread); splx(s); }
/* * Routine: lck_mtx_unlock_wakeup * * Invoked on unlock when there is contention. * * Called with the interlock locked. */ void lck_mtx_unlock_wakeup ( lck_mtx_t *lck, thread_t holder) { thread_t thread = current_thread(); lck_mtx_t *mutex; if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; if (thread != holder) panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder); KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0); assert(mutex->lck_mtx_waiters > 0); thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int))); if (thread->promotions > 0) { spl_t s = splsched(); thread_lock(thread); if ( --thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED) ) { thread->sched_flags &= ~TH_SFLAG_PROMOTED; if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { /* Thread still has a RW lock promotion */ } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, DEPRESSPRI, 0, lck, 0); set_sched_pri(thread, DEPRESSPRI); } else { if (thread->priority < thread->sched_pri) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, thread->priority, 0, lck, 0); } SCHED(compute_priority)(thread, FALSE); } } thread_unlock(thread); splx(s); } KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0); }
__private_extern__ kern_return_t chudxnu_cpu_timer_callback_enter( chudxnu_cpu_timer_callback_func_t func, uint32_t time, uint32_t units) { chudcpu_data_t *chud_proc_info; boolean_t oldlevel; oldlevel = ml_set_interrupts_enabled(FALSE); chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); // cancel any existing callback for this cpu timer_call_cancel(&(chud_proc_info->cpu_timer_call)); chud_proc_info->cpu_timer_callback_fn = func; clock_interval_to_deadline(time, units, &(chud_proc_info->t_deadline)); timer_call_setup(&(chud_proc_info->cpu_timer_call), chudxnu_private_cpu_timer_callback, NULL); timer_call_enter(&(chud_proc_info->cpu_timer_call), chud_proc_info->t_deadline); KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_TIMER_CALLBACK_ENTER) | DBG_FUNC_NONE, (uint32_t) func, time, units, 0, 0); ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; }
static uint64_t rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now) { uint64_t delta; uint64_t delta_tsc; uint64_t tsc = rdtsc64(); uint64_t set = 0; if (deadline > 0) { /* * Convert to TSC */ delta = deadline_to_decrementer(deadline, now); set = now + delta; delta_tsc = tmrCvt(delta, tscFCvtn2t); lapic_set_tsc_deadline_timer(tsc + delta_tsc); } else { lapic_set_tsc_deadline_timer(0); } KERNEL_DEBUG_CONSTANT( DECR_SET_TSC_DEADLINE | DBG_FUNC_NONE, now, deadline, tsc, lapic_get_tsc_deadline_timer(), 0); return set; }
/* * Callout from context switch if the thread goes * off core with a positive rwlock_count * * Called at splsched with the thread locked */ void lck_rw_set_promotion_locked(thread_t thread) { if (LcksOpts & disLkRWPrio) return; integer_t priority; priority = thread->sched_pri; if (priority < thread->base_pri) priority = thread->base_pri; if (priority < BASEPRI_BACKGROUND) priority = BASEPRI_BACKGROUND; if ((thread->sched_pri < priority) || !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE, (uintptr_t)thread_tid(thread), thread->sched_pri, thread->base_pri, priority, 0); thread->sched_flags |= TH_SFLAG_RW_PROMOTED; if (thread->sched_pri < priority) set_sched_pri(thread, priority); } }
static uint64_t rtc_lapic_set_timer(uint64_t deadline, uint64_t now) { uint64_t count; uint64_t set = 0; if (deadline > 0) { /* * Convert delta to bus ticks * - time now is not relevant */ count = deadline_to_decrementer(deadline, now); set = now + count; lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t)); } else { lapic_set_timer(FALSE, one_shot, divide_by_1, 0); } KERNEL_DEBUG_CONSTANT( DECR_SET_APIC_DEADLINE | DBG_FUNC_NONE, now, deadline, set, LAPIC_READ(TIMER_CURRENT_COUNT), 0); return set; }
static kern_return_t chudxnu_private_cpu_signal_handler(int request) { chudxnu_cpusig_callback_func_t fn = cpusig_callback_fn; if (fn) { x86_thread_state_t state; mach_msg_type_number_t count = x86_THREAD_STATE_COUNT; if (chudxnu_thread_get_state(current_thread(), x86_THREAD_STATE, (thread_state_t) &state, &count, FALSE) == KERN_SUCCESS) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_CPUSIG_CALLBACK) | DBG_FUNC_NONE, (uint32_t)fn, request, 0, 0, 0); return (fn)( request, x86_THREAD_STATE, (thread_state_t) &state, count); } else { return KERN_FAILURE; } } return KERN_SUCCESS; //ignored }
/* * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used * by fs_usage. The path up to and including the current component name are * logged. Up to NUMPARMS*4 bytes of pathname will be logged. If the path * to be logged is longer than that, then the last NUMPARMS*4 bytes are logged. * That is, the truncation removes the leading portion of the path. * * The logging is done via multiple KERNEL_DEBUG_CONSTANT calls. The first one * is marked with DBG_FUNC_START. The last one is marked with DBG_FUNC_END * (in addition to DBG_FUNC_START if it is also the first). There may be * intermediate ones with neither DBG_FUNC_START nor DBG_FUNC_END. * * The first KERNEL_DEBUG_CONSTANT passes the vnode pointer and 12 bytes of * pathname. The remaining KERNEL_DEBUG_CONSTANT calls add 16 bytes of pathname * each. The minimum number of KERNEL_DEBUG_CONSTANT calls required to pass * the path are used. Any excess padding in the final KERNEL_DEBUG_CONSTANT * (because not all of the 12 or 16 bytes are needed for the remainder of the * path) is set to zero bytes, or '>' if there is more path beyond the * current component name (usually because an intermediate component was not * found). * * NOTE: If the path length is greater than NUMPARMS*4, or is not of the form * 12+N*16, there will be no padding. * * TODO: If there is more path beyond the current component name, should we * force some padding? For example, a lookup for /foo_bar_baz/spam that * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with * no '>' padding. But /foo_bar/spam would log "/foo_bar>>>>". */ static void kdebug_lookup(struct vnode *dp, struct componentname *cnp) { unsigned int i; int code; int dbg_namelen; char *dbg_nameptr; long dbg_parms[NUMPARMS]; /* Collect the pathname for tracing */ dbg_namelen = (cnp->cn_nameptr - cnp->cn_pnbuf) + cnp->cn_namelen; dbg_nameptr = cnp->cn_nameptr + cnp->cn_namelen; if (dbg_namelen > (int)sizeof(dbg_parms)) dbg_namelen = sizeof(dbg_parms); dbg_nameptr -= dbg_namelen; /* Copy the (possibly truncated) path itself */ memcpy(dbg_parms, dbg_nameptr, dbg_namelen); /* Pad with '\0' or '>' */ if (dbg_namelen < (int)sizeof(dbg_parms)) { memset((char *)dbg_parms + dbg_namelen, *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0, sizeof(dbg_parms) - dbg_namelen); } /* * In the event that we collect multiple, consecutive pathname * entries, we must mark the start of the path's string and the end. */ code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START; if (dbg_namelen <= 12) code |= DBG_FUNC_END; KERNEL_DEBUG_CONSTANT(code, (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0); code &= ~DBG_FUNC_START; for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) { if (dbg_namelen <= 16) code |= DBG_FUNC_END; KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0); } }
__private_extern__ kern_return_t chudxnu_cpusig_send(int otherCPU, uint32_t request_code) { int thisCPU; kern_return_t retval = KERN_FAILURE; chudcpu_signal_request_t request; uint64_t deadline; chudcpu_data_t *target_chudp; boolean_t old_level; disable_preemption(); // force interrupts on for a cross CPU signal. old_level = chudxnu_set_interrupts_enabled(TRUE); thisCPU = cpu_number(); if ((unsigned) otherCPU < real_ncpus && thisCPU != otherCPU && cpu_data_ptr[otherCPU]->cpu_running) { target_chudp = (chudcpu_data_t *) cpu_data_ptr[otherCPU]->cpu_chud; /* Fill out request */ request.req_sync = 0xFFFFFFFF; /* set sync flag */ //request.req_type = CPRQchud; /* set request type */ request.req_code = request_code; /* set request */ KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_CPUSIG_SEND) | DBG_FUNC_NONE, otherCPU, request_code, 0, 0, 0); /* * Insert the new request in the target cpu's request queue * and signal target cpu. */ mpenqueue_tail(&target_chudp->cpu_request_queue, &request.req_entry); i386_signal_cpu(otherCPU, MP_CHUD, ASYNC); /* Wait for response or timeout */ deadline = mach_absolute_time() + LockTimeOut; while (request.req_sync != 0) { if (mach_absolute_time() > deadline) { panic("chudxnu_cpusig_send(%d,%d) timed out\n", otherCPU, request_code); } cpu_pause(); } retval = KERN_SUCCESS; } else { retval = KERN_INVALID_ARGUMENT; } chudxnu_set_interrupts_enabled(old_level); enable_preemption(); return retval; }
void pmap_pcid_activate(pmap_t tpmap, int ccpu) { pcid_t new_pcid = tpmap->pmap_pcid_cpus[ccpu]; pmap_t last_pmap; boolean_t pcid_conflict = FALSE, pending_flush = FALSE; pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled); if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) { new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu); } pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID); #ifdef PCID_ASSERT cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid; #endif cpu_datap(ccpu)->cpu_active_pcid = new_pcid; pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); if (__probable(pending_flush == FALSE)) { last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid]; pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap)); } if (__improbable(pending_flush || pcid_conflict)) { pmap_pcid_validate_cpu(tpmap, ccpu); } /* Consider making this a unique id */ cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap; pmap_assert(new_pcid < PMAP_PCID_MAX_PCID); pmap_assert(((tpmap == kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0))); #if PMAP_ASSERT pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63); pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL); /* Diagnostic to detect pagetable anchor corruption */ if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX]) __asm__ volatile("int3"); #endif /* PMAP_ASSERT */ set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict)); if (!pending_flush) { /* We did not previously observe a pending invalidation for this * ASID. However, the load from the coherency vector * could've been reordered ahead of the store to the * active_cr3 field (in the context switch path, our * caller). Re-consult the pending invalidation vector * after the CR3 write. We rely on MOV CR3's documented * serializing property to avoid insertion of an expensive * barrier. (DRK) */ pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0); if (__improbable(pending_flush != 0)) { pmap_pcid_validate_cpu(tpmap, ccpu); set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE); } } cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]); #if DEBUG KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0); #endif }
void thread_tell_urgency(int urgency, uint64_t rt_period, uint64_t rt_deadline, thread_t nthread) { uint64_t urgency_notification_time_start, delta; boolean_t urgency_assert = (urgency_notification_assert_abstime_threshold != 0); assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE); #if DEBUG urgency_stats[cpu_number() % 64][urgency]++; #endif if (!pmInitDone || pmDispatch == NULL || pmDispatch->pmThreadTellUrgency == NULL) return; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0); if (__improbable((urgency_assert == TRUE))) urgency_notification_time_start = mach_absolute_time(); current_cpu_datap()->cpu_nthread = nthread; pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline); if (__improbable((urgency_assert == TRUE))) { delta = mach_absolute_time() - urgency_notification_time_start; if (__improbable(delta > urgency_notification_max_recorded)) { /* This is not synchronized, but it doesn't matter * if we (rarely) miss an event, as it is statistically * unlikely that it will never recur. */ urgency_notification_max_recorded = delta; if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended())) panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta); } } KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0); }
void unix_syscall_return(int error) { thread_act_t thread; volatile int *rval; struct i386_saved_state *regs; struct proc *p; struct proc *current_proc(); unsigned short code; vm_offset_t params; struct sysent *callp; extern int nsysent; thread = current_act(); rval = (int *)get_bsduthreadrval(thread); p = current_proc(); regs = USER_REGS(thread); /* reconstruct code for tracing before blasting eax */ code = regs->eax; params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; if (callp == sysent) { code = fuword(params); } if (error == ERESTART) { regs->eip -= 7; } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = rval[0]; regs->edx = rval[1]; regs->efl &= ~EFL_CF; } } ktrsysret(p, code, error, rval[0], callp->sy_funnel); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, error, rval[0], rval[1], 0, 0); if (callp->sy_funnel != NO_FUNNEL) (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); thread_exception_return(); /* NOTREACHED */ }
static kern_return_t chudxnu_private_chud_ast_callback( int trapno, void *regs, int unused1, int unused2) { #pragma unused (trapno) #pragma unused (regs) #pragma unused (unused1) #pragma unused (unused2) boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); ast_t *myast = ast_pending(); kern_return_t retval = KERN_FAILURE; chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn; if (*myast & AST_CHUD_URGENT) { *myast &= ~(AST_CHUD_URGENT | AST_CHUD); if ((*myast & AST_PREEMPTION) != AST_PREEMPTION) *myast &= ~(AST_URGENT); retval = KERN_SUCCESS; } else if (*myast & AST_CHUD) { *myast &= ~(AST_CHUD); retval = KERN_SUCCESS; } if (fn) { x86_thread_state_t state; mach_msg_type_number_t count; count = x86_THREAD_STATE_COUNT; if (chudxnu_thread_get_state( current_thread(), x86_THREAD_STATE, (thread_state_t) &state, &count, TRUE) == KERN_SUCCESS) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_AST_CALLBACK) | DBG_FUNC_NONE, (uint32_t) fn, 0, 0, 0, 0); (fn)( x86_THREAD_STATE, (thread_state_t) &state, count); } } ml_set_interrupts_enabled(oldlevel); return retval; }
/* * Routine: lck_mtx_lock_acquire * * Invoked on acquiring the mutex when there is * contention. * * Returns the current number of waiters. * * Called with the interlock locked. */ int lck_mtx_lock_acquire( lck_mtx_t *lck) { thread_t thread = current_thread(); lck_mtx_t *mutex; if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { thread->pending_promoter[thread->pending_promoter_index] = NULL; if (thread->pending_promoter_index > 0) thread->pending_promoter_index--; mutex->lck_mtx_waiters--; } if (mutex->lck_mtx_waiters > 0) { integer_t priority = mutex->lck_mtx_pri; spl_t s = splsched(); thread_lock(thread); thread->promotions++; thread->sched_flags |= TH_SFLAG_PROMOTED; if (thread->sched_pri < priority) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, thread->sched_pri, priority, 0, lck, 0); /* Do not promote past promotion ceiling */ assert(priority <= MAXPRI_PROMOTE); set_sched_pri(thread, priority); } thread_unlock(thread); splx(s); } else mutex->lck_mtx_pri = 0; #if CONFIG_DTRACE if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) { if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) { LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0); } else { LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0); } } #endif return (mutex->lck_mtx_waiters); }
/* * Routine: lck_mtx_clear_promoted * * Handle clearing of TH_SFLAG_PROMOTED, * adjusting thread priority as needed. * * Called with thread lock held */ static void lck_mtx_clear_promoted ( thread_t thread, __kdebug_only uintptr_t trace_lck) { thread->sched_flags &= ~TH_SFLAG_PROMOTED; if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) { /* Thread still has a RW lock promotion */ } else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0); set_sched_pri(thread, DEPRESSPRI); } else { if (thread->base_pri < thread->sched_pri) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE, thread->sched_pri, thread->base_pri, 0, trace_lck, 0); } thread_recompute_sched_pri(thread, FALSE); } }
/* * Call for disabling global system override. * This should be called only with the sys_override_lock held. */ static void disable_system_override(uint64_t flags) { if (flags & SYS_OVERRIDE_IO_THROTTLE) { assert(io_throttle_assert_cnt > 0); io_throttle_assert_cnt--; if (io_throttle_assert_cnt == 0) { /* Enable I/O Throttling */ KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); sys_override_io_throttle(THROTTLE_IO_ENABLE); } } if (flags & SYS_OVERRIDE_CPU_THROTTLE) { assert(cpu_throttle_assert_cnt > 0); cpu_throttle_assert_cnt--; if (cpu_throttle_assert_cnt == 0) { /* Enable CPU Throttling */ KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0); sys_override_cpu_throttle(CPU_THROTTLE_ENABLE); } } }
__private_extern__ kern_return_t chudxnu_perfmon_ast_send_urgent(boolean_t urgent) { boolean_t oldlevel = ml_set_interrupts_enabled(FALSE); ast_t *myast = ast_pending(); if(urgent) { *myast |= (AST_CHUD_URGENT | AST_URGENT); } else { *myast |= (AST_CHUD); } KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_AST_SEND) | DBG_FUNC_NONE, urgent, 0, 0, 0, 0); ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; }
/* * Routine: lck_mtx_lock_acquire * * Invoked on acquiring the mutex when there is * contention. * * Returns the current number of waiters. * * Called with the interlock locked. */ int lck_mtx_lock_acquire( lck_mtx_t *lck) { thread_t thread = current_thread(); lck_mtx_t *mutex; if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) mutex = lck; else mutex = &lck->lck_mtx_ptr->lck_mtx; if (thread->pending_promoter[thread->pending_promoter_index] == mutex) { thread->pending_promoter[thread->pending_promoter_index] = NULL; if (thread->pending_promoter_index > 0) thread->pending_promoter_index--; mutex->lck_mtx_waiters--; } if (mutex->lck_mtx_waiters > 0) { integer_t priority = mutex->lck_mtx_pri; spl_t s = splsched(); thread_lock(thread); thread->promotions++; thread->sched_mode |= TH_MODE_PROMOTED; if (thread->sched_pri < priority) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE, thread->sched_pri, priority, 0, (int)lck, 0); set_sched_pri(thread, priority); } thread_unlock(thread); splx(s); } else mutex->lck_mtx_pri = 0; return (mutex->lck_mtx_waiters); }
static int sd_callback3(proc_t p, void * args) { struct sd_iterargs * sd = (struct sd_iterargs *)args; vfs_context_t ctx = vfs_context_current(); int setsdstate = sd->setsdstate; proc_lock(p); p->p_shutdownstate = setsdstate; if (p->p_stat != SZOMB) { /* * NOTE: following code ignores sig_lock and plays * with exit_thread correctly. This is OK unless we * are a multiprocessor, in which case I do not * understand the sig_lock. This needs to be fixed. * XXX */ if (p->exit_thread) { /* someone already doing it */ proc_unlock(p); /* give him a chance */ thread_block(THREAD_CONTINUE_NULL); } else { p->exit_thread = current_thread(); printf("."); sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid); proc_unlock(p); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, p->p_pid, 0, 1, 0, 0); sd->activecount++; exit1(p, 1, (int *)NULL); } } else { proc_unlock(p); } return PROC_RETURNED; }
static void thread_suspended(__unused void *parameter, wait_result_t result) { thread_t thread = current_thread(); thread_mtx_lock(thread); if (result == THREAD_INTERRUPTED) thread->suspend_parked = FALSE; else assert(thread->suspend_parked == FALSE); if (thread->suspend_count > 0) { thread_set_apc_ast(thread); } else { spl_t s = splsched(); thread_lock(thread); if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) { thread->sched_pri = DEPRESSPRI; thread->last_processor->current_pri = thread->sched_pri; thread->last_processor->current_perfctl_class = thread_get_perfcontrol_class(thread); KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY), (uintptr_t)thread_tid(thread), thread->base_pri, thread->sched_pri, 0, /* eventually, 'reason' */ 0); } thread_unlock(thread); splx(s); } thread_mtx_unlock(thread); thread_exception_return(); /*NOTREACHED*/ }
/* * Re-evaluate the outstanding deadlines and select the most proximate. * * Should be called at splclock. */ void etimer_resync_deadlines(void) { uint64_t deadline; rtclock_timer_t *mytimer; spl_t s = splclock(); /* No interruptions please */ struct per_proc_info *pp; pp = getPerProc(); deadline = ~0ULL; /* if we have a clock timer set sooner, pop on that */ mytimer = &pp->rtclock_timer; /* Point to the timer itself */ if (!mytimer->has_expired && mytimer->deadline > 0) deadline = mytimer->deadline; /* if we have a power management event coming up, how about that? */ if (pp->pms.pmsPop > 0 && pp->pms.pmsPop < deadline) deadline = pp->pms.pmsPop; if (deadline > 0 && deadline <= pp->rtcPop) { int decr; uint64_t now; now = mach_absolute_time(); decr = setPop(deadline); if (deadline < now) pp->rtcPop = now + decr; else pp->rtcPop = deadline; KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0); } splx(s); }
static void chudxnu_private_cpu_timer_callback( timer_call_param_t param0, timer_call_param_t param1) { #pragma unused (param0) #pragma unused (param1) chudcpu_data_t *chud_proc_info; boolean_t oldlevel; x86_thread_state_t state; mach_msg_type_number_t count; chudxnu_cpu_timer_callback_func_t fn; oldlevel = ml_set_interrupts_enabled(FALSE); chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); count = x86_THREAD_STATE_COUNT; if (chudxnu_thread_get_state(current_thread(), x86_THREAD_STATE, (thread_state_t)&state, &count, FALSE) == KERN_SUCCESS) { fn = chud_proc_info->cpu_timer_callback_fn; if (fn) { KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_TIMER_CALLBACK) | DBG_FUNC_NONE, (uint32_t)fn, 0,0,0,0); //state.eip, state.cs, 0, 0); (fn)( x86_THREAD_STATE, (thread_state_t)&state, count); } } ml_set_interrupts_enabled(oldlevel); }
__private_extern__ kern_return_t chudxnu_cpu_timer_callback_cancel(void) { chudcpu_data_t *chud_proc_info; boolean_t oldlevel; oldlevel = ml_set_interrupts_enabled(FALSE); chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud); timer_call_cancel(&(chud_proc_info->cpu_timer_call)); KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_CHUD, CHUD_TIMER_CALLBACK_CANCEL) | DBG_FUNC_NONE, 0, 0, 0, 0, 0); // set to max value: chud_proc_info->t_deadline |= ~(chud_proc_info->t_deadline); chud_proc_info->cpu_timer_callback_fn = NULL; ml_set_interrupts_enabled(oldlevel); return KERN_SUCCESS; }
/* * Create a new thread. * Doesn't start the thread running. */ static kern_return_t thread_create_internal( task_t parent_task, integer_t priority, thread_continue_t continuation, thread_t *out_thread) { thread_t new_thread; static thread_t first_thread; /* * Allocate a thread and initialize static fields */ if (first_thread == NULL) new_thread = first_thread = current_thread(); else new_thread = (thread_t)zalloc(thread_zone); if (new_thread == NULL) return (KERN_RESOURCE_SHORTAGE); if (new_thread != first_thread) *new_thread = thread_template; #ifdef MACH_BSD { new_thread->uthread = uthread_alloc(parent_task, new_thread); if (new_thread->uthread == NULL) { zfree(thread_zone, new_thread); return (KERN_RESOURCE_SHORTAGE); } } #endif /* MACH_BSD */ if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) { #ifdef MACH_BSD { void *ut = new_thread->uthread; new_thread->uthread = NULL; /* cred free may not be necessary */ uthread_cleanup(parent_task, ut, parent_task->bsd_info); uthread_cred_free(ut); uthread_zone_free(ut); } #endif /* MACH_BSD */ zfree(thread_zone, new_thread); return (KERN_FAILURE); } new_thread->task = parent_task; thread_lock_init(new_thread); wake_lock_init(new_thread); mutex_init(&new_thread->mutex, 0); ipc_thread_init(new_thread); queue_init(&new_thread->held_ulocks); new_thread->continuation = continuation; mutex_lock(&tasks_threads_lock); task_lock(parent_task); if ( !parent_task->active || (parent_task->thread_count >= THREAD_MAX && parent_task != kernel_task)) { task_unlock(parent_task); mutex_unlock(&tasks_threads_lock); #ifdef MACH_BSD { void *ut = new_thread->uthread; new_thread->uthread = NULL; uthread_cleanup(parent_task, ut, parent_task->bsd_info); /* cred free may not be necessary */ uthread_cred_free(ut); uthread_zone_free(ut); } #endif /* MACH_BSD */ ipc_thread_disable(new_thread); ipc_thread_terminate(new_thread); machine_thread_destroy(new_thread); zfree(thread_zone, new_thread); return (KERN_FAILURE); } task_reference_internal(parent_task); /* Cache the task's map */ new_thread->map = parent_task->map; /* Chain the thread onto the task's list */ queue_enter(&parent_task->threads, new_thread, thread_t, task_threads); parent_task->thread_count++; /* So terminating threads don't need to take the task lock to decrement */ hw_atomic_add(&parent_task->active_thread_count, 1); queue_enter(&threads, new_thread, thread_t, threads); threads_count++; timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread); timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread); /* Set the thread's scheduling parameters */ if (parent_task != kernel_task) new_thread->sched_mode |= TH_MODE_TIMESHARE; new_thread->max_priority = parent_task->max_priority; new_thread->task_priority = parent_task->priority; new_thread->priority = (priority < 0)? parent_task->priority: priority; if (new_thread->priority > new_thread->max_priority) new_thread->priority = new_thread->max_priority; new_thread->importance = new_thread->priority - new_thread->task_priority; new_thread->sched_stamp = sched_tick; new_thread->pri_shift = sched_pri_shift; compute_priority(new_thread, FALSE); new_thread->active = TRUE; *out_thread = new_thread; { long dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4; kdbg_trace_data(parent_task->bsd_info, &dbg_arg2); KERNEL_DEBUG_CONSTANT( TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE, (vm_address_t)new_thread, dbg_arg2, 0, 0, 0); kdbg_trace_string(parent_task->bsd_info, &dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4); KERNEL_DEBUG_CONSTANT( TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE, dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0); } DTRACE_PROC1(lwp__create, thread_t, *out_thread); return (KERN_SUCCESS); }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; register struct vnode *vp; int flags; int prot, file_prot; int err=0; vm_map_t user_map; kern_return_t result; mach_vm_offset_t user_addr; mach_vm_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags=0; boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; user_addr = (mach_vm_offset_t)uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = mach_vm_round_page(user_size); /* hi end */ /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif alloc_flags = 0; if (flags & MAP_ANON) { /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_PURGABLE); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } } handle = NULL; maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; } if (fp->f_fglob->fg_type != DTYPE_VNODE) { error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = mach_vm_round_page(user_size); if (file_pos & PAGE_MASK_64) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = mach_vm_round_page(user_addr); } else { if (user_addr != mach_vm_trunc_page(user_addr)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; /* * Lookup/allocate object. */ if (handle == NULL) { pager = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; } else { pager = (vm_pager_t)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, (ipc_port_t)pager, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); goto out; } file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); if (docow) { /* private mapping: won't write to the file */ file_prot &= ~PROT_WRITE; } (void) ubc_map(vp, file_prot); } if (!mapanon) (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); return(error); }
int spec_strategy(struct vnop_strategy_args *ap) { buf_t bp; int bflags; int policy; dev_t bdev; uthread_t ut; size_t devbsdunit; mount_t mp; bp = ap->a_bp; bdev = buf_device(bp); bflags = buf_flags(bp); mp = buf_vnode(bp)->v_mount; if (kdebug_enable) { int code = 0; if (bflags & B_READ) code |= DKIO_READ; if (bflags & B_ASYNC) code |= DKIO_ASYNC; if (bflags & B_META) code |= DKIO_META; else if (bflags & B_PAGEIO) code |= DKIO_PAGING; KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE, bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0); } if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) && mp && (mp->mnt_kern_flag & MNTK_ROOTDEV)) hard_throttle_on_root = 1; if (mp != NULL) devbsdunit = mp->mnt_devbsdunit; else devbsdunit = LOWPRI_MAX_NUM_DEV - 1; throttle_info_update(&_throttle_io_info[devbsdunit], bflags); if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) { bp->b_flags |= B_THROTTLED_IO; } if ((bflags & B_READ) == 0) { microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp); if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size); } } else if (mp) { INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size); } (*bdevsw[major(bdev)].d_strategy)(bp); return (0); }
/* * thread_call_thread: */ static void thread_call_thread( thread_call_group_t group, wait_result_t wres) { thread_t self = current_thread(); boolean_t canwait; /* * A wakeup with THREAD_INTERRUPTED indicates that * we should terminate. */ if (wres == THREAD_INTERRUPTED) { thread_terminate(self); /* NOTREACHED */ panic("thread_terminate() returned?"); } (void)disable_ints_and_lock(); thread_sched_call(self, group->sched_call); while (group->pending_count > 0) { thread_call_t call; thread_call_func_t func; thread_call_param_t param0, param1; call = TC(dequeue_head(&group->pending_queue)); group->pending_count--; func = call->tc_call.func; param0 = call->tc_call.param0; param1 = call->tc_call.param1; call->tc_call.queue = NULL; _internal_call_release(call); /* * Can only do wakeups for thread calls whose storage * we control. */ if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) { canwait = TRUE; call->tc_refs++; /* Delay free until we're done */ } else canwait = FALSE; enable_ints_and_unlock(); KERNEL_DEBUG_CONSTANT( MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0); (*func)(param0, param1); if (get_preemption_level() != 0) { int pl = get_preemption_level(); panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)", pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1); } (void)thread_funnel_set(self->funnel_lock, FALSE); /* XXX */ (void) disable_ints_and_lock(); if (canwait) { /* Frees if so desired */ thread_call_finish(call); } } thread_sched_call(self, NULL); group->active_count--; if (group_isparallel(group)) { /* * For new style of thread group, thread always blocks. * If we have more than the target number of threads, * and this is the first to block, and it isn't active * already, set a timer for deallocating a thread if we * continue to have a surplus. */ group->idle_count++; if (group->idle_count == 1) { group->idle_timestamp = mach_absolute_time(); } if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) && ((group->active_count + group->idle_count) > group->target_thread_count)) { group->flags |= TCG_DEALLOC_ACTIVE; thread_call_start_deallocate_timer(group); } /* Wait for more work (or termination) */ wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0); if (wres != THREAD_WAITING) { panic("kcall worker unable to assert wait?"); } enable_ints_and_unlock(); thread_block_parameter((thread_continue_t)thread_call_thread, group); } else { if (group->idle_count < group->target_thread_count) { group->idle_count++; wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */ enable_ints_and_unlock(); thread_block_parameter((thread_continue_t)thread_call_thread, group); /* NOTREACHED */ } } enable_ints_and_unlock(); thread_terminate(self); /* NOTREACHED */ }