Beispiel #1
0
/*
 * Device ioctl operation.
 */
int
spec_ioctl(struct vnop_ioctl_args *ap)
{
	proc_t p = vfs_context_proc(ap->a_context);
	dev_t dev = ap->a_vp->v_rdev;
	int	retval = 0;

	KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_START,
			      (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, (unsigned int)ap->a_vp->v_type, 0);

	switch (ap->a_vp->v_type) {

	case VCHR:
		retval = (*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
						       ap->a_fflag, p);
		break;

	case VBLK:
		retval = (*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
						       ap->a_fflag, p);
		break;

	default:
		panic("spec_ioctl");
		/* NOTREACHED */
	}
	KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_IOCTL, 0) | DBG_FUNC_END,
			      (unsigned int)dev, (unsigned int)ap->a_command, (unsigned int)ap->a_fflag, retval, 0);

	return (retval);
}
Beispiel #2
0
/*
 * 	Event timer interrupt.
 *
 * XXX a drawback of this implementation is that events serviced earlier must not set deadlines
 *     that occur before the entire chain completes.
 *
 * XXX a better implementation would use a set of generic callouts and iterate over them
 */
void
etimer_intr(
__unused int inuser,
__unused uint64_t iaddr)
{
	uint64_t		abstime;
	rtclock_timer_t		*mytimer;
	struct per_proc_info	*pp;

	pp = getPerProc();

	mytimer = &pp->rtclock_timer;				/* Point to the event timer */

	abstime = mach_absolute_time();				/* Get the time now */

	/* is it time for power management state change? */	
	if (pp->pms.pmsPop <= abstime) {
	        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_START, 0, 0, 0, 0, 0);
		pmsStep(1);					/* Yes, advance step */
	        KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 3) | DBG_FUNC_END, 0, 0, 0, 0, 0);

		abstime = mach_absolute_time();			/* Get the time again since we ran a bit */
	}

	/* has a pending clock timer expired? */
	if (mytimer->deadline <= abstime) {			/* Have we expired the deadline? */
		mytimer->has_expired = TRUE;			/* Remember that we popped */
		mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
		mytimer->has_expired = FALSE;
	}

	/* schedule our next deadline */
	pp->rtcPop = EndOfAllTime;				/* any real deadline will be earlier */
	etimer_resync_deadlines();
}
Beispiel #3
0
/*
 * Call for enabling global system override.
 * This should be called only with the sys_override_lock held.
 */
static void
enable_system_override(uint64_t flags)
{
	
	if (flags & SYS_OVERRIDE_IO_THROTTLE) {
		if (io_throttle_assert_cnt == 0) {
			/* Disable I/O Throttling */
			printf("Process %s [%d] disabling system-wide I/O Throttling\n", current_proc()->p_comm, current_proc()->p_pid);
			KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0);
			sys_override_io_throttle(THROTTLE_IO_DISABLE);
		}
		io_throttle_assert_cnt++;
	}
	
	if (flags & SYS_OVERRIDE_CPU_THROTTLE) {
		if (cpu_throttle_assert_cnt == 0) {
			/* Disable CPU Throttling */
			printf("Process %s [%d] disabling system-wide CPU Throttling\n", current_proc()->p_comm, current_proc()->p_pid);
			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_START, current_proc()->p_pid, 0, 0, 0, 0);
			sys_override_cpu_throttle(CPU_THROTTLE_DISABLE);
		}
		cpu_throttle_assert_cnt++;
	}

}
Beispiel #4
0
/*
 * lck_rw_clear_promotion: Undo priority promotions when the last RW
 * lock is released by a thread (if a promotion was active)
 */
void lck_rw_clear_promotion(thread_t thread)
{
	assert(thread->rwlock_count == 0);

	/* Cancel any promotions if the thread had actually blocked while holding a RW lock */
	spl_t s = splsched();

	thread_lock(thread);

	if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
		thread->sched_flags &= ~TH_SFLAG_RW_PROMOTED;

		if (thread->sched_flags & TH_SFLAG_PROMOTED) {
			/* Thread still has a mutex promotion */
		} else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
							      thread->sched_pri, DEPRESSPRI, 0, 0, 0);
			
			set_sched_pri(thread, DEPRESSPRI);
		} else {
			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_DEMOTE) | DBG_FUNC_NONE,
								  thread->sched_pri, thread->base_pri, 0, 0, 0);
			
			thread_recompute_sched_pri(thread, FALSE);
		}
	}

	thread_unlock(thread);
	splx(s);
}
Beispiel #5
0
/*
 * Routine: 	lck_mtx_unlock_wakeup
 *
 * Invoked on unlock when there is contention.
 *
 * Called with the interlock locked.
 */
void
lck_mtx_unlock_wakeup (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread != holder)
		panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);

	assert(mutex->lck_mtx_waiters > 0);
	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));

	if (thread->promotions > 0) {
		spl_t		s = splsched();

		thread_lock(thread);
		if (	--thread->promotions == 0				&&
				(thread->sched_flags & TH_SFLAG_PROMOTED)		) {
			thread->sched_flags &= ~TH_SFLAG_PROMOTED;

			if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
				/* Thread still has a RW lock promotion */
			} else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
				KERNEL_DEBUG_CONSTANT(
					MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
						  thread->sched_pri, DEPRESSPRI, 0, lck, 0);

				set_sched_pri(thread, DEPRESSPRI);
			}
			else {
				if (thread->priority < thread->sched_pri) {
					KERNEL_DEBUG_CONSTANT(
						MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) |
															DBG_FUNC_NONE,
							thread->sched_pri, thread->priority,
									0, lck, 0);
				}

				SCHED(compute_priority)(thread, FALSE);
			}
		}
		thread_unlock(thread);
		splx(s);
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
}
__private_extern__ kern_return_t
chudxnu_cpu_timer_callback_enter(
	chudxnu_cpu_timer_callback_func_t	func,
	uint32_t				time,
	uint32_t				units)
{
	chudcpu_data_t	*chud_proc_info;
	boolean_t	oldlevel;

	oldlevel = ml_set_interrupts_enabled(FALSE);
	chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud);

	// cancel any existing callback for this cpu
	timer_call_cancel(&(chud_proc_info->cpu_timer_call));

	chud_proc_info->cpu_timer_callback_fn = func;

	clock_interval_to_deadline(time, units, &(chud_proc_info->t_deadline));
	timer_call_setup(&(chud_proc_info->cpu_timer_call),
			 chudxnu_private_cpu_timer_callback, NULL);
	timer_call_enter(&(chud_proc_info->cpu_timer_call),
			 chud_proc_info->t_deadline);

	KERNEL_DEBUG_CONSTANT(
		MACHDBG_CODE(DBG_MACH_CHUD,
			     CHUD_TIMER_CALLBACK_ENTER) | DBG_FUNC_NONE,
		(uint32_t) func, time, units, 0, 0);

	ml_set_interrupts_enabled(oldlevel);
	return KERN_SUCCESS;
}
Beispiel #7
0
static uint64_t
rtc_lapic_set_tsc_deadline_timer(uint64_t deadline, uint64_t now)
{
	uint64_t delta;
	uint64_t delta_tsc;
	uint64_t tsc = rdtsc64();
	uint64_t set = 0;

	if (deadline > 0) {
		/*
		 * Convert to TSC
		 */
		delta = deadline_to_decrementer(deadline, now);
		set = now + delta;
		delta_tsc = tmrCvt(delta, tscFCvtn2t);
		lapic_set_tsc_deadline_timer(tsc + delta_tsc);
	} else {
		lapic_set_tsc_deadline_timer(0);
	}
	
	KERNEL_DEBUG_CONSTANT(
		DECR_SET_TSC_DEADLINE | DBG_FUNC_NONE,
		now, deadline,
		tsc, lapic_get_tsc_deadline_timer(),
		0);

	return set;
} 
Beispiel #8
0
/*
 * Callout from context switch if the thread goes
 * off core with a positive rwlock_count
 *
 * Called at splsched with the thread locked
 */
void
lck_rw_set_promotion_locked(thread_t thread)
{
	if (LcksOpts & disLkRWPrio)
		return;

	integer_t priority;

	priority = thread->sched_pri;

	if (priority < thread->base_pri)
		priority = thread->base_pri;
	if (priority < BASEPRI_BACKGROUND)
		priority = BASEPRI_BACKGROUND;

	if ((thread->sched_pri < priority) ||
	    !(thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
		KERNEL_DEBUG_CONSTANT(
		        MACHDBG_CODE(DBG_MACH_SCHED, MACH_RW_PROMOTE) | DBG_FUNC_NONE,
		        (uintptr_t)thread_tid(thread), thread->sched_pri,
		        thread->base_pri, priority, 0);

		thread->sched_flags |= TH_SFLAG_RW_PROMOTED;

		if (thread->sched_pri < priority)
			set_sched_pri(thread, priority);
	}
}
Beispiel #9
0
static uint64_t
rtc_lapic_set_timer(uint64_t deadline, uint64_t now)
{
	uint64_t count;
	uint64_t set = 0;

	if (deadline > 0) {
		/*
		 * Convert delta to bus ticks
		 * - time now is not relevant
		 */
		count = deadline_to_decrementer(deadline, now);
		set = now + count;
		lapic_set_timer_fast((uint32_t) tmrCvt(count, busFCvtn2t));
	} else {
		lapic_set_timer(FALSE, one_shot, divide_by_1, 0);
	}

	KERNEL_DEBUG_CONSTANT(
		DECR_SET_APIC_DEADLINE | DBG_FUNC_NONE,
		now, deadline,
		set, LAPIC_READ(TIMER_CURRENT_COUNT),
		0);

	return set;
}
static          kern_return_t
chudxnu_private_cpu_signal_handler(int request)
{
	chudxnu_cpusig_callback_func_t fn = cpusig_callback_fn;
	
	if (fn) {
	x86_thread_state_t  state;
		mach_msg_type_number_t count = x86_THREAD_STATE_COUNT;

		if (chudxnu_thread_get_state(current_thread(),
					     x86_THREAD_STATE,
					     (thread_state_t) &state, &count,
					     FALSE) == KERN_SUCCESS) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_CHUD,
					CHUD_CPUSIG_CALLBACK) | DBG_FUNC_NONE,
				(uint32_t)fn, request, 0, 0, 0);
			return (fn)(
					request, x86_THREAD_STATE,
					(thread_state_t) &state, count);
		} else {
			return KERN_FAILURE;
		}
	}
	return KERN_SUCCESS; //ignored
}
Beispiel #11
0
/*
 * Log (part of) a pathname using the KERNEL_DEBUG_CONSTANT mechanism, as used
 * by fs_usage.  The path up to and including the current component name are
 * logged.  Up to NUMPARMS*4 bytes of pathname will be logged.  If the path
 * to be logged is longer than that, then the last NUMPARMS*4 bytes are logged.
 * That is, the truncation removes the leading portion of the path.
 *
 * The logging is done via multiple KERNEL_DEBUG_CONSTANT calls.  The first one
 * is marked with DBG_FUNC_START.  The last one is marked with DBG_FUNC_END
 * (in addition to DBG_FUNC_START if it is also the first).  There may be
 * intermediate ones with neither DBG_FUNC_START nor DBG_FUNC_END.
 *
 * The first KERNEL_DEBUG_CONSTANT passes the vnode pointer and 12 bytes of
 * pathname.  The remaining KERNEL_DEBUG_CONSTANT calls add 16 bytes of pathname
 * each.  The minimum number of KERNEL_DEBUG_CONSTANT calls required to pass
 * the path are used.  Any excess padding in the final KERNEL_DEBUG_CONSTANT
 * (because not all of the 12 or 16 bytes are needed for the remainder of the
 * path) is set to zero bytes, or '>' if there is more path beyond the
 * current component name (usually because an intermediate component was not
 * found).
 *
 * NOTE: If the path length is greater than NUMPARMS*4, or is not of the form
 * 12+N*16, there will be no padding.
 *
 * TODO: If there is more path beyond the current component name, should we
 * force some padding?  For example, a lookup for /foo_bar_baz/spam that
 * fails because /foo_bar_baz is not found will only log "/foo_bar_baz", with
 * no '>' padding.  But /foo_bar/spam would log "/foo_bar>>>>".
 */
static void
kdebug_lookup(struct vnode *dp, struct componentname *cnp)
{
	unsigned int i;
	int code;
	int dbg_namelen;
	char *dbg_nameptr;
	long dbg_parms[NUMPARMS];

	/* Collect the pathname for tracing */
	dbg_namelen = (cnp->cn_nameptr - cnp->cn_pnbuf) + cnp->cn_namelen;
	dbg_nameptr = cnp->cn_nameptr + cnp->cn_namelen;

	if (dbg_namelen > (int)sizeof(dbg_parms))
		dbg_namelen = sizeof(dbg_parms);
	dbg_nameptr -= dbg_namelen;
	
	/* Copy the (possibly truncated) path itself */
	memcpy(dbg_parms, dbg_nameptr, dbg_namelen);
	
	/* Pad with '\0' or '>' */
	if (dbg_namelen < (int)sizeof(dbg_parms)) {
		memset((char *)dbg_parms + dbg_namelen,
		       *(cnp->cn_nameptr + cnp->cn_namelen) ? '>' : 0,
		       sizeof(dbg_parms) - dbg_namelen);
	}
	
	/*
	 * In the event that we collect multiple, consecutive pathname
	 * entries, we must mark the start of the path's string and the end.
	 */
	code = (FSDBG_CODE(DBG_FSRW,36)) | DBG_FUNC_START;

	if (dbg_namelen <= 12)
		code |= DBG_FUNC_END;

	KERNEL_DEBUG_CONSTANT(code, (unsigned int)dp, dbg_parms[0], dbg_parms[1], dbg_parms[2], 0);

	code &= ~DBG_FUNC_START;

	for (i=3, dbg_namelen -= 12; dbg_namelen > 0; i+=4, dbg_namelen -= 16) {
		if (dbg_namelen <= 16)
			code |= DBG_FUNC_END;

		KERNEL_DEBUG_CONSTANT(code, dbg_parms[i], dbg_parms[i+1], dbg_parms[i+2], dbg_parms[i+3], 0);
	}
}
__private_extern__ kern_return_t
chudxnu_cpusig_send(int otherCPU, uint32_t request_code)
{
	int				thisCPU;
	kern_return_t			retval = KERN_FAILURE;
	chudcpu_signal_request_t	request;
	uint64_t			deadline;
	chudcpu_data_t			*target_chudp;
	boolean_t old_level;

	disable_preemption();
	// force interrupts on for a cross CPU signal.
	old_level = chudxnu_set_interrupts_enabled(TRUE);
	thisCPU = cpu_number();

	if ((unsigned) otherCPU < real_ncpus &&
	    thisCPU != otherCPU &&
	    cpu_data_ptr[otherCPU]->cpu_running) {

		target_chudp = (chudcpu_data_t *)
					cpu_data_ptr[otherCPU]->cpu_chud;

		/* Fill out request */
		request.req_sync = 0xFFFFFFFF;		/* set sync flag */
		//request.req_type = CPRQchud;		/* set request type */
		request.req_code = request_code;	/* set request */

		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_CHUD,
				     CHUD_CPUSIG_SEND) | DBG_FUNC_NONE,
			otherCPU, request_code, 0, 0, 0);

		/*
		 * Insert the new request in the target cpu's request queue
		 * and signal target cpu.
		 */
		mpenqueue_tail(&target_chudp->cpu_request_queue,
			       &request.req_entry);
		i386_signal_cpu(otherCPU, MP_CHUD, ASYNC);

		/* Wait for response or timeout */
		deadline = mach_absolute_time() + LockTimeOut;
		while (request.req_sync != 0) {
			if (mach_absolute_time() > deadline) {
				panic("chudxnu_cpusig_send(%d,%d) timed out\n",
					otherCPU, request_code);
			}
			cpu_pause();
		}
		retval = KERN_SUCCESS;
	} else {
		retval = KERN_INVALID_ARGUMENT;
	}

	chudxnu_set_interrupts_enabled(old_level);
	enable_preemption();
	return retval;
}
Beispiel #13
0
void	pmap_pcid_activate(pmap_t tpmap, int ccpu) {
	pcid_t		new_pcid = tpmap->pmap_pcid_cpus[ccpu];
	pmap_t		last_pmap;
	boolean_t	pcid_conflict = FALSE, pending_flush = FALSE;

	pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled);
	if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) {
		new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu);
	}
	pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID);
#ifdef	PCID_ASSERT	
	cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid;
#endif
	cpu_datap(ccpu)->cpu_active_pcid = new_pcid;

	pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
	if (__probable(pending_flush == FALSE)) {
		last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid];
		pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap));
	}
	if (__improbable(pending_flush || pcid_conflict)) {
		pmap_pcid_validate_cpu(tpmap, ccpu);
	}
	/* Consider making this a unique id */
	cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap;

	pmap_assert(new_pcid < PMAP_PCID_MAX_PCID);
	pmap_assert(((tpmap ==  kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0)));
#if	PMAP_ASSERT
	pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63);
	pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL);
	/* Diagnostic to detect pagetable anchor corruption */
	if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX])
		__asm__ volatile("int3");
#endif	/* PMAP_ASSERT */
	set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict));

	if (!pending_flush) {
		/* We did not previously observe a pending invalidation for this
		 * ASID. However, the load from the coherency vector
		 * could've been reordered ahead of the store to the
		 * active_cr3 field (in the context switch path, our
		 * caller). Re-consult the pending invalidation vector
		 * after the CR3 write. We rely on MOV CR3's documented
		 * serializing property to avoid insertion of an expensive
		 * barrier. (DRK)
		 */
		pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
		if (__improbable(pending_flush != 0)) {
			pmap_pcid_validate_cpu(tpmap, ccpu);
			set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE);
		}
	}
	cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]);
#if	DEBUG	
	KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0);
#endif
}
Beispiel #14
0
void
thread_tell_urgency(int urgency,
    uint64_t rt_period,
    uint64_t rt_deadline,
    thread_t nthread)
{
	uint64_t	urgency_notification_time_start, delta;
	boolean_t	urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
#if	DEBUG
	urgency_stats[cpu_number() % 64][urgency]++;
#endif
	if (!pmInitDone
	    || pmDispatch == NULL
	    || pmDispatch->pmThreadTellUrgency == NULL)
		return;

	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);

	if (__improbable((urgency_assert == TRUE)))
		urgency_notification_time_start = mach_absolute_time();

	current_cpu_datap()->cpu_nthread = nthread;
	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);

	if (__improbable((urgency_assert == TRUE))) {
		delta = mach_absolute_time() - urgency_notification_time_start;

		if (__improbable(delta > urgency_notification_max_recorded)) {
			/* This is not synchronized, but it doesn't matter
			 * if we (rarely) miss an event, as it is statistically
			 * unlikely that it will never recur.
			 */
			urgency_notification_max_recorded = delta;

			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
		}
	}

	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
}
Beispiel #15
0
void
unix_syscall_return(int error)
{
    thread_act_t		thread;
	volatile int *rval;
	struct i386_saved_state *regs;
	struct proc *p;
	struct proc *current_proc();
	unsigned short code;
	vm_offset_t params;
	struct sysent *callp;
	extern int nsysent;

    thread = current_act();
    rval = (int *)get_bsduthreadrval(thread);
	p = current_proc();

	regs = USER_REGS(thread);

	/* reconstruct code for tracing before blasting eax */
	code = regs->eax;
	params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
	callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
	if (callp == sysent) {
	  code = fuword(params);
	}

	if (error == ERESTART) {
		regs->eip -= 7;
	}
	else if (error != EJUSTRETURN) {
		if (error) {
		    regs->eax = error;
		    regs->efl |= EFL_CF;	/* carry bit */
		} else { /* (not error) */
		    regs->eax = rval[0];
		    regs->edx = rval[1];
		    regs->efl &= ~EFL_CF;
		} 
	}

	ktrsysret(p, code, error, rval[0], callp->sy_funnel);

	KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
		error, rval[0], rval[1], 0, 0);

	if (callp->sy_funnel != NO_FUNNEL)
    		(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);

    thread_exception_return();
    /* NOTREACHED */
}
static kern_return_t
chudxnu_private_chud_ast_callback(
	int			trapno,
	void			*regs,
	int			unused1,
	int			unused2)
{
#pragma unused (trapno)
#pragma unused (regs)
#pragma unused (unused1)
#pragma unused (unused2)
	boolean_t	oldlevel = ml_set_interrupts_enabled(FALSE);
	ast_t		*myast = ast_pending();
	kern_return_t	retval = KERN_FAILURE;
	chudxnu_perfmon_ast_callback_func_t fn = perfmon_ast_callback_fn;
    
	if (*myast & AST_CHUD_URGENT) {
		*myast &= ~(AST_CHUD_URGENT | AST_CHUD);
		if ((*myast & AST_PREEMPTION) != AST_PREEMPTION)
			*myast &= ~(AST_URGENT);
		retval = KERN_SUCCESS;
	} else if (*myast & AST_CHUD) {
		*myast &= ~(AST_CHUD);
		retval = KERN_SUCCESS;
	}

	if (fn) {
		x86_thread_state_t state;
		mach_msg_type_number_t count;
		count = x86_THREAD_STATE_COUNT;

		if (chudxnu_thread_get_state(
			current_thread(),
			x86_THREAD_STATE,
			(thread_state_t) &state, &count,
			TRUE) == KERN_SUCCESS) {

			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_CHUD,
				    CHUD_AST_CALLBACK) | DBG_FUNC_NONE,
				(uint32_t) fn, 0, 0, 0, 0);

			(fn)(
				x86_THREAD_STATE,
				(thread_state_t) &state,
				count);
		}
	}
    
	ml_set_interrupts_enabled(oldlevel);
	return retval;
}
Beispiel #17
0
/*
 * Routine: 	lck_mtx_lock_acquire
 *
 * Invoked on acquiring the mutex when there is
 * contention.
 *
 * Returns the current number of waiters.
 *
 * Called with the interlock locked.
 */
int
lck_mtx_lock_acquire(
	lck_mtx_t		*lck)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
		thread->pending_promoter[thread->pending_promoter_index] = NULL;
		if (thread->pending_promoter_index > 0)
			thread->pending_promoter_index--;
		mutex->lck_mtx_waiters--;
	}

	if (mutex->lck_mtx_waiters > 0) {
		integer_t		priority = mutex->lck_mtx_pri;
		spl_t			s = splsched();

		thread_lock(thread);
		thread->promotions++;
		thread->sched_flags |= TH_SFLAG_PROMOTED;
		if (thread->sched_pri < priority) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
						thread->sched_pri, priority, 0, lck, 0);
			/* Do not promote past promotion ceiling */
			assert(priority <= MAXPRI_PROMOTE);
			set_sched_pri(thread, priority);
		}
		thread_unlock(thread);
		splx(s);
	}
	else
		mutex->lck_mtx_pri = 0;

#if CONFIG_DTRACE
	if (lockstat_probemap[LS_LCK_MTX_LOCK_ACQUIRE] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_ACQUIRE]) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_ACQUIRE, lck, 0);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_ACQUIRE, lck, 0);
		}
	}
#endif	
	return (mutex->lck_mtx_waiters);
}
Beispiel #18
0
/*
 * Routine:	lck_mtx_clear_promoted
 *
 * Handle clearing of TH_SFLAG_PROMOTED,
 * adjusting thread priority as needed.
 *
 * Called with thread lock held
 */
static void
lck_mtx_clear_promoted (
	thread_t 			thread,
	__kdebug_only uintptr_t		trace_lck)
{
	thread->sched_flags &= ~TH_SFLAG_PROMOTED;

	if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
		/* Thread still has a RW lock promotion */
	} else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
				thread->sched_pri, DEPRESSPRI, 0, trace_lck, 0);
		set_sched_pri(thread, DEPRESSPRI);
	} else {
		if (thread->base_pri < thread->sched_pri) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
					thread->sched_pri, thread->base_pri, 0, trace_lck, 0);
		}
		thread_recompute_sched_pri(thread, FALSE);
	}
}
Beispiel #19
0
/*
 * Call for disabling global system override.
 * This should be called only with the sys_override_lock held.
 */
static void
disable_system_override(uint64_t flags)
{

	if (flags & SYS_OVERRIDE_IO_THROTTLE) {
		assert(io_throttle_assert_cnt > 0);
		io_throttle_assert_cnt--;
		if (io_throttle_assert_cnt == 0) {
			/* Enable I/O Throttling */
			KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_THROTTLE, IO_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0);
			sys_override_io_throttle(THROTTLE_IO_ENABLE);
		}
	}

	if (flags & SYS_OVERRIDE_CPU_THROTTLE) {
		assert(cpu_throttle_assert_cnt > 0);
		cpu_throttle_assert_cnt--;
			if (cpu_throttle_assert_cnt == 0) {
			/* Enable CPU Throttling */
			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_CPU_THROTTLE_DISABLE) | DBG_FUNC_END, current_proc()->p_pid, 0, 0, 0, 0);
			sys_override_cpu_throttle(CPU_THROTTLE_ENABLE);
		}
	}
}
__private_extern__ kern_return_t
chudxnu_perfmon_ast_send_urgent(boolean_t urgent)
{
    boolean_t oldlevel = ml_set_interrupts_enabled(FALSE);
	ast_t *myast = ast_pending();

    if(urgent) {
        *myast |= (AST_CHUD_URGENT | AST_URGENT);
    } else {
        *myast |= (AST_CHUD);
    }

    KERNEL_DEBUG_CONSTANT(
	MACHDBG_CODE(DBG_MACH_CHUD, CHUD_AST_SEND) | DBG_FUNC_NONE,
	urgent, 0, 0, 0, 0);

    ml_set_interrupts_enabled(oldlevel);
    return KERN_SUCCESS;
}
Beispiel #21
0
/*
 * Routine: 	lck_mtx_lock_acquire
 *
 * Invoked on acquiring the mutex when there is
 * contention.
 *
 * Returns the current number of waiters.
 *
 * Called with the interlock locked.
 */
int
lck_mtx_lock_acquire(
	lck_mtx_t		*lck)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread->pending_promoter[thread->pending_promoter_index] == mutex) {
		thread->pending_promoter[thread->pending_promoter_index] = NULL;
		if (thread->pending_promoter_index > 0)
			thread->pending_promoter_index--;
		mutex->lck_mtx_waiters--;
	}

	if (mutex->lck_mtx_waiters > 0) {
		integer_t		priority = mutex->lck_mtx_pri;
		spl_t			s = splsched();

		thread_lock(thread);
		thread->promotions++;
		thread->sched_mode |= TH_MODE_PROMOTED;
		if (thread->sched_pri < priority) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
						thread->sched_pri, priority, 0, (int)lck, 0);

			set_sched_pri(thread, priority);
		}
		thread_unlock(thread);
		splx(s);
	}
	else
		mutex->lck_mtx_pri = 0;

	return (mutex->lck_mtx_waiters);
}
Beispiel #22
0
static int
sd_callback3(proc_t p, void * args)
{
	struct sd_iterargs * sd = (struct sd_iterargs *)args;
	vfs_context_t ctx = vfs_context_current();

	int setsdstate = sd->setsdstate;

	proc_lock(p);
	p->p_shutdownstate = setsdstate;
	if (p->p_stat != SZOMB) {
	       /*
		* NOTE: following code ignores sig_lock and plays
		* with exit_thread correctly.  This is OK unless we
		* are a multiprocessor, in which case I do not
		* understand the sig_lock.  This needs to be fixed.
		* XXX
		*/
		if (p->exit_thread) {	/* someone already doing it */
			proc_unlock(p);
			/* give him a chance */
			thread_block(THREAD_CONTINUE_NULL);
		} else {
			p->exit_thread = current_thread();
			printf(".");

			sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid);

			proc_unlock(p);
			KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE,
					      p->p_pid, 0, 1, 0, 0);
			sd->activecount++;
			exit1(p, 1, (int *)NULL);
		}
	} else {
		proc_unlock(p);
	}

	return PROC_RETURNED;
}
Beispiel #23
0
static void
thread_suspended(__unused void *parameter, wait_result_t result)
{
	thread_t thread = current_thread();

	thread_mtx_lock(thread);

	if (result == THREAD_INTERRUPTED)
		thread->suspend_parked = FALSE;
	else
		assert(thread->suspend_parked == FALSE);

	if (thread->suspend_count > 0) {
		thread_set_apc_ast(thread);
	} else {
		spl_t s = splsched();

		thread_lock(thread);
		if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
			thread->sched_pri = DEPRESSPRI;
			thread->last_processor->current_pri = thread->sched_pri;
			thread->last_processor->current_perfctl_class = thread_get_perfcontrol_class(thread);

			KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_CHANGE_PRIORITY),
			                      (uintptr_t)thread_tid(thread),
			                      thread->base_pri,
			                      thread->sched_pri,
			                      0, /* eventually, 'reason' */
			                      0);
		}
		thread_unlock(thread);
		splx(s);
	}

	thread_mtx_unlock(thread);

	thread_exception_return();
	/*NOTREACHED*/
}
Beispiel #24
0
/*
 * Re-evaluate the outstanding deadlines and select the most proximate.
 *
 * Should be called at splclock.
 */
void
etimer_resync_deadlines(void)
{
	uint64_t		deadline;
	rtclock_timer_t		*mytimer;
	spl_t			s = splclock();		/* No interruptions please */
	struct per_proc_info	*pp;

	pp = getPerProc();

	deadline = ~0ULL;

	/* if we have a clock timer set sooner, pop on that */
	mytimer = &pp->rtclock_timer;			/* Point to the timer itself */
	if (!mytimer->has_expired && mytimer->deadline > 0)
		deadline = mytimer->deadline;

	/* if we have a power management event coming up, how about that? */
	if (pp->pms.pmsPop > 0 && pp->pms.pmsPop < deadline)
		deadline = pp->pms.pmsPop;
	

	if (deadline > 0 && deadline <= pp->rtcPop) {
		int     decr;
		uint64_t now;

		now = mach_absolute_time();
		decr = setPop(deadline);

		if (deadline < now)
		        pp->rtcPop = now + decr;
		else
		        pp->rtcPop = deadline;

		KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_DECI, 1) | DBG_FUNC_NONE, decr, 2, 0, 0, 0);
	}
	splx(s);
}
static void
chudxnu_private_cpu_timer_callback(
	timer_call_param_t param0,
	timer_call_param_t param1)
{
#pragma unused (param0)
#pragma unused (param1)
	chudcpu_data_t			*chud_proc_info;
	boolean_t			oldlevel;
	x86_thread_state_t 		state;
	mach_msg_type_number_t		count;
	chudxnu_cpu_timer_callback_func_t fn;

	oldlevel = ml_set_interrupts_enabled(FALSE);
	chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud);

	count = x86_THREAD_STATE_COUNT;
	if (chudxnu_thread_get_state(current_thread(),
				     x86_THREAD_STATE,
				     (thread_state_t)&state,
				     &count,
				     FALSE) == KERN_SUCCESS) {
			fn = chud_proc_info->cpu_timer_callback_fn;
       		if (fn) {
			KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_CHUD,
					CHUD_TIMER_CALLBACK) | DBG_FUNC_NONE,
				(uint32_t)fn, 0,0,0,0);
				//state.eip, state.cs, 0, 0);
       			(fn)(
				x86_THREAD_STATE,
				(thread_state_t)&state,
				count);
       		} 
	} 
	
	ml_set_interrupts_enabled(oldlevel);
}
__private_extern__ kern_return_t
chudxnu_cpu_timer_callback_cancel(void)
{
	chudcpu_data_t	*chud_proc_info;
	boolean_t	oldlevel;

	oldlevel = ml_set_interrupts_enabled(FALSE);
	chud_proc_info = (chudcpu_data_t *)(current_cpu_datap()->cpu_chud);

	timer_call_cancel(&(chud_proc_info->cpu_timer_call));

	KERNEL_DEBUG_CONSTANT(
		MACHDBG_CODE(DBG_MACH_CHUD,
			     CHUD_TIMER_CALLBACK_CANCEL) | DBG_FUNC_NONE,
		0, 0, 0, 0, 0);

	// set to max value:
	chud_proc_info->t_deadline |= ~(chud_proc_info->t_deadline);
	chud_proc_info->cpu_timer_callback_fn = NULL;

	ml_set_interrupts_enabled(oldlevel);
 	return KERN_SUCCESS;
}
Beispiel #27
0
/*
 * Create a new thread.
 * Doesn't start the thread running.
 */
static kern_return_t
thread_create_internal(
	task_t					parent_task,
	integer_t				priority,
	thread_continue_t		continuation,
	thread_t				*out_thread)
{
	thread_t				new_thread;
	static thread_t			first_thread;

	/*
	 *	Allocate a thread and initialize static fields
	 */
	if (first_thread == NULL)
		new_thread = first_thread = current_thread();
	else
		new_thread = (thread_t)zalloc(thread_zone);
	if (new_thread == NULL)
		return (KERN_RESOURCE_SHORTAGE);

	if (new_thread != first_thread)
		*new_thread = thread_template;

#ifdef MACH_BSD
    {
		new_thread->uthread = uthread_alloc(parent_task, new_thread);
		if (new_thread->uthread == NULL) {
			zfree(thread_zone, new_thread);
			return (KERN_RESOURCE_SHORTAGE);
		}
	}
#endif  /* MACH_BSD */

	if (machine_thread_create(new_thread, parent_task) != KERN_SUCCESS) {
#ifdef MACH_BSD
		{
			void *ut = new_thread->uthread;

			new_thread->uthread = NULL;
			/* cred free may not be necessary */
			uthread_cleanup(parent_task, ut, parent_task->bsd_info);
			uthread_cred_free(ut);
			uthread_zone_free(ut);
		}
#endif  /* MACH_BSD */
		zfree(thread_zone, new_thread);
		return (KERN_FAILURE);
	}

    new_thread->task = parent_task;

	thread_lock_init(new_thread);
	wake_lock_init(new_thread);

	mutex_init(&new_thread->mutex, 0);

	ipc_thread_init(new_thread);
	queue_init(&new_thread->held_ulocks);

	new_thread->continuation = continuation;

	mutex_lock(&tasks_threads_lock);
	task_lock(parent_task);

	if (	!parent_task->active							||
			(parent_task->thread_count >= THREAD_MAX	&&
			 parent_task != kernel_task)) {
		task_unlock(parent_task);
		mutex_unlock(&tasks_threads_lock);

#ifdef MACH_BSD
		{
			void *ut = new_thread->uthread;

			new_thread->uthread = NULL;
			uthread_cleanup(parent_task, ut, parent_task->bsd_info);
			/* cred free may not be necessary */
			uthread_cred_free(ut);
			uthread_zone_free(ut);
		}
#endif  /* MACH_BSD */
		ipc_thread_disable(new_thread);
		ipc_thread_terminate(new_thread);
		machine_thread_destroy(new_thread);
		zfree(thread_zone, new_thread);
		return (KERN_FAILURE);
	}

	task_reference_internal(parent_task);

	/* Cache the task's map */
	new_thread->map = parent_task->map;

	/* Chain the thread onto the task's list */
	queue_enter(&parent_task->threads, new_thread, thread_t, task_threads);
	parent_task->thread_count++;
	
	/* So terminating threads don't need to take the task lock to decrement */
	hw_atomic_add(&parent_task->active_thread_count, 1);

	queue_enter(&threads, new_thread, thread_t, threads);
	threads_count++;

	timer_call_setup(&new_thread->wait_timer, thread_timer_expire, new_thread);
	timer_call_setup(&new_thread->depress_timer, thread_depress_expire, new_thread);

	/* Set the thread's scheduling parameters */
	if (parent_task != kernel_task)
		new_thread->sched_mode |= TH_MODE_TIMESHARE;
	new_thread->max_priority = parent_task->max_priority;
	new_thread->task_priority = parent_task->priority;
	new_thread->priority = (priority < 0)? parent_task->priority: priority;
	if (new_thread->priority > new_thread->max_priority)
		new_thread->priority = new_thread->max_priority;
	new_thread->importance =
					new_thread->priority - new_thread->task_priority;
	new_thread->sched_stamp = sched_tick;
	new_thread->pri_shift = sched_pri_shift;
	compute_priority(new_thread, FALSE);

	new_thread->active = TRUE;

	*out_thread = new_thread;

	{
		long	dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4;

		kdbg_trace_data(parent_task->bsd_info, &dbg_arg2);

		KERNEL_DEBUG_CONSTANT(
					TRACEDBG_CODE(DBG_TRACE_DATA, 1) | DBG_FUNC_NONE,
							(vm_address_t)new_thread, dbg_arg2, 0, 0, 0);

		kdbg_trace_string(parent_task->bsd_info,
							&dbg_arg1, &dbg_arg2, &dbg_arg3, &dbg_arg4);

		KERNEL_DEBUG_CONSTANT(
					TRACEDBG_CODE(DBG_TRACE_STRING, 1) | DBG_FUNC_NONE,
							dbg_arg1, dbg_arg2, dbg_arg3, dbg_arg4, 0);
	}

	DTRACE_PROC1(lwp__create, thread_t, *out_thread);

	return (KERN_SUCCESS);
}
Beispiel #28
0
/*
 * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
 * XXX usage is PROT_* from an interface perspective.  Thus the values of
 * XXX VM_PROT_* and PROT_* need to correspond.
 */
int
mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
{
	/*
	 *	Map in special device (must be SHARED) or file
	 */
	struct fileproc *fp;
	register struct		vnode *vp;
	int			flags;
	int			prot, file_prot;
	int			err=0;
	vm_map_t		user_map;
	kern_return_t		result;
	mach_vm_offset_t	user_addr;
	mach_vm_size_t		user_size;
	vm_object_offset_t	pageoff;
	vm_object_offset_t	file_pos;
	int			alloc_flags=0;
	boolean_t		docow;
	vm_prot_t		maxprot;
	void 			*handle;
	vm_pager_t		pager;
	int 			mapanon=0;
	int 			fpref=0;
	int error =0;
	int fd = uap->fd;

	user_addr = (mach_vm_offset_t)uap->addr;
	user_size = (mach_vm_size_t) uap->len;

	AUDIT_ARG(addr, user_addr);
	AUDIT_ARG(len, user_size);
	AUDIT_ARG(fd, uap->fd);

	prot = (uap->prot & VM_PROT_ALL);
#if 3777787
	/*
	 * Since the hardware currently does not support writing without
	 * read-before-write, or execution-without-read, if the request is
	 * for write or execute access, we must imply read access as well;
	 * otherwise programs expecting this to work will fail to operate.
	 */
	if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
		prot |= VM_PROT_READ;
#endif	/* radar 3777787 */

	flags = uap->flags;
	vp = NULLVP;

	/*
	 * The vm code does not have prototypes & compiler doesn't do the'
	 * the right thing when you cast 64bit value and pass it in function 
	 * call. So here it is.
	 */
	file_pos = (vm_object_offset_t)uap->pos;


	/* make sure mapping fits into numeric range etc */
	if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
		return (EINVAL);

	/*
	 * Align the file position to a page boundary,
	 * and save its page offset component.
	 */
	pageoff = (file_pos & PAGE_MASK);
	file_pos -= (vm_object_offset_t)pageoff;


	/* Adjust size for rounding (on both ends). */
	user_size += pageoff;			/* low end... */
	user_size = mach_vm_round_page(user_size);	/* hi end */


	/*
	 * Check for illegal addresses.  Watch out for address wrap... Note
	 * that VM_*_ADDRESS are not constants due to casts (argh).
	 */
	if (flags & MAP_FIXED) {
		/*
		 * The specified address must have the same remainder
		 * as the file offset taken modulo PAGE_SIZE, so it
		 * should be aligned after adjustment by pageoff.
		 */
		user_addr -= pageoff;
		if (user_addr & PAGE_MASK)
		return (EINVAL);
	}
#ifdef notyet
	/* DO not have apis to get this info, need to wait till then*/
	/*
	 * XXX for non-fixed mappings where no hint is provided or
	 * the hint would fall in the potential heap space,
	 * place it after the end of the largest possible heap.
	 *
	 * There should really be a pmap call to determine a reasonable
	 * location.
	 */
	else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
		addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);

#endif

	alloc_flags = 0;

	if (flags & MAP_ANON) {
		/*
		 * Mapping blank space is trivial.  Use positive fds as the alias
		 * value for memory tracking. 
		 */
		if (fd != -1) {
			/*
			 * Use "fd" to pass (some) Mach VM allocation flags,
			 * (see the VM_FLAGS_* definitions).
			 */
			alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
					    VM_FLAGS_PURGABLE);
			if (alloc_flags != fd) {
				/* reject if there are any extra flags */
				return EINVAL;
			}
		}
			
		handle = NULL;
		maxprot = VM_PROT_ALL;
		file_pos = 0;
		mapanon = 1;
	} else {
		struct vnode_attr va;
		vfs_context_t ctx = vfs_context_current();

		/*
		 * Mapping file, get fp for validation. Obtain vnode and make
		 * sure it is of appropriate type.
		 */
		err = fp_lookup(p, fd, &fp, 0);
		if (err)
			return(err);
		fpref = 1;
		if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
			uap->addr = (user_addr_t)user_addr;
			uap->len = (user_size_t)user_size;
			uap->prot = prot;
			uap->flags = flags;
			uap->pos = file_pos;
			error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
			goto bad;
		}

		if (fp->f_fglob->fg_type != DTYPE_VNODE) {
			error = EINVAL;
			goto bad;
		}
		vp = (struct vnode *)fp->f_fglob->fg_data;
		error = vnode_getwithref(vp);
		if(error != 0)
			goto bad;

		if (vp->v_type != VREG && vp->v_type != VCHR) {
			(void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}

		AUDIT_ARG(vnpath, vp, ARG_VNODE1);
		
		/*
		 * POSIX: mmap needs to update access time for mapped files
		 */
		if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
			VATTR_INIT(&va);
			nanotime(&va.va_access_time);
			VATTR_SET_ACTIVE(&va, va_access_time);
			vnode_setattr(vp, &va, ctx);
		}
		
		/*
		 * XXX hack to handle use of /dev/zero to map anon memory (ala
		 * SunOS).
		 */
		if (vp->v_type == VCHR || vp->v_type == VSTR) {
			(void)vnode_put(vp);
			error = ENODEV;
			goto bad;
		} else {
			/*
			 * Ensure that file and memory protections are
			 * compatible.  Note that we only worry about
			 * writability if mapping is shared; in this case,
			 * current and max prot are dictated by the open file.
			 * XXX use the vnode instead?  Problem is: what
			 * credentials do we use for determination? What if
			 * proc does a setuid?
			 */
			maxprot = VM_PROT_EXECUTE;	/* ??? */
			if (fp->f_fglob->fg_flag & FREAD)
				maxprot |= VM_PROT_READ;
			else if (prot & PROT_READ) {
				(void)vnode_put(vp);
				error = EACCES;
				goto bad;
			}
			/*
			 * If we are sharing potential changes (either via
			 * MAP_SHARED or via the implicit sharing of character
			 * device mappings), and we are trying to get write
			 * permission although we opened it without asking
			 * for it, bail out. 
			 */

			if ((flags & MAP_SHARED) != 0) {
				if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
 					/*
 					 * check for write access
 					 *
 					 * Note that we already made this check when granting FWRITE
 					 * against the file, so it seems redundant here.
 					 */
 					error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
 
 					/* if not granted for any reason, but we wanted it, bad */
 					if ((prot & PROT_WRITE) && (error != 0)) {
 						vnode_put(vp);
  						goto bad;
  					}
 
 					/* if writable, remember */
 					if (error == 0)
  						maxprot |= VM_PROT_WRITE;

				} else if ((prot & PROT_WRITE) != 0) {
					(void)vnode_put(vp);
					error = EACCES;
					goto bad;
				}
			} else
				maxprot |= VM_PROT_WRITE;

			handle = (void *)vp;
#if CONFIG_MACF
			error = mac_file_check_mmap(vfs_context_ucred(ctx),
			    fp->f_fglob, prot, flags, &maxprot);
			if (error) {
				(void)vnode_put(vp);
				goto bad;
			}
#endif /* MAC */
		}
	}

	if (user_size == 0)  {
		if (!mapanon)
			(void)vnode_put(vp);
		error = 0;
		goto bad;
	}

	/*
	 *	We bend a little - round the start and end addresses
	 *	to the nearest page boundary.
	 */
	user_size = mach_vm_round_page(user_size);

	if (file_pos & PAGE_MASK_64) {
		if (!mapanon)
			(void)vnode_put(vp);
		error = EINVAL;
		goto bad;
	}

	user_map = current_map();

	if ((flags & MAP_FIXED) == 0) {
		alloc_flags |= VM_FLAGS_ANYWHERE;
		user_addr = mach_vm_round_page(user_addr);
	} else {
		if (user_addr != mach_vm_trunc_page(user_addr)) {
		        if (!mapanon)
			        (void)vnode_put(vp);
			error = EINVAL;
			goto bad;
		}
		/*
		 * mmap(MAP_FIXED) will replace any existing mappings in the
		 * specified range, if the new mapping is successful.
		 * If we just deallocate the specified address range here,
		 * another thread might jump in and allocate memory in that
		 * range before we get a chance to establish the new mapping,
		 * and we won't have a chance to restore the old mappings.
		 * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
		 * has to deallocate the existing mappings and establish the
		 * new ones atomically.
		 */
		alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
	}

	if (flags & MAP_NOCACHE)
		alloc_flags |= VM_FLAGS_NO_CACHE;

	/*
	 * Lookup/allocate object.
	 */
	if (handle == NULL) {
		pager = NULL;
#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 IPC_PORT_NULL, 0, FALSE,
						 prot, maxprot,
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);
		if (result != KERN_SUCCESS) 
				goto out;
	} else {
		pager = (vm_pager_t)ubc_getpager(vp);
		
		if (pager == NULL) {
			(void)vnode_put(vp);
			error = ENOMEM;
			goto bad;
		}

		/*
		 *  Set credentials:
		 *	FIXME: if we're writing the file we need a way to
		 *      ensure that someone doesn't replace our R/W creds
		 * 	with ones that only work for read.
		 */

		ubc_setthreadcred(vp, p, current_thread());
		docow = FALSE;
		if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
			docow = TRUE;
		}

#ifdef notyet
/* Hmm .. */
#if defined(VM_PROT_READ_IS_EXEC)
		if (prot & VM_PROT_READ)
			prot |= VM_PROT_EXECUTE;
		if (maxprot & VM_PROT_READ)
			maxprot |= VM_PROT_EXECUTE;
#endif
#endif /* notyet */

#if 3777787
		if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			prot |= VM_PROT_READ;
		if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
			maxprot |= VM_PROT_READ;
#endif	/* radar 3777787 */

		result = vm_map_enter_mem_object(user_map,
						 &user_addr, user_size,
						 0, alloc_flags,
						 (ipc_port_t)pager, file_pos,
						 docow, prot, maxprot, 
						 (flags & MAP_SHARED) ?
						 VM_INHERIT_SHARE : 
						 VM_INHERIT_DEFAULT);

		if (result != KERN_SUCCESS)  {
				(void)vnode_put(vp);
				goto out;
		}

		file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
		if (docow) {
			/* private mapping: won't write to the file */
			file_prot &= ~PROT_WRITE;
		}
		(void) ubc_map(vp, file_prot);
	}

	if (!mapanon)
		(void)vnode_put(vp);

out:
	switch (result) {
	case KERN_SUCCESS:
		*retval = user_addr + pageoff;
		error = 0;
		break;
	case KERN_INVALID_ADDRESS:
	case KERN_NO_SPACE:
		error =  ENOMEM;
		break;
	case KERN_PROTECTION_FAILURE:
		error =  EACCES;
		break;
	default:
		error =  EINVAL;
		break;
	}
bad:
	if (fpref)
		fp_drop(p, fd, fp, 0);

	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
	KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
			      (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);

	return(error);
}
Beispiel #29
0
int
spec_strategy(struct vnop_strategy_args *ap)
{
        buf_t	bp;
	int	bflags;
	int 	policy;
	dev_t	bdev;
	uthread_t ut;
	size_t devbsdunit;
	mount_t mp;

        bp = ap->a_bp;
	bdev = buf_device(bp);
	bflags = buf_flags(bp);
	mp = buf_vnode(bp)->v_mount;

        if (kdebug_enable) {
	        int    code = 0;

		if (bflags & B_READ)
		        code |= DKIO_READ;
		if (bflags & B_ASYNC)
		        code |= DKIO_ASYNC;

		if (bflags & B_META)
		        code |= DKIO_META;
		else if (bflags & B_PAGEIO)
		        code |= DKIO_PAGING;

		KERNEL_DEBUG_CONSTANT(FSDBG_CODE(DBG_DKRW, code) | DBG_FUNC_NONE,
				      bp, bdev, (int)buf_blkno(bp), buf_count(bp), 0);
        }
	if (((bflags & (B_IOSTREAMING | B_PAGEIO | B_READ)) == (B_PAGEIO | B_READ)) &&
	    mp && (mp->mnt_kern_flag & MNTK_ROOTDEV))
	        hard_throttle_on_root = 1;


	if (mp != NULL)
		devbsdunit = mp->mnt_devbsdunit;
	else
		devbsdunit = LOWPRI_MAX_NUM_DEV - 1;

	throttle_info_update(&_throttle_io_info[devbsdunit], bflags);
	if ((policy = throttle_get_io_policy(&ut)) == IOPOL_THROTTLE) {
		bp->b_flags |= B_THROTTLED_IO;
	}


	if ((bflags & B_READ) == 0) {
		microuptime(&_throttle_io_info[devbsdunit].last_IO_timestamp);
		if (mp) {
			INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_write_size);
		}
	} else if (mp) {
		INCR_PENDING_IO(buf_count(bp), mp->mnt_pending_read_size);
	}

	(*bdevsw[major(bdev)].d_strategy)(bp);
	
	return (0);
}
Beispiel #30
0
/*
 *	thread_call_thread:
 */
static void
thread_call_thread(
		thread_call_group_t		group,
		wait_result_t			wres)
{
	thread_t	self = current_thread();
	boolean_t	canwait;

	/*
	 * A wakeup with THREAD_INTERRUPTED indicates that 
	 * we should terminate.
	 */
	if (wres == THREAD_INTERRUPTED) {
		thread_terminate(self);

		/* NOTREACHED */
		panic("thread_terminate() returned?");
	}

	(void)disable_ints_and_lock();

	thread_sched_call(self, group->sched_call);

	while (group->pending_count > 0) {
		thread_call_t			call;
		thread_call_func_t		func;
		thread_call_param_t		param0, param1;

		call = TC(dequeue_head(&group->pending_queue));
		group->pending_count--;

		func = call->tc_call.func;
		param0 = call->tc_call.param0;
		param1 = call->tc_call.param1;

		call->tc_call.queue = NULL;

		_internal_call_release(call);

		/*
		 * Can only do wakeups for thread calls whose storage
		 * we control.
		 */
		if ((call->tc_flags & THREAD_CALL_ALLOC) != 0) {
			canwait = TRUE;
			call->tc_refs++;	/* Delay free until we're done */
		} else
			canwait = FALSE;

		enable_ints_and_unlock();

		KERNEL_DEBUG_CONSTANT(
				MACHDBG_CODE(DBG_MACH_SCHED,MACH_CALLOUT) | DBG_FUNC_NONE,
				VM_KERNEL_UNSLIDE(func), param0, param1, 0, 0);

		(*func)(param0, param1);

		if (get_preemption_level() != 0) {
			int pl = get_preemption_level();
			panic("thread_call_thread: preemption_level %d, last callout %p(%p, %p)",
					pl, (void *)VM_KERNEL_UNSLIDE(func), param0, param1);
		}

		(void)thread_funnel_set(self->funnel_lock, FALSE);		/* XXX */

		(void) disable_ints_and_lock();
		
		if (canwait) {
			/* Frees if so desired */
			thread_call_finish(call);
		}
	}

	thread_sched_call(self, NULL);
	group->active_count--;

	if (group_isparallel(group)) {
		/*
		 * For new style of thread group, thread always blocks. 
		 * If we have more than the target number of threads,
		 * and this is the first to block, and it isn't active 
		 * already, set a timer for deallocating a thread if we 
		 * continue to have a surplus.
		 */
		group->idle_count++;

		if (group->idle_count == 1) {
			group->idle_timestamp = mach_absolute_time();
		}   

		if (((group->flags & TCG_DEALLOC_ACTIVE) == 0) &&
				((group->active_count + group->idle_count) > group->target_thread_count)) {
			group->flags |= TCG_DEALLOC_ACTIVE;
			thread_call_start_deallocate_timer(group);
		}   

		/* Wait for more work (or termination) */
		wres = wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_INTERRUPTIBLE, 0); 
		if (wres != THREAD_WAITING) {
			panic("kcall worker unable to assert wait?");
		}   

		enable_ints_and_unlock();

		thread_block_parameter((thread_continue_t)thread_call_thread, group);
	} else {
		if (group->idle_count < group->target_thread_count) {
			group->idle_count++;

			wait_queue_assert_wait(&group->idle_wqueue, NO_EVENT, THREAD_UNINT, 0); /* Interrupted means to exit */

			enable_ints_and_unlock();

			thread_block_parameter((thread_continue_t)thread_call_thread, group);
			/* NOTREACHED */
		}
	}

	enable_ints_and_unlock();

	thread_terminate(self);
	/* NOTREACHED */
}