Beispiel #1
0
/*
 * Routine:	lck_mtx_sleep_deadline
 */
wait_result_t
lck_mtx_sleep_deadline(
        lck_mtx_t		*lck,
	lck_sleep_action_t	lck_sleep_action,
	event_t			event,
	wait_interrupt_t	interruptible,
	uint64_t		deadline)
{
	wait_result_t   res;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
		     (int)lck, (int)lck_sleep_action, (int)event, (int)interruptible, 0);

	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
		panic("Invalid lock sleep action %x\n", lck_sleep_action);

	res = assert_wait_deadline(event, interruptible, deadline);
	if (res == THREAD_WAITING) {
		lck_mtx_unlock(lck);
		res = thread_block(THREAD_CONTINUE_NULL);
		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK))
			lck_mtx_lock(lck);
	}
	else
	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
		lck_mtx_unlock(lck);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);

	return res;
}
Beispiel #2
0
/*
 * Routine: 	lck_mtx_unlock_wakeup
 *
 * Invoked on unlock when there is contention.
 *
 * Called with the interlock locked.
 */
void
lck_mtx_unlock_wakeup (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread != holder)
		panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);

	assert(mutex->lck_mtx_waiters > 0);
	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));

	if (thread->promotions > 0) {
		spl_t		s = splsched();

		thread_lock(thread);
		if (	--thread->promotions == 0				&&
				(thread->sched_flags & TH_SFLAG_PROMOTED)		) {
			thread->sched_flags &= ~TH_SFLAG_PROMOTED;

			if (thread->sched_flags & TH_SFLAG_RW_PROMOTED) {
				/* Thread still has a RW lock promotion */
			} else if (thread->sched_flags & TH_SFLAG_DEPRESSED_MASK) {
				KERNEL_DEBUG_CONSTANT(
					MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) | DBG_FUNC_NONE,
						  thread->sched_pri, DEPRESSPRI, 0, lck, 0);

				set_sched_pri(thread, DEPRESSPRI);
			}
			else {
				if (thread->priority < thread->sched_pri) {
					KERNEL_DEBUG_CONSTANT(
						MACHDBG_CODE(DBG_MACH_SCHED,MACH_DEMOTE) |
															DBG_FUNC_NONE,
							thread->sched_pri, thread->priority,
									0, lck, 0);
				}

				SCHED(compute_priority)(thread, FALSE);
			}
		}
		thread_unlock(thread);
		splx(s);
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
}
Beispiel #3
0
/*
 * Routine:	lck_mtx_sleep
 */
wait_result_t
lck_mtx_sleep(
        lck_mtx_t		*lck,
	lck_sleep_action_t	lck_sleep_action,
	event_t			event,
	wait_interrupt_t	interruptible)
{
	wait_result_t	res;
	thread_t		thread = current_thread();
 
	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_START,
		     VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);

	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
		panic("Invalid lock sleep action %x\n", lck_sleep_action);

	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
		/*
		 * We overload the RW lock promotion to give us a priority ceiling
		 * during the time that this thread is asleep, so that when it
		 * is re-awakened (and not yet contending on the mutex), it is
		 * runnable at a reasonably high priority.
		 */
		thread->rwlock_count++;
	}

	res = assert_wait(event, interruptible);
	if (res == THREAD_WAITING) {
		lck_mtx_unlock(lck);
		res = thread_block(THREAD_CONTINUE_NULL);
		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
			if ((lck_sleep_action & LCK_SLEEP_SPIN))
				lck_mtx_lock_spin(lck);
			else if ((lck_sleep_action & LCK_SLEEP_SPIN_ALWAYS))
				lck_mtx_lock_spin_always(lck);
			else
				lck_mtx_lock(lck);
		}
	}
	else
	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
		lck_mtx_unlock(lck);

	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
		if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
			/* sched_flags checked without lock, but will be rechecked while clearing */
			lck_rw_clear_promotion(thread);
		}
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);

	return res;
}
Beispiel #4
0
/*
 * Routine:	lck_mtx_sleep_deadline
 */
wait_result_t
lck_mtx_sleep_deadline(
        lck_mtx_t		*lck,
	lck_sleep_action_t	lck_sleep_action,
	event_t			event,
	wait_interrupt_t	interruptible,
	uint64_t		deadline)
{
	wait_result_t   res;
	thread_t		thread = current_thread();

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_START,
		     VM_KERNEL_UNSLIDE_OR_PERM(lck), (int)lck_sleep_action, VM_KERNEL_UNSLIDE_OR_PERM(event), (int)interruptible, 0);

	if ((lck_sleep_action & ~LCK_SLEEP_MASK) != 0)
		panic("Invalid lock sleep action %x\n", lck_sleep_action);

	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
		/*
		 * See lck_mtx_sleep().
		 */
		thread->rwlock_count++;
	}

	res = assert_wait_deadline(event, interruptible, deadline);
	if (res == THREAD_WAITING) {
		lck_mtx_unlock(lck);
		res = thread_block(THREAD_CONTINUE_NULL);
		if (!(lck_sleep_action & LCK_SLEEP_UNLOCK)) {
			if ((lck_sleep_action & LCK_SLEEP_SPIN))
				lck_mtx_lock_spin(lck);
			else
				lck_mtx_lock(lck);
		}
	}
	else
	if (lck_sleep_action & LCK_SLEEP_UNLOCK)
		lck_mtx_unlock(lck);

	if (lck_sleep_action & LCK_SLEEP_PROMOTED_PRI) {
		if ((thread->rwlock_count-- == 1 /* field now 0 */) && (thread->sched_flags & TH_SFLAG_RW_PROMOTED)) {
			/* sched_flags checked without lock, but will be rechecked while clearing */
			lck_rw_clear_promotion(thread);
		}
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_SLEEP_DEADLINE_CODE) | DBG_FUNC_END, (int)res, 0, 0, 0, 0);

	return res;
}
Beispiel #5
0
boolean_t
thread_funnel_set(
        funnel_t *	fnl,
	boolean_t	funneled)
{
	thread_t	cur_thread;
	boolean_t	funnel_state_prev;
	boolean_t	intr;
        
	cur_thread = current_thread();
	funnel_state_prev = ((cur_thread->funnel_state & TH_FN_OWNED) == TH_FN_OWNED);

	if (funnel_state_prev != funneled) {
		intr = ml_set_interrupts_enabled(FALSE);

		if (funneled == TRUE) {
			if (cur_thread->funnel_lock)
				panic("Funnel lock called when holding one %p", cur_thread->funnel_lock);
			KERNEL_DEBUG(0x6032428 | DBG_FUNC_NONE,
											fnl, 1, 0, 0, 0);
			funnel_lock(fnl);
			KERNEL_DEBUG(0x6032434 | DBG_FUNC_NONE,
											fnl, 1, 0, 0, 0);
			cur_thread->funnel_state |= TH_FN_OWNED;
			cur_thread->funnel_lock = fnl;
		} else {
			if(cur_thread->funnel_lock->fnl_mutex != fnl->fnl_mutex)
				panic("Funnel unlock  when not holding funnel");
			cur_thread->funnel_state &= ~TH_FN_OWNED;
			KERNEL_DEBUG(0x603242c | DBG_FUNC_NONE,
								fnl, 1, 0, 0, 0);

			cur_thread->funnel_lock = THR_FUNNEL_NULL;
			funnel_unlock(fnl);
		}
		(void)ml_set_interrupts_enabled(intr);
	} else {
		/* if we are trying to acquire funnel recursively
		 * check for funnel to be held already
		 */
		if (funneled && (fnl->fnl_mutex != cur_thread->funnel_lock->fnl_mutex)) {
				panic("thread_funnel_set: already holding a different funnel");
		}
	}
	return(funnel_state_prev);
}
/*
 * Routine: 	lck_mtx_lock_spinwait
 *
 * Invoked trying to acquire a mutex when there is contention but
 * the holder is running on another processor. We spin for up to a maximum
 * time waiting for the lock to be released.
 *
 * Called with the interlock unlocked.
 */
void
lck_mtx_lock_spinwait(
	lck_mtx_t		*lck)
{
	thread_t		holder;
	volatile lck_mtx_t	*mutex;
	uint64_t		deadline;

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(
		MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_SPIN) | DBG_FUNC_NONE,
		(int)lck, (int)mutex->lck_mtx_locked, 0, 0, 0);

	deadline = mach_absolute_time() + MutexSpin;
	/*
	 * Spin while:
	 *   - mutex is locked, and
	 *   - its locked as a spin lock, or
	 *   - owner is running on another processor, and
	 *   - owner (processor) is not idling, and
	 *   - we haven't spun for long enough.
	 */
	while ((holder = (thread_t) mutex->lck_mtx_locked) != NULL) {
	        if ((holder == (thread_t)MUTEX_LOCKED_AS_SPIN) ||
		    ((holder->machine.specFlags & OnProc) != 0 &&
		     (holder->state & TH_IDLE) == 0 &&
		     mach_absolute_time() < deadline)) {
		        cpu_pause();
			continue;
		}
		break;
	}
#if	CONFIG_DTRACE
	/*
	 * We've already kept a count via deadline of how long we spun.
	 * If dtrace is active, then we compute backwards to decide how
	 * long we spun.
	 *
	 * Note that we record a different probe id depending on whether
	 * this is a direct or indirect mutex.  This allows us to 
	 * penalize only lock groups that have debug/stats enabled
	 * with dtrace processing if desired.
	 */
	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
		LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_SPIN, lck,
		    mach_absolute_time() - (deadline - MutexSpin));
	} else {
		LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_SPIN, lck,
		    mach_absolute_time() - (deadline - MutexSpin));
	}
	/* The lockstat acquire event is recorded by the assembly code beneath us. */
#endif
}
/*
 *      Routine:        lck_rw_lock_exclusive_to_shared
 */
void
lck_rw_lock_exclusive_to_shared(
	lck_rw_t	*lck)
{
	boolean_t	wakeup_readers = FALSE;
	boolean_t	wakeup_writers = FALSE;
	boolean_t	istate;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, 0, 0);

	istate = lck_interlock_lock(lck);

	lck->lck_rw_shared_count++;
	if (lck->lck_rw_want_upgrade)
		lck->lck_rw_want_upgrade = FALSE;
	else
	 	lck->lck_rw_want_write = FALSE;

	if (lck->lck_w_waiting) {
		lck->lck_w_waiting = FALSE;
		wakeup_writers = TRUE;
	} 
	if (!(lck->lck_rw_priv_excl && wakeup_writers == TRUE) && 
			lck->lck_r_waiting) {
		lck->lck_r_waiting = FALSE;
		wakeup_readers = TRUE;
	}

	lck_interlock_unlock(lck, istate);

	if (wakeup_readers)
		thread_wakeup(RW_LOCK_READER_EVENT(lck));
	if (wakeup_writers)
		thread_wakeup(RW_LOCK_WRITER_EVENT(lck));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EX_TO_SH_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, lck->lck_rw_shared_count, 0);

#if CONFIG_DTRACE
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_TO_SHARED_DOWNGRADE, lck, 0);
#endif
}
Beispiel #8
0
void
lck_rw_lock_shared_gen(lck_rw_t	*lck)
{
	int		i;
	wait_result_t      res;
    
	lck_rw_ilk_lock(lck);

	while ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
           ((lck->lck_rw_shared_count == 0) || (lck->lck_rw_priv_excl))) {
		i = lock_wait_time[1];
        
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
                     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, i, 0);
        
		if (i != 0) {
			lck_rw_ilk_unlock(lck);
			while (--i != 0 &&
			       (lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
			       ((lck->lck_rw_shared_count == 0) || (lck->lck_rw_priv_excl)))
				continue;
			lck_rw_ilk_lock(lck);
		}
        
		if ((lck->lck_rw_want_excl || lck->lck_rw_want_upgrade) &&
		    ((lck->lck_rw_shared_count == 0) || (lck->lck_rw_priv_excl))) {
			lck->lck_rw_waiting = TRUE;
			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_rw_ilk_unlock(lck);
				res = thread_block(THREAD_CONTINUE_NULL);
				lck_rw_ilk_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
                     (int)lck, lck->lck_rw_want_excl, lck->lck_rw_want_upgrade, res, 0);
	}
    
	lck->lck_rw_shared_count++;
    
	lck_rw_ilk_unlock(lck);
}
Beispiel #9
0
void throttle_lowpri_io(boolean_t ok_to_sleep)
{
	int i;
	int max_try_num;
	struct uthread *ut;
	struct _throttle_io_info_t *info;

	ut = get_bsdthread_info(current_thread());

	if ((ut->uu_lowpri_window == 0) || (ut->uu_throttle_info == NULL))
		goto done;

	info = ut->uu_throttle_info;
	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
		     ut->uu_lowpri_window, ok_to_sleep, 0, 0, 0);

	if (ok_to_sleep == TRUE) {
		max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, info->numthreads_throttling);

		for (i=0; i<max_try_num; i++) {
			if (throttle_io_will_be_throttled_internal(ut->uu_lowpri_window, info)) {
				IOSleep(LOWPRI_SLEEP_INTERVAL);
    				DEBUG_ALLOC_THROTTLE_INFO("sleeping because of info = %p\n", info, info );
			} else {
				break;
			}
		}
	}
	KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
		     ut->uu_lowpri_window, i*5, 0, 0, 0);
	SInt32 oldValue;
	oldValue = OSDecrementAtomic(&info->numthreads_throttling);

	if (oldValue <= 0) {
		panic("%s: numthreads negative", __func__);
	}
done:
	ut->uu_lowpri_window = 0;
	if (ut->uu_throttle_info)
		throttle_info_rel(ut->uu_throttle_info);
	ut->uu_throttle_info = NULL;
}
Beispiel #10
0
/*
 * Routine: 	lck_mtx_unlock_wakeup
 *
 * Invoked on unlock when there is contention.
 *
 * Called with the interlock locked.
 */
void
lck_mtx_unlock_wakeup (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		thread = current_thread();
	lck_mtx_t		*mutex;
	__kdebug_only uintptr_t trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	if (thread != holder)
		panic("lck_mtx_unlock_wakeup: mutex %p holder %p\n", mutex, holder);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_START, trace_lck, VM_KERNEL_UNSLIDE_OR_PERM(holder), 0, 0, 0);

	assert(mutex->lck_mtx_waiters > 0);
	if (mutex->lck_mtx_waiters > 1)
		thread_wakeup_one_with_pri(LCK_MTX_EVENT(lck), lck->lck_mtx_pri);
	else
		thread_wakeup_one(LCK_MTX_EVENT(lck));

	if (thread->promotions > 0) {
		spl_t		s = splsched();

		thread_lock(thread);
		if (--thread->promotions == 0 && (thread->sched_flags & TH_SFLAG_PROMOTED))
			lck_mtx_clear_promoted(thread, trace_lck);
		thread_unlock(thread);
		splx(s);
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
}
Beispiel #11
0
void
lck_mtx_unlockspin_wakeup (
	lck_mtx_t			*lck)
{
	assert(lck->lck_mtx_waiters > 0);
	thread_wakeup_one((event_t)(((unsigned int*)lck)+(sizeof(lck_mtx_t)-1)/sizeof(unsigned int)));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, (int)lck, 0, 0, 1, 0);
#if CONFIG_DTRACE
	/*
	 * When there are waiters, we skip the hot-patch spot in the
	 * fastpath, so we record it here.
	 */
	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
#endif
}
Beispiel #12
0
void
lck_mtx_unlockspin_wakeup (
	lck_mtx_t			*lck)
{
	assert(lck->lck_mtx_waiters > 0);
	thread_wakeup_one(LCK_MTX_EVENT(lck));

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_UNLCK_WAKEUP_CODE) | DBG_FUNC_NONE, VM_KERNEL_UNSLIDE_OR_PERM(lck), 0, 0, 1, 0);
#if CONFIG_DTRACE
	/*
	 * When there are waiters, we skip the hot-patch spot in the
	 * fastpath, so we record it here.
	 */
	LOCKSTAT_RECORD(LS_LCK_MTX_UNLOCK_RELEASE, lck, 0);
#endif
}
Beispiel #13
0
static int
pmThreadGetUrgency(uint64_t *rt_period, uint64_t *rt_deadline)
{
	int             urgency;
	uint64_t        arg1, arg2;

	urgency = thread_get_urgency(current_processor()->next_thread, &arg1, &arg2);

	if (urgency == THREAD_URGENCY_REAL_TIME) {
		if (rt_period != NULL)
			*rt_period = arg1;
		
		if (rt_deadline != NULL)
			*rt_deadline = arg2;
	}

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_SCHED_GET_URGENCY), urgency, arg1, arg2, 0, 0);

	return(urgency);
}
Beispiel #14
0
/*
 * Routine: 	lck_mtx_lock_wait
 *
 * Invoked in order to wait on contention.
 *
 * Called with the interlock locked and
 * returns it unlocked.
 */
void
lck_mtx_lock_wait (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		self = current_thread();
	lck_mtx_t		*mutex;
	integer_t		priority;
	spl_t			s = splsched();
#if	CONFIG_DTRACE
	uint64_t		sleep_start = 0;

	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
		sleep_start = mach_absolute_time();
	}
#endif

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, (int)lck, (int)holder, 0, 0, 0);

	priority = self->sched_pri;
	if (priority < self->priority)
		priority = self->priority;
	if (priority < BASEPRI_DEFAULT)
		priority = BASEPRI_DEFAULT;

	thread_lock(holder);
	if (mutex->lck_mtx_pri == 0)
		holder->promotions++;
	holder->sched_mode |= TH_MODE_PROMOTED;
	if (		mutex->lck_mtx_pri < priority	&&
				holder->sched_pri < priority		) {
		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
					holder->sched_pri, priority, (int)holder, (int)lck, 0);

		set_sched_pri(holder, priority);
	}
	thread_unlock(holder);
	splx(s);

	if (mutex->lck_mtx_pri < priority)
		mutex->lck_mtx_pri = priority;
	if (self->pending_promoter[self->pending_promoter_index] == NULL) {
		self->pending_promoter[self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}
	else
	if (self->pending_promoter[self->pending_promoter_index] != mutex) {
		self->pending_promoter[++self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}

	assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_mtx_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
	lck_mtx_ilk_unlock(mutex);

	thread_block(THREAD_CONTINUE_NULL);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
#if	CONFIG_DTRACE
	/*
	 * Record the Dtrace lockstat probe for blocking, block time
	 * measured from when we were entered.
	 */
	if (sleep_start) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		}
	}
#endif
}
boolean_t
lck_rw_lock_shared_to_exclusive(
	lck_rw_t	*lck)
{
	int	    i;
	boolean_t	    do_wakeup = FALSE;
	wait_result_t      res;
#if	MACH_LDEBUG
	int		   decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep = 0;
#endif

	istate = lck_interlock_lock(lck);

	lck->lck_rw_shared_count--;	

	if (lck->lck_rw_want_upgrade) {
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);

		/*
		 *	Someone else has requested upgrade.
		 *	Since we've released a read lock, wake
		 *	him up.
		 */
		if (lck->lck_w_waiting && (lck->lck_rw_shared_count == 0)) {
			lck->lck_w_waiting = FALSE;
			do_wakeup = TRUE;
		}

		lck_interlock_unlock(lck, istate);

		if (do_wakeup) 
			thread_wakeup(RW_LOCK_WRITER_EVENT(lck));

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);

		return (FALSE);
	}

	lck->lck_rw_want_upgrade = TRUE;

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while (lck->lck_rw_shared_count != 0) {
#if	CONFIG_DTRACE
		if (lockstat_probemap[LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN] && wait_interval == 0) {
			wait_interval = mach_absolute_time();
			readers_at_sleep = lck->lck_rw_shared_count;
		} else {
			wait_interval = -1;
		}
#endif
		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, i, 0, 0);

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - lck_rw_shared_count");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && lck->lck_rw_shared_count != 0)
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && lck->lck_rw_shared_count != 0) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
	}

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	/*
	 * We infer whether we took the sleep/spin path above by checking readers_at_sleep.
	 */
	if (wait_interval != 0 && wait_interval != (unsigned) -1 && readers_at_sleep) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_TO_EXCL_SPIN, lck, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_TO_EXCL_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}

	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_TO_EXCL_UPGRADE, lck, 1);
#endif
	return (TRUE);
}
/*
 *	Routine:	lck_rw_lock_shared_gen
 */
void
lck_rw_lock_shared_gen(
	lck_rw_t	*lck)
{
	int		i;
	wait_result_t      res;
#if	MACH_LDEBUG
	int		decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep;
#endif

	istate = lck_interlock_lock(lck);
#if	CONFIG_DTRACE
	readers_at_sleep = lck->lck_rw_shared_count;
#endif

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while ((lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
	    ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {

		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, i, 0);
#if	CONFIG_DTRACE
		if ((lockstat_probemap[LS_LCK_RW_LOCK_SHARED_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_SHARED_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = -1;
		}
#endif

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - wait no writers");
#endif	/* MACH_LDEBUG */
			while (--i != 0 &&
			    (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
			       ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl))
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep &&
		    (lck->lck_rw_want_write || lck->lck_rw_want_upgrade) &&
		    ((lck->lck_rw_shared_count == 0) || lck->lck_rw_priv_excl)) {
			lck->lck_r_waiting = TRUE;
			res = assert_wait(RW_LOCK_READER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SHARED_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_want_write, lck->lck_rw_want_upgrade, res, 0);
	}

	lck->lck_rw_shared_count++;

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_SHARED_SPIN, lck, mach_absolute_time() - wait_interval, 0);
		} else {
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_SHARED_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 0,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_SHARED_ACQUIRE, lck, 0);
#endif
}
Beispiel #17
0
u_int16_t
inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
    unsigned int len)
{
	u_short *w;
	u_int32_t sum = 0;
	int mlen = 0;
	int byte_swapped = 0;
	union s_util s_util;
	union l_util l_util;

	KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);

	/* sanity check */
	if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
		panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
		    m->m_pkthdr.len, skip, len);
	}

	/* include pseudo header checksum? */
	if (nxt != 0) {
		struct ip *iph;

		if (m->m_len < sizeof (struct ip))
			panic("inet_cksum: bad mbuf chain");

		iph = mtod(m, struct ip *);
		sum = in_pseudo(iph->ip_src.s_addr, iph->ip_dst.s_addr,
		    htonl(len + nxt));
	}

	if (skip != 0) {
		for (; skip && m; m = m->m_next) {
			if (m->m_len > skip) {
				mlen = m->m_len - skip;
				w = (u_short *)(m->m_data+skip);
				goto skip_start;
			} else {
				skip -= m->m_len;
			}
		}
	}
	for (;m && len; m = m->m_next) {
		if (m->m_len == 0)
			continue;
		w = mtod(m, u_short *);

		if (mlen == -1) {
			/*
			 * The first byte of this mbuf is the continuation
			 * of a word spanning between this mbuf and the
			 * last mbuf.
			 *
			 * s_util.c[0] is already saved when scanning previous
			 * mbuf.
			 */
			s_util.c[1] = *(char *)w;
			sum += s_util.s;
			w = (u_short *)((char *)w + 1);
			mlen = m->m_len - 1;
			len--;
		} else {
			mlen = m->m_len;
		}
skip_start:
		if (len < mlen)
			mlen = len;

		len -= mlen;
		/*
		 * Force to even boundary.
		 */
		if ((1 & (uintptr_t) w) && (mlen > 0)) {
			REDUCE;
			sum <<= 8;
			s_util.c[0] = *(u_char *)w;
			w = (u_short *)((char *)w + 1);
			mlen--;
			byte_swapped = 1;
		}
		/*
		 * Unroll the loop to make overhead from
		 * branches &c small.
		 */
		while ((mlen -= 32) >= 0) {
			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
			sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
			sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
			sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
			w += 16;
		}
		mlen += 32;
		while ((mlen -= 8) >= 0) {
			sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
			w += 4;
		}
		mlen += 8;
		if (mlen == 0 && byte_swapped == 0)
			continue;
		REDUCE;
		while ((mlen -= 2) >= 0) {
			sum += *w++;
		}
		if (byte_swapped) {
			REDUCE;
			sum <<= 8;
			byte_swapped = 0;
			if (mlen == -1) {
				s_util.c[1] = *(char *)w;
				sum += s_util.s;
				mlen = 0;
			} else
				mlen = -1;
		} else if (mlen == -1)
			s_util.c[0] = *(char *)w;
	}
	if (len)
		printf("cksum: out of data by %d\n", len);
	if (mlen == -1) {
		/* The last mbuf has odd # of bytes. Follow the
		   standard (the odd byte may be shifted left by 8 bits
		   or not as determined by endian-ness of the machine) */
		s_util.c[1] = 0;
		sum += s_util.s;
	}
	REDUCE;
	KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_END, 0,0,0,0,0);
	return (~sum & 0xffff);
}
Beispiel #18
0
boolean_t
lck_rw_lock_shared_to_exclusive_gen(lck_rw_t	*lck)
{
	int	    i;
	boolean_t	    do_wakeup = FALSE;
	wait_result_t      res;

	lck_rw_ilk_lock(lck);
    
	lck->lck_rw_shared_count--;
    
	if (lck->lck_rw_want_upgrade) {
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_START,
                     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
        
		/*
		 *	Someone else has requested upgrade.
		 *	Since we've released a read lock, wake
		 *	him up.
		 */
		if (lck->lck_rw_waiting && (lck->lck_rw_shared_count == 0)) {
			lck->lck_rw_waiting = FALSE;
			do_wakeup = TRUE;
		}
        
		lck_rw_ilk_unlock(lck);
        
		if (do_wakeup)
			thread_wakeup((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))));
        
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX_CODE) | DBG_FUNC_END,
                     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, 0, 0);
        
		return (FALSE);
	}
    
	lck->lck_rw_want_upgrade = TRUE;
    
	while (lck->lck_rw_shared_count != 0) {
		i = lock_wait_time[1];
        
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_START,
                     (int)lck, lck->lck_rw_shared_count, i, 0, 0);
		if (i != 0) {
			lck_rw_ilk_unlock(lck);
			while (--i != 0 && lck->lck_rw_shared_count != 0)
				continue;
			lck_rw_ilk_lock(lck);
		}
        
		if (lck->lck_rw_shared_count != 0) {
			lck->lck_rw_waiting = TRUE;
			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_rw_ilk_unlock(lck);
				res = thread_block(THREAD_CONTINUE_NULL);
				lck_rw_ilk_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_SH_TO_EX1_CODE) | DBG_FUNC_END,
                     (int)lck, lck->lck_rw_shared_count, 0, 0, 0);
	}
    
	lck_rw_ilk_unlock(lck);
    

	return (TRUE);
}
Beispiel #19
0
void
lck_rw_lock_exclusive_gen(lck_rw_t	*lck)
{
	int             i;
	wait_result_t   res;
    
	lck_rw_ilk_lock(lck);
	/*
	 *	Try to acquire the lck_rw_want_excl bit.
	 */

	while (lck->lck_rw_want_excl) {
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);

		i = lock_wait_time[1];
		if (i != 0) {
			lck_rw_ilk_unlock(lck);
			while (--i != 0 && lck->lck_rw_want_excl)
				continue;
			lck_rw_ilk_lock(lck);
		}
        
		if (lck->lck_rw_want_excl) {
			lck->lck_rw_waiting = TRUE;
			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_rw_ilk_unlock(lck);
				res = thread_block(THREAD_CONTINUE_NULL);
				lck_rw_ilk_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
	}
	lck->lck_rw_want_excl = TRUE;
    
	/* Wait for readers (and upgrades) to finish */
    
	while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) {
        
		i = lock_wait_time[1];
        
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
                     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0);

        
		if (i != 0) {
			lck_rw_ilk_unlock(lck);
			while (--i != 0 && (lck->lck_rw_shared_count != 0 ||
                                lck->lck_rw_want_upgrade))
				continue;
			lck_rw_ilk_lock(lck);
		}
        
		if (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade) {
			lck->lck_rw_waiting = TRUE;
			res = assert_wait((event_t)(((unsigned int*)lck)+((sizeof(lck_rw_t)-1)/sizeof(unsigned int))), THREAD_UNINT);

			if (res == THREAD_WAITING) {
				lck_rw_ilk_unlock(lck);
				res = thread_block(THREAD_CONTINUE_NULL);
				lck_rw_ilk_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
                     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0);
	}
    
	lck_rw_ilk_unlock(lck);
}
Beispiel #20
0
/*
 * Routine: 	lck_mtx_lock_wait
 *
 * Invoked in order to wait on contention.
 *
 * Called with the interlock locked and
 * returns it unlocked.
 */
void
lck_mtx_lock_wait (
	lck_mtx_t			*lck,
	thread_t			holder)
{
	thread_t		self = current_thread();
	lck_mtx_t		*mutex;
	__kdebug_only uintptr_t	trace_lck = VM_KERNEL_UNSLIDE_OR_PERM(lck);
	__kdebug_only uintptr_t	trace_holder = VM_KERNEL_UNSLIDE_OR_PERM(holder);
	integer_t		priority;
	spl_t			s = splsched();
#if	CONFIG_DTRACE
	uint64_t		sleep_start = 0;

	if (lockstat_probemap[LS_LCK_MTX_LOCK_BLOCK] || lockstat_probemap[LS_LCK_MTX_EXT_LOCK_BLOCK]) {
		sleep_start = mach_absolute_time();
	}
#endif

	if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT)
		mutex = lck;
	else
		mutex = &lck->lck_mtx_ptr->lck_mtx;

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_START, trace_lck, trace_holder, 0, 0, 0);

	priority = self->sched_pri;
	if (priority < self->base_pri)
		priority = self->base_pri;
	if (priority < BASEPRI_DEFAULT)
		priority = BASEPRI_DEFAULT;

	/* Do not promote past promotion ceiling */
	priority = MIN(priority, MAXPRI_PROMOTE);

	thread_lock(holder);
	if (mutex->lck_mtx_pri == 0)
		holder->promotions++;
	holder->sched_flags |= TH_SFLAG_PROMOTED;
	if (mutex->lck_mtx_pri < priority && holder->sched_pri < priority) {
		KERNEL_DEBUG_CONSTANT(
			MACHDBG_CODE(DBG_MACH_SCHED,MACH_PROMOTE) | DBG_FUNC_NONE,
					holder->sched_pri, priority, trace_holder, trace_lck, 0);
		set_sched_pri(holder, priority);
	}
	thread_unlock(holder);
	splx(s);

	if (mutex->lck_mtx_pri < priority)
		mutex->lck_mtx_pri = priority;
	if (self->pending_promoter[self->pending_promoter_index] == NULL) {
		self->pending_promoter[self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}
	else
	if (self->pending_promoter[self->pending_promoter_index] != mutex) {
		self->pending_promoter[++self->pending_promoter_index] = mutex;
		mutex->lck_mtx_waiters++;
	}

	assert_wait(LCK_MTX_EVENT(mutex), THREAD_UNINT);
	lck_mtx_ilk_unlock(mutex);

	thread_block(THREAD_CONTINUE_NULL);

	KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_MTX_LCK_WAIT_CODE) | DBG_FUNC_END, 0, 0, 0, 0, 0);
#if	CONFIG_DTRACE
	/*
	 * Record the Dtrace lockstat probe for blocking, block time
	 * measured from when we were entered.
	 */
	if (sleep_start) {
		if (lck->lck_mtx_tag != LCK_MTX_TAG_INDIRECT) {
			LOCKSTAT_RECORD(LS_LCK_MTX_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		} else {
			LOCKSTAT_RECORD(LS_LCK_MTX_EXT_LOCK_BLOCK, lck,
			    mach_absolute_time() - sleep_start);
		}
	}
#endif
}
Beispiel #21
0
kern_return_t
copypv(addr64_t src64, addr64_t snk64, unsigned int size, int which)
{
	unsigned int lop, csize;
	int bothphys = 0;
	
	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_START, (unsigned)src64,
		     (unsigned)snk64, size, which, 0);

	if ((which & (cppvPsrc | cppvPsnk)) == 0 )				/* Make sure that only one is virtual */
		panic("copypv: no more than 1 parameter may be virtual\n");	/* Not allowed */

	if ((which & (cppvPsrc | cppvPsnk)) == (cppvPsrc | cppvPsnk))
	        bothphys = 1;							/* both are physical */

	while (size) {
	  
	        if (bothphys) {
		        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));		/* Assume sink smallest */

			if (lop > (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1))))
			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));	/* No, source is smaller */
		} else {
		        /*
			 * only need to compute the resid for the physical page
			 * address... we don't care about where we start/finish in
			 * the virtual since we just call the normal copyin/copyout
			 */
		        if (which & cppvPsrc)
			        lop = (unsigned int)(PAGE_SIZE - (src64 & (PAGE_SIZE - 1)));
			else
			        lop = (unsigned int)(PAGE_SIZE - (snk64 & (PAGE_SIZE - 1)));
		}
		csize = size;						/* Assume we can copy it all */
		if (lop < size)
		        csize = lop;					/* Nope, we can't do it all */
#if 0		
		/*
		 * flush_dcache64 is currently a nop on the i386... 
		 * it's used when copying to non-system memory such
		 * as video capture cards... on PPC there was a need
		 * to flush due to how we mapped this memory... not
		 * sure if it's needed on i386.
		 */
		if (which & cppvFsrc)
		        flush_dcache64(src64, csize, 1);		/* If requested, flush source before move */
		if (which & cppvFsnk)
		        flush_dcache64(snk64, csize, 1);		/* If requested, flush sink before move */
#endif
		if (bothphys)
		        bcopy_phys(src64, snk64, csize);		/* Do a physical copy, virtually */
		else {
		        if (copyio_phys(src64, snk64, csize, which))
			        return (KERN_FAILURE);
		}
#if 0
		if (which & cppvFsrc)
		        flush_dcache64(src64, csize, 1);	/* If requested, flush source after move */
		if (which & cppvFsnk)
		        flush_dcache64(snk64, csize, 1);	/* If requested, flush sink after move */
#endif
		size   -= csize;					/* Calculate what is left */
		snk64 += csize;					/* Bump sink to next physical address */
		src64 += csize;					/* Bump source to next physical address */
	}
	KERNEL_DEBUG(0xeff7004c | DBG_FUNC_END, (unsigned)src64,
		     (unsigned)snk64, size, which, 0);

	return KERN_SUCCESS;
}
Beispiel #22
0
static int
copyio(int copy_type, user_addr_t user_addr, char *kernel_addr,
       vm_size_t nbytes, vm_size_t *lencopied, int use_kernel_map)
{
        thread_t	thread;
	pmap_t		pmap;
	vm_size_t	bytes_copied;
	int		error = 0;
	boolean_t	istate = FALSE;
	boolean_t	recursive_CopyIOActive;
#if KDEBUG
	int		debug_type = 0xeff70010;
	debug_type += (copy_type << 2);
#endif

	thread = current_thread();

	KERNEL_DEBUG(debug_type | DBG_FUNC_START,
		     (unsigned)(user_addr >> 32), (unsigned)user_addr,
		     nbytes, thread->machine.copyio_state, 0);

	if (nbytes == 0)
		goto out;

        pmap = thread->map->pmap;

	if ((copy_type != COPYINPHYS) && (copy_type != COPYOUTPHYS) && ((vm_offset_t)kernel_addr < VM_MIN_KERNEL_AND_KEXT_ADDRESS)) {
		panic("Invalid copy parameter, copy type: %d, kernel address: %p", copy_type, kernel_addr);
	}

	/* Sanity and security check for addresses to/from a user */

	if (((pmap != kernel_pmap) && (use_kernel_map == 0)) &&
	    ((nbytes && (user_addr+nbytes <= user_addr)) || ((user_addr + nbytes) > vm_map_max(thread->map)))) {
		error = EFAULT;
		goto out;
	}

	/*
	 * If the no_shared_cr3 boot-arg is set (true), the kernel runs on 
	 * its own pmap and cr3 rather than the user's -- so that wild accesses
	 * from kernel or kexts can be trapped. So, during copyin and copyout,
	 * we need to switch back to the user's map/cr3. The thread is flagged
	 * "CopyIOActive" at this time so that if the thread is pre-empted,
	 * we will later restore the correct cr3.
	 */
	recursive_CopyIOActive = thread->machine.specFlags & CopyIOActive;
	thread->machine.specFlags |= CopyIOActive;
	user_access_enable();
	if (no_shared_cr3) {
		istate = ml_set_interrupts_enabled(FALSE);
 		if (get_cr3_base() != pmap->pm_cr3)
			set_cr3_raw(pmap->pm_cr3);
	}

	/*
	 * Ensure that we're running on the target thread's cr3.
	 */
	if ((pmap != kernel_pmap) && !use_kernel_map &&
	    (get_cr3_base() != pmap->pm_cr3)) {
		panic("copyio(%d,%p,%p,%ld,%p,%d) cr3 is %p expects %p",
			copy_type, (void *)user_addr, kernel_addr, nbytes, lencopied, use_kernel_map,
			(void *) get_cr3_raw(), (void *) pmap->pm_cr3);
	}
	if (no_shared_cr3)
		(void) ml_set_interrupts_enabled(istate);

	KERNEL_DEBUG(0xeff70044 | DBG_FUNC_NONE, (unsigned)user_addr,
		     (unsigned)kernel_addr, nbytes, 0, 0);

        switch (copy_type) {

	case COPYIN:
	        error = _bcopy((const void *) user_addr,
				kernel_addr,
				nbytes);
		break;
			
	case COPYOUT:
	        error = _bcopy(kernel_addr,
				(void *) user_addr,
				nbytes);
		break;

	case COPYINPHYS:
	        error = _bcopy((const void *) user_addr,
				PHYSMAP_PTOV(kernel_addr),
				nbytes);
		break;

	case COPYOUTPHYS:
	        error = _bcopy((const void *) PHYSMAP_PTOV(kernel_addr),
				(void *) user_addr,
				nbytes);
		break;

	case COPYINSTR:
	        error = _bcopystr((const void *) user_addr,
				kernel_addr,
				(int) nbytes,
				&bytes_copied);

		/*
		 * lencopied should be updated on success
		 * or ENAMETOOLONG...  but not EFAULT
		 */
		if (error != EFAULT)
		        *lencopied = bytes_copied;

		if (error) {
#if KDEBUG
		        nbytes = *lencopied;
#endif
		        break;
		}
		if (*(kernel_addr + bytes_copied - 1) == 0) {
		        /*
			 * we found a NULL terminator... we're done
			 */
#if KDEBUG
		        nbytes = *lencopied;
#endif
			break;
		} else {
		        /*
			 * no more room in the buffer and we haven't
			 * yet come across a NULL terminator
			 */
#if KDEBUG
		        nbytes = *lencopied;
#endif
		        error = ENAMETOOLONG;
			break;
		}
		break;
	}

	user_access_disable();
	if (!recursive_CopyIOActive) {
		thread->machine.specFlags &= ~CopyIOActive;
	}
	if (no_shared_cr3) {
		istate = ml_set_interrupts_enabled(FALSE);
		if  (get_cr3_raw() != kernel_pmap->pm_cr3)
			set_cr3_raw(kernel_pmap->pm_cr3);
		(void) ml_set_interrupts_enabled(istate);
	}

out:
	KERNEL_DEBUG(debug_type | DBG_FUNC_END, (unsigned)user_addr,
		     (unsigned)kernel_addr, (unsigned)nbytes, error, 0);

	return (error);
}
/*
 *      Routine:        lck_rw_lock_exclusive
 */
void
lck_rw_lock_exclusive(
	lck_rw_t	*lck)
{
	int	   i;
	wait_result_t	res;
#if	MACH_LDEBUG
	int				decrementer;
#endif	/* MACH_LDEBUG */
	boolean_t	istate;
#if	CONFIG_DTRACE
	uint64_t wait_interval = 0;
	int slept = 0;
	int readers_at_sleep;
#endif

	istate = lck_interlock_lock(lck);
#if	CONFIG_DTRACE
	readers_at_sleep = lck->lck_rw_shared_count;
#endif

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */

	/*
	 *	Try to acquire the lck_rw_want_write bit.
	 */
	while (lck->lck_rw_want_write) {

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_START, (int)lck, 0, 0, 0, 0);
		/*
		 * Either sleeping or spinning is happening, start
		 * a timing of our delay interval now.
		 */
#if	CONFIG_DTRACE
		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = -1;
		}
#endif


		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];
		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - lck_rw_want_write");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && lck->lck_rw_want_write)
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && lck->lck_rw_want_write) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE_CODE) | DBG_FUNC_END, (int)lck, res, 0, 0, 0);
	}
	lck->lck_rw_want_write = TRUE;

	/* Wait for readers (and upgrades) to finish */

#if	MACH_LDEBUG
	decrementer = DECREMENTER_TIMEOUT;
#endif	/* MACH_LDEBUG */
	while ((lck->lck_rw_shared_count != 0) || lck->lck_rw_want_upgrade) {

		i = lock_wait_time[lck->lck_rw_can_sleep ? 1 : 0];

		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_START,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, i, 0);

#if	CONFIG_DTRACE
		/*
		 * Either sleeping or spinning is happening, start
		 * a timing of our delay interval now.  If we set it
		 * to -1 we don't have accurate data so we cannot later
		 * decide to record a dtrace spin or sleep event.
		 */
		if ((lockstat_probemap[LS_LCK_RW_LOCK_EXCL_SPIN] || lockstat_probemap[LS_LCK_RW_LOCK_EXCL_BLOCK]) && wait_interval == 0) {
			wait_interval = mach_absolute_time();
		} else {
			wait_interval = (unsigned) -1;
		}
#endif

		if (i != 0) {
			lck_interlock_unlock(lck, istate);
#if	MACH_LDEBUG
			if (!--decrementer)
				Debugger("timeout - wait for readers");
#endif	/* MACH_LDEBUG */
			while (--i != 0 && (lck->lck_rw_shared_count != 0 ||
					    lck->lck_rw_want_upgrade))
				lck_rw_lock_pause(istate);
			istate = lck_interlock_lock(lck);
		}

		if (lck->lck_rw_can_sleep && (lck->lck_rw_shared_count != 0 || lck->lck_rw_want_upgrade)) {
			lck->lck_w_waiting = TRUE;
			res = assert_wait(RW_LOCK_WRITER_EVENT(lck), THREAD_UNINT);
			if (res == THREAD_WAITING) {
				lck_interlock_unlock(lck, istate);
				res = thread_block(THREAD_CONTINUE_NULL);
#if	CONFIG_DTRACE
				slept = 1;
#endif
				istate = lck_interlock_lock(lck);
			}
		}
		KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_LOCKS, LCK_RW_LCK_EXCLUSIVE1_CODE) | DBG_FUNC_END,
			     (int)lck, lck->lck_rw_shared_count, lck->lck_rw_want_upgrade, res, 0);
	}

	lck_interlock_unlock(lck, istate);
#if	CONFIG_DTRACE
	/*
	 * Decide what latencies we suffered that are Dtrace events.
	 * If we have set wait_interval, then we either spun or slept.
	 * At least we get out from under the interlock before we record
	 * which is the best we can do here to minimize the impact
	 * of the tracing.
	 * If we have set wait_interval to -1, then dtrace was not enabled when we
	 * started sleeping/spinning so we don't record this event.
	 */
	if (wait_interval != 0 && wait_interval != (unsigned) -1) {
		if (slept == 0) {
			LOCKSTAT_RECORD2(LS_LCK_RW_LOCK_EXCL_SPIN, lck,
			    mach_absolute_time() - wait_interval, 1);
		} else {
			/*
			 * For the blocking case, we also record if when we blocked
			 * it was held for read or write, and how many readers.
			 * Notice that above we recorded this before we dropped
			 * the interlock so the count is accurate.
			 */
			LOCKSTAT_RECORD4(LS_LCK_RW_LOCK_EXCL_BLOCK, lck,
			    mach_absolute_time() - wait_interval, 1,
			    (readers_at_sleep == 0 ? 1 : 0), readers_at_sleep);
		}
	}
	LOCKSTAT_RECORD(LS_LCK_RW_LOCK_EXCL_ACQUIRE, lck, 1);
#endif
}
Beispiel #24
0
/*
 * Modify the packet so that the payload is encrypted.
 * The mbuf (m) must start with IPv4 or IPv6 header.
 * On failure, free the given mbuf and return NULL.
 *
 * on invocation:
 *	m   nexthdrp md
 *	v   v        v
 *	IP ......... payload
 * during the encryption:
 *	m   nexthdrp mprev md
 *	v   v        v     v
 *	IP ............... esp iv payload pad padlen nxthdr
 *	                   <--><-><------><--------------->
 *	                   esplen plen    extendsiz
 *	                       ivlen
 *	                   <-----> esphlen
 *	<-> hlen
 *	<-----------------> espoff
 */
static int
esp_output(
	struct mbuf *m,
	u_char *nexthdrp,
	struct mbuf *md,
	int af,
	struct secasvar *sav)
{
	struct mbuf *n;
	struct mbuf *mprev;
	struct esp *esp;
	struct esptail *esptail;
	const struct esp_algorithm *algo;
	u_int32_t spi;
	u_int8_t nxt = 0;
	size_t plen;	/*payload length to be encrypted*/
	size_t espoff;
	size_t esphlen;	/* sizeof(struct esp/newesp) + ivlen */
	int ivlen;
	int afnumber;
	size_t extendsiz;
	int error = 0;
	struct ipsecstat *stat;
	struct udphdr *udp = NULL;
	int	udp_encapsulate = (sav->flags & SADB_X_EXT_NATT && (af == AF_INET || af == AF_INET6) &&
			(esp_udp_encap_port & 0xFFFF) != 0);

	KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_START, sav->ivlen,0,0,0,0);
	switch (af) {
#if INET
	case AF_INET:
		afnumber = 4;
		stat = &ipsecstat;
		break;
#endif
#if INET6
	case AF_INET6:
		afnumber = 6;
		stat = &ipsec6stat;
		break;
#endif
	default:
		ipseclog((LOG_ERR, "esp_output: unsupported af %d\n", af));
		KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_END, 1,0,0,0,0);
		return 0;	/* no change at all */
	}

	/* some sanity check */
	if ((sav->flags & SADB_X_EXT_OLD) == 0 && !sav->replay) {
		switch (af) {
#if INET
		case AF_INET:
		    {
			struct ip *ip;

			ip = mtod(m, struct ip *);
			ipseclog((LOG_DEBUG, "esp4_output: internal error: "
				"sav->replay is null: %x->%x, SPI=%u\n",
				(u_int32_t)ntohl(ip->ip_src.s_addr),
				(u_int32_t)ntohl(ip->ip_dst.s_addr),
				(u_int32_t)ntohl(sav->spi)));
			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
			break;
		    }
#endif /*INET*/
#if INET6
		case AF_INET6:
			ipseclog((LOG_DEBUG, "esp6_output: internal error: "
				"sav->replay is null: SPI=%u\n",
				(u_int32_t)ntohl(sav->spi)));
			IPSEC_STAT_INCREMENT(ipsec6stat.out_inval);
			break;
#endif /*INET6*/
		default:
			panic("esp_output: should not reach here");
		}
		m_freem(m);
		KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_END, 2,0,0,0,0);
		return EINVAL;
	}

	algo = esp_algorithm_lookup(sav->alg_enc);
	if (!algo) {
		ipseclog((LOG_ERR, "esp_output: unsupported algorithm: "
		    "SPI=%u\n", (u_int32_t)ntohl(sav->spi)));
		m_freem(m);
		KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_END, 3,0,0,0,0);
		return EINVAL;
	}
	spi = sav->spi;
	ivlen = sav->ivlen;
	/* should be okey */
	if (ivlen < 0) {
		panic("invalid ivlen");
	}

    {
	/*
	 * insert ESP header.
	 * XXX inserts ESP header right after IPv4 header.  should
	 * chase the header chain.
	 * XXX sequential number
	 */
#if INET
	struct ip *ip = NULL;
#endif
#if INET6
	struct ip6_hdr *ip6 = NULL;
#endif
	size_t esplen;	/* sizeof(struct esp/newesp) */
	size_t hlen = 0;	/* ip header len */

	if (sav->flags & SADB_X_EXT_OLD) {
		/* RFC 1827 */
		esplen = sizeof(struct esp);
	} else {
		/* RFC 2406 */
		if (sav->flags & SADB_X_EXT_DERIV)
			esplen = sizeof(struct esp);
		else
			esplen = sizeof(struct newesp);
	}
	esphlen = esplen + ivlen;

	for (mprev = m; mprev && mprev->m_next != md; mprev = mprev->m_next)
		;
	if (mprev == NULL || mprev->m_next != md) {
		ipseclog((LOG_DEBUG, "esp%d_output: md is not in chain\n",
		    afnumber));
		m_freem(m);
		KERNEL_DEBUG(DBG_FNC_ESPOUT | DBG_FUNC_END, 4,0,0,0,0);
		return EINVAL;
	}

	plen = 0;
	for (n = md; n; n = n->m_next)
		plen += n->m_len;

	switch (af) {
#if INET
	case AF_INET:
		ip = mtod(m, struct ip *);
#ifdef _IP_VHL
		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
#else
		hlen = ip->ip_hl << 2;
#endif
		break;
#endif
#if INET6
	case AF_INET6:
		ip6 = mtod(m, struct ip6_hdr *);
		hlen = sizeof(*ip6);
		break;
#endif
	}

	/* make the packet over-writable */
	mprev->m_next = NULL;
	if ((md = ipsec_copypkt(md)) == NULL) {
		m_freem(m);
		error = ENOBUFS;
		goto fail;
	}
	mprev->m_next = md;
	
	/* 
	 * Translate UDP source port back to its original value.
	 * SADB_X_EXT_NATT_MULTIPLEUSERS is only set for transort mode.
	 */
	if ((sav->flags & SADB_X_EXT_NATT_MULTIPLEUSERS) != 0) {
		/* if not UDP - drop it */
		if (ip->ip_p != IPPROTO_UDP)	{
			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
			m_freem(m);
			error = EINVAL;
			goto fail;
		}			
		
		udp = mtod(md, struct udphdr *);

		/* if src port not set in sav - find it */
		if (sav->natt_encapsulated_src_port == 0)
			if (key_natt_get_translated_port(sav) == 0) {
				m_freem(m);
				error = EINVAL;
				goto fail;
			}
		if (sav->remote_ike_port == htons(udp->uh_dport)) {
			/* translate UDP port */
			udp->uh_dport = sav->natt_encapsulated_src_port;
			udp->uh_sum = 0;	/* don't need checksum with ESP auth */
		} else {
			/* drop the packet - can't translate the port */
			IPSEC_STAT_INCREMENT(ipsecstat.out_inval);
			m_freem(m);
			error = EINVAL;
			goto fail;
		}
	}
		

	espoff = m->m_pkthdr.len - plen;
	
	if (udp_encapsulate) {
		esphlen += sizeof(struct udphdr);
		espoff += sizeof(struct udphdr);
	}

	/*
	 * grow the mbuf to accomodate ESP header.
	 * before: IP ... payload
	 * after:  IP ... [UDP] ESP IV payload
	 */
	if (M_LEADINGSPACE(md) < esphlen || (md->m_flags & M_EXT) != 0) {
		MGET(n, M_DONTWAIT, MT_DATA);
		if (!n) {
			m_freem(m);
			error = ENOBUFS;
			goto fail;
		}
		n->m_len = esphlen;
		mprev->m_next = n;
		n->m_next = md;
		m->m_pkthdr.len += esphlen;
		if (udp_encapsulate) {
			udp = mtod(n, struct udphdr *);
			esp = (struct esp *)(void *)((caddr_t)udp + sizeof(struct udphdr));
		} else {
			esp = mtod(n, struct esp *);
		}
	} else {
Beispiel #25
0
	// LP64todo - fix this! 'n' should be int64_t?
int
uiomove64(const addr64_t c_cp, int n, struct uio *uio)
{
	addr64_t cp = c_cp;
#if LP64KERN
	uint64_t acnt;
#else
	u_int acnt;
#endif
	int error = 0;

#if DIAGNOSTIC
	if (uio->uio_rw != UIO_READ && uio->uio_rw != UIO_WRITE)
		panic("uiomove: mode");
#endif

#if LP64_DEBUG
	if (IS_VALID_UIO_SEGFLG(uio->uio_segflg) == 0) {
		panic("%s :%d - invalid uio_segflg\n", __FILE__, __LINE__); 
	}
#endif /* LP64_DEBUG */

	while (n > 0 && uio_resid(uio)) {
		acnt = uio_iov_len(uio);
		if (acnt == 0) {
			uio_next_iov(uio);
			uio->uio_iovcnt--;
			continue;
		}
		if (n > 0 && acnt > (uint64_t)n)
			acnt = n;

		switch (uio->uio_segflg) {

		case UIO_USERSPACE64:
		case UIO_USERISPACE64:
			// LP64 - 3rd argument in debug code is 64 bit, expected to be 32 bit
			if (uio->uio_rw == UIO_READ)
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START,
					 (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0);

					error = copyout( CAST_DOWN(caddr_t, cp), uio->uio_iovs.iov64p->iov_base, acnt );

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END,
					 (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 0,0);
			  }
			else
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START,
					 (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0);

			        error = copyin(uio->uio_iovs.iov64p->iov_base, CAST_DOWN(caddr_t, cp), acnt);

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END,
					 (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 0,0);
			  }
			if (error)
				return (error);
			break;

		case UIO_USERSPACE32:
		case UIO_USERISPACE32:
		case UIO_USERSPACE:
		case UIO_USERISPACE:
			if (uio->uio_rw == UIO_READ)
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0);

					error = copyout( CAST_DOWN(caddr_t, cp), CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), acnt );

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 0,0);
			  }
			else
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0);

			        error = copyin(CAST_USER_ADDR_T(uio->uio_iovs.iov32p->iov_base), CAST_DOWN(caddr_t, cp), acnt);

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 0,0);
			  }
			if (error)
				return (error);
			break;

		case UIO_SYSSPACE32:
		case UIO_SYSSPACE:
			if (uio->uio_rw == UIO_READ)
				error = copywithin(CAST_DOWN(caddr_t, cp), (caddr_t)uio->uio_iovs.iov32p->iov_base,
						   acnt);
			else
				error = copywithin((caddr_t)uio->uio_iovs.iov32p->iov_base, CAST_DOWN(caddr_t, cp),
						   acnt);
			break;

		case UIO_PHYS_USERSPACE64:
			if (uio->uio_rw == UIO_READ)
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START,
					 (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0);

				error = copypv((addr64_t)cp, uio->uio_iovs.iov64p->iov_base, acnt, cppvPsrc | cppvNoRefSrc);
				if (error) 	/* Copy physical to virtual */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END,
					 (int)cp, (int)uio->uio_iovs.iov64p->iov_base, acnt, 1,0);
			  }
			else
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START,
					 (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0);

				error = copypv(uio->uio_iovs.iov64p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk);
				if (error)	/* Copy virtual to physical */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END,
					 (int)uio->uio_iovs.iov64p->iov_base, (int)cp, acnt, 1,0);
			  }
			if (error)
				return (error);
			break;

		case UIO_PHYS_USERSPACE32:
		case UIO_PHYS_USERSPACE:
			if (uio->uio_rw == UIO_READ)
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0);

				error = copypv((addr64_t)cp, (addr64_t)uio->uio_iovs.iov32p->iov_base, acnt, cppvPsrc | cppvNoRefSrc);
				if (error) 	/* Copy physical to virtual */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 1,0);
			  }
			else
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0);

				error = copypv((addr64_t)uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvPsnk | cppvNoRefSrc | cppvNoModSnk);
				if (error)	/* Copy virtual to physical */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 1,0);
			  }
			if (error)
				return (error);
			break;

		case UIO_PHYS_SYSSPACE32:
		case UIO_PHYS_SYSSPACE:
			if (uio->uio_rw == UIO_READ)
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_START,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0);

				error = copypv((addr64_t)cp, uio->uio_iovs.iov32p->iov_base, acnt, cppvKmap | cppvPsrc | cppvNoRefSrc);
				if (error) 	/* Copy physical to virtual */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYOUT)) | DBG_FUNC_END,
					 (int)cp, (int)uio->uio_iovs.iov32p->iov_base, acnt, 2,0);
			  }
			else
			  {
			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_START,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0);

				error = copypv(uio->uio_iovs.iov32p->iov_base, (addr64_t)cp, acnt, cppvKmap | cppvPsnk | cppvNoRefSrc | cppvNoModSnk);
				if (error)	/* Copy virtual to physical */
				        error = EFAULT;

			        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, DBG_UIO_COPYIN)) | DBG_FUNC_END,
					 (int)uio->uio_iovs.iov32p->iov_base, (int)cp, acnt, 2,0);
			  }
			if (error)
				return (error);
			break;

		default:
			break;
		}
		uio_iov_base_add(uio, acnt);
#if LP64KERN
		uio_iov_len_add(uio, -((int64_t)acnt));
		uio_setresid(uio, (uio_resid(uio) - ((int64_t)acnt)));
#else
		uio_iov_len_add(uio, -((int)acnt));
		uio_setresid(uio, (uio_resid(uio) - ((int)acnt)));
#endif
		uio->uio_offset += acnt;
		cp += acnt;
		n -= acnt;
	}
	return (error);
}