Exemplo n.º 1
0
void	pmap_pcid_activate(pmap_t tpmap, int ccpu) {
	pcid_t		new_pcid = tpmap->pmap_pcid_cpus[ccpu];
	pmap_t		last_pmap;
	boolean_t	pcid_conflict = FALSE, pending_flush = FALSE;

	pmap_assert(cpu_datap(ccpu)->cpu_pmap_pcid_enabled);
	if (__improbable(new_pcid == PMAP_PCID_INVALID_PCID)) {
		new_pcid = tpmap->pmap_pcid_cpus[ccpu] = pmap_pcid_allocate_pcid(ccpu);
	}
	pmap_assert(new_pcid != PMAP_PCID_INVALID_PCID);
#ifdef	PCID_ASSERT	
	cpu_datap(ccpu)->cpu_last_pcid = cpu_datap(ccpu)->cpu_active_pcid;
#endif
	cpu_datap(ccpu)->cpu_active_pcid = new_pcid;

	pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
	if (__probable(pending_flush == FALSE)) {
		last_pmap = cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid];
		pcid_conflict = ((last_pmap != NULL) &&(tpmap != last_pmap));
	}
	if (__improbable(pending_flush || pcid_conflict)) {
		pmap_pcid_validate_cpu(tpmap, ccpu);
	}
	/* Consider making this a unique id */
	cpu_datap(ccpu)->cpu_pcid_last_pmap_dispatched[new_pcid] = tpmap;

	pmap_assert(new_pcid < PMAP_PCID_MAX_PCID);
	pmap_assert(((tpmap ==  kernel_pmap) && new_pcid == 0) || ((new_pcid != PMAP_PCID_INVALID_PCID) && (new_pcid != 0)));
#if	PMAP_ASSERT
	pcid_record_array[ccpu % PCID_RECORD_SIZE] = tpmap->pm_cr3 | new_pcid | (((uint64_t)(!(pending_flush || pcid_conflict))) <<63);
	pml4_entry_t *pml4 = pmap64_pml4(tpmap, 0ULL);
	/* Diagnostic to detect pagetable anchor corruption */
	if (pml4[KERNEL_PML4_INDEX] != kernel_pmap->pm_pml4[KERNEL_PML4_INDEX])
		__asm__ volatile("int3");
#endif	/* PMAP_ASSERT */
	set_cr3_composed(tpmap->pm_cr3, new_pcid, !(pending_flush || pcid_conflict));

	if (!pending_flush) {
		/* We did not previously observe a pending invalidation for this
		 * ASID. However, the load from the coherency vector
		 * could've been reordered ahead of the store to the
		 * active_cr3 field (in the context switch path, our
		 * caller). Re-consult the pending invalidation vector
		 * after the CR3 write. We rely on MOV CR3's documented
		 * serializing property to avoid insertion of an expensive
		 * barrier. (DRK)
		 */
		pending_flush = (tpmap->pmap_pcid_coherency_vector[ccpu] != 0);
		if (__improbable(pending_flush != 0)) {
			pmap_pcid_validate_cpu(tpmap, ccpu);
			set_cr3_composed(tpmap->pm_cr3, new_pcid, FALSE);
		}
	}
	cpu_datap(ccpu)->cpu_pmap_pcid_coherentp = &(tpmap->pmap_pcid_coherency_vector[ccpu]);
#if	DEBUG	
	KERNEL_DEBUG_CONSTANT(0x9c1d0000, tpmap, new_pcid, pending_flush, pcid_conflict, 0);
#endif
}
Exemplo n.º 2
0
static unsigned int NOINLINE
hw_lock_lock_contended(hw_lock_t lock, uintptr_t data, uint64_t timeout, boolean_t do_panic)
{
	uint64_t	end = 0;
	uintptr_t	holder = lock->lock_data;
	int		i;

	if (timeout == 0)
		timeout = LOCK_PANIC_TIMEOUT;
#if CONFIG_DTRACE
	uint64_t begin;
	boolean_t dtrace_enabled = lockstat_probemap[LS_LCK_SPIN_LOCK_SPIN] != 0;
	if (__improbable(dtrace_enabled))
		begin = mach_absolute_time();
#endif
	for ( ; ; ) {	
		for (i = 0; i < LOCK_SNOOP_SPINS; i++) {
			cpu_pause();
#if (!__ARM_ENABLE_WFE_) || (LOCK_PRETEST)
			holder = ordered_load_hw(lock);
			if (holder != 0)
				continue;
#endif
			if (atomic_compare_exchange(&lock->lock_data, 0, data,
			    memory_order_acquire_smp, TRUE)) {
#if CONFIG_DTRACE
				if (__improbable(dtrace_enabled)) {
					uint64_t spintime = mach_absolute_time() - begin;
					if (spintime > dtrace_spin_threshold)
						LOCKSTAT_RECORD2(LS_LCK_SPIN_LOCK_SPIN, lock, spintime, dtrace_spin_threshold);
				}
#endif
				return 1;
			}
		}
		if (end == 0) {
			end = ml_get_timebase() + timeout;
		}
		else if (ml_get_timebase() >= end)
			break;
	}
	if (do_panic) {
		// Capture the actual time spent blocked, which may be higher than the timeout
		// if a misbehaving interrupt stole this thread's CPU time.
		panic("Spinlock timeout after %llu ticks, %p = %lx",
			(ml_get_timebase() - end + timeout), lock, holder);
	}
	return 0;
}
Exemplo n.º 3
0
size_t
__strlcpy_chk(char *dst, char const *src, size_t s, size_t chk_size)
{
    if (__improbable(chk_size < s))
        panic("__strlcpy_chk object size check failed: dst %p, src %p, (%zu < %zu)", dst, src, chk_size, s);
    return strlcpy(dst, src, s);
}
Exemplo n.º 4
0
void *
__memset_chk(void *dst, int c, size_t s, size_t chk_size)
{
    if (__improbable(chk_size < s))
        panic("__memset_chk object size check failed: dst %p, c %c, (%zu < %zu)", dst, c, chk_size, s);
    return memset(dst, c, s);
}
Exemplo n.º 5
0
void *
__memmove_chk(void *dst, void const *src, size_t s, size_t chk_size)
{
    if (__improbable(chk_size < s))
        panic("__memmove_chk object size check failed: dst %p, src %p, (%zu < %zu)", dst, src, chk_size, s);
    return memmove(dst, src, s);
}
Exemplo n.º 6
0
void	vm_map_store_copy_insert( vm_map_t map, vm_map_entry_t after_where, vm_map_copy_t copy)
{
	if (__improbable(vm_debug_events)) {
		vm_map_entry_t entry;
		for (entry = vm_map_copy_first_entry(copy); entry != vm_map_copy_to_entry(copy); entry = entry->vme_next) {
			DTRACE_VM4(map_entry_link_copy, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);
		}
	}

	if (map->holelistenabled) {
		vm_map_entry_t entry = NULL;

		entry = vm_map_copy_first_entry(copy);
		while (entry != vm_map_copy_to_entry(copy)) {
			vm_map_store_update_first_free(map, entry, TRUE);
			entry = entry->vme_next;
		}
	}

	vm_map_store_copy_insert_ll(map, after_where, copy);
#ifdef VM_MAP_STORE_USE_RB
	if (vm_map_store_has_RB_support( &map->hdr )) {
		vm_map_store_copy_insert_rb(map, after_where, copy);
	}
#endif
}
Exemplo n.º 7
0
void
thread_tell_urgency(int urgency,
    uint64_t rt_period,
    uint64_t rt_deadline,
    thread_t nthread)
{
	uint64_t	urgency_notification_time_start, delta;
	boolean_t	urgency_assert = (urgency_notification_assert_abstime_threshold != 0);
	assert(get_preemption_level() > 0 || ml_get_interrupts_enabled() == FALSE);
#if	DEBUG
	urgency_stats[cpu_number() % 64][urgency]++;
#endif
	if (!pmInitDone
	    || pmDispatch == NULL
	    || pmDispatch->pmThreadTellUrgency == NULL)
		return;

	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_START, urgency, rt_period, rt_deadline, 0, 0);

	if (__improbable((urgency_assert == TRUE)))
		urgency_notification_time_start = mach_absolute_time();

	current_cpu_datap()->cpu_nthread = nthread;
	pmDispatch->pmThreadTellUrgency(urgency, rt_period, rt_deadline);

	if (__improbable((urgency_assert == TRUE))) {
		delta = mach_absolute_time() - urgency_notification_time_start;

		if (__improbable(delta > urgency_notification_max_recorded)) {
			/* This is not synchronized, but it doesn't matter
			 * if we (rarely) miss an event, as it is statistically
			 * unlikely that it will never recur.
			 */
			urgency_notification_max_recorded = delta;

			if (__improbable((delta > urgency_notification_assert_abstime_threshold) && !machine_timeout_suspended()))
				panic("Urgency notification callout %p exceeded threshold, 0x%llx abstime units", pmDispatch->pmThreadTellUrgency, delta);
		}
	}

	KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_URGENCY) | DBG_FUNC_END, urgency, rt_period, rt_deadline, 0, 0);
}
Exemplo n.º 8
0
pcid_t	pcid_for_pmap_cpu_tuple(pmap_t cpmap, thread_t cthread, int ccpu) {
	pmap_t active_pmap = cpmap;

	if (__improbable(cpmap->pagezero_accessible)) {
		if ((cthread->machine.specFlags & CopyIOActive) == 0) {
			active_pmap = kernel_pmap;
		}
	}

	return active_pmap->pmap_pcid_cpus[ccpu];
}
Exemplo n.º 9
0
void
_vm_map_store_entry_unlink( struct vm_map_header * mapHdr, vm_map_entry_t entry)
{
	if (__improbable(vm_debug_events))
		DTRACE_VM4(map_entry_unlink, vm_map_t, (char *)mapHdr - sizeof (lck_rw_t), vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);

	vm_map_store_entry_unlink_ll(mapHdr, entry);
#ifdef VM_MAP_STORE_USE_RB
	if (vm_map_store_has_RB_support( mapHdr )) {
		vm_map_store_entry_unlink_rb(mapHdr, entry);
	}
#endif
}
Exemplo n.º 10
0
static boolean_t 
timer_call_enter_internal(
	timer_call_t 		call,
	timer_call_param_t	param1,
	uint64_t 		deadline,
	uint32_t 		flags)
{
	mpqueue_head_t		*queue;
	mpqueue_head_t		*old_queue;
	spl_t			s;
	uint64_t 		slop = 0;

	s = splclock();

	call->soft_deadline = deadline;
	call->flags = flags;

	if ((flags & TIMER_CALL_CRITICAL) == 0 &&
	     mach_timer_coalescing_enabled) {
		slop = timer_call_slop(deadline);
		deadline += slop;
	}

#if	defined(__i386__) || defined(__x86_64__)	
	uint64_t ctime = mach_absolute_time();
	if (__improbable(deadline < ctime)) {
		uint64_t delta = (ctime - deadline);

		past_deadline_timers++;
		past_deadline_deltas += delta;
		if (delta > past_deadline_longest)
			past_deadline_longest = deadline;
		if (delta < past_deadline_shortest)
			past_deadline_shortest = delta;

		deadline = ctime + past_deadline_timer_adjustment;
		call->soft_deadline = deadline;
	}
#endif
	queue = timer_queue_assign(deadline);

	old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline);

	CE(call)->param1 = param1;

	splx(s);

	return (old_queue != NULL);
}
Exemplo n.º 11
0
void
_vm_map_store_entry_link( struct vm_map_header * mapHdr, vm_map_entry_t after_where, vm_map_entry_t entry)
{
	assert(entry->vme_start < entry->vme_end);
	if (__improbable(vm_debug_events))
		DTRACE_VM4(map_entry_link, vm_map_t, (char *)mapHdr - sizeof (lck_rw_t), vm_map_entry_t, entry, vm_address_t, entry->links.start, vm_address_t, entry->links.end);

	vm_map_store_entry_link_ll(mapHdr, after_where, entry);
#ifdef VM_MAP_STORE_USE_RB
	if (vm_map_store_has_RB_support( mapHdr )) {
		vm_map_store_entry_link_rb(mapHdr, after_where, entry);
	}
#endif
#if MAP_ENTRY_INSERTION_DEBUG
	fastbacktrace(&entry->vme_insertion_bt[0],
		      (sizeof (entry->vme_insertion_bt) / sizeof (uintptr_t)));
#endif
}
Exemplo n.º 12
0
int
hfs_vnop_lookup(struct vnop_lookup_args *ap)
{
	struct vnode *dvp = ap->a_dvp;
	struct vnode *vp;
	struct cnode *cp;
	struct cnode *dcp;
	struct hfsmount *hfsmp;
	int error;
	struct vnode **vpp = ap->a_vpp;
	struct componentname *cnp = ap->a_cnp;
	struct proc *p = vfs_context_proc(ap->a_context);
	int flags = cnp->cn_flags;
	int force_casesensitive_lookup = proc_is_forcing_hfs_case_sensitivity(p);
	int cnode_locked;

	*vpp = NULL;
	dcp = VTOC(dvp);
	
	hfsmp = VTOHFS(dvp);

	/*
	 * Lookup an entry in the cache
	 *
	 * If the lookup succeeds, the vnode is returned in *vpp,
	 * and a status of -1 is returned.
	 *
	 * If the lookup determines that the name does not exist
	 * (negative cacheing), a status of ENOENT is returned.
	 *
	 * If the lookup fails, a status of zero is returned.
	 */
	error = cache_lookup(dvp, vpp, cnp);
	if (error != -1) {
		if ((error == ENOENT) && (cnp->cn_nameiop != CREATE))		
			goto exit;	/* found a negative cache entry */
		goto lookup;		/* did not find it in the cache */
	}
	/*
	 * We have a name that matched
	 * cache_lookup returns the vp with an iocount reference already taken
	 */
	error = 0;
	vp = *vpp;
	cp = VTOC(vp);
	
	/* We aren't allowed to vend out vp's via lookup to the hidden directory */
	if (cp->c_cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
		cp->c_cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
		/* Drop the iocount from cache_lookup */
		vnode_put (vp);
		error = ENOENT;
		goto exit;
	}
	
	
	/*
	 * If this is a hard-link vnode then we need to update
	 * the name (of the link), the parent ID, the cnid, the
	 * text encoding and the catalog hint.  This enables
	 * getattrlist calls to return the correct link info.
	 */

	/*
	 * Alternatively, if we are forcing a case-sensitive lookup
	 * on a case-insensitive volume, the namecache entry
	 * may have been for an incorrect case. Since we cannot
	 * determine case vs. normalization, redrive the catalog
	 * lookup based on any byte mismatch.
	 */
	if (((flags & ISLASTCN) && (cp->c_flag & C_HARDLINK))
		|| (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE))) {
		int stale_link = 0;

		hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);	
		if ((cp->c_parentcnid != dcp->c_cnid) ||
		    (cnp->cn_namelen != cp->c_desc.cd_namelen) ||
		    (bcmp(cnp->cn_nameptr, cp->c_desc.cd_nameptr, cp->c_desc.cd_namelen) != 0)) {
			struct cat_desc desc;
			struct cat_attr lookup_attr;
			int lockflags;

			if (force_casesensitive_lookup && !(hfsmp->hfs_flags & HFS_CASE_SENSITIVE)) {
				/*
				 * Since the name in the cnode doesn't match our lookup
				 * string exactly, do a full lookup.
				 */
				hfs_unlock (cp);

				vnode_put(vp);
				goto lookup;
			}

			/*
			 * Get an updated descriptor
			 */
			desc.cd_nameptr = (const u_int8_t *)cnp->cn_nameptr;
			desc.cd_namelen = cnp->cn_namelen;
			desc.cd_parentcnid = dcp->c_fileid;
			desc.cd_hint = dcp->c_childhint;
			desc.cd_encoding = 0;
			desc.cd_cnid = 0;
			desc.cd_flags = S_ISDIR(cp->c_mode) ? CD_ISDIR : 0;

			/*
			 * Because lookups call replace_desc to put a new descriptor in
			 * the cnode we are modifying it is possible that this cnode's 
			 * descriptor is out of date for the parent ID / name that
			 * we are trying to look up. (It may point to a different hardlink).
			 *
			 * We need to be cautious that when re-supplying the 
			 * descriptor below that the results of the catalog lookup
			 * still point to the same raw inode for the hardlink.  This would 
			 * not be the case if we found something in the cache above but 
			 * the vnode it returned no longer has a valid hardlink for the 
			 * parent ID/filename combo we are requesting.  (This is because 
			 * hfs_unlink does not directly trigger namecache removal). 
			 *
			 * As a result, before vending out the vnode (and replacing
			 * its descriptor) verify that the fileID is the same by comparing
			 * the in-cnode attributes vs. the one returned from the lookup call
			 * below.  If they do not match, treat this lookup as if we never hit
			 * in the cache at all.
			 */

			lockflags = hfs_systemfile_lock(VTOHFS(dvp), SFL_CATALOG, HFS_SHARED_LOCK);		
		
			error = cat_lookup(VTOHFS(vp), &desc, 0, 0, &desc, &lookup_attr, NULL, NULL);	
			
			hfs_systemfile_unlock(VTOHFS(dvp), lockflags);

			/* 
			 * Note that cat_lookup may fail to find something with the name provided in the
			 * stack-based descriptor above. In that case, an ENOENT is a legitimate errno
			 * to be placed in error, which will get returned in the fastpath below.
			 */
			if (error == 0) {
				if (lookup_attr.ca_fileid == cp->c_attr.ca_fileid) {
					/* It still points to the right raw inode.  Replacing the descriptor is fine */
					replace_desc (cp, &desc);

					/* 
					 * Save the origin info for file and directory hardlinks.  Directory hardlinks 
					 * need the origin for '..' lookups, and file hardlinks need it to ensure that 
					 * competing lookups do not cause us to vend different hardlinks than the ones requested.
					 * We want to restrict saving the cache entries to LOOKUP namei operations, since
					 * we're really doing this to protect getattr.
					 */
					if (cnp->cn_nameiop == LOOKUP) {
						hfs_savelinkorigin(cp, dcp->c_fileid);
					}
				}
				else {
					/* If the fileID does not match then do NOT replace the descriptor! */
					stale_link = 1;
				}	
			}
		}
		hfs_unlock (cp);
		
		if (stale_link) {
			/* 
			 * If we had a stale_link, then we need to pretend as though
			 * we never found this vnode and force a lookup through the 
			 * traditional path.  Drop the iocount acquired through 
			 * cache_lookup above and force a cat lookup / getnewvnode
			 */
			vnode_put(vp);
			goto lookup;
		}
		
		if (error) {
			/* 
			 * If the cat_lookup failed then the caller will not expect 
			 * a vnode with an iocount on it.
			 */
			vnode_put(vp);
		}

	}	
	goto exit;
	
lookup:
	/*
	 * The vnode was not in the name cache or it was stale.
	 *
	 * So we need to do a real lookup.
	 */
	cnode_locked = 0;

	error = hfs_lookup(dvp, vpp, cnp, &cnode_locked, force_casesensitive_lookup);
	
	if (cnode_locked)
		hfs_unlock(VTOC(*vpp));
exit:
	{
	uthread_t ut = (struct uthread *)get_bsdthread_info(current_thread());

	/*
	 * check to see if we issued any I/O while completing this lookup and
	 * this thread/task is throttleable... if so, throttle now
	 *
	 * this allows us to throttle in between multiple meta data reads that
	 * might result due to looking up a long pathname (since we'll have to
	 * re-enter hfs_vnop_lookup for each component of the pathnam not in
	 * the VFS cache), instead of waiting until the entire path lookup has
	 * completed and throttling at the systemcall return
	 */
	if (__improbable(ut->uu_lowpri_window)) {
		throttle_lowpri_io(1);
	}
	}

	return (error);
}
Exemplo n.º 13
0
void
mach_call_munger64(x86_saved_state_t *state)
{
	int call_number;
	int argc;
	mach_call_t mach_call;
	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	x86_saved_state64_t	*regs;

#if PROC_REF_DEBUG
	struct uthread *ut = get_bsdthread_info(current_thread());

	uthread_reset_proc_refcount(ut);
#endif

	assert(is_saved_state64(state));
	regs = saved_state64(state);

	call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK);

	DEBUG_KPRINT_SYSCALL_MACH(
		"mach_call_munger64: code=%d(%s)\n",
		call_number, mach_syscall_name_table[call_number]);

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START,
		regs->rdi, regs->rsi, regs->rdx, regs->r10, 0);
	
	if (call_number < 0 || call_number >= mach_trap_count) {
	        i386_exception(EXC_SYSCALL, regs->rax, 1);
		/* NOTREACHED */
	}
	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;

	if (mach_call == (mach_call_t)kern_invalid) {
	        i386_exception(EXC_SYSCALL, regs->rax, 1);
		/* NOTREACHED */
	}
	argc = mach_trap_table[call_number].mach_trap_arg_count;
	if (argc) {
		int args_in_regs = MIN(6, argc);

		memcpy(&args.arg1, &regs->rdi, args_in_regs * sizeof(syscall_arg_t));

		if (argc > 6) {
	        int copyin_count;

			assert(argc <= 9);
			copyin_count = (argc - 6) * (int)sizeof(syscall_arg_t);

	        if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&args.arg7, copyin_count)) {
		        regs->rax = KERN_INVALID_ARGUMENT;
			
				thread_exception_return();
				/* NOTREACHED */
			}
		}
	}

#ifdef MACH_BSD
	mach_kauth_cred_uthread_update();
#endif

	regs->rax = (uint64_t)mach_call((void *)&args);
	
	DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax);

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, 
		regs->rax, 0, 0, 0, 0);

	throttle_lowpri_io(1);

#if PROC_REF_DEBUG
	if (__improbable(uthread_get_proc_refcount(ut) != 0)) {
		panic("system call returned with uu_proc_refcount != 0");
	}
#endif

	thread_exception_return();
	/* NOTREACHED */
}
Exemplo n.º 14
0
void
mach_call_munger(x86_saved_state_t *state)
{
	int argc;
	int call_number;
	mach_call_t mach_call;
	kern_return_t retval;
	struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	x86_saved_state32_t	*regs;

#if PROC_REF_DEBUG
	struct uthread *ut = get_bsdthread_info(current_thread());

	uthread_reset_proc_refcount(ut);
#endif

	assert(is_saved_state32(state));
	regs = saved_state32(state);

	call_number = -(regs->eax);

	DEBUG_KPRINT_SYSCALL_MACH(
		"mach_call_munger: code=%d(%s)\n",
		call_number, mach_syscall_name_table[call_number]);
#if DEBUG_TRACE
	kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number);
#endif

	if (call_number < 0 || call_number >= mach_trap_count) {
		i386_exception(EXC_SYSCALL, call_number, 1);
		/* NOTREACHED */
	}
	mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function;

	if (mach_call == (mach_call_t)kern_invalid) {
		DEBUG_KPRINT_SYSCALL_MACH(
			"mach_call_munger: kern_invalid 0x%x\n", regs->eax);
		i386_exception(EXC_SYSCALL, call_number, 1);
		/* NOTREACHED */
	}

	argc = mach_trap_table[call_number].mach_trap_arg_count;
	if (argc) {
		retval = mach_call_arg_munger32(regs->uesp, &args,  &mach_trap_table[call_number]);
		if (retval != KERN_SUCCESS) {
			regs->eax = retval;

			DEBUG_KPRINT_SYSCALL_MACH(
				"mach_call_munger: retval=0x%x\n", retval);

			thread_exception_return();
			/* NOTREACHED */
		}
	}

#ifdef MACH_BSD
	mach_kauth_cred_uthread_update();
#endif

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
		MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START,
		args.arg1, args.arg2, args.arg3, args.arg4, 0);

	retval = mach_call(&args);

	DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval);

	KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
		MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END,
		retval, 0, 0, 0, 0);

	regs->eax = retval;

	throttle_lowpri_io(1);

#if PROC_REF_DEBUG
	if (__improbable(uthread_get_proc_refcount(ut) != 0)) {
		panic("system call returned with uu_proc_refcount != 0");
	}
#endif

	thread_exception_return();
	/* NOTREACHED */
}
Exemplo n.º 15
0
void
unix_syscall64(x86_saved_state_t *state)
{
	thread_t	thread;
	void			*vt;
	unsigned int	code;
	struct sysent	*callp;
	int		args_in_regs;
	boolean_t	args_start_at_rdi;
	int		error;
	struct proc	*p;
	struct uthread	*uthread;
	x86_saved_state64_t *regs;

	assert(is_saved_state64(state));
	regs = saved_state64(state);
#if	DEBUG
	if (regs->rax == 0x2000800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	if (__probable(!(uthread->uu_flag & UT_VFORK)))
		p = (struct proc *)get_bsdtask_info(current_task());
	else 
		p = current_proc();

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->rax = EPERM;
		regs->isf.rflags |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->rax & SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: code=%d(%s) rip=%llx\n",
		code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip);
	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	vt = (void *)uthread->uu_arg;

	if (__improbable(callp == sysent)) {
	        /*
		 * indirect system call... system call number
		 * passed as 'arg0'
		 */
		code = regs->rdi;
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
		args_start_at_rdi = FALSE;
		args_in_regs = 5;
	} else {
		args_start_at_rdi = TRUE;
		args_in_regs = 6;
	}

	if (callp->sy_narg != 0) {
		assert(callp->sy_narg <= 8); /* size of uu_arg */

		args_in_regs = MIN(args_in_regs, callp->sy_narg);
		memcpy(vt, args_start_at_rdi ? &regs->rdi : &regs->rsi, args_in_regs * sizeof(syscall_arg_t));


		if (code != 180) {
			uint64_t *ip = (uint64_t *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				(int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
		}

		if (__improbable(callp->sy_narg > args_in_regs)) {
			int copyin_count;

			copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t);

			error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count);
			if (error) {
				regs->rax = error;
				regs->isf.rflags |= EFL_CF;
				thread_exception_return();
				/* NOTREACHED */
			}
		}
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
               printf("system call returned with uu_iocount != 0\n");
#endif

#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */
	
	if (__improbable(error == ERESTART)) {
		/*
		 * all system calls come through via the syscall instruction
		 * in 64 bit mode... its 2 bytes in length
		 * move the user's pc back to repeat the syscall:
		 */
		pal_syscall_restart( thread, state );
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
			regs->rax = error;
			regs->isf.rflags |= EFL_CF;	/* carry bit */
		} else { /* (not error) */

			switch (callp->sy_return_type) {
			case _SYSCALL_RET_INT_T:
				regs->rax = uthread->uu_rval[0];
				regs->rdx = uthread->uu_rval[1];
				break;
			case _SYSCALL_RET_UINT_T:
				regs->rax = ((u_int)uthread->uu_rval[0]);
				regs->rdx = ((u_int)uthread->uu_rval[1]);
				break;
			case _SYSCALL_RET_OFF_T:
			case _SYSCALL_RET_ADDR_T:
			case _SYSCALL_RET_SIZE_T:
			case _SYSCALL_RET_SSIZE_T:
			case _SYSCALL_RET_UINT64_T:
			        regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
				regs->rdx = 0;
				break;
			case _SYSCALL_RET_NONE:
				break;
			default:
				panic("unix_syscall: unknown return type");
				break;
			}
			regs->isf.rflags &= ~EFL_CF;
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall64: error=%d retval=(%llu,%llu)\n",
		error, regs->rax, regs->rdx);
	
	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	thread_exception_return();
	/* NOTREACHED */
}
Exemplo n.º 16
0
/*
 * 	Event timer interrupt.
 *
 * XXX a drawback of this implementation is that events serviced earlier must not set deadlines
 *     that occur before the entire chain completes.
 *
 * XXX a better implementation would use a set of generic callouts and iterate over them
 */
void
timer_intr(int		user_mode,
           uint64_t	rip)
{
    uint64_t		abstime;
    rtclock_timer_t		*mytimer;
    cpu_data_t		*pp;
    int64_t			latency;
    uint64_t		pmdeadline;
    boolean_t		timer_processed = FALSE;

    pp = current_cpu_datap();

    SCHED_STATS_TIMER_POP(current_processor());

    abstime = mach_absolute_time();		/* Get the time now */

    /* has a pending clock timer expired? */
    mytimer = &pp->rtclock_timer;		/* Point to the event timer */

    if ((timer_processed = ((mytimer->deadline <= abstime) ||
                            (abstime >= (mytimer->queue.earliest_soft_deadline))))) {
        /*
         * Log interrupt service latency (-ve value expected by tool)
         * a non-PM event is expected next.
         * The requested deadline may be earlier than when it was set
         * - use MAX to avoid reporting bogus latencies.
         */
        latency = (int64_t) (abstime - MAX(mytimer->deadline,
                                           mytimer->when_set));
        /* Log zero timer latencies when opportunistically processing
         * coalesced timers.
         */
        if (latency < 0) {
            TCOAL_DEBUG(0xEEEE0000, abstime, mytimer->queue.earliest_soft_deadline, abstime - mytimer->queue.earliest_soft_deadline, 0, 0);
            latency = 0;
        }

        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  DECR_TRAP_LATENCY | DBG_FUNC_NONE,
                                  -latency,
                                  ((user_mode != 0) ? rip : VM_KERNEL_UNSLIDE(rip)),
                                  user_mode, 0, 0);

        mytimer->has_expired = TRUE;	/* Remember that we popped */
        mytimer->deadline = timer_queue_expire(&mytimer->queue, abstime);
        mytimer->has_expired = FALSE;

        /* Get the time again since we ran a bit */
        abstime = mach_absolute_time();
        mytimer->when_set = abstime;
    }

    /* is it time for power management state change? */
    if ((pmdeadline = pmCPUGetDeadline(pp)) && (pmdeadline <= abstime)) {
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  DECR_PM_DEADLINE | DBG_FUNC_START,
                                  0, 0, 0, 0, 0);
        pmCPUDeadline(pp);
        KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
                                  DECR_PM_DEADLINE | DBG_FUNC_END,
                                  0, 0, 0, 0, 0);
        timer_processed = TRUE;
    }

    /* schedule our next deadline */
    x86_lcpu()->rtcDeadline = EndOfAllTime;
    timer_resync_deadlines();

    if (__improbable(timer_processed == FALSE))
        spurious_timers++;
}
Exemplo n.º 17
0
static boolean_t 
timer_call_enter_internal(
	timer_call_t 		call,
	timer_call_param_t	param1,
	uint64_t 		deadline,
	uint64_t 		leeway,
	uint32_t 		flags,
	boolean_t		ratelimited)
{
	mpqueue_head_t		*queue = NULL;
	mpqueue_head_t		*old_queue;
	spl_t			s;
	uint64_t 		slop;
	uint32_t		urgency;
	uint64_t		sdeadline, ttd;

	s = splclock();

	sdeadline = deadline;
	uint64_t ctime = mach_absolute_time();

	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
        	DECR_TIMER_ENTER | DBG_FUNC_START,
	    VM_KERNEL_UNSLIDE_OR_PERM(call),
	    VM_KERNEL_UNSLIDE_OR_PERM(param1), deadline, flags, 0); 

	urgency = (flags & TIMER_CALL_URGENCY_MASK);

	boolean_t slop_ratelimited = FALSE;
	slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);

	if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop)
		slop = leeway;

	if (UINT64_MAX - deadline <= slop) {
		deadline = UINT64_MAX;
	} else {
		deadline += slop;
	}

	if (__improbable(deadline < ctime)) {
		uint64_t delta = (ctime - deadline);

		past_deadline_timers++;
		past_deadline_deltas += delta;
		if (delta > past_deadline_longest)
			past_deadline_longest = deadline;
		if (delta < past_deadline_shortest)
			past_deadline_shortest = delta;

		deadline = ctime + past_deadline_timer_adjustment;
		sdeadline = deadline;
	}

	if (ratelimited || slop_ratelimited) {
		flags |= TIMER_CALL_RATELIMITED;
	} else {
		flags &= ~TIMER_CALL_RATELIMITED;
	}

	ttd =  sdeadline - ctime;
#if CONFIG_DTRACE
	DTRACE_TMR7(callout__create, timer_call_func_t, TCE(call)->func,
	timer_call_param_t, TCE(call)->param0, uint32_t, flags,
	    (deadline - sdeadline),
	    (ttd >> 32), (unsigned) (ttd & 0xFFFFFFFF), call);
#endif

	/* Program timer callout parameters under the appropriate per-CPU or
	 * longterm queue lock. The callout may have been previously enqueued
	 * and in-flight on this or another timer queue.
	 */
	if (!ratelimited && !slop_ratelimited) {
		queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue, sdeadline, ttd, param1, flags);
	}

	if (queue == NULL) {
		queue = timer_queue_assign(deadline);
		old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline, sdeadline, ttd, param1, flags);
	}

#if TIMER_TRACE
	TCE(call)->entry_time = ctime;
#endif

	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
        	DECR_TIMER_ENTER | DBG_FUNC_END,
		VM_KERNEL_UNSLIDE_OR_PERM(call),
		(old_queue != NULL), deadline, queue->count, 0); 

	splx(s);

	return (old_queue != NULL);
}
Exemplo n.º 18
0
/*
 * Function:	unix_syscall
 *
 * Inputs:	regs	- pointer to i386 save area
 *
 * Outputs:	none
 */
void
unix_syscall(x86_saved_state_t *state)
{
	thread_t		thread;
	void			*vt;
	unsigned int		code;
	struct sysent		*callp;

	int			error;
	vm_offset_t		params;
	struct proc		*p;
	struct uthread		*uthread;
	x86_saved_state32_t	*regs;
	boolean_t		is_vfork;

	assert(is_saved_state32(state));
	regs = saved_state32(state);
#if DEBUG
	if (regs->eax == 0x800)
		thread_exception_return();
#endif
	thread = current_thread();
	uthread = get_bsdthread_info(thread);

	/* Get the approriate proc; may be different from task's for vfork() */
	is_vfork = uthread->uu_flag & UT_VFORK;
	if (__improbable(is_vfork != 0))
		p = current_proc();
	else 
		p = (struct proc *)get_bsdtask_info(current_task());

	/* Verify that we are not being called from a task without a proc */
	if (__improbable(p == NULL)) {
		regs->eax = EPERM;
		regs->efl |= EFL_CF;
		task_terminate_internal(current_task());
		thread_exception_return();
		/* NOTREACHED */
	}

	code = regs->eax & I386_SYSCALL_NUMBER_MASK;
	DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
							  code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip);
	params = (vm_offset_t) (regs->uesp + sizeof (int));

	regs->efl &= ~(EFL_CF);

	callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];

	if (__improbable(callp == sysent)) {
		code = fuword(params);
		params += sizeof(int);
		callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
	}

	vt = (void *)uthread->uu_arg;

	if (callp->sy_arg_bytes != 0) {
#if CONFIG_REQUIRES_U32_MUNGING
		sy_munge_t	*mungerp;
#else
#error U32 syscalls on x86_64 kernel requires munging
#endif
		uint32_t	 nargs;

		assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
		nargs = callp->sy_arg_bytes;
		error = copyin((user_addr_t) params, (char *) vt, nargs);
		if (error) {
			regs->eax = error;
			regs->efl |= EFL_CF;
			thread_exception_return();
			/* NOTREACHED */
		}

		if (__probable(code != 180)) {
	        	int *ip = (int *)vt;

			KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE,
				BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
				*ip, *(ip+1), *(ip+2), *(ip+3), 0);
		}

#if CONFIG_REQUIRES_U32_MUNGING
		mungerp = callp->sy_arg_munge32;

		if (mungerp != NULL)
			(*mungerp)(vt);
#endif
	} else
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
			0, 0, 0, 0, 0);

	/*
	 * Delayed binding of thread credential to process credential, if we
	 * are not running with an explicitly set thread credential.
	 */
	kauth_cred_uthread_update(uthread, p);

	uthread->uu_rval[0] = 0;
	uthread->uu_rval[1] = 0;
	uthread->uu_flag |= UT_NOTCANCELPT;
	uthread->syscall_code = code;

#ifdef JOE_DEBUG
        uthread->uu_iocount = 0;
        uthread->uu_vpindex = 0;
#endif

	AUDIT_SYSCALL_ENTER(code, p, uthread);
	error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
	AUDIT_SYSCALL_EXIT(code, p, uthread, error);

#ifdef JOE_DEBUG
        if (uthread->uu_iocount)
                printf("system call returned with uu_iocount != 0\n");
#endif
#if CONFIG_DTRACE
	uthread->t_dtrace_errno = error;
#endif /* CONFIG_DTRACE */

	if (__improbable(error == ERESTART)) {
		/*
		 * Move the user's pc back to repeat the syscall:
		 * 5 bytes for a sysenter, or 2 for an int 8x.
		 * The SYSENTER_TF_CS covers single-stepping over a sysenter
		 * - see debug trap handler in idt.s/idt64.s
		 */

		pal_syscall_restart(thread, state);
	}
	else if (error != EJUSTRETURN) {
		if (__improbable(error)) {
		    regs->eax = error;
		    regs->efl |= EFL_CF;	/* carry bit */
		} else { /* (not error) */
			/*
			 * We split retval across two registers, in case the
			 * syscall had a 64-bit return value, in which case
			 * eax/edx matches the function call ABI.
			 */
		    regs->eax = uthread->uu_rval[0];
		    regs->edx = uthread->uu_rval[1];
		} 
	}

	DEBUG_KPRINT_SYSCALL_UNIX(
		"unix_syscall: error=%d retval=(%u,%u)\n",
		error, regs->eax, regs->edx);

	uthread->uu_flag &= ~UT_NOTCANCELPT;

	if (__improbable(uthread->uu_lowpri_window)) {
	        /*
		 * task is marked as a low priority I/O type
		 * and the I/O we issued while in this system call
		 * collided with normal I/O operations... we'll
		 * delay in order to mitigate the impact of this
		 * task on the normal operation of the system
		 */
		throttle_lowpri_io(1);
	}
	if (__probable(code != 180))
		KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 
			BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
			error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0);

	if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) {
		pal_execve_return(thread);
	}

	thread_exception_return();
	/* NOTREACHED */
}
Exemplo n.º 19
0
vm_offset_t
gzalloc_alloc(zone_t zone, boolean_t canblock) {
	vm_offset_t addr = 0;

	if (__improbable(gzalloc_mode &&
		(((zone->elem_size >= gzalloc_min) &&
		    (zone->elem_size <= gzalloc_max))) &&
		(zone->gzalloc_exempt == 0))) {

		if (get_preemption_level() != 0) {
			if (canblock == TRUE) {
				pdzalloc_count++;
			}
			else
				return 0;
		}

		vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
		vm_offset_t residue = rounded_size - zone->elem_size;
		vm_offset_t gzaddr = 0;
		gzhdr_t *gzh;

		if (!kmem_ready || (vm_page_zone == ZONE_NULL)) {
			/* Early allocations are supplied directly from the
			 * reserve.
			 */
			if (gzalloc_reserve_size < rounded_size)
				panic("gzalloc reserve exhausted");
			gzaddr = gzalloc_reserve;
			/* No guard page for these early allocations, just
			 * waste an additional page.
			 */
			gzalloc_reserve += rounded_size + PAGE_SIZE;
			gzalloc_reserve_size -= rounded_size + PAGE_SIZE;
			OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_alloc);
		}
		else {
			kern_return_t kr = kernel_memory_allocate(gzalloc_map,
			    &gzaddr, rounded_size + (1*PAGE_SIZE),
			    0, KMA_KOBJECT | gzalloc_guard);
			if (kr != KERN_SUCCESS)
				panic("gzalloc: kernel_memory_allocate for size 0x%llx failed with %d", (uint64_t)rounded_size, kr);

		}

		if (gzalloc_uf_mode) {
			gzaddr += PAGE_SIZE;
			/* The "header" becomes a "footer" in underflow
			 * mode.
			 */
			gzh = (gzhdr_t *) (gzaddr + zone->elem_size);
			addr = gzaddr;
		} else {
			gzh = (gzhdr_t *) (gzaddr + residue - GZHEADER_SIZE);
			addr = (gzaddr + residue);
		}

		/* Fill with a pattern on allocation to trap uninitialized
		 * data use. Since the element size may be "rounded up"
		 * by higher layers such as the kalloc layer, this may
		 * also identify overruns between the originally requested
		 * size and the rounded size via visual inspection.
		 * TBD: plumb through the originally requested size,
		 * prior to rounding by kalloc/IOMalloc etc.
		 * We also add a signature and the zone of origin in a header
		 * prefixed to the allocation.
		 */
		memset((void *)gzaddr, gzalloc_fill_pattern, rounded_size);

		gzh->gzone = (kmem_ready && vm_page_zone) ? zone : GZDEADZONE;
		gzh->gzsize = (uint32_t) zone->elem_size;
		gzh->gzsig = GZALLOC_SIGNATURE;

		lock_zone(zone);
		zone->count++;
		zone->sum_count++;
		zone->cur_size += rounded_size;
		unlock_zone(zone);

		OSAddAtomic64((SInt32) rounded_size, &gzalloc_allocated);
		OSAddAtomic64((SInt32) (rounded_size - zone->elem_size), &gzalloc_wasted);
	}
	return addr;
}
Exemplo n.º 20
0
static boolean_t 
timer_call_enter_internal(
	timer_call_t 		call,
	timer_call_param_t	param1,
	uint64_t 		deadline,
	uint64_t 		leeway,
	uint32_t 		flags,
	boolean_t		ratelimited)
{
	mpqueue_head_t		*queue = NULL;
	mpqueue_head_t		*old_queue;
	spl_t			s;
	uint64_t 		slop;
	uint32_t		urgency;

	s = splclock();

	call->soft_deadline = deadline;
	call->flags = flags;

	uint64_t ctime = mach_absolute_time();

	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
        	DECR_TIMER_ENTER | DBG_FUNC_START,
		call,
		param1, deadline, flags, 0); 

	urgency = (flags & TIMER_CALL_URGENCY_MASK);

	boolean_t slop_ratelimited = FALSE;
	slop = timer_call_slop(deadline, ctime, urgency, current_thread(), &slop_ratelimited);

	if ((flags & TIMER_CALL_LEEWAY) != 0 && leeway > slop)
		slop = leeway;

	if (UINT64_MAX - deadline <= slop) {
		deadline = UINT64_MAX;
	} else {
		deadline += slop;
	}

	if (__improbable(deadline < ctime)) {
		uint64_t delta = (ctime - deadline);

		past_deadline_timers++;
		past_deadline_deltas += delta;
		if (delta > past_deadline_longest)
			past_deadline_longest = deadline;
		if (delta < past_deadline_shortest)
			past_deadline_shortest = delta;

		deadline = ctime + past_deadline_timer_adjustment;
		call->soft_deadline = deadline;
	}

	/* Bit 0 of the "soft" deadline indicates that
	 * this particular timer call requires rate-limiting
	 * behaviour. Maintain the invariant deadline >= soft_deadline by
	 * setting bit 0 of "deadline".
	 */

	deadline |= 1;
	if (ratelimited || slop_ratelimited) {
		call->soft_deadline |= 1ULL;
	} else {
		call->soft_deadline &= ~0x1ULL;
	}

	call->ttd =  call->soft_deadline - ctime;

#if CONFIG_DTRACE
	DTRACE_TMR7(callout__create, timer_call_func_t, CE(call)->func,
	timer_call_param_t, CE(call)->param0, uint32_t, call->flags,
	    (deadline - call->soft_deadline),
	    (call->ttd >> 32), (unsigned) (call->ttd & 0xFFFFFFFF), call);
#endif

	if (!ratelimited && !slop_ratelimited) {
		queue = timer_longterm_enqueue_unlocked(call, ctime, deadline, &old_queue);
	}

	if (queue == NULL) {
		queue = timer_queue_assign(deadline);
		old_queue = timer_call_enqueue_deadline_unlocked(call, queue, deadline);
	}

	CE(call)->param1 = param1;
#if TIMER_TRACE
	CE(call)->entry_time = ctime;
#endif

	TIMER_KDEBUG_TRACE(KDEBUG_TRACE,
        	DECR_TIMER_ENTER | DBG_FUNC_END,
		call,
		(old_queue != NULL), call->soft_deadline, queue->count, 0); 

	splx(s);

	return (old_queue != NULL);
}
Exemplo n.º 21
0
boolean_t gzalloc_free(zone_t zone, void *addr) {
	boolean_t gzfreed = FALSE;
	kern_return_t kr;

	if (__improbable(gzalloc_mode &&
		(((zone->elem_size >= gzalloc_min) &&
		    (zone->elem_size <= gzalloc_max))) &&
		(zone->gzalloc_exempt == 0))) {
		gzhdr_t *gzh;
		vm_offset_t rounded_size = round_page(zone->elem_size + GZHEADER_SIZE);
		vm_offset_t residue = rounded_size - zone->elem_size;
		vm_offset_t saddr;
		vm_offset_t free_addr = 0;

		if (gzalloc_uf_mode) {
			gzh = (gzhdr_t *)((vm_offset_t)addr + zone->elem_size);
			saddr = (vm_offset_t) addr - PAGE_SIZE;
		} else {
			gzh = (gzhdr_t *)((vm_offset_t)addr - GZHEADER_SIZE);
			saddr = ((vm_offset_t)addr) - residue;
		}

		assert((saddr & PAGE_MASK) == 0);

		if (gzalloc_consistency_checks) {
			if (gzh->gzsig != GZALLOC_SIGNATURE) {
				panic("GZALLOC signature mismatch for element %p, expected 0x%x, found 0x%x", addr, GZALLOC_SIGNATURE, gzh->gzsig);
			}

			if (gzh->gzone != zone && (gzh->gzone != GZDEADZONE))
				panic("%s: Mismatched zone or under/overflow, current zone: %p, recorded zone: %p, address: %p", __FUNCTION__, zone, gzh->gzone, (void *)addr);
			/* Partially redundant given the zone check, but may flag header corruption */
			if (gzh->gzsize != zone->elem_size) {
				panic("Mismatched zfree or under/overflow for zone %p, recorded size: 0x%x, element size: 0x%x, address: %p\n", zone, gzh->gzsize, (uint32_t) zone->elem_size, (void *)addr);
			}
		}

		if (!kmem_ready || gzh->gzone == GZDEADZONE) {
			/* For now, just leak frees of early allocations
			 * performed before kmem is fully configured.
			 * They don't seem to get freed currently;
			 * consider ml_static_mfree in the future.
			 */
			OSAddAtomic64((SInt32) (rounded_size), &gzalloc_early_free);
			return TRUE;
		}

		if (get_preemption_level() != 0) {
				pdzfree_count++;
		}

		if (gzfc_size) {
			/* Either write protect or unmap the newly freed
			 * allocation
			 */
			kr = vm_map_protect(
				gzalloc_map,
				saddr,
				saddr + rounded_size + (1 * PAGE_SIZE),
				gzalloc_prot,
				FALSE);
			if (kr != KERN_SUCCESS)
				panic("%s: vm_map_protect: %p, 0x%x", __FUNCTION__, (void *)saddr, kr);
		} else {
			free_addr = saddr;
		}

		lock_zone(zone);

		/* Insert newly freed element into the protected free element
		 * cache, and rotate out the LRU element.
		 */
		if (gzfc_size) {
			if (zone->gz.gzfc_index >= gzfc_size) {
				zone->gz.gzfc_index = 0;
			}
			free_addr = zone->gz.gzfc[zone->gz.gzfc_index];
			zone->gz.gzfc[zone->gz.gzfc_index++] = saddr;
		}

		if (free_addr) {
			zone->count--;
			zone->cur_size -= rounded_size;
		}

		unlock_zone(zone);

		if (free_addr) {
			kr = vm_map_remove(
				gzalloc_map,
				free_addr,
				free_addr + rounded_size + (1 * PAGE_SIZE),
				VM_MAP_REMOVE_KUNWIRE);
			if (kr != KERN_SUCCESS)
				panic("gzfree: vm_map_remove: %p, 0x%x", (void *)free_addr, kr);

			OSAddAtomic64((SInt32)rounded_size, &gzalloc_freed);
			OSAddAtomic64(-((SInt32) (rounded_size - zone->elem_size)), &gzalloc_wasted);
		}

		gzfreed = TRUE;
	}
	return gzfreed;
}