コード例 #1
0
ファイル: radeon_gem.c プロジェクト: ele7enxxh/dtrace-pf
int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
			    struct drm_file *filp)
{
	struct radeon_device *rdev = dev->dev_private;
	struct drm_radeon_gem_create *args = data;
	struct drm_gem_object *gobj;
	uint32_t handle;
	int r;

	sx_slock(&rdev->exclusive_lock);
	/* create a gem object to contain this object in */
	args->size = roundup(args->size, PAGE_SIZE);
	r = radeon_gem_object_create(rdev, args->size, args->alignment,
					args->initial_domain, false,
					false, &gobj);
	if (r) {
		sx_sunlock(&rdev->exclusive_lock);
		r = radeon_gem_handle_lockup(rdev, r);
		return r;
	}
	handle = 0;
	r = drm_gem_handle_create(filp, gobj, &handle);
	/* drop reference from allocate - handle holds it now */
	drm_gem_object_unreference_unlocked(gobj);
	if (r) {
		sx_sunlock(&rdev->exclusive_lock);
		r = radeon_gem_handle_lockup(rdev, r);
		return r;
	}
	args->handle = handle;
	sx_sunlock(&rdev->exclusive_lock);
	return 0;
}
コード例 #2
0
ファイル: radeon_gem.c プロジェクト: ele7enxxh/dtrace-pf
int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
				struct drm_file *filp)
{
	/* transition the BO to a domain -
	 * just validate the BO into a certain domain */
	struct radeon_device *rdev = dev->dev_private;
	struct drm_radeon_gem_set_domain *args = data;
	struct drm_gem_object *gobj;
	struct radeon_bo *robj;
	int r;

	/* for now if someone requests domain CPU -
	 * just make sure the buffer is finished with */
	sx_slock(&rdev->exclusive_lock);

	/* just do a BO wait for now */
	gobj = drm_gem_object_lookup(dev, filp, args->handle);
	if (gobj == NULL) {
		sx_sunlock(&rdev->exclusive_lock);
		return -ENOENT;
	}
	robj = gem_to_radeon_bo(gobj);

	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);

	drm_gem_object_unreference_unlocked(gobj);
	sx_sunlock(&rdev->exclusive_lock);
	r = radeon_gem_handle_lockup(robj->rdev, r);
	return r;
}
コード例 #3
0
ファイル: tty_tty.c プロジェクト: AhmadTux/freebsd
static void
ctty_clone(void *arg, struct ucred *cred, char *name, int namelen,
    struct cdev **dev)
{

	if (*dev != NULL)
		return;
	if (strcmp(name, "tty"))
		return;
	sx_sunlock(&clone_drain_lock);
	sx_slock(&proctree_lock);
	sx_slock(&clone_drain_lock);
	dev_lock();
	if (!(curthread->td_proc->p_flag & P_CONTROLT))
		*dev = ctty;
	else if (curthread->td_proc->p_session->s_ttyvp == NULL)
		*dev = ctty;
	else if (curthread->td_proc->p_session->s_ttyvp->v_type == VBAD ||
	    curthread->td_proc->p_session->s_ttyvp->v_rdev == NULL) {
		/* e.g. s_ttyvp was revoked */
		*dev = ctty;
	} else
		*dev = curthread->td_proc->p_session->s_ttyvp->v_rdev;
	dev_refl(*dev);
	dev_unlock();
	sx_sunlock(&proctree_lock);
}
コード例 #4
0
ファイル: ibcs2_sysvec.c プロジェクト: edgar-pek/PerspicuOS
/*
 * Create an "ibcs2" module that does nothing but allow checking for
 * the presence of the subsystem.
 */
static int
ibcs2_modevent(module_t mod, int type, void *unused)
{
	struct proc *p = NULL;
	int rval = 0;

	switch(type) {
	case MOD_LOAD:
		break;
	case MOD_UNLOAD:
		/* if this was an ELF module we'd use elf_brand_inuse()... */
		sx_slock(&allproc_lock);
		FOREACH_PROC_IN_SYSTEM(p) {
			if (p->p_sysent == &ibcs2_svr3_sysvec) {
				rval = EBUSY;
				break;
			}
		}
		sx_sunlock(&allproc_lock);
		break;
	default:
	        rval = EOPNOTSUPP;
		break;
	}
	return (rval);
}
コード例 #5
0
ファイル: opensolaris_zone.c プロジェクト: 151706061/osv
int
zone_dataset_attach(struct ucred *cred, const char *dataset, int jailid)
{
	struct zone_dataset_head *head;
	zone_dataset_t *zd, *zd2;
	struct prison *pr;
	int dofree, error;

	if ((error = priv_check_cred(cred, PRIV_ZFS_JAIL, 0)) != 0)
		return (error);

	/* Allocate memory before we grab prison's mutex. */
	zd = malloc(sizeof(*zd) + strlen(dataset) + 1, M_ZONES, M_WAITOK);

	sx_slock(&allprison_lock);
	pr = prison_find(jailid);	/* Locks &pr->pr_mtx. */
	sx_sunlock(&allprison_lock);
	if (pr == NULL) {
		free(zd, M_ZONES);
		return (ENOENT);
	}

	head = osd_jail_get(pr, zone_slot);
	if (head != NULL) {
		dofree = 0;
		LIST_FOREACH(zd2, head, zd_next) {
			if (strcmp(dataset, zd2->zd_dataset) == 0) {
				free(zd, M_ZONES);
				error = EEXIST;
				goto end;
			}
		}
	} else {
コード例 #6
0
ファイル: ntfs_usnjrnl.c プロジェクト: icaleo/ntfs
/**
 * ntfs_usnjrnl_stamp - stamp the transaction log ($UsnJrnl) on an ntfs volume
 * @vol:	ntfs volume on which to stamp the transaction log
 *
 * Stamp the transaction log ($UsnJrnl) on the ntfs volume @vol and return 0
 * on success and errno on error.
 *
 * This function assumes that the transaction log has already been loaded and
 * consistency checked by a call to ntfs_vfsops.c::ntfs_usnjrnl_load().
 */
errno_t ntfs_usnjrnl_stamp(ntfs_volume *vol)
{
	ntfs_debug("Entering.");
	if (!NVolUsnJrnlStamped(vol)) {
		sle64 j_size, stamp;
		upl_t upl;
		upl_page_info_array_t pl;
		USN_HEADER *uh;
		ntfs_inode *max_ni;
		errno_t err;

		mtx_lock_spin(&vol->usnjrnl_j_ni->size_lock);
		j_size = vol->usnjrnl_j_ni->data_size;
		mtx_unlock_spin(&vol->usnjrnl_j_ni->size_lock);
		max_ni = vol->usnjrnl_max_ni;
		/*
		 * FIXME: Next If statement always false because of
		 * replacing vnode_get() with vhold()
		 */
		vhold(max_ni->vn);
		if (0) {
			ntfs_error(vol->mp, "Failed to get vnode for "
					"$UsnJrnl/$DATA/$Max.");
			return err;
		}
		sx_slock(&max_ni->lock);
		err = ntfs_page_map(max_ni, 0, &upl, &pl, (u8**)&uh, TRUE);
		if (err) {
			ntfs_error(vol->mp, "Failed to read from "
					"$UsnJrnl/$DATA/$Max attribute.");
			vdrop(max_ni->vn);
			return err;
		}
		stamp = ntfs_current_time();
		ntfs_debug("Stamping transaction log ($UsnJrnl): old "
				"journal_id 0x%llx, old lowest_valid_usn "
				"0x%llx, new journal_id 0x%llx, new "
				"lowest_valid_usn 0x%llx.",
				(unsigned long long)
				sle64_to_cpu(uh->journal_id),
				(unsigned long long)
				sle64_to_cpu(uh->lowest_valid_usn),
				(unsigned long long)sle64_to_cpu(stamp),
				(unsigned long long)j_size);
		uh->lowest_valid_usn = cpu_to_sle64(j_size);
		uh->journal_id = stamp;
		ntfs_page_unmap(max_ni, upl, pl, TRUE);
		sx_sunlock(&max_ni->lock);
		vdrop(max_ni->vn);
		/* Set the flag so we do not have to do it again on remount. */
		NVolSetUsnJrnlStamped(vol);
		// TODO: Should we mark any times on the base inode $UsnJrnl
		// for update here?
	}
	ntfs_debug("Done.");
	return 0;
}
コード例 #7
0
ファイル: uipc_shm.c プロジェクト: BillTheBest/libuinet
void
shm_path(struct shmfd *shmfd, char *path, size_t size)
{

	if (shmfd->shm_path == NULL)
		return;
	sx_slock(&shm_dict_lock);
	if (shmfd->shm_path != NULL)
		strlcpy(path, shmfd->shm_path, size);
	sx_sunlock(&shm_dict_lock);
}
コード例 #8
0
ファイル: uipc_sem.c プロジェクト: rchander/freebsd
static void
ksem_info_impl(struct ksem *ks, char *path, size_t size, uint32_t *value)
{

	if (ks->ks_path == NULL)
		return;
	sx_slock(&ksem_dict_lock);
	if (ks->ks_path != NULL)
		strlcpy(path, ks->ks_path, size);
	if (value != NULL)
		*value = ks->ks_value;
	sx_sunlock(&ksem_dict_lock);
}
コード例 #9
0
ファイル: kern_sx.c プロジェクト: jmgurney/freebsd
uintptr_t
unlock_sx(struct lock_object *lock)
{
	struct sx *sx;

	sx = (struct sx *)lock;
	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
	if (sx_xlocked(sx)) {
		sx_xunlock(sx);
		return (0);
	} else {
		sx_sunlock(sx);
		return (1);
	}
}
コード例 #10
0
ファイル: kern_mib.c プロジェクト: ChristosKa/freebsd
static int
sysctl_hostname(SYSCTL_HANDLER_ARGS)
{
	struct prison *pr, *cpr;
	size_t pr_offset;
	char tmpname[MAXHOSTNAMELEN];
	int descend, error, len;

	/*
	 * This function can set: hostname domainname hostuuid.
	 * Keep that in mind when comments say "hostname".
	 */
	pr_offset = (size_t)arg1;
	len = arg2;
	KASSERT(len <= sizeof(tmpname),
	    ("length %d too long for %s", len, __func__));

	pr = req->td->td_ucred->cr_prison;
	if (!(pr->pr_allow & PR_ALLOW_SET_HOSTNAME) && req->newptr)
		return (EPERM);
	/*
	 * Make a local copy of hostname to get/set so we don't have to hold
	 * the jail mutex during the sysctl copyin/copyout activities.
	 */
	mtx_lock(&pr->pr_mtx);
	bcopy((char *)pr + pr_offset, tmpname, len);
	mtx_unlock(&pr->pr_mtx);

	error = sysctl_handle_string(oidp, tmpname, len, req);

	if (req->newptr != NULL && error == 0) {
		/*
		 * Copy the locally set hostname to all jails that share
		 * this host info.
		 */
		sx_slock(&allprison_lock);
		while (!(pr->pr_flags & PR_HOST))
			pr = pr->pr_parent;
		mtx_lock(&pr->pr_mtx);
		bcopy(tmpname, (char *)pr + pr_offset, len);
		FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend)
			if (cpr->pr_flags & PR_HOST)
				descend = 0;
			else
				bcopy(tmpname, (char *)cpr + pr_offset, len);
		mtx_unlock(&pr->pr_mtx);
		sx_sunlock(&allprison_lock);
	}
コード例 #11
0
ファイル: radeon_ttm.c プロジェクト: Alkzndr/freebsd
static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct ttm_buffer_object *bo;
	struct radeon_device *rdev;
	int r;

	bo = (struct ttm_buffer_object *)vma->vm_private_data;	
	if (bo == NULL) {
		return VM_FAULT_NOPAGE;
	}
	rdev = radeon_get_rdev(bo->bdev);
	sx_slock(&rdev->pm.mclk_lock);
	r = ttm_vm_ops->fault(vma, vmf);
	sx_sunlock(&rdev->pm.mclk_lock);
	return r;
}
コード例 #12
0
static void
racctd(void)
{
	struct thread *td;
	struct proc *p;
	struct timeval wallclock;
	uint64_t runtime;

	for (;;) {
		sx_slock(&allproc_lock);

		FOREACH_PROC_IN_SYSTEM(p) {
			if (p->p_state != PRS_NORMAL)
				continue;
			if (p->p_flag & P_SYSTEM)
				continue;

			microuptime(&wallclock);
			timevalsub(&wallclock, &p->p_stats->p_start);
			PROC_LOCK(p);
			PROC_SLOCK(p);
			FOREACH_THREAD_IN_PROC(p, td) {
				ruxagg(p, td);
				thread_lock(td);
				thread_unlock(td);
			}
			runtime = cputick2usec(p->p_rux.rux_runtime);
			PROC_SUNLOCK(p);
#ifdef notyet
			KASSERT(runtime >= p->p_prev_runtime,
			    ("runtime < p_prev_runtime"));
#else
			if (runtime < p->p_prev_runtime)
				runtime = p->p_prev_runtime;
#endif
			p->p_prev_runtime = runtime;
			mtx_lock(&racct_lock);
			racct_set_locked(p, RACCT_CPU, runtime);
			racct_set_locked(p, RACCT_WALLCLOCK,
			    wallclock.tv_sec * 1000000 + wallclock.tv_usec);
			mtx_unlock(&racct_lock);
			PROC_UNLOCK(p);
		}
		sx_sunlock(&allproc_lock);
		pause("-", hz);
	}
コード例 #13
0
/*
 * Sysctl to read out the capture buffer from userspace.  We require
 * privilege as sensitive process/memory information may be accessed.
 */
static int
sysctl_debug_ddb_capture_data(SYSCTL_HANDLER_ARGS)
{
    int error;
    char ch;

    error = priv_check(req->td, PRIV_DDB_CAPTURE);
    if (error)
        return (error);

    sx_slock(&db_capture_sx);
    error = SYSCTL_OUT(req, db_capture_buf, db_capture_bufoff);
    sx_sunlock(&db_capture_sx);
    if (error)
        return (error);
    ch = '\0';
    return (SYSCTL_OUT(req, &ch, sizeof(ch)));
}
コード例 #14
0
ファイル: uipc_sem.c プロジェクト: outbackdingo/uBSD
static int
ksem_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
{
	struct ksem *ks;

	kif->kf_type = KF_TYPE_SEM;
	ks = fp->f_data;
	mtx_lock(&sem_lock);
	kif->kf_un.kf_sem.kf_sem_value = ks->ks_value;
	kif->kf_un.kf_sem.kf_sem_mode = S_IFREG | ks->ks_mode;	/* XXX */
	mtx_unlock(&sem_lock);
	if (ks->ks_path != NULL) {
		sx_slock(&ksem_dict_lock);
		if (ks->ks_path != NULL)
			strlcpy(kif->kf_path, ks->ks_path, sizeof(kif->kf_path));
		sx_sunlock(&ksem_dict_lock);
	}
	return (0);
}
コード例 #15
0
ファイル: filemon.c プロジェクト: 2trill2spill/freebsd
/*
 * Invalidate the passed filemon in all processes.
 */
static void
filemon_untrack_processes(struct filemon *filemon)
{
	struct proc *p;

	sx_assert(&filemon->lock, SA_XLOCKED);

	/* Avoid allproc loop if there is no need. */
	if (filemon->proccnt == 0)
		return;

	/*
	 * Processes in this list won't go away while here since
	 * filemon_event_process_exit() will lock on filemon->lock
	 * which we hold.
	 */
	sx_slock(&allproc_lock);
	FOREACH_PROC_IN_SYSTEM(p) {
		/*
		 * No PROC_LOCK is needed to compare here since it is
		 * guaranteed to not change since we have its filemon
		 * locked.  Everything that changes this p_filemon will
		 * be locked on it.
		 */
		if (p->p_filemon == filemon)
			filemon_proc_drop(p);
	}
	sx_sunlock(&allproc_lock);

	/*
	 * It's possible some references were acquired but will be
	 * dropped shortly as they are restricted from being
	 * inherited.  There is at least the reference in cdevpriv remaining.
	 */
	KASSERT(filemon->refcnt > 0, ("%s: filemon %p should have "
	    "references still.", __func__, filemon));
	KASSERT(filemon->proccnt == 0, ("%s: filemon %p should not have "
	    "attached procs still.", __func__, filemon));
}
コード例 #16
0
ファイル: kern_osd.c プロジェクト: dcui/FreeBSD-9.3_kernel
int
osd_call(u_int type, u_int method, void *obj, void *data)
{
	osd_method_t methodfun;
	int error, i;

	KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
	KASSERT(method < osd_nmethods[type], ("Invalid method."));

	/*
	 * Call this method for every slot that defines it, stopping if an
	 * error is encountered.
	 */
	error = 0;
	sx_slock(&osd_module_lock[type]);
	for (i = 0; i < osd_nslots[type]; i++) {
		methodfun =
		    osd_methods[type][i * osd_nmethods[type] + method];
		if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
			break;
	}
	sx_sunlock(&osd_module_lock[type]);
	return (error);
}
コード例 #17
0
void debugger_getprocs_callback(struct allocation_t* ref)
{
	if (!ref)
		return;

	struct message_t* message = __get(ref);
	if (!message)
		return;

	// Only handle requests
	if (message->header.request != 1)
		goto cleanup;

	int(*_sx_slock)(struct sx *sx, int opts, const char *file, int line) = kdlsym(_sx_slock);
	void(*_sx_sunlock)(struct sx *sx, const char *file, int line) = kdlsym(_sx_sunlock);
	void(*_mtx_lock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_lock_flags);
	void(*_mtx_unlock_flags)(struct mtx *m, int opts, const char *file, int line) = kdlsym(_mtx_unlock_flags);
	struct sx* allproclock = (struct sx*)kdlsym(allproc_lock);
	struct proclist* allproc = (struct proclist*)*(uint64_t*)kdlsym(allproc);

	void(*vmspace_free)(struct vmspace *) = kdlsym(vmspace_free);
	struct vmspace* (*vmspace_acquire_ref)(struct proc *) = kdlsym(vmspace_acquire_ref);
	void(*_vm_map_lock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_lock_read);
	void(*_vm_map_unlock_read)(vm_map_t map, const char *file, int line) = kdlsym(_vm_map_unlock_read);

	uint64_t procCount = 0;
	struct proc* p = NULL;
	struct debugger_getprocs_t getproc = { 0 };

	sx_slock(allproclock);
	FOREACH_PROC_IN_SYSTEM(p)
	{
		PROC_LOCK(p);
		// Zero out our process information
		kmemset(&getproc, 0, sizeof(getproc));

		// Get the vm map
		struct vmspace* vm = vmspace_acquire_ref(p);
		vm_map_t map = &p->p_vmspace->vm_map;
		vm_map_lock_read(map);

		struct vm_map_entry* entry = map->header.next;

		// Copy over all of the address information
		getproc.process_id = p->p_pid;
		getproc.text_address = (uint64_t)entry->start;
		getproc.text_size = (uint64_t)entry->end - entry->start;
		getproc.data_address = (uint64_t)p->p_vmspace->vm_daddr;
		getproc.data_size = p->p_vmspace->vm_dsize;
		// Copy over the name and path
		kmemcpy(getproc.process_name, p->p_comm, sizeof(getproc.process_name));
		kmemcpy(getproc.path, p->p_elfpath, sizeof(getproc.path));
		// Write it back to the PC
		kwrite(message->socket, &getproc, sizeof(getproc));
		procCount++;

		// Free the vmmap
		vm_map_unlock_read(map);
		vmspace_free(vm);

		PROC_UNLOCK(p);
	}
	sx_sunlock(allproclock);
	// Send finalizer, because f**k this shit
	kmemset(&getproc, 0xDD, sizeof(getproc));
	kwrite(message->socket, &getproc, sizeof(getproc));

cleanup:
	__dec(ref);
}
コード例 #18
0
ファイル: linux_file.c プロジェクト: jaredmcneill/freebsd
static int
linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
{
	cap_rights_t rights;
	struct proc *p = td->td_proc;
	struct file *fp;
	int fd;
	int bsd_flags, error;

	bsd_flags = 0;
	switch (l_flags & LINUX_O_ACCMODE) {
	case LINUX_O_WRONLY:
		bsd_flags |= O_WRONLY;
		break;
	case LINUX_O_RDWR:
		bsd_flags |= O_RDWR;
		break;
	default:
		bsd_flags |= O_RDONLY;
	}
	if (l_flags & LINUX_O_NDELAY)
		bsd_flags |= O_NONBLOCK;
	if (l_flags & LINUX_O_APPEND)
		bsd_flags |= O_APPEND;
	if (l_flags & LINUX_O_SYNC)
		bsd_flags |= O_FSYNC;
	if (l_flags & LINUX_O_NONBLOCK)
		bsd_flags |= O_NONBLOCK;
	if (l_flags & LINUX_FASYNC)
		bsd_flags |= O_ASYNC;
	if (l_flags & LINUX_O_CREAT)
		bsd_flags |= O_CREAT;
	if (l_flags & LINUX_O_TRUNC)
		bsd_flags |= O_TRUNC;
	if (l_flags & LINUX_O_EXCL)
		bsd_flags |= O_EXCL;
	if (l_flags & LINUX_O_NOCTTY)
		bsd_flags |= O_NOCTTY;
	if (l_flags & LINUX_O_DIRECT)
		bsd_flags |= O_DIRECT;
	if (l_flags & LINUX_O_NOFOLLOW)
		bsd_flags |= O_NOFOLLOW;
	if (l_flags & LINUX_O_DIRECTORY)
		bsd_flags |= O_DIRECTORY;
	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */

	error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
	if (error != 0)
		goto done;
	if (bsd_flags & O_NOCTTY)
		goto done;

	/*
	 * XXX In between kern_open() and fget(), another process
	 * having the same filedesc could use that fd without
	 * checking below.
	*/
	fd = td->td_retval[0];
	if (fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp) == 0) {
		if (fp->f_type != DTYPE_VNODE) {
			fdrop(fp, td);
			goto done;
		}
		sx_slock(&proctree_lock);
		PROC_LOCK(p);
		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
			PROC_UNLOCK(p);
			sx_sunlock(&proctree_lock);
			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
			    td->td_ucred, td);
		} else {
			PROC_UNLOCK(p);
			sx_sunlock(&proctree_lock);
		}
		fdrop(fp, td);
	}

done:
#ifdef DEBUG
	if (ldebug(open))
		printf(LMSG("open returns error %d"), error);
#endif
	LFREEPATH(path);
	return (error);
}
コード例 #19
0
/*
 * Resolve the 'which' parameter of several cpuset apis.
 *
 * For WHICH_PID and WHICH_TID return a locked proc and valid proc/tid.  Also
 * checks for permission via p_cansched().
 *
 * For WHICH_SET returns a valid set with a new reference.
 *
 * -1 may be supplied for any argument to mean the current proc/thread or
 * the base set of the current thread.  May fail with ESRCH/EPERM.
 */
static int
cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp,
    struct cpuset **setp)
{
	struct cpuset *set;
	struct thread *td;
	struct proc *p;
	int error;

	*pp = p = NULL;
	*tdp = td = NULL;
	*setp = set = NULL;
	switch (which) {
	case CPU_WHICH_PID:
		if (id == -1) {
			PROC_LOCK(curproc);
			p = curproc;
			break;
		}
		if ((p = pfind(id)) == NULL)
			return (ESRCH);
		break;
	case CPU_WHICH_TID:
		if (id == -1) {
			PROC_LOCK(curproc);
			p = curproc;
			td = curthread;
			break;
		}
		td = tdfind(id, -1);
		if (td == NULL)
			return (ESRCH);
		p = td->td_proc;
		break;
	case CPU_WHICH_CPUSET:
		if (id == -1) {
			thread_lock(curthread);
			set = cpuset_refbase(curthread->td_cpuset);
			thread_unlock(curthread);
		} else
			set = cpuset_lookup(id, curthread);
		if (set) {
			*setp = set;
			return (0);
		}
		return (ESRCH);
	case CPU_WHICH_JAIL:
	{
		/* Find `set' for prison with given id. */
		struct prison *pr;

		sx_slock(&allprison_lock);
		pr = prison_find_child(curthread->td_ucred->cr_prison, id);
		sx_sunlock(&allprison_lock);
		if (pr == NULL)
			return (ESRCH);
		cpuset_ref(pr->pr_cpuset);
		*setp = pr->pr_cpuset;
		mtx_unlock(&pr->pr_mtx);
		return (0);
	}
	case CPU_WHICH_IRQ:
		return (0);
	default:
		return (EINVAL);
	}
	error = p_cansched(curthread, p);
	if (error) {
		PROC_UNLOCK(p);
		return (error);
	}
	if (td == NULL)
		td = FIRST_THREAD_IN_PROC(p);
	*pp = p;
	*tdp = td;
	return (0);
}
コード例 #20
0
/*
 * Return directory entries.
 */
static int
pfs_readdir(struct vop_readdir_args *va)
{
	struct vnode *vn = va->a_vp;
	struct pfs_vdata *pvd = vn->v_data;
	struct pfs_node *pd = pvd->pvd_pn;
	pid_t pid = pvd->pvd_pid;
	struct proc *p, *proc;
	struct pfs_node *pn;
	struct dirent *entry;
	struct uio *uio;
	off_t offset;
	int error, i, resid;
	char *buf, *ent;

	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
	    ("%s(): pn_info does not match mountpoint", __func__));
	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
	pfs_assert_not_owned(pd);

	if (vn->v_type != VDIR)
		PFS_RETURN (ENOTDIR);
	KASSERT_PN_IS_DIR(pd);
	uio = va->a_uio;

	/* only allow reading entire entries */
	offset = uio->uio_offset;
	resid = uio->uio_resid;
	if (offset < 0 || offset % PFS_DELEN != 0 ||
	    (resid && resid < PFS_DELEN))
		PFS_RETURN (EINVAL);
	if (resid == 0)
		PFS_RETURN (0);

	/* can't do this while holding the proc lock... */
	buf = malloc(resid, M_IOV, M_WAITOK | M_ZERO);
	sx_slock(&allproc_lock);
	pfs_lock(pd);

        /* check if the directory is visible to the caller */
        if (!pfs_visible(curthread, pd, pid, &proc)) {
		sx_sunlock(&allproc_lock);
		pfs_unlock(pd);
		free(buf, M_IOV);
                PFS_RETURN (ENOENT);
	}
	KASSERT(pid == NO_PID || proc != NULL,
	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));

	/* skip unwanted entries */
	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
			/* nothing left... */
			if (proc != NULL)
				PROC_UNLOCK(proc);
			pfs_unlock(pd);
			sx_sunlock(&allproc_lock);
			free(buf, M_IOV);
			PFS_RETURN (0);
		}
	}

	/* fill in entries */
	ent = buf;
	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
	    resid >= PFS_DELEN) {
		entry = (struct dirent *)ent;
		entry->d_reclen = PFS_DELEN;
		entry->d_fileno = pn_fileno(pn, pid);
		/* PFS_DELEN was picked to fit PFS_NAMLEN */
		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
			entry->d_name[i] = pn->pn_name[i];
		entry->d_name[i] = 0;
		entry->d_namlen = i;
		switch (pn->pn_type) {
		case pfstype_procdir:
			KASSERT(p != NULL,
			    ("reached procdir node with p == NULL"));
			entry->d_namlen = snprintf(entry->d_name,
			    PFS_NAMELEN, "%d", p->p_pid);
			/* fall through */
		case pfstype_root:
		case pfstype_dir:
		case pfstype_this:
		case pfstype_parent:
			entry->d_type = DT_DIR;
			break;
		case pfstype_file:
			entry->d_type = DT_REG;
			break;
		case pfstype_symlink:
			entry->d_type = DT_LNK;
			break;
		default:
			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
		}
		PFS_TRACE(("%s", entry->d_name));
		offset += PFS_DELEN;
		resid -= PFS_DELEN;
		ent += PFS_DELEN;
	}
	if (proc != NULL)
		PROC_UNLOCK(proc);
	pfs_unlock(pd);
	sx_sunlock(&allproc_lock);
	PFS_TRACE(("%zd bytes", ent - buf));
	error = uiomove(buf, ent - buf, uio);
	free(buf, M_IOV);
	PFS_RETURN (error);
}
コード例 #21
0
ファイル: fasttrap_isa.c プロジェクト: 2asoft/freebsd
int
fasttrap_pid_probe(struct reg *rp)
{
	proc_t *p = curproc;
#ifndef illumos
	struct rm_priotracker tracker;
	proc_t *pp;
#endif
	uintptr_t pc = rp->r_rip - 1;
	uintptr_t new_pc = 0;
	fasttrap_bucket_t *bucket;
#ifdef illumos
	kmutex_t *pid_mtx;
#endif
	fasttrap_tracepoint_t *tp, tp_local;
	pid_t pid;
	dtrace_icookie_t cookie;
	uint_t is_enabled = 0;

	/*
	 * It's possible that a user (in a veritable orgy of bad planning)
	 * could redirect this thread's flow of control before it reached the
	 * return probe fasttrap. In this case we need to kill the process
	 * since it's in a unrecoverable state.
	 */
	if (curthread->t_dtrace_step) {
		ASSERT(curthread->t_dtrace_on);
		fasttrap_sigtrap(p, curthread, pc);
		return (0);
	}

	/*
	 * Clear all user tracing flags.
	 */
	curthread->t_dtrace_ft = 0;
	curthread->t_dtrace_pc = 0;
	curthread->t_dtrace_npc = 0;
	curthread->t_dtrace_scrpc = 0;
	curthread->t_dtrace_astpc = 0;
#ifdef __amd64
	curthread->t_dtrace_regv = 0;
#endif

	/*
	 * Treat a child created by a call to vfork(2) as if it were its
	 * parent. We know that there's only one thread of control in such a
	 * process: this one.
	 */
#ifdef illumos
	while (p->p_flag & SVFORK) {
		p = p->p_parent;
	}

	pid = p->p_pid;
	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
	mutex_enter(pid_mtx);
#else
	pp = p;
	sx_slock(&proctree_lock);
	while (pp->p_vmspace == pp->p_pptr->p_vmspace)
		pp = pp->p_pptr;
	pid = pp->p_pid;
	sx_sunlock(&proctree_lock);
	pp = NULL;

	rm_rlock(&fasttrap_tp_lock, &tracker);
#endif

	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];

	/*
	 * Lookup the tracepoint that the process just hit.
	 */
	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
		    tp->ftt_proc->ftpc_acount != 0)
			break;
	}

	/*
	 * If we couldn't find a matching tracepoint, either a tracepoint has
	 * been inserted without using the pid<pid> ioctl interface (see
	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
	 */
	if (tp == NULL) {
#ifdef illumos
		mutex_exit(pid_mtx);
#else
		rm_runlock(&fasttrap_tp_lock, &tracker);
#endif
		return (-1);
	}

	/*
	 * Set the program counter to the address of the traced instruction
	 * so that it looks right in ustack() output.
	 */
	rp->r_rip = pc;

	if (tp->ftt_ids != NULL) {
		fasttrap_id_t *id;

#ifdef __amd64
		if (p->p_model == DATAMODEL_LP64) {
			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
				fasttrap_probe_t *probe = id->fti_probe;

				if (id->fti_ptype == DTFTP_ENTRY) {
					/*
					 * We note that this was an entry
					 * probe to help ustack() find the
					 * first caller.
					 */
					cookie = dtrace_interrupt_disable();
					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
					dtrace_probe(probe->ftp_id, rp->r_rdi,
					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
					    rp->r_r8);
					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
					dtrace_interrupt_enable(cookie);
				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
					/*
					 * Note that in this case, we don't
					 * call dtrace_probe() since it's only
					 * an artificial probe meant to change
					 * the flow of control so that it
					 * encounters the true probe.
					 */
					is_enabled = 1;
				} else if (probe->ftp_argmap == NULL) {
					dtrace_probe(probe->ftp_id, rp->r_rdi,
					    rp->r_rsi, rp->r_rdx, rp->r_rcx,
					    rp->r_r8);
				} else {
					uintptr_t t[5];

					fasttrap_usdt_args64(probe, rp,
					    sizeof (t) / sizeof (t[0]), t);

					dtrace_probe(probe->ftp_id, t[0], t[1],
					    t[2], t[3], t[4]);
				}
			}
		} else {
#else /* __amd64 */
			uintptr_t s0, s1, s2, s3, s4, s5;
			uint32_t *stack = (uint32_t *)rp->r_esp;

			/*
			 * In 32-bit mode, all arguments are passed on the
			 * stack. If this is a function entry probe, we need
			 * to skip the first entry on the stack as it
			 * represents the return address rather than a
			 * parameter to the function.
			 */
			s0 = fasttrap_fuword32_noerr(&stack[0]);
			s1 = fasttrap_fuword32_noerr(&stack[1]);
			s2 = fasttrap_fuword32_noerr(&stack[2]);
			s3 = fasttrap_fuword32_noerr(&stack[3]);
			s4 = fasttrap_fuword32_noerr(&stack[4]);
			s5 = fasttrap_fuword32_noerr(&stack[5]);

			for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
				fasttrap_probe_t *probe = id->fti_probe;

				if (id->fti_ptype == DTFTP_ENTRY) {
					/*
					 * We note that this was an entry
					 * probe to help ustack() find the
					 * first caller.
					 */
					cookie = dtrace_interrupt_disable();
					DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
					dtrace_probe(probe->ftp_id, s1, s2,
					    s3, s4, s5);
					DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
					dtrace_interrupt_enable(cookie);
				} else if (id->fti_ptype == DTFTP_IS_ENABLED) {
					/*
					 * Note that in this case, we don't
					 * call dtrace_probe() since it's only
					 * an artificial probe meant to change
					 * the flow of control so that it
					 * encounters the true probe.
					 */
					is_enabled = 1;
				} else if (probe->ftp_argmap == NULL) {
					dtrace_probe(probe->ftp_id, s0, s1,
					    s2, s3, s4);
				} else {
					uint32_t t[5];

					fasttrap_usdt_args32(probe, rp,
					    sizeof (t) / sizeof (t[0]), t);

					dtrace_probe(probe->ftp_id, t[0], t[1],
					    t[2], t[3], t[4]);
				}
			}
#endif /* __amd64 */
#ifdef __amd64
		}
#endif
	}

	/*
	 * We're about to do a bunch of work so we cache a local copy of
	 * the tracepoint to emulate the instruction, and then find the
	 * tracepoint again later if we need to light up any return probes.
	 */
	tp_local = *tp;
#ifdef illumos
	mutex_exit(pid_mtx);
#else
	rm_runlock(&fasttrap_tp_lock, &tracker);
#endif
	tp = &tp_local;

	/*
	 * Set the program counter to appear as though the traced instruction
	 * had completely executed. This ensures that fasttrap_getreg() will
	 * report the expected value for REG_RIP.
	 */
	rp->r_rip = pc + tp->ftt_size;

	/*
	 * If there's an is-enabled probe connected to this tracepoint it
	 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
	 * instruction that was placed there by DTrace when the binary was
	 * linked. As this probe is, in fact, enabled, we need to stuff 1
	 * into %eax or %rax. Accordingly, we can bypass all the instruction
	 * emulation logic since we know the inevitable result. It's possible
	 * that a user could construct a scenario where the 'is-enabled'
	 * probe was on some other instruction, but that would be a rather
	 * exotic way to shoot oneself in the foot.
	 */
	if (is_enabled) {
		rp->r_rax = 1;
		new_pc = rp->r_rip;
		goto done;
	}

	/*
	 * We emulate certain types of instructions to ensure correctness
	 * (in the case of position dependent instructions) or optimize
	 * common cases. The rest we have the thread execute back in user-
	 * land.
	 */
	switch (tp->ftt_type) {
	case FASTTRAP_T_RET:
	case FASTTRAP_T_RET16:
	{
		uintptr_t dst = 0;
		uintptr_t addr = 0;
		int ret = 0;

		/*
		 * We have to emulate _every_ facet of the behavior of a ret
		 * instruction including what happens if the load from %esp
		 * fails; in that case, we send a SIGSEGV.
		 */
#ifdef __amd64
		if (p->p_model == DATAMODEL_NATIVE) {
			ret = dst = fasttrap_fulword((void *)rp->r_rsp);
			addr = rp->r_rsp + sizeof (uintptr_t);
		} else {
#endif
#ifdef __i386__
			uint32_t dst32;
			ret = dst32 = fasttrap_fuword32((void *)rp->r_esp);
			dst = dst32;
			addr = rp->r_esp + sizeof (uint32_t);
#endif
#ifdef __amd64
		}
#endif

		if (ret == -1) {
			fasttrap_sigsegv(p, curthread, rp->r_rsp);
			new_pc = pc;
			break;
		}

		if (tp->ftt_type == FASTTRAP_T_RET16)
			addr += tp->ftt_dest;

		rp->r_rsp = addr;
		new_pc = dst;
		break;
	}

	case FASTTRAP_T_JCC:
	{
		uint_t taken = 0;

		switch (tp->ftt_code) {
		case FASTTRAP_JO:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) != 0;
			break;
		case FASTTRAP_JNO:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0;
			break;
		case FASTTRAP_JB:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0;
			break;
		case FASTTRAP_JAE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0;
			break;
		case FASTTRAP_JE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
			break;
		case FASTTRAP_JNE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
			break;
		case FASTTRAP_JBE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) != 0 ||
			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0;
			break;
		case FASTTRAP_JA:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_CF) == 0 &&
			    (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0;
			break;
		case FASTTRAP_JS:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) != 0;
			break;
		case FASTTRAP_JNS:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0;
			break;
		case FASTTRAP_JP:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) != 0;
			break;
		case FASTTRAP_JNP:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_PF) == 0;
			break;
		case FASTTRAP_JL:
			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JGE:
			taken = ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JLE:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 ||
			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) !=
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;
		case FASTTRAP_JG:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
			    ((rp->r_rflags & FASTTRAP_EFLAGS_SF) == 0) ==
			    ((rp->r_rflags & FASTTRAP_EFLAGS_OF) == 0);
			break;

		}

		if (taken)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_LOOP:
	{
		uint_t taken = 0;
#ifdef __amd64
		greg_t cx = rp->r_rcx--;
#else
		greg_t cx = rp->r_ecx--;
#endif

		switch (tp->ftt_code) {
		case FASTTRAP_LOOPNZ:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) == 0 &&
			    cx != 0;
			break;
		case FASTTRAP_LOOPZ:
			taken = (rp->r_rflags & FASTTRAP_EFLAGS_ZF) != 0 &&
			    cx != 0;
			break;
		case FASTTRAP_LOOP:
			taken = (cx != 0);
			break;
		}

		if (taken)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_JCXZ:
	{
#ifdef __amd64
		greg_t cx = rp->r_rcx;
#else
		greg_t cx = rp->r_ecx;
#endif

		if (cx == 0)
			new_pc = tp->ftt_dest;
		else
			new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_PUSHL_EBP:
	{
		int ret = 0;

#ifdef __amd64
		if (p->p_model == DATAMODEL_NATIVE) {
			rp->r_rsp -= sizeof (uintptr_t);
			ret = fasttrap_sulword((void *)rp->r_rsp, rp->r_rbp);
		} else {
#endif
#ifdef __i386__
			rp->r_rsp -= sizeof (uint32_t);
			ret = fasttrap_suword32((void *)rp->r_rsp, rp->r_rbp);
#endif
#ifdef __amd64
		}
#endif

		if (ret == -1) {
			fasttrap_sigsegv(p, curthread, rp->r_rsp);
			new_pc = pc;
			break;
		}

		new_pc = pc + tp->ftt_size;
		break;
	}

	case FASTTRAP_T_NOP:
		new_pc = pc + tp->ftt_size;
		break;

	case FASTTRAP_T_JMP:
	case FASTTRAP_T_CALL:
		if (tp->ftt_code == 0) {
			new_pc = tp->ftt_dest;
		} else {
			uintptr_t value, addr = tp->ftt_dest;

			if (tp->ftt_base != FASTTRAP_NOREG)
				addr += fasttrap_getreg(rp, tp->ftt_base);
			if (tp->ftt_index != FASTTRAP_NOREG)
				addr += fasttrap_getreg(rp, tp->ftt_index) <<
				    tp->ftt_scale;

			if (tp->ftt_code == 1) {
				/*
				 * If there's a segment prefix for this
				 * instruction, we'll need to check permissions
				 * and bounds on the given selector, and adjust
				 * the address accordingly.
				 */
				if (tp->ftt_segment != FASTTRAP_SEG_NONE &&
				    fasttrap_do_seg(tp, rp, &addr) != 0) {
					fasttrap_sigsegv(p, curthread, addr);
					new_pc = pc;
					break;
				}

#ifdef __amd64
				if (p->p_model == DATAMODEL_NATIVE) {
#endif
					if ((value = fasttrap_fulword((void *)addr))
					     == -1) {
						fasttrap_sigsegv(p, curthread,
						    addr);
						new_pc = pc;
						break;
					}
					new_pc = value;
#ifdef __amd64
				} else {
					uint32_t value32;
					addr = (uintptr_t)(uint32_t)addr;
					if ((value32 = fasttrap_fuword32((void *)addr))
					    == -1) {
						fasttrap_sigsegv(p, curthread,
						    addr);
						new_pc = pc;
						break;
					}
					new_pc = value32;
				}
#endif
			} else {
				new_pc = addr;
			}
		}

		/*
		 * If this is a call instruction, we need to push the return
		 * address onto the stack. If this fails, we send the process
		 * a SIGSEGV and reset the pc to emulate what would happen if
		 * this instruction weren't traced.
		 */
		if (tp->ftt_type == FASTTRAP_T_CALL) {
			int ret = 0;
			uintptr_t addr = 0, pcps;
#ifdef __amd64
			if (p->p_model == DATAMODEL_NATIVE) {
				addr = rp->r_rsp - sizeof (uintptr_t);
				pcps = pc + tp->ftt_size;
				ret = fasttrap_sulword((void *)addr, pcps);
			} else {
#endif
				addr = rp->r_rsp - sizeof (uint32_t);
				pcps = (uint32_t)(pc + tp->ftt_size);
				ret = fasttrap_suword32((void *)addr, pcps);
#ifdef __amd64
			}
#endif

			if (ret == -1) {
				fasttrap_sigsegv(p, curthread, addr);
				new_pc = pc;
				break;
			}

			rp->r_rsp = addr;
		}

		break;

	case FASTTRAP_T_COMMON:
	{
		uintptr_t addr;
#if defined(__amd64)
		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 22];
#else
		uint8_t scratch[2 * FASTTRAP_MAX_INSTR_SIZE + 7];
#endif
		uint_t i = 0;
#ifdef illumos
		klwp_t *lwp = ttolwp(curthread);

		/*
		 * Compute the address of the ulwp_t and step over the
		 * ul_self pointer. The method used to store the user-land
		 * thread pointer is very different on 32- and 64-bit
		 * kernels.
		 */
#if defined(__amd64)
		if (p->p_model == DATAMODEL_LP64) {
			addr = lwp->lwp_pcb.pcb_fsbase;
			addr += sizeof (void *);
		} else {
			addr = lwp->lwp_pcb.pcb_gsbase;
			addr += sizeof (caddr32_t);
		}
#else
		addr = USD_GETBASE(&lwp->lwp_pcb.pcb_gsdesc);
		addr += sizeof (void *);
#endif
#else	/* !illumos */
		fasttrap_scrspace_t *scrspace;
		scrspace = fasttrap_scraddr(curthread, tp->ftt_proc);
		if (scrspace == NULL) {
			/*
			 * We failed to allocate scratch space for this thread.
			 * Try to write the original instruction back out and
			 * reset the pc.
			 */
			if (fasttrap_copyout(tp->ftt_instr, (void *)pc,
			    tp->ftt_size))
				fasttrap_sigtrap(p, curthread, pc);
			new_pc = pc;
			break;
		}
		addr = scrspace->ftss_addr;
#endif /* illumos */

		/*
		 * Generic Instruction Tracing
		 * ---------------------------
		 *
		 * This is the layout of the scratch space in the user-land
		 * thread structure for our generated instructions.
		 *
		 *	32-bit mode			bytes
		 *	------------------------	-----
		 * a:	<original instruction>		<= 15
		 *	jmp	<pc + tp->ftt_size>	    5
		 * b:	<original instruction>		<= 15
		 *	int	T_DTRACE_RET		    2
		 *					-----
		 *					<= 37
		 *
		 *	64-bit mode			bytes
		 *	------------------------	-----
		 * a:	<original instruction>		<= 15
		 *	jmp	0(%rip)			    6
		 *	<pc + tp->ftt_size>		    8
		 * b:	<original instruction>		<= 15
		 * 	int	T_DTRACE_RET		    2
		 * 					-----
		 * 					<= 46
		 *
		 * The %pc is set to a, and curthread->t_dtrace_astpc is set
		 * to b. If we encounter a signal on the way out of the
		 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
		 * so that we execute the original instruction and re-enter
		 * the kernel rather than redirecting to the next instruction.
		 *
		 * If there are return probes (so we know that we're going to
		 * need to reenter the kernel after executing the original
		 * instruction), the scratch space will just contain the
		 * original instruction followed by an interrupt -- the same
		 * data as at b.
		 *
		 * %rip-relative Addressing
		 * ------------------------
		 *
		 * There's a further complication in 64-bit mode due to %rip-
		 * relative addressing. While this is clearly a beneficial
		 * architectural decision for position independent code, it's
		 * hard not to see it as a personal attack against the pid
		 * provider since before there was a relatively small set of
		 * instructions to emulate; with %rip-relative addressing,
		 * almost every instruction can potentially depend on the
		 * address at which it's executed. Rather than emulating
		 * the broad spectrum of instructions that can now be
		 * position dependent, we emulate jumps and others as in
		 * 32-bit mode, and take a different tack for instructions
		 * using %rip-relative addressing.
		 *
		 * For every instruction that uses the ModRM byte, the
		 * in-kernel disassembler reports its location. We use the
		 * ModRM byte to identify that an instruction uses
		 * %rip-relative addressing and to see what other registers
		 * the instruction uses. To emulate those instructions,
		 * we modify the instruction to be %rax-relative rather than
		 * %rip-relative (or %rcx-relative if the instruction uses
		 * %rax; or %r8- or %r9-relative if the REX.B is present so
		 * we don't have to rewrite the REX prefix). We then load
		 * the value that %rip would have been into the scratch
		 * register and generate an instruction to reset the scratch
		 * register back to its original value. The instruction
		 * sequence looks like this:
		 *
		 *	64-mode %rip-relative		bytes
		 *	------------------------	-----
		 * a:	<modified instruction>		<= 15
		 *	movq	$<value>, %<scratch>	    6
		 *	jmp	0(%rip)			    6
		 *	<pc + tp->ftt_size>		    8
		 * b:	<modified instruction>  	<= 15
		 * 	int	T_DTRACE_RET		    2
		 * 					-----
		 *					   52
		 *
		 * We set curthread->t_dtrace_regv so that upon receiving
		 * a signal we can reset the value of the scratch register.
		 */

		ASSERT(tp->ftt_size < FASTTRAP_MAX_INSTR_SIZE);

		curthread->t_dtrace_scrpc = addr;
		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
		i += tp->ftt_size;

#ifdef __amd64
		if (tp->ftt_ripmode != 0) {
			greg_t *reg = NULL;

			ASSERT(p->p_model == DATAMODEL_LP64);
			ASSERT(tp->ftt_ripmode &
			    (FASTTRAP_RIP_1 | FASTTRAP_RIP_2));

			/*
			 * If this was a %rip-relative instruction, we change
			 * it to be either a %rax- or %rcx-relative
			 * instruction (depending on whether those registers
			 * are used as another operand; or %r8- or %r9-
			 * relative depending on the value of REX.B). We then
			 * set that register and generate a movq instruction
			 * to reset the value.
			 */
			if (tp->ftt_ripmode & FASTTRAP_RIP_X)
				scratch[i++] = FASTTRAP_REX(1, 0, 0, 1);
			else
				scratch[i++] = FASTTRAP_REX(1, 0, 0, 0);

			if (tp->ftt_ripmode & FASTTRAP_RIP_1)
				scratch[i++] = FASTTRAP_MOV_EAX;
			else
				scratch[i++] = FASTTRAP_MOV_ECX;

			switch (tp->ftt_ripmode) {
			case FASTTRAP_RIP_1:
				reg = &rp->r_rax;
				curthread->t_dtrace_reg = REG_RAX;
				break;
			case FASTTRAP_RIP_2:
				reg = &rp->r_rcx;
				curthread->t_dtrace_reg = REG_RCX;
				break;
			case FASTTRAP_RIP_1 | FASTTRAP_RIP_X:
				reg = &rp->r_r8;
				curthread->t_dtrace_reg = REG_R8;
				break;
			case FASTTRAP_RIP_2 | FASTTRAP_RIP_X:
				reg = &rp->r_r9;
				curthread->t_dtrace_reg = REG_R9;
				break;
			}

			/* LINTED - alignment */
			*(uint64_t *)&scratch[i] = *reg;
			curthread->t_dtrace_regv = *reg;
			*reg = pc + tp->ftt_size;
			i += sizeof (uint64_t);
		}
#endif

		/*
		 * Generate the branch instruction to what would have
		 * normally been the subsequent instruction. In 32-bit mode,
		 * this is just a relative branch; in 64-bit mode this is a
		 * %rip-relative branch that loads the 64-bit pc value
		 * immediately after the jmp instruction.
		 */
#ifdef __amd64
		if (p->p_model == DATAMODEL_LP64) {
			scratch[i++] = FASTTRAP_GROUP5_OP;
			scratch[i++] = FASTTRAP_MODRM(0, 4, 5);
			/* LINTED - alignment */
			*(uint32_t *)&scratch[i] = 0;
			i += sizeof (uint32_t);
			/* LINTED - alignment */
			*(uint64_t *)&scratch[i] = pc + tp->ftt_size;
			i += sizeof (uint64_t);
		} else {
#endif
#ifdef __i386__
			/*
			 * Set up the jmp to the next instruction; note that
			 * the size of the traced instruction cancels out.
			 */
			scratch[i++] = FASTTRAP_JMP32;
			/* LINTED - alignment */
			*(uint32_t *)&scratch[i] = pc - addr - 5;
			i += sizeof (uint32_t);
#endif
#ifdef __amd64
		}
#endif

		curthread->t_dtrace_astpc = addr + i;
		bcopy(tp->ftt_instr, &scratch[i], tp->ftt_size);
		i += tp->ftt_size;
		scratch[i++] = FASTTRAP_INT;
		scratch[i++] = T_DTRACE_RET;

		ASSERT(i <= sizeof (scratch));

#ifdef illumos
		if (fasttrap_copyout(scratch, (char *)addr, i)) {
#else
		if (uwrite(p, scratch, i, addr)) {
#endif
			fasttrap_sigtrap(p, curthread, pc);
			new_pc = pc;
			break;
		}
		if (tp->ftt_retids != NULL) {
			curthread->t_dtrace_step = 1;
			curthread->t_dtrace_ret = 1;
			new_pc = curthread->t_dtrace_astpc;
		} else {
			new_pc = curthread->t_dtrace_scrpc;
		}

		curthread->t_dtrace_pc = pc;
		curthread->t_dtrace_npc = pc + tp->ftt_size;
		curthread->t_dtrace_on = 1;
		break;
	}

	default:
		panic("fasttrap: mishandled an instruction");
	}

done:
	/*
	 * If there were no return probes when we first found the tracepoint,
	 * we should feel no obligation to honor any return probes that were
	 * subsequently enabled -- they'll just have to wait until the next
	 * time around.
	 */
	if (tp->ftt_retids != NULL) {
		/*
		 * We need to wait until the results of the instruction are
		 * apparent before invoking any return probes. If this
		 * instruction was emulated we can just call
		 * fasttrap_return_common(); if it needs to be executed, we
		 * need to wait until the user thread returns to the kernel.
		 */
		if (tp->ftt_type != FASTTRAP_T_COMMON) {
			/*
			 * Set the program counter to the address of the traced
			 * instruction so that it looks right in ustack()
			 * output. We had previously set it to the end of the
			 * instruction to simplify %rip-relative addressing.
			 */
			rp->r_rip = pc;

			fasttrap_return_common(rp, pc, pid, new_pc);
		} else {
			ASSERT(curthread->t_dtrace_ret != 0);
			ASSERT(curthread->t_dtrace_pc == pc);
			ASSERT(curthread->t_dtrace_scrpc != 0);
			ASSERT(new_pc == curthread->t_dtrace_astpc);
		}
	}

	rp->r_rip = new_pc;

#ifndef illumos
	PROC_LOCK(p);
	proc_write_regs(curthread, rp);
	PROC_UNLOCK(p);
#endif

	return (0);
}

int
fasttrap_return_probe(struct reg *rp)
{
	proc_t *p = curproc;
	uintptr_t pc = curthread->t_dtrace_pc;
	uintptr_t npc = curthread->t_dtrace_npc;

	curthread->t_dtrace_pc = 0;
	curthread->t_dtrace_npc = 0;
	curthread->t_dtrace_scrpc = 0;
	curthread->t_dtrace_astpc = 0;

#ifdef illumos
	/*
	 * Treat a child created by a call to vfork(2) as if it were its
	 * parent. We know that there's only one thread of control in such a
	 * process: this one.
	 */
	while (p->p_flag & SVFORK) {
		p = p->p_parent;
	}
#endif

	/*
	 * We set rp->r_rip to the address of the traced instruction so
	 * that it appears to dtrace_probe() that we're on the original
	 * instruction, and so that the user can't easily detect our
	 * complex web of lies. dtrace_return_probe() (our caller)
	 * will correctly set %pc after we return.
	 */
	rp->r_rip = pc;

	fasttrap_return_common(rp, pc, p->p_pid, npc);

	return (0);
}
コード例 #22
0
static void
deadlkres(void)
{
	struct proc *p;
	struct thread *td;
	void *wchan;
	int blkticks, i, slpticks, slptype, tryl, tticks;

	tryl = 0;
	for (;;) {
		blkticks = blktime_threshold * hz;
		slpticks = slptime_threshold * hz;

		/*
		 * Avoid to sleep on the sx_lock in order to avoid a possible
		 * priority inversion problem leading to starvation.
		 * If the lock can't be held after 100 tries, panic.
		 */
		if (!sx_try_slock(&allproc_lock)) {
			if (tryl > 100)
		panic("%s: possible deadlock detected on allproc_lock\n",
				    __func__);
			tryl++;
			pause("allproc", sleepfreq * hz);
			continue;
		}
		tryl = 0;
		FOREACH_PROC_IN_SYSTEM(p) {
			PROC_LOCK(p);
			if (p->p_state == PRS_NEW) {
				PROC_UNLOCK(p);
				continue;
			}
			FOREACH_THREAD_IN_PROC(p, td) {

				/*
				 * Once a thread is found in "interesting"
				 * state a possible ticks wrap-up needs to be
				 * checked.
				 */
				thread_lock(td);
				if (TD_ON_LOCK(td) && ticks < td->td_blktick) {

					/*
					 * The thread should be blocked on a
					 * turnstile, simply check if the
					 * turnstile channel is in good state.
					 */
					MPASS(td->td_blocked != NULL);

					tticks = ticks - td->td_blktick;
					thread_unlock(td);
					if (tticks > blkticks) {

						/*
						 * Accordingly with provided
						 * thresholds, this thread is
						 * stuck for too long on a
						 * turnstile.
						 */
						PROC_UNLOCK(p);
						sx_sunlock(&allproc_lock);
	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
						    __func__, td, tticks);
					}
				} else if (TD_IS_SLEEPING(td) &&
				    TD_ON_SLEEPQ(td) &&
				    ticks < td->td_blktick) {

					/*
					 * Check if the thread is sleeping on a
					 * lock, otherwise skip the check.
					 * Drop the thread lock in order to
					 * avoid a LOR with the sleepqueue
					 * spinlock.
					 */
					wchan = td->td_wchan;
					tticks = ticks - td->td_slptick;
					thread_unlock(td);
					slptype = sleepq_type(wchan);
					if ((slptype == SLEEPQ_SX ||
					    slptype == SLEEPQ_LK) &&
					    tticks > slpticks) {

						/*
						 * Accordingly with provided
						 * thresholds, this thread is
						 * stuck for too long on a
						 * sleepqueue.
						 * However, being on a
						 * sleepqueue, we might still
						 * check for the blessed
						 * list.
						 */
						tryl = 0;
						for (i = 0; blessed[i] != NULL;
						    i++) {
							if (!strcmp(blessed[i],
							    td->td_wmesg)) {
								tryl = 1;
								break;
							}
						}
						if (tryl != 0) {
							tryl = 0;
							continue;
						}
						PROC_UNLOCK(p);
						sx_sunlock(&allproc_lock);
	panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
						    __func__, td, tticks);
					}
				} else
					thread_unlock(td);
			}
			PROC_UNLOCK(p);
		}
		sx_sunlock(&allproc_lock);

		/* Sleep for sleepfreq seconds. */
		pause("-", sleepfreq * hz);
	}
コード例 #23
0
ファイル: kern_fork.c プロジェクト: mulichao/freebsd
int
fork1(struct thread *td, struct fork_req *fr)
{
	struct proc *p1, *newproc;
	struct thread *td2;
	struct vmspace *vm2;
	struct file *fp_procdesc;
	vm_ooffset_t mem_charged;
	int error, nprocs_new, ok;
	static int curfail;
	static struct timeval lastfail;
	int flags, pages;

	flags = fr->fr_flags;
	pages = fr->fr_pages;

	if ((flags & RFSTOPPED) != 0)
		MPASS(fr->fr_procp != NULL && fr->fr_pidp == NULL);
	else
		MPASS(fr->fr_procp == NULL);

	/* Check for the undefined or unimplemented flags. */
	if ((flags & ~(RFFLAGS | RFTSIGFLAGS(RFTSIGMASK))) != 0)
		return (EINVAL);

	/* Signal value requires RFTSIGZMB. */
	if ((flags & RFTSIGFLAGS(RFTSIGMASK)) != 0 && (flags & RFTSIGZMB) == 0)
		return (EINVAL);

	/* Can't copy and clear. */
	if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
		return (EINVAL);

	/* Check the validity of the signal number. */
	if ((flags & RFTSIGZMB) != 0 && (u_int)RFTSIGNUM(flags) > _SIG_MAXSIG)
		return (EINVAL);

	if ((flags & RFPROCDESC) != 0) {
		/* Can't not create a process yet get a process descriptor. */
		if ((flags & RFPROC) == 0)
			return (EINVAL);

		/* Must provide a place to put a procdesc if creating one. */
		if (fr->fr_pd_fd == NULL)
			return (EINVAL);

		/* Check if we are using supported flags. */
		if ((fr->fr_pd_flags & ~PD_ALLOWED_AT_FORK) != 0)
			return (EINVAL);
	}

	p1 = td->td_proc;

	/*
	 * Here we don't create a new process, but we divorce
	 * certain parts of a process from itself.
	 */
	if ((flags & RFPROC) == 0) {
		if (fr->fr_procp != NULL)
			*fr->fr_procp = NULL;
		else if (fr->fr_pidp != NULL)
			*fr->fr_pidp = 0;
		return (fork_norfproc(td, flags));
	}

	fp_procdesc = NULL;
	newproc = NULL;
	vm2 = NULL;

	/*
	 * Increment the nprocs resource before allocations occur.
	 * Although process entries are dynamically created, we still
	 * keep a global limit on the maximum number we will
	 * create. There are hard-limits as to the number of processes
	 * that can run, established by the KVA and memory usage for
	 * the process data.
	 *
	 * Don't allow a nonprivileged user to use the last ten
	 * processes; don't let root exceed the limit.
	 */
	nprocs_new = atomic_fetchadd_int(&nprocs, 1) + 1;
	if ((nprocs_new >= maxproc - 10 && priv_check_cred(td->td_ucred,
	    PRIV_MAXPROC, 0) != 0) || nprocs_new >= maxproc) {
		error = EAGAIN;
		sx_xlock(&allproc_lock);
		if (ppsratecheck(&lastfail, &curfail, 1)) {
			printf("maxproc limit exceeded by uid %u (pid %d); "
			    "see tuning(7) and login.conf(5)\n",
			    td->td_ucred->cr_ruid, p1->p_pid);
		}
		sx_xunlock(&allproc_lock);
		goto fail2;
	}

	/*
	 * If required, create a process descriptor in the parent first; we
	 * will abandon it if something goes wrong. We don't finit() until
	 * later.
	 */
	if (flags & RFPROCDESC) {
		error = procdesc_falloc(td, &fp_procdesc, fr->fr_pd_fd,
		    fr->fr_pd_flags, fr->fr_pd_fcaps);
		if (error != 0)
			goto fail2;
	}

	mem_charged = 0;
	if (pages == 0)
		pages = kstack_pages;
	/* Allocate new proc. */
	newproc = uma_zalloc(proc_zone, M_WAITOK);
	td2 = FIRST_THREAD_IN_PROC(newproc);
	if (td2 == NULL) {
		td2 = thread_alloc(pages);
		if (td2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		proc_linkup(newproc, td2);
	} else {
		if (td2->td_kstack == 0 || td2->td_kstack_pages != pages) {
			if (td2->td_kstack != 0)
				vm_thread_dispose(td2);
			if (!thread_alloc_stack(td2, pages)) {
				error = ENOMEM;
				goto fail2;
			}
		}
	}

	if ((flags & RFMEM) == 0) {
		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
		if (vm2 == NULL) {
			error = ENOMEM;
			goto fail2;
		}
		if (!swap_reserve(mem_charged)) {
			/*
			 * The swap reservation failed. The accounting
			 * from the entries of the copied vm2 will be
			 * subtracted in vmspace_free(), so force the
			 * reservation there.
			 */
			swap_reserve_force(mem_charged);
			error = ENOMEM;
			goto fail2;
		}
	} else
		vm2 = NULL;

	/*
	 * XXX: This is ugly; when we copy resource usage, we need to bump
	 *      per-cred resource counters.
	 */
	proc_set_cred_init(newproc, crhold(td->td_ucred));

	/*
	 * Initialize resource accounting for the child process.
	 */
	error = racct_proc_fork(p1, newproc);
	if (error != 0) {
		error = EAGAIN;
		goto fail1;
	}

#ifdef MAC
	mac_proc_init(newproc);
#endif
	newproc->p_klist = knlist_alloc(&newproc->p_mtx);
	STAILQ_INIT(&newproc->p_ktr);

	/* We have to lock the process tree while we look for a pid. */
	sx_slock(&proctree_lock);
	sx_xlock(&allproc_lock);

	/*
	 * Increment the count of procs running with this uid. Don't allow
	 * a nonprivileged user to exceed their current limit.
	 *
	 * XXXRW: Can we avoid privilege here if it's not needed?
	 */
	error = priv_check_cred(td->td_ucred, PRIV_PROC_LIMIT, 0);
	if (error == 0)
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1, 0);
	else {
		ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
		    lim_cur(td, RLIMIT_NPROC));
	}
	if (ok) {
		do_fork(td, fr, newproc, td2, vm2, fp_procdesc);
		return (0);
	}

	error = EAGAIN;
	sx_sunlock(&proctree_lock);
	sx_xunlock(&allproc_lock);
#ifdef MAC
	mac_proc_destroy(newproc);
#endif
	racct_proc_exit(newproc);
fail1:
	crfree(newproc->p_ucred);
	newproc->p_ucred = NULL;
fail2:
	if (vm2 != NULL)
		vmspace_free(vm2);
	uma_zfree(proc_zone, newproc);
	if ((flags & RFPROCDESC) != 0 && fp_procdesc != NULL) {
		fdclose(td, fp_procdesc, *fr->fr_pd_fd);
		fdrop(fp_procdesc, td);
	}
	atomic_add_int(&nprocs, -1);
	pause("fork", hz / 2);
	return (error);
}
コード例 #24
0
ファイル: kern_fork.c プロジェクト: mulichao/freebsd
static void
do_fork(struct thread *td, struct fork_req *fr, struct proc *p2, struct thread *td2,
    struct vmspace *vm2, struct file *fp_procdesc)
{
	struct proc *p1, *pptr;
	int trypid;
	struct filedesc *fd;
	struct filedesc_to_leader *fdtol;
	struct sigacts *newsigacts;

	sx_assert(&proctree_lock, SX_SLOCKED);
	sx_assert(&allproc_lock, SX_XLOCKED);

	p1 = td->td_proc;

	trypid = fork_findpid(fr->fr_flags);

	sx_sunlock(&proctree_lock);

	p2->p_state = PRS_NEW;		/* protect against others */
	p2->p_pid = trypid;
	AUDIT_ARG_PID(p2->p_pid);
	LIST_INSERT_HEAD(&allproc, p2, p_list);
	allproc_gen++;
	LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
	tidhash_add(td2);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	sx_xunlock(&allproc_lock);

	bcopy(&p1->p_startcopy, &p2->p_startcopy,
	    __rangeof(struct proc, p_startcopy, p_endcopy));
	pargs_hold(p2->p_args);

	PROC_UNLOCK(p1);

	bzero(&p2->p_startzero,
	    __rangeof(struct proc, p_startzero, p_endzero));

	/* Tell the prison that we exist. */
	prison_proc_hold(p2->p_ucred->cr_prison);

	PROC_UNLOCK(p2);

	/*
	 * Malloc things while we don't hold any locks.
	 */
	if (fr->fr_flags & RFSIGSHARE)
		newsigacts = NULL;
	else
		newsigacts = sigacts_alloc();

	/*
	 * Copy filedesc.
	 */
	if (fr->fr_flags & RFCFDG) {
		fd = fdinit(p1->p_fd, false);
		fdtol = NULL;
	} else if (fr->fr_flags & RFFDG) {
		fd = fdcopy(p1->p_fd);
		fdtol = NULL;
	} else {
		fd = fdshare(p1->p_fd);
		if (p1->p_fdtol == NULL)
			p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL,
			    p1->p_leader);
		if ((fr->fr_flags & RFTHREAD) != 0) {
			/*
			 * Shared file descriptor table, and shared
			 * process leaders.
			 */
			fdtol = p1->p_fdtol;
			FILEDESC_XLOCK(p1->p_fd);
			fdtol->fdl_refcount++;
			FILEDESC_XUNLOCK(p1->p_fd);
		} else {
			/* 
			 * Shared file descriptor table, and different
			 * process leaders.
			 */
			fdtol = filedesc_to_leader_alloc(p1->p_fdtol,
			    p1->p_fd, p2);
		}
	}
	/*
	 * Make a proc table entry for the new process.
	 * Start by zeroing the section of proc that is zero-initialized,
	 * then copy the section that is copied directly from the parent.
	 */

	PROC_LOCK(p2);
	PROC_LOCK(p1);

	bzero(&td2->td_startzero,
	    __rangeof(struct thread, td_startzero, td_endzero));

	bcopy(&td->td_startcopy, &td2->td_startcopy,
	    __rangeof(struct thread, td_startcopy, td_endcopy));

	bcopy(&p2->p_comm, &td2->td_name, sizeof(td2->td_name));
	td2->td_sigstk = td->td_sigstk;
	td2->td_flags = TDF_INMEM;
	td2->td_lend_user_pri = PRI_MAX;

#ifdef VIMAGE
	td2->td_vnet = NULL;
	td2->td_vnet_lpush = NULL;
#endif

	/*
	 * Allow the scheduler to initialize the child.
	 */
	thread_lock(td);
	sched_fork(td, td2);
	thread_unlock(td);

	/*
	 * Duplicate sub-structures as needed.
	 * Increase reference counts on shared objects.
	 */
	p2->p_flag = P_INMEM;
	p2->p_flag2 = p1->p_flag2 & (P2_NOTRACE | P2_NOTRACE_EXEC | P2_TRAPCAP);
	p2->p_swtick = ticks;
	if (p1->p_flag & P_PROFIL)
		startprofclock(p2);

	/*
	 * Whilst the proc lock is held, copy the VM domain data out
	 * using the VM domain method.
	 */
	vm_domain_policy_init(&p2->p_vm_dom_policy);
	vm_domain_policy_localcopy(&p2->p_vm_dom_policy,
	    &p1->p_vm_dom_policy);

	if (fr->fr_flags & RFSIGSHARE) {
		p2->p_sigacts = sigacts_hold(p1->p_sigacts);
	} else {
		sigacts_copy(newsigacts, p1->p_sigacts);
		p2->p_sigacts = newsigacts;
	}

	if (fr->fr_flags & RFTSIGZMB)
	        p2->p_sigparent = RFTSIGNUM(fr->fr_flags);
	else if (fr->fr_flags & RFLINUXTHPN)
	        p2->p_sigparent = SIGUSR1;
	else
	        p2->p_sigparent = SIGCHLD;

	p2->p_textvp = p1->p_textvp;
	p2->p_fd = fd;
	p2->p_fdtol = fdtol;

	if (p1->p_flag2 & P2_INHERIT_PROTECTED) {
		p2->p_flag |= P_PROTECTED;
		p2->p_flag2 |= P2_INHERIT_PROTECTED;
	}

	/*
	 * p_limit is copy-on-write.  Bump its refcount.
	 */
	lim_fork(p1, p2);

	thread_cow_get_proc(td2, p2);

	pstats_fork(p1->p_stats, p2->p_stats);

	PROC_UNLOCK(p1);
	PROC_UNLOCK(p2);

	/* Bump references to the text vnode (for procfs). */
	if (p2->p_textvp)
		vrefact(p2->p_textvp);

	/*
	 * Set up linkage for kernel based threading.
	 */
	if ((fr->fr_flags & RFTHREAD) != 0) {
		mtx_lock(&ppeers_lock);
		p2->p_peers = p1->p_peers;
		p1->p_peers = p2;
		p2->p_leader = p1->p_leader;
		mtx_unlock(&ppeers_lock);
		PROC_LOCK(p1->p_leader);
		if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
			PROC_UNLOCK(p1->p_leader);
			/*
			 * The task leader is exiting, so process p1 is
			 * going to be killed shortly.  Since p1 obviously
			 * isn't dead yet, we know that the leader is either
			 * sending SIGKILL's to all the processes in this
			 * task or is sleeping waiting for all the peers to
			 * exit.  We let p1 complete the fork, but we need
			 * to go ahead and kill the new process p2 since
			 * the task leader may not get a chance to send
			 * SIGKILL to it.  We leave it on the list so that
			 * the task leader will wait for this new process
			 * to commit suicide.
			 */
			PROC_LOCK(p2);
			kern_psignal(p2, SIGKILL);
			PROC_UNLOCK(p2);
		} else
			PROC_UNLOCK(p1->p_leader);
	} else {
		p2->p_peers = NULL;
		p2->p_leader = p2;
	}

	sx_xlock(&proctree_lock);
	PGRP_LOCK(p1->p_pgrp);
	PROC_LOCK(p2);
	PROC_LOCK(p1);

	/*
	 * Preserve some more flags in subprocess.  P_PROFIL has already
	 * been preserved.
	 */
	p2->p_flag |= p1->p_flag & P_SUGID;
	td2->td_pflags |= (td->td_pflags & TDP_ALTSTACK) | TDP_FORKING;
	SESS_LOCK(p1->p_session);
	if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
		p2->p_flag |= P_CONTROLT;
	SESS_UNLOCK(p1->p_session);
	if (fr->fr_flags & RFPPWAIT)
		p2->p_flag |= P_PPWAIT;

	p2->p_pgrp = p1->p_pgrp;
	LIST_INSERT_AFTER(p1, p2, p_pglist);
	PGRP_UNLOCK(p1->p_pgrp);
	LIST_INIT(&p2->p_children);
	LIST_INIT(&p2->p_orphans);

	callout_init_mtx(&p2->p_itcallout, &p2->p_mtx, 0);

	/*
	 * If PF_FORK is set, the child process inherits the
	 * procfs ioctl flags from its parent.
	 */
	if (p1->p_pfsflags & PF_FORK) {
		p2->p_stops = p1->p_stops;
		p2->p_pfsflags = p1->p_pfsflags;
	}

	/*
	 * This begins the section where we must prevent the parent
	 * from being swapped.
	 */
	_PHOLD(p1);
	PROC_UNLOCK(p1);

	/*
	 * Attach the new process to its parent.
	 *
	 * If RFNOWAIT is set, the newly created process becomes a child
	 * of init.  This effectively disassociates the child from the
	 * parent.
	 */
	if ((fr->fr_flags & RFNOWAIT) != 0) {
		pptr = p1->p_reaper;
		p2->p_reaper = pptr;
	} else {
		p2->p_reaper = (p1->p_treeflag & P_TREE_REAPER) != 0 ?
		    p1 : p1->p_reaper;
		pptr = p1;
	}
	p2->p_pptr = pptr;
	LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
	LIST_INIT(&p2->p_reaplist);
	LIST_INSERT_HEAD(&p2->p_reaper->p_reaplist, p2, p_reapsibling);
	if (p2->p_reaper == p1)
		p2->p_reapsubtree = p2->p_pid;
	sx_xunlock(&proctree_lock);

	/* Inform accounting that we have forked. */
	p2->p_acflag = AFORK;
	PROC_UNLOCK(p2);

#ifdef KTRACE
	ktrprocfork(p1, p2);
#endif

	/*
	 * Finish creating the child process.  It will return via a different
	 * execution path later.  (ie: directly into user mode)
	 */
	vm_forkproc(td, p2, td2, vm2, fr->fr_flags);

	if (fr->fr_flags == (RFFDG | RFPROC)) {
		VM_CNT_INC(v_forks);
		VM_CNT_ADD(v_forkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (fr->fr_flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
		VM_CNT_INC(v_vforks);
		VM_CNT_ADD(v_vforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else if (p1 == &proc0) {
		VM_CNT_INC(v_kthreads);
		VM_CNT_ADD(v_kthreadpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	} else {
		VM_CNT_INC(v_rforks);
		VM_CNT_ADD(v_rforkpages, p2->p_vmspace->vm_dsize +
		    p2->p_vmspace->vm_ssize);
	}

	/*
	 * Associate the process descriptor with the process before anything
	 * can happen that might cause that process to need the descriptor.
	 * However, don't do this until after fork(2) can no longer fail.
	 */
	if (fr->fr_flags & RFPROCDESC)
		procdesc_new(p2, fr->fr_pd_flags);

	/*
	 * Both processes are set up, now check if any loadable modules want
	 * to adjust anything.
	 */
	EVENTHANDLER_INVOKE(process_fork, p1, p2, fr->fr_flags);

	/*
	 * Set the child start time and mark the process as being complete.
	 */
	PROC_LOCK(p2);
	PROC_LOCK(p1);
	microuptime(&p2->p_stats->p_start);
	PROC_SLOCK(p2);
	p2->p_state = PRS_NORMAL;
	PROC_SUNLOCK(p2);

#ifdef KDTRACE_HOOKS
	/*
	 * Tell the DTrace fasttrap provider about the new process so that any
	 * tracepoints inherited from the parent can be removed. We have to do
	 * this only after p_state is PRS_NORMAL since the fasttrap module will
	 * use pfind() later on.
	 */
	if ((fr->fr_flags & RFMEM) == 0 && dtrace_fasttrap_fork)
		dtrace_fasttrap_fork(p1, p2);
#endif
	/*
	 * Hold the process so that it cannot exit after we make it runnable,
	 * but before we wait for the debugger.
	 */
	_PHOLD(p2);
	if (p1->p_ptevents & PTRACE_FORK) {
		/*
		 * Arrange for debugger to receive the fork event.
		 *
		 * We can report PL_FLAG_FORKED regardless of
		 * P_FOLLOWFORK settings, but it does not make a sense
		 * for runaway child.
		 */
		td->td_dbgflags |= TDB_FORK;
		td->td_dbg_forked = p2->p_pid;
		td2->td_dbgflags |= TDB_STOPATFORK;
	}
	if (fr->fr_flags & RFPPWAIT) {
		td->td_pflags |= TDP_RFPPWAIT;
		td->td_rfppwait_p = p2;
		td->td_dbgflags |= TDB_VFORK;
	}
	PROC_UNLOCK(p2);

	/*
	 * Now can be swapped.
	 */
	_PRELE(p1);
	PROC_UNLOCK(p1);

	/*
	 * Tell any interested parties about the new process.
	 */
	knote_fork(p1->p_klist, p2->p_pid);
	SDT_PROBE3(proc, , , create, p2, p1, fr->fr_flags);

	if (fr->fr_flags & RFPROCDESC) {
		procdesc_finit(p2->p_procdesc, fp_procdesc);
		fdrop(fp_procdesc, td);
	}

	if ((fr->fr_flags & RFSTOPPED) == 0) {
		/*
		 * If RFSTOPPED not requested, make child runnable and
		 * add to run queue.
		 */
		thread_lock(td2);
		TD_SET_CAN_RUN(td2);
		sched_add(td2, SRQ_BORING);
		thread_unlock(td2);
		if (fr->fr_pidp != NULL)
			*fr->fr_pidp = p2->p_pid;
	} else {
		*fr->fr_procp = p2;
	}

	PROC_LOCK(p2);
	/*
	 * Wait until debugger is attached to child.
	 */
	while (td2->td_proc == p2 && (td2->td_dbgflags & TDB_STOPATFORK) != 0)
		cv_wait(&p2->p_dbgwait, &p2->p_mtx);
	_PRELE(p2);
	racct_proc_fork_done(p2);
	PROC_UNLOCK(p2);
}