Esempio n. 1
0
static void
percpu_cpu_enlarge(size_t size)
{
    CPU_INFO_ITERATOR cii;
    struct cpu_info *ci;

    for (CPU_INFO_FOREACH(cii, ci)) {
        percpu_cpu_t pcc;

        pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */
        pcc.pcc_size = size;
        if (!mp_online) {
            percpu_cpu_swap(ci, &pcc);
        } else {
            uint64_t where;

            uvm_lwp_hold(curlwp); /* don't swap out pcc */
            where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci);
            xc_wait(where);
            uvm_lwp_rele(curlwp);
        }
        KASSERT(pcc.pcc_size < size);
        if (pcc.pcc_data != NULL) {
            kmem_free(pcc.pcc_data, pcc.pcc_size);
        }
    }
}
Esempio n. 2
0
int
process_dofpregs(struct lwp *curl /*tracer*/,
    struct lwp *l /*traced*/,
    struct uio *uio)
{
#if defined(PT_GETFPREGS) || defined(PT_SETFPREGS)
	int error;
	struct fpreg r;
	char *kv;
	int kl;

	if (uio->uio_offset < 0 || uio->uio_offset > (off_t)sizeof(r))
		return EINVAL;

	kl = sizeof(r);
	kv = (char *)&r;

	kv += uio->uio_offset;
	kl -= uio->uio_offset;
	if ((size_t)kl > uio->uio_resid)
		kl = uio->uio_resid;

	uvm_lwp_hold(l);

	error = process_read_fpregs(l, &r);
	if (error == 0)
		error = uiomove(kv, kl, uio);
	if (error == 0 && uio->uio_rw == UIO_WRITE) {
		if (l->l_stat != LSSTOP)
			error = EBUSY;
		else
			error = process_write_fpregs(l, &r);
	}

	uvm_lwp_rele(l);

	uio->uio_offset = 0;
	return (error);
#else
	return (EINVAL);
#endif
}
Esempio n. 3
0
int
process_machdep_dovecregs(struct lwp *curl, struct lwp *l, struct uio *uio)
{
	struct vreg r;
	int error;
	char *kv;
	int kl;

	kl = sizeof(r);
	kv = (char *) &r;

	kv += uio->uio_offset;
	kl -= uio->uio_offset;
	if (kl > uio->uio_resid)
		kl = uio->uio_resid;

	uvm_lwp_hold(l);

	if (kl < 0)
		error = EINVAL;
	else
		error = process_machdep_read_vecregs(l, &r);
	if (error == 0)
		error = uiomove(kv, kl, uio);
	if (error == 0 && uio->uio_rw == UIO_WRITE) {
		if (l->l_proc->p_stat != SSTOP)
			error = EBUSY;
		else
			error = process_machdep_write_vecregs(l, &r);
	}

	uvm_lwp_rele(l);

	uio->uio_offset = 0;
	return (error);
}
Esempio n. 4
0
static void
workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q)
{
	struct workqueue_exitargs wqe;
	lwp_t *l;

	KASSERT(wq->wq_func == workqueue_exit);

	wqe.wqe_q = q;
	KASSERT(SIMPLEQ_EMPTY(&q->q_queue));
	KASSERT(q->q_worker != NULL);
	l = curlwp;
	uvm_lwp_hold(l);	
	mutex_enter(&q->q_mutex);
	SIMPLEQ_INSERT_TAIL(&q->q_queue, &wqe.wqe_wk, wk_entry);
	cv_signal(&q->q_cv);
	while (q->q_worker != NULL) {
		cv_wait(&q->q_cv, &q->q_mutex);
	}
	mutex_exit(&q->q_mutex);
	uvm_lwp_rele(l);	
	mutex_destroy(&q->q_mutex);
	cv_destroy(&q->q_cv);
}
Esempio n. 5
0
/*
 * Process debugging system call.
 */
int
sys_ptrace(struct lwp *l, const struct sys_ptrace_args *uap, register_t *retval)
{
	/* {
		syscallarg(int) req;
		syscallarg(pid_t) pid;
		syscallarg(void *) addr;
		syscallarg(int) data;
	} */
	struct proc *p = l->l_proc;
	struct lwp *lt;
	struct proc *t;				/* target process */
	struct uio uio;
	struct iovec iov;
	struct ptrace_io_desc piod;
	struct ptrace_lwpinfo pl;
	struct vmspace *vm;
	int error, write, tmp, req, pheld;
	int signo;
	ksiginfo_t ksi;
#ifdef COREDUMP
	char *path;
#endif

	error = 0;
	req = SCARG(uap, req);

	/*
	 * If attaching or detaching, we need to get a write hold on the
	 * proclist lock so that we can re-parent the target process.
	 */
	mutex_enter(proc_lock);

	/* "A foolish consistency..." XXX */
	if (req == PT_TRACE_ME) {
		t = p;
		mutex_enter(t->p_lock);
	} else {
		/* Find the process we're supposed to be operating on. */
		if ((t = p_find(SCARG(uap, pid), PFIND_LOCKED)) == NULL) {
			mutex_exit(proc_lock);
			return (ESRCH);
		}

		/* XXX-elad */
		mutex_enter(t->p_lock);
		error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE,
		    t, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL);
		if (error) {
			mutex_exit(proc_lock);
			mutex_exit(t->p_lock);
			return (ESRCH);
		}
	}

	/*
	 * Grab a reference on the process to prevent it from execing or
	 * exiting.
	 */
	if (!rw_tryenter(&t->p_reflock, RW_READER)) {
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		return EBUSY;
	}

	/* Make sure we can operate on it. */
	switch (req) {
	case  PT_TRACE_ME:
		/* Saying that you're being traced is always legal. */
		break;

	case  PT_ATTACH:
		/*
		 * You can't attach to a process if:
		 *	(1) it's the process that's doing the attaching,
		 */
		if (t->p_pid == p->p_pid) {
			error = EINVAL;
			break;
		}

		/*
		 *  (2) it's a system process
		 */
		if (t->p_flag & PK_SYSTEM) {
			error = EPERM;
			break;
		}

		/*
		 *	(3) it's already being traced, or
		 */
		if (ISSET(t->p_slflag, PSL_TRACED)) {
			error = EBUSY;
			break;
		}

		/*
		 * 	(4) the tracer is chrooted, and its root directory is
		 * 	    not at or above the root directory of the tracee
		 */
		mutex_exit(t->p_lock);	/* XXXSMP */
		tmp = proc_isunder(t, l);
		mutex_enter(t->p_lock);	/* XXXSMP */
		if (!tmp) {
			error = EPERM;
			break;
		}
		break;

	case  PT_READ_I:
	case  PT_READ_D:
	case  PT_WRITE_I:
	case  PT_WRITE_D:
	case  PT_IO:
#ifdef PT_GETREGS
	case  PT_GETREGS:
#endif
#ifdef PT_SETREGS
	case  PT_SETREGS:
#endif
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
#endif
#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
#endif
#ifdef __HAVE_PTRACE_MACHDEP
	PTRACE_MACHDEP_REQUEST_CASES
#endif
		/*
		 * You can't read/write the memory or registers of a process
		 * if the tracer is chrooted, and its root directory is not at
		 * or above the root directory of the tracee.
		 */
		mutex_exit(t->p_lock);	/* XXXSMP */
		tmp = proc_isunder(t, l);
		mutex_enter(t->p_lock);	/* XXXSMP */
		if (!tmp) {
			error = EPERM;
			break;
		}
		/*FALLTHROUGH*/

	case  PT_CONTINUE:
	case  PT_KILL:
	case  PT_DETACH:
	case  PT_LWPINFO:
	case  PT_SYSCALL:
#ifdef COREDUMP
	case  PT_DUMPCORE:
#endif
#ifdef PT_STEP
	case  PT_STEP:
#endif
		/*
		 * You can't do what you want to the process if:
		 *	(1) It's not being traced at all,
		 */
		if (!ISSET(t->p_slflag, PSL_TRACED)) {
			error = EPERM;
			break;
		}

		/*
		 *	(2) it's being traced by procfs (which has
		 *	    different signal delivery semantics),
		 */
		if (ISSET(t->p_slflag, PSL_FSTRACE)) {
			uprintf("file system traced\n");
			error = EBUSY;
			break;
		}

		/*
		 *	(3) it's not being traced by _you_, or
		 */
		if (t->p_pptr != p) {
			uprintf("parent %d != %d\n", t->p_pptr->p_pid, p->p_pid);
			error = EBUSY;
			break;
		}

		/*
		 *	(4) it's not currently stopped.
		 */
		if (t->p_stat != SSTOP || !t->p_waited /* XXXSMP */) {
			uprintf("stat %d flag %d\n", t->p_stat,
			    !t->p_waited);
			error = EBUSY;
			break;
		}
		break;

	default:			/* It was not a legal request. */
		error = EINVAL;
		break;
	}

	if (error == 0)
		error = kauth_authorize_process(l->l_cred,
		    KAUTH_PROCESS_PTRACE, t, KAUTH_ARG(req),
		    NULL, NULL);

	if (error != 0) {
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		rw_exit(&t->p_reflock);
		return error;
	}

	/* Do single-step fixup if needed. */
	FIX_SSTEP(t);

	/*
	 * XXX NJWLWP
	 *
	 * The entire ptrace interface needs work to be useful to a
	 * process with multiple LWPs. For the moment, we'll kluge
	 * this; memory access will be fine, but register access will
	 * be weird.
	 */
	lt = LIST_FIRST(&t->p_lwps);
	KASSERT(lt != NULL);
	lwp_addref(lt);

	/*
	 * Which locks do we need held? XXX Ugly.
	 */
	switch (req) {
#ifdef PT_STEP
	case PT_STEP:
#endif
	case PT_CONTINUE:
	case PT_DETACH:
	case PT_KILL:
	case PT_SYSCALL:
	case PT_ATTACH:
	case PT_TRACE_ME:
		pheld = 1;
		break;
	default:
		mutex_exit(proc_lock);
		mutex_exit(t->p_lock);
		pheld = 0;
		break;
	}

	/* Now do the operation. */
	write = 0;
	*retval = 0;
	tmp = 0;

	switch (req) {
	case  PT_TRACE_ME:
		/* Just set the trace flag. */
		SET(t->p_slflag, PSL_TRACED);
		t->p_opptr = t->p_pptr;
		break;

	case  PT_WRITE_I:		/* XXX no separate I and D spaces */
	case  PT_WRITE_D:
#if defined(__HAVE_RAS)
		/*
		 * Can't write to a RAS
		 */
		if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) {
			error = EACCES;
			break;
		}
#endif
		write = 1;
		tmp = SCARG(uap, data);
		/* FALLTHROUGH */

	case  PT_READ_I:		/* XXX no separate I and D spaces */
	case  PT_READ_D:
		/* write = 0 done above. */
		iov.iov_base = (void *)&tmp;
		iov.iov_len = sizeof(tmp);
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(unsigned long)SCARG(uap, addr);
		uio.uio_resid = sizeof(tmp);
		uio.uio_rw = write ? UIO_WRITE : UIO_READ;
		UIO_SETUP_SYSSPACE(&uio);

		error = process_domem(l, lt, &uio);
		if (!write)
			*retval = tmp;
		break;

	case  PT_IO:
		error = copyin(SCARG(uap, addr), &piod, sizeof(piod));
		if (error)
			break;
		switch (piod.piod_op) {
		case PIOD_READ_D:
		case PIOD_READ_I:
			uio.uio_rw = UIO_READ;
			break;
		case PIOD_WRITE_D:
		case PIOD_WRITE_I:
			/*
			 * Can't write to a RAS
			 */
			if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) {
				return (EACCES);
			}
			uio.uio_rw = UIO_WRITE;
			break;
		default:
			error = EINVAL;
			break;
		}
		if (error)
			break;
		error = proc_vmspace_getref(l->l_proc, &vm);
		if (error)
			break;
		iov.iov_base = piod.piod_addr;
		iov.iov_len = piod.piod_len;
		uio.uio_iov = &iov;
		uio.uio_iovcnt = 1;
		uio.uio_offset = (off_t)(unsigned long)piod.piod_offs;
		uio.uio_resid = piod.piod_len;
		uio.uio_vmspace = vm;

		error = process_domem(l, lt, &uio);
		piod.piod_len -= uio.uio_resid;
		(void) copyout(&piod, SCARG(uap, addr), sizeof(piod));
		uvmspace_free(vm);
		break;

#ifdef COREDUMP
	case  PT_DUMPCORE:
		if ((path = SCARG(uap, addr)) != NULL) {
			char *dst;
			int len = SCARG(uap, data);
			if (len < 0 || len >= MAXPATHLEN) {
				error = EINVAL;
				break;
			}
			dst = malloc(len + 1, M_TEMP, M_WAITOK);
			if ((error = copyin(path, dst, len)) != 0) {
				free(dst, M_TEMP);
				break;
			}
			path = dst;
			path[len] = '\0';
		}
		error = coredump(lt, path);
		if (path)
			free(path, M_TEMP);
		break;
#endif

#ifdef PT_STEP
	case  PT_STEP:
		/*
		 * From the 4.4BSD PRM:
		 * "Execution continues as in request PT_CONTINUE; however
		 * as soon as possible after execution of at least one
		 * instruction, execution stops again. [ ... ]"
		 */
#endif
	case  PT_CONTINUE:
	case  PT_SYSCALL:
	case  PT_DETACH:
		if (req == PT_SYSCALL) {
			if (!ISSET(t->p_slflag, PSL_SYSCALL)) {
				SET(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
				(*t->p_emul->e_syscall_intern)(t);
#endif
			}
		} else {
			if (ISSET(t->p_slflag, PSL_SYSCALL)) {
				CLR(t->p_slflag, PSL_SYSCALL);
#ifdef __HAVE_SYSCALL_INTERN
				(*t->p_emul->e_syscall_intern)(t);
#endif
			}
		}
		p->p_trace_enabled = trace_is_enabled(p);

		/*
		 * From the 4.4BSD PRM:
		 * "The data argument is taken as a signal number and the
		 * child's execution continues at location addr as if it
		 * incurred that signal.  Normally the signal number will
		 * be either 0 to indicate that the signal that caused the
		 * stop should be ignored, or that value fetched out of
		 * the process's image indicating which signal caused
		 * the stop.  If addr is (int *)1 then execution continues
		 * from where it stopped."
		 */

		/* Check that the data is a valid signal number or zero. */
		if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) {
			error = EINVAL;
			break;
		}

		uvm_lwp_hold(lt);

		/* If the address parameter is not (int *)1, set the pc. */
		if ((int *)SCARG(uap, addr) != (int *)1)
			if ((error = process_set_pc(lt, SCARG(uap, addr))) != 0) {
				uvm_lwp_rele(lt);
				break;
			}

#ifdef PT_STEP
		/*
		 * Arrange for a single-step, if that's requested and possible.
		 */
		error = process_sstep(lt, req == PT_STEP);
		if (error) {
			uvm_lwp_rele(lt);
			break;
		}
#endif

		uvm_lwp_rele(lt);

		if (req == PT_DETACH) {
			CLR(t->p_slflag, PSL_TRACED|PSL_FSTRACE|PSL_SYSCALL);

			/* give process back to original parent or init */
			if (t->p_opptr != t->p_pptr) {
				struct proc *pp = t->p_opptr;
				proc_reparent(t, pp ? pp : initproc);
			}

			/* not being traced any more */
			t->p_opptr = NULL;
		}

		signo = SCARG(uap, data);
	sendsig:
		/* Finally, deliver the requested signal (or none). */
		if (t->p_stat == SSTOP) {
			/*
			 * Unstop the process.  If it needs to take a
			 * signal, make all efforts to ensure that at
			 * an LWP runs to see it.
			 */
			t->p_xstat = signo;
			proc_unstop(t);
		} else if (signo != 0) {
			KSI_INIT_EMPTY(&ksi);
			ksi.ksi_signo = signo;
			kpsignal2(t, &ksi);
		}
		break;

	case  PT_KILL:
		/* just send the process a KILL signal. */
		signo = SIGKILL;
		goto sendsig;	/* in PT_CONTINUE, above. */

	case  PT_ATTACH:
		/*
		 * Go ahead and set the trace flag.
		 * Save the old parent (it's reset in
		 *   _DETACH, and also in kern_exit.c:wait4()
		 * Reparent the process so that the tracing
		 *   proc gets to see all the action.
		 * Stop the target.
		 */
		t->p_opptr = t->p_pptr;
		if (t->p_pptr != p) {
			struct proc *parent = t->p_pptr;

			if (parent->p_lock < t->p_lock) {
				if (!mutex_tryenter(parent->p_lock)) {
					mutex_exit(t->p_lock);
					mutex_enter(parent->p_lock);
				}
			} else if (parent->p_lock > t->p_lock) {
				mutex_enter(parent->p_lock);
			}
			parent->p_slflag |= PSL_CHTRACED;
			proc_reparent(t, p);
			if (parent->p_lock != t->p_lock)
				mutex_exit(parent->p_lock);
		}
		SET(t->p_slflag, PSL_TRACED);
		signo = SIGSTOP;
		goto sendsig;

	case PT_LWPINFO:
		if (SCARG(uap, data) != sizeof(pl)) {
			error = EINVAL;
			break;
		}
		error = copyin(SCARG(uap, addr), &pl, sizeof(pl));
		if (error)
			break;
		tmp = pl.pl_lwpid;
		lwp_delref(lt);
		mutex_enter(t->p_lock);
		if (tmp == 0)
			lt = LIST_FIRST(&t->p_lwps);
		else {
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lt = LIST_NEXT(lt, l_sibling);
		}
		while (lt != NULL && lt->l_stat == LSZOMB)
			lt = LIST_NEXT(lt, l_sibling);
		pl.pl_lwpid = 0;
		pl.pl_event = 0;
		if (lt) {
			lwp_addref(lt);
			pl.pl_lwpid = lt->l_lid;
			if (lt->l_lid == t->p_sigctx.ps_lwp)
				pl.pl_event = PL_EVENT_SIGNAL;
		}
		mutex_exit(t->p_lock);

		error = copyout(&pl, SCARG(uap, addr), sizeof(pl));
		break;

#ifdef PT_SETREGS
	case  PT_SETREGS:
		write = 1;
#endif
#ifdef PT_GETREGS
	case  PT_GETREGS:
		/* write = 0 done above. */
#endif
#if defined(PT_SETREGS) || defined(PT_GETREGS)
		tmp = SCARG(uap, data);
		if (tmp != 0 && t->p_nlwps > 1) {
			lwp_delref(lt);
			mutex_enter(t->p_lock);
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lwp_addref(lt);
			mutex_exit(t->p_lock);
		}
		if (!process_validregs(lt))
			error = EINVAL;
		else {
			error = proc_vmspace_getref(l->l_proc, &vm);
			if (error)
				break;
			iov.iov_base = SCARG(uap, addr);
			iov.iov_len = sizeof(struct reg);
			uio.uio_iov = &iov;
			uio.uio_iovcnt = 1;
			uio.uio_offset = 0;
			uio.uio_resid = sizeof(struct reg);
			uio.uio_rw = write ? UIO_WRITE : UIO_READ;
			uio.uio_vmspace = vm;

			error = process_doregs(l, lt, &uio);
			uvmspace_free(vm);
		}
		break;
#endif

#ifdef PT_SETFPREGS
	case  PT_SETFPREGS:
		write = 1;
#endif
#ifdef PT_GETFPREGS
	case  PT_GETFPREGS:
		/* write = 0 done above. */
#endif
#if defined(PT_SETFPREGS) || defined(PT_GETFPREGS)
		tmp = SCARG(uap, data);
		if (tmp != 0 && t->p_nlwps > 1) {
			lwp_delref(lt);
			mutex_enter(t->p_lock);
			lt = lwp_find(t, tmp);
			if (lt == NULL) {
				mutex_exit(t->p_lock);
				error = ESRCH;
				break;
			}
			lwp_addref(lt);
			mutex_exit(t->p_lock);
		}
		if (!process_validfpregs(lt))
			error = EINVAL;
		else {
			error = proc_vmspace_getref(l->l_proc, &vm);
			if (error)
				break;
			iov.iov_base = SCARG(uap, addr);
			iov.iov_len = sizeof(struct fpreg);
			uio.uio_iov = &iov;
			uio.uio_iovcnt = 1;
			uio.uio_offset = 0;
			uio.uio_resid = sizeof(struct fpreg);
			uio.uio_rw = write ? UIO_WRITE : UIO_READ;
			uio.uio_vmspace = vm;

			error = process_dofpregs(l, lt, &uio);
			uvmspace_free(vm);
		}
		break;
#endif

#ifdef __HAVE_PTRACE_MACHDEP
	PTRACE_MACHDEP_REQUEST_CASES
		error = ptrace_machdep_dorequest(l, lt,
		    req, SCARG(uap, addr), SCARG(uap, data));
		break;
#endif
	}

	if (pheld) {
		mutex_exit(t->p_lock);
		mutex_exit(proc_lock);
	}
	if (lt != NULL)
		lwp_delref(lt);
	rw_exit(&t->p_reflock);

	return error;
}
Esempio n. 6
0
/*
 * Fork a kernel thread.  Any process can request this to be done.
 */
int
kthread_create(pri_t pri, int flag, struct cpu_info *ci,
	       void (*func)(void *), void *arg,
	       lwp_t **lp, const char *fmt, ...)
{
	lwp_t *l;
	vaddr_t uaddr;
	bool inmem;
	int error;
	va_list ap;
	int lc;

	inmem = uvm_uarea_alloc(&uaddr);
	if (uaddr == 0)
		return ENOMEM;
	if ((flag & KTHREAD_TS) != 0) {
		lc = SCHED_OTHER;
	} else {
		lc = SCHED_RR;
	}
	error = lwp_create(&lwp0, &proc0, uaddr, inmem, LWP_DETACHED, NULL,
	    0, func, arg, &l, lc);
	if (error) {
		uvm_uarea_free(uaddr, curcpu());
		return error;
	}
	uvm_lwp_hold(l);
	if (fmt != NULL) {
		l->l_name = kmem_alloc(MAXCOMLEN, KM_SLEEP);
		if (l->l_name == NULL) {
			lwp_exit(l);
			return ENOMEM;
		}
		va_start(ap, fmt);
		vsnprintf(l->l_name, MAXCOMLEN, fmt, ap);
		va_end(ap);
	}

	/*
	 * Set parameters.
	 */
	if ((flag & KTHREAD_INTR) != 0) {
		KASSERT((flag & KTHREAD_MPSAFE) != 0);
	}

	if (pri == PRI_NONE) {
		if ((flag & KTHREAD_TS) != 0) {
			/* Maximum user priority level. */
			pri = MAXPRI_USER;
		} else {
			/* Minimum kernel priority level. */
			pri = PRI_KTHREAD;
		}
	}
	mutex_enter(proc0.p_lock);
	lwp_lock(l);
	l->l_priority = pri;
	if (ci != NULL) {
		if (ci != l->l_cpu) {
			lwp_unlock_to(l, ci->ci_schedstate.spc_mutex);
			lwp_lock(l);
		}
		l->l_pflag |= LP_BOUND;
		l->l_cpu = ci;
	}
	if ((flag & KTHREAD_INTR) != 0)
		l->l_pflag |= LP_INTR;
	if ((flag & KTHREAD_MPSAFE) == 0)
		l->l_pflag &= ~LP_MPSAFE;

	/*
	 * Set the new LWP running, unless the caller has requested
	 * otherwise.
	 */
	if ((flag & KTHREAD_IDLE) == 0) {
		l->l_stat = LSRUN;
		sched_enqueue(l, false);
		lwp_unlock(l);
	} else
		lwp_unlock_to(l, ci->ci_schedstate.spc_lwplock);

	/*
	 * The LWP is not created suspended or stopped and cannot be set
	 * into those states later, so must be considered runnable.
	 */
	proc0.p_nrlwps++;
	mutex_exit(proc0.p_lock);

	/* All done! */
	if (lp != NULL)
		*lp = l;

	return (0);
}
Esempio n. 7
0
/*
 * Do "physical I/O" on behalf of a user.  "Physical I/O" is I/O directly
 * from the raw device to user buffers, and bypasses the buffer cache.
 *
 * Comments in brackets are from Leffler, et al.'s pseudo-code implementation.
 */
int
physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags,
    void (*min_phys)(struct buf *), struct uio *uio)
{
	struct iovec *iovp;
	struct lwp *l = curlwp;
	struct proc *p = l->l_proc;
	int i, error;
	struct buf *bp = NULL;
	struct physio_stat *ps;
	int concurrency = PHYSIO_CONCURRENCY - 1;

	error = RUN_ONCE(&physio_initialized, physio_init);
	if (__predict_false(error != 0)) {
		return error;
	}

	DPRINTF(("%s: called: off=%" PRIu64 ", resid=%zu\n",
	    __func__, uio->uio_offset, uio->uio_resid));

	flags &= B_READ | B_WRITE;

	if ((ps = kmem_zalloc(sizeof(*ps), KM_SLEEP)) == NULL)
		return ENOMEM;
	/* ps->ps_running = 0; */
	/* ps->ps_error = 0; */
	/* ps->ps_failed = 0; */
	ps->ps_orig_bp = obp;
	ps->ps_endoffset = -1;
	mutex_init(&ps->ps_lock, MUTEX_DEFAULT, IPL_NONE);
	cv_init(&ps->ps_cv, "physio");

	/* Make sure we have a buffer, creating one if necessary. */
	if (obp != NULL) {
		/* [raise the processor priority level to splbio;] */
		mutex_enter(&bufcache_lock);
		/* Mark it busy, so nobody else will use it. */
		while (bbusy(obp, false, 0, NULL) == EPASSTHROUGH)
			;
		mutex_exit(&bufcache_lock);
		concurrency = 0; /* see "XXXkludge" comment below */
	}

	uvm_lwp_hold(l);

	for (i = 0; i < uio->uio_iovcnt; i++) {
		bool sync = true;

		iovp = &uio->uio_iov[i];
		while (iovp->iov_len > 0) {
			size_t todo;
			vaddr_t endp;

			mutex_enter(&ps->ps_lock);
			if (ps->ps_failed != 0) {
				goto done_locked;
			}
			physio_wait(ps, sync ? 0 : concurrency);
			mutex_exit(&ps->ps_lock);
			if (obp != NULL) {
				/*
				 * XXXkludge
				 * some drivers use "obp" as an identifier.
				 */
				bp = obp;
			} else {
				bp = getiobuf(NULL, true);
				bp->b_cflags = BC_BUSY;
			}
			bp->b_dev = dev;
			bp->b_proc = p;
			bp->b_private = ps;

			/*
			 * [mark the buffer busy for physical I/O]
			 * (i.e. set B_PHYS (because it's an I/O to user
			 * memory, and B_RAW, because B_RAW is to be
			 * "Set by physio for raw transfers.", in addition
			 * to the "busy" and read/write flag.)
			 */
			bp->b_oflags = 0;
			bp->b_cflags = BC_BUSY;
			bp->b_flags = flags | B_PHYS | B_RAW;
			bp->b_iodone = physio_biodone;

			/* [set up the buffer for a maximum-sized transfer] */
			bp->b_blkno = btodb(uio->uio_offset);
			if (dbtob(bp->b_blkno) != uio->uio_offset) {
				error = EINVAL;
				goto done;
			}
			bp->b_bcount = MIN(MAXPHYS, iovp->iov_len);
			bp->b_data = iovp->iov_base;

			/*
			 * [call minphys to bound the transfer size]
			 * and remember the amount of data to transfer,
			 * for later comparison.
			 */
			(*min_phys)(bp);
			todo = bp->b_bufsize = bp->b_bcount;
#if defined(DIAGNOSTIC)
			if (todo > MAXPHYS)
				panic("todo(%zu) > MAXPHYS; minphys broken",
				    todo);
#endif /* defined(DIAGNOSTIC) */

			sync = false;
			endp = (vaddr_t)bp->b_data + todo;
			if (trunc_page(endp) != endp) {
				/*
				 * following requests can overlap.
				 * note that uvm_vslock does round_page.
				 */
				sync = true;
			}

			/*
			 * [lock the part of the user address space involved
			 *    in the transfer]
			 * Beware vmapbuf(); it clobbers b_data and
			 * saves it in b_saveaddr.  However, vunmapbuf()
			 * restores it.
			 */
			error = uvm_vslock(p->p_vmspace, bp->b_data, todo,
			    (flags & B_READ) ?  VM_PROT_WRITE : VM_PROT_READ);
			if (error) {
				goto done;
			}
			vmapbuf(bp, todo);

			BIO_SETPRIO(bp, BPRIO_TIMECRITICAL);

			mutex_enter(&ps->ps_lock);
			ps->ps_running++;
			mutex_exit(&ps->ps_lock);

			/* [call strategy to start the transfer] */
			(*strategy)(bp);
			bp = NULL;

			iovp->iov_len -= todo;
			iovp->iov_base = (char *)iovp->iov_base + todo;
			uio->uio_offset += todo;
			uio->uio_resid -= todo;
		}
	}

done:
	mutex_enter(&ps->ps_lock);
done_locked:
	physio_wait(ps, 0);
	mutex_exit(&ps->ps_lock);

	if (ps->ps_failed != 0) {
		off_t delta;

		delta = uio->uio_offset - ps->ps_endoffset;
		KASSERT(delta > 0);
		uio->uio_resid += delta;
		/* uio->uio_offset = ps->ps_endoffset; */
	} else {
		KASSERT(ps->ps_endoffset == -1);
	}
	if (bp != NULL && bp != obp) {
		putiobuf(bp);
	}
	if (error == 0) {
		error = ps->ps_error;
	}
	mutex_destroy(&ps->ps_lock);
	cv_destroy(&ps->ps_cv);
	kmem_free(ps, sizeof(*ps));

	/*
	 * [clean up the state of the buffer]
	 * Remember if somebody wants it, so we can wake them up below.
	 * Also, if we had to steal it, give it back.
	 */
	if (obp != NULL) {
		KASSERT((obp->b_cflags & BC_BUSY) != 0);

		/*
		 * [if another process is waiting for the raw I/O buffer,
		 *    wake up processes waiting to do physical I/O;
		 */
		mutex_enter(&bufcache_lock);
		obp->b_cflags &= ~(BC_BUSY | BC_WANTED);
		obp->b_flags &= ~(B_PHYS | B_RAW);
		obp->b_iodone = NULL;
		cv_broadcast(&obp->b_busy);
		mutex_exit(&bufcache_lock);
	}
	uvm_lwp_rele(l);

	DPRINTF(("%s: done: off=%" PRIu64 ", resid=%zu\n",
	    __func__, uio->uio_offset, uio->uio_resid));

	return error;
}