static void percpu_cpu_enlarge(size_t size) { CPU_INFO_ITERATOR cii; struct cpu_info *ci; for (CPU_INFO_FOREACH(cii, ci)) { percpu_cpu_t pcc; pcc.pcc_data = kmem_alloc(size, KM_SLEEP); /* XXX cacheline */ pcc.pcc_size = size; if (!mp_online) { percpu_cpu_swap(ci, &pcc); } else { uint64_t where; uvm_lwp_hold(curlwp); /* don't swap out pcc */ where = xc_unicast(0, percpu_cpu_swap, ci, &pcc, ci); xc_wait(where); uvm_lwp_rele(curlwp); } KASSERT(pcc.pcc_size < size); if (pcc.pcc_data != NULL) { kmem_free(pcc.pcc_data, pcc.pcc_size); } } }
int process_dofpregs(struct lwp *curl /*tracer*/, struct lwp *l /*traced*/, struct uio *uio) { #if defined(PT_GETFPREGS) || defined(PT_SETFPREGS) int error; struct fpreg r; char *kv; int kl; if (uio->uio_offset < 0 || uio->uio_offset > (off_t)sizeof(r)) return EINVAL; kl = sizeof(r); kv = (char *)&r; kv += uio->uio_offset; kl -= uio->uio_offset; if ((size_t)kl > uio->uio_resid) kl = uio->uio_resid; uvm_lwp_hold(l); error = process_read_fpregs(l, &r); if (error == 0) error = uiomove(kv, kl, uio); if (error == 0 && uio->uio_rw == UIO_WRITE) { if (l->l_stat != LSSTOP) error = EBUSY; else error = process_write_fpregs(l, &r); } uvm_lwp_rele(l); uio->uio_offset = 0; return (error); #else return (EINVAL); #endif }
int process_machdep_dovecregs(struct lwp *curl, struct lwp *l, struct uio *uio) { struct vreg r; int error; char *kv; int kl; kl = sizeof(r); kv = (char *) &r; kv += uio->uio_offset; kl -= uio->uio_offset; if (kl > uio->uio_resid) kl = uio->uio_resid; uvm_lwp_hold(l); if (kl < 0) error = EINVAL; else error = process_machdep_read_vecregs(l, &r); if (error == 0) error = uiomove(kv, kl, uio); if (error == 0 && uio->uio_rw == UIO_WRITE) { if (l->l_proc->p_stat != SSTOP) error = EBUSY; else error = process_machdep_write_vecregs(l, &r); } uvm_lwp_rele(l); uio->uio_offset = 0; return (error); }
static void workqueue_finiqueue(struct workqueue *wq, struct workqueue_queue *q) { struct workqueue_exitargs wqe; lwp_t *l; KASSERT(wq->wq_func == workqueue_exit); wqe.wqe_q = q; KASSERT(SIMPLEQ_EMPTY(&q->q_queue)); KASSERT(q->q_worker != NULL); l = curlwp; uvm_lwp_hold(l); mutex_enter(&q->q_mutex); SIMPLEQ_INSERT_TAIL(&q->q_queue, &wqe.wqe_wk, wk_entry); cv_signal(&q->q_cv); while (q->q_worker != NULL) { cv_wait(&q->q_cv, &q->q_mutex); } mutex_exit(&q->q_mutex); uvm_lwp_rele(l); mutex_destroy(&q->q_mutex); cv_destroy(&q->q_cv); }
/* * Process debugging system call. */ int sys_ptrace(struct lwp *l, const struct sys_ptrace_args *uap, register_t *retval) { /* { syscallarg(int) req; syscallarg(pid_t) pid; syscallarg(void *) addr; syscallarg(int) data; } */ struct proc *p = l->l_proc; struct lwp *lt; struct proc *t; /* target process */ struct uio uio; struct iovec iov; struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct vmspace *vm; int error, write, tmp, req, pheld; int signo; ksiginfo_t ksi; #ifdef COREDUMP char *path; #endif error = 0; req = SCARG(uap, req); /* * If attaching or detaching, we need to get a write hold on the * proclist lock so that we can re-parent the target process. */ mutex_enter(proc_lock); /* "A foolish consistency..." XXX */ if (req == PT_TRACE_ME) { t = p; mutex_enter(t->p_lock); } else { /* Find the process we're supposed to be operating on. */ if ((t = p_find(SCARG(uap, pid), PFIND_LOCKED)) == NULL) { mutex_exit(proc_lock); return (ESRCH); } /* XXX-elad */ mutex_enter(t->p_lock); error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, t, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); if (error) { mutex_exit(proc_lock); mutex_exit(t->p_lock); return (ESRCH); } } /* * Grab a reference on the process to prevent it from execing or * exiting. */ if (!rw_tryenter(&t->p_reflock, RW_READER)) { mutex_exit(proc_lock); mutex_exit(t->p_lock); return EBUSY; } /* Make sure we can operate on it. */ switch (req) { case PT_TRACE_ME: /* Saying that you're being traced is always legal. */ break; case PT_ATTACH: /* * You can't attach to a process if: * (1) it's the process that's doing the attaching, */ if (t->p_pid == p->p_pid) { error = EINVAL; break; } /* * (2) it's a system process */ if (t->p_flag & PK_SYSTEM) { error = EPERM; break; } /* * (3) it's already being traced, or */ if (ISSET(t->p_slflag, PSL_TRACED)) { error = EBUSY; break; } /* * (4) the tracer is chrooted, and its root directory is * not at or above the root directory of the tracee */ mutex_exit(t->p_lock); /* XXXSMP */ tmp = proc_isunder(t, l); mutex_enter(t->p_lock); /* XXXSMP */ if (!tmp) { error = EPERM; break; } break; case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_IO: #ifdef PT_GETREGS case PT_GETREGS: #endif #ifdef PT_SETREGS case PT_SETREGS: #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef __HAVE_PTRACE_MACHDEP PTRACE_MACHDEP_REQUEST_CASES #endif /* * You can't read/write the memory or registers of a process * if the tracer is chrooted, and its root directory is not at * or above the root directory of the tracee. */ mutex_exit(t->p_lock); /* XXXSMP */ tmp = proc_isunder(t, l); mutex_enter(t->p_lock); /* XXXSMP */ if (!tmp) { error = EPERM; break; } /*FALLTHROUGH*/ case PT_CONTINUE: case PT_KILL: case PT_DETACH: case PT_LWPINFO: case PT_SYSCALL: #ifdef COREDUMP case PT_DUMPCORE: #endif #ifdef PT_STEP case PT_STEP: #endif /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(t->p_slflag, PSL_TRACED)) { error = EPERM; break; } /* * (2) it's being traced by procfs (which has * different signal delivery semantics), */ if (ISSET(t->p_slflag, PSL_FSTRACE)) { uprintf("file system traced\n"); error = EBUSY; break; } /* * (3) it's not being traced by _you_, or */ if (t->p_pptr != p) { uprintf("parent %d != %d\n", t->p_pptr->p_pid, p->p_pid); error = EBUSY; break; } /* * (4) it's not currently stopped. */ if (t->p_stat != SSTOP || !t->p_waited /* XXXSMP */) { uprintf("stat %d flag %d\n", t->p_stat, !t->p_waited); error = EBUSY; break; } break; default: /* It was not a legal request. */ error = EINVAL; break; } if (error == 0) error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_PTRACE, t, KAUTH_ARG(req), NULL, NULL); if (error != 0) { mutex_exit(proc_lock); mutex_exit(t->p_lock); rw_exit(&t->p_reflock); return error; } /* Do single-step fixup if needed. */ FIX_SSTEP(t); /* * XXX NJWLWP * * The entire ptrace interface needs work to be useful to a * process with multiple LWPs. For the moment, we'll kluge * this; memory access will be fine, but register access will * be weird. */ lt = LIST_FIRST(&t->p_lwps); KASSERT(lt != NULL); lwp_addref(lt); /* * Which locks do we need held? XXX Ugly. */ switch (req) { #ifdef PT_STEP case PT_STEP: #endif case PT_CONTINUE: case PT_DETACH: case PT_KILL: case PT_SYSCALL: case PT_ATTACH: case PT_TRACE_ME: pheld = 1; break; default: mutex_exit(proc_lock); mutex_exit(t->p_lock); pheld = 0; break; } /* Now do the operation. */ write = 0; *retval = 0; tmp = 0; switch (req) { case PT_TRACE_ME: /* Just set the trace flag. */ SET(t->p_slflag, PSL_TRACED); t->p_opptr = t->p_pptr; break; case PT_WRITE_I: /* XXX no separate I and D spaces */ case PT_WRITE_D: #if defined(__HAVE_RAS) /* * Can't write to a RAS */ if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) { error = EACCES; break; } #endif write = 1; tmp = SCARG(uap, data); /* FALLTHROUGH */ case PT_READ_I: /* XXX no separate I and D spaces */ case PT_READ_D: /* write = 0 done above. */ iov.iov_base = (void *)&tmp; iov.iov_len = sizeof(tmp); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(unsigned long)SCARG(uap, addr); uio.uio_resid = sizeof(tmp); uio.uio_rw = write ? UIO_WRITE : UIO_READ; UIO_SETUP_SYSSPACE(&uio); error = process_domem(l, lt, &uio); if (!write) *retval = tmp; break; case PT_IO: error = copyin(SCARG(uap, addr), &piod, sizeof(piod)); if (error) break; switch (piod.piod_op) { case PIOD_READ_D: case PIOD_READ_I: uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: /* * Can't write to a RAS */ if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) { return (EACCES); } uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; break; } if (error) break; error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = piod.piod_addr; iov.iov_len = piod.piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(unsigned long)piod.piod_offs; uio.uio_resid = piod.piod_len; uio.uio_vmspace = vm; error = process_domem(l, lt, &uio); piod.piod_len -= uio.uio_resid; (void) copyout(&piod, SCARG(uap, addr), sizeof(piod)); uvmspace_free(vm); break; #ifdef COREDUMP case PT_DUMPCORE: if ((path = SCARG(uap, addr)) != NULL) { char *dst; int len = SCARG(uap, data); if (len < 0 || len >= MAXPATHLEN) { error = EINVAL; break; } dst = malloc(len + 1, M_TEMP, M_WAITOK); if ((error = copyin(path, dst, len)) != 0) { free(dst, M_TEMP); break; } path = dst; path[len] = '\0'; } error = coredump(lt, path); if (path) free(path, M_TEMP); break; #endif #ifdef PT_STEP case PT_STEP: /* * From the 4.4BSD PRM: * "Execution continues as in request PT_CONTINUE; however * as soon as possible after execution of at least one * instruction, execution stops again. [ ... ]" */ #endif case PT_CONTINUE: case PT_SYSCALL: case PT_DETACH: if (req == PT_SYSCALL) { if (!ISSET(t->p_slflag, PSL_SYSCALL)) { SET(t->p_slflag, PSL_SYSCALL); #ifdef __HAVE_SYSCALL_INTERN (*t->p_emul->e_syscall_intern)(t); #endif } } else { if (ISSET(t->p_slflag, PSL_SYSCALL)) { CLR(t->p_slflag, PSL_SYSCALL); #ifdef __HAVE_SYSCALL_INTERN (*t->p_emul->e_syscall_intern)(t); #endif } } p->p_trace_enabled = trace_is_enabled(p); /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) { error = EINVAL; break; } uvm_lwp_hold(lt); /* If the address parameter is not (int *)1, set the pc. */ if ((int *)SCARG(uap, addr) != (int *)1) if ((error = process_set_pc(lt, SCARG(uap, addr))) != 0) { uvm_lwp_rele(lt); break; } #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(lt, req == PT_STEP); if (error) { uvm_lwp_rele(lt); break; } #endif uvm_lwp_rele(lt); if (req == PT_DETACH) { CLR(t->p_slflag, PSL_TRACED|PSL_FSTRACE|PSL_SYSCALL); /* give process back to original parent or init */ if (t->p_opptr != t->p_pptr) { struct proc *pp = t->p_opptr; proc_reparent(t, pp ? pp : initproc); } /* not being traced any more */ t->p_opptr = NULL; } signo = SCARG(uap, data); sendsig: /* Finally, deliver the requested signal (or none). */ if (t->p_stat == SSTOP) { /* * Unstop the process. If it needs to take a * signal, make all efforts to ensure that at * an LWP runs to see it. */ t->p_xstat = signo; proc_unstop(t); } else if (signo != 0) { KSI_INIT_EMPTY(&ksi); ksi.ksi_signo = signo; kpsignal2(t, &ksi); } break; case PT_KILL: /* just send the process a KILL signal. */ signo = SIGKILL; goto sendsig; /* in PT_CONTINUE, above. */ case PT_ATTACH: /* * Go ahead and set the trace flag. * Save the old parent (it's reset in * _DETACH, and also in kern_exit.c:wait4() * Reparent the process so that the tracing * proc gets to see all the action. * Stop the target. */ t->p_opptr = t->p_pptr; if (t->p_pptr != p) { struct proc *parent = t->p_pptr; if (parent->p_lock < t->p_lock) { if (!mutex_tryenter(parent->p_lock)) { mutex_exit(t->p_lock); mutex_enter(parent->p_lock); } } else if (parent->p_lock > t->p_lock) { mutex_enter(parent->p_lock); } parent->p_slflag |= PSL_CHTRACED; proc_reparent(t, p); if (parent->p_lock != t->p_lock) mutex_exit(parent->p_lock); } SET(t->p_slflag, PSL_TRACED); signo = SIGSTOP; goto sendsig; case PT_LWPINFO: if (SCARG(uap, data) != sizeof(pl)) { error = EINVAL; break; } error = copyin(SCARG(uap, addr), &pl, sizeof(pl)); if (error) break; tmp = pl.pl_lwpid; lwp_delref(lt); mutex_enter(t->p_lock); if (tmp == 0) lt = LIST_FIRST(&t->p_lwps); else { lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lt = LIST_NEXT(lt, l_sibling); } while (lt != NULL && lt->l_stat == LSZOMB) lt = LIST_NEXT(lt, l_sibling); pl.pl_lwpid = 0; pl.pl_event = 0; if (lt) { lwp_addref(lt); pl.pl_lwpid = lt->l_lid; if (lt->l_lid == t->p_sigctx.ps_lwp) pl.pl_event = PL_EVENT_SIGNAL; } mutex_exit(t->p_lock); error = copyout(&pl, SCARG(uap, addr), sizeof(pl)); break; #ifdef PT_SETREGS case PT_SETREGS: write = 1; #endif #ifdef PT_GETREGS case PT_GETREGS: /* write = 0 done above. */ #endif #if defined(PT_SETREGS) || defined(PT_GETREGS) tmp = SCARG(uap, data); if (tmp != 0 && t->p_nlwps > 1) { lwp_delref(lt); mutex_enter(t->p_lock); lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lwp_addref(lt); mutex_exit(t->p_lock); } if (!process_validregs(lt)) error = EINVAL; else { error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = SCARG(uap, addr); iov.iov_len = sizeof(struct reg); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct reg); uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_vmspace = vm; error = process_doregs(l, lt, &uio); uvmspace_free(vm); } break; #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: write = 1; #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: /* write = 0 done above. */ #endif #if defined(PT_SETFPREGS) || defined(PT_GETFPREGS) tmp = SCARG(uap, data); if (tmp != 0 && t->p_nlwps > 1) { lwp_delref(lt); mutex_enter(t->p_lock); lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lwp_addref(lt); mutex_exit(t->p_lock); } if (!process_validfpregs(lt)) error = EINVAL; else { error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = SCARG(uap, addr); iov.iov_len = sizeof(struct fpreg); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct fpreg); uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_vmspace = vm; error = process_dofpregs(l, lt, &uio); uvmspace_free(vm); } break; #endif #ifdef __HAVE_PTRACE_MACHDEP PTRACE_MACHDEP_REQUEST_CASES error = ptrace_machdep_dorequest(l, lt, req, SCARG(uap, addr), SCARG(uap, data)); break; #endif } if (pheld) { mutex_exit(t->p_lock); mutex_exit(proc_lock); } if (lt != NULL) lwp_delref(lt); rw_exit(&t->p_reflock); return error; }
/* * Do "physical I/O" on behalf of a user. "Physical I/O" is I/O directly * from the raw device to user buffers, and bypasses the buffer cache. * * Comments in brackets are from Leffler, et al.'s pseudo-code implementation. */ int physio(void (*strategy)(struct buf *), struct buf *obp, dev_t dev, int flags, void (*min_phys)(struct buf *), struct uio *uio) { struct iovec *iovp; struct lwp *l = curlwp; struct proc *p = l->l_proc; int i, error; struct buf *bp = NULL; struct physio_stat *ps; int concurrency = PHYSIO_CONCURRENCY - 1; error = RUN_ONCE(&physio_initialized, physio_init); if (__predict_false(error != 0)) { return error; } DPRINTF(("%s: called: off=%" PRIu64 ", resid=%zu\n", __func__, uio->uio_offset, uio->uio_resid)); flags &= B_READ | B_WRITE; if ((ps = kmem_zalloc(sizeof(*ps), KM_SLEEP)) == NULL) return ENOMEM; /* ps->ps_running = 0; */ /* ps->ps_error = 0; */ /* ps->ps_failed = 0; */ ps->ps_orig_bp = obp; ps->ps_endoffset = -1; mutex_init(&ps->ps_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&ps->ps_cv, "physio"); /* Make sure we have a buffer, creating one if necessary. */ if (obp != NULL) { /* [raise the processor priority level to splbio;] */ mutex_enter(&bufcache_lock); /* Mark it busy, so nobody else will use it. */ while (bbusy(obp, false, 0, NULL) == EPASSTHROUGH) ; mutex_exit(&bufcache_lock); concurrency = 0; /* see "XXXkludge" comment below */ } uvm_lwp_hold(l); for (i = 0; i < uio->uio_iovcnt; i++) { bool sync = true; iovp = &uio->uio_iov[i]; while (iovp->iov_len > 0) { size_t todo; vaddr_t endp; mutex_enter(&ps->ps_lock); if (ps->ps_failed != 0) { goto done_locked; } physio_wait(ps, sync ? 0 : concurrency); mutex_exit(&ps->ps_lock); if (obp != NULL) { /* * XXXkludge * some drivers use "obp" as an identifier. */ bp = obp; } else { bp = getiobuf(NULL, true); bp->b_cflags = BC_BUSY; } bp->b_dev = dev; bp->b_proc = p; bp->b_private = ps; /* * [mark the buffer busy for physical I/O] * (i.e. set B_PHYS (because it's an I/O to user * memory, and B_RAW, because B_RAW is to be * "Set by physio for raw transfers.", in addition * to the "busy" and read/write flag.) */ bp->b_oflags = 0; bp->b_cflags = BC_BUSY; bp->b_flags = flags | B_PHYS | B_RAW; bp->b_iodone = physio_biodone; /* [set up the buffer for a maximum-sized transfer] */ bp->b_blkno = btodb(uio->uio_offset); if (dbtob(bp->b_blkno) != uio->uio_offset) { error = EINVAL; goto done; } bp->b_bcount = MIN(MAXPHYS, iovp->iov_len); bp->b_data = iovp->iov_base; /* * [call minphys to bound the transfer size] * and remember the amount of data to transfer, * for later comparison. */ (*min_phys)(bp); todo = bp->b_bufsize = bp->b_bcount; #if defined(DIAGNOSTIC) if (todo > MAXPHYS) panic("todo(%zu) > MAXPHYS; minphys broken", todo); #endif /* defined(DIAGNOSTIC) */ sync = false; endp = (vaddr_t)bp->b_data + todo; if (trunc_page(endp) != endp) { /* * following requests can overlap. * note that uvm_vslock does round_page. */ sync = true; } /* * [lock the part of the user address space involved * in the transfer] * Beware vmapbuf(); it clobbers b_data and * saves it in b_saveaddr. However, vunmapbuf() * restores it. */ error = uvm_vslock(p->p_vmspace, bp->b_data, todo, (flags & B_READ) ? VM_PROT_WRITE : VM_PROT_READ); if (error) { goto done; } vmapbuf(bp, todo); BIO_SETPRIO(bp, BPRIO_TIMECRITICAL); mutex_enter(&ps->ps_lock); ps->ps_running++; mutex_exit(&ps->ps_lock); /* [call strategy to start the transfer] */ (*strategy)(bp); bp = NULL; iovp->iov_len -= todo; iovp->iov_base = (char *)iovp->iov_base + todo; uio->uio_offset += todo; uio->uio_resid -= todo; } } done: mutex_enter(&ps->ps_lock); done_locked: physio_wait(ps, 0); mutex_exit(&ps->ps_lock); if (ps->ps_failed != 0) { off_t delta; delta = uio->uio_offset - ps->ps_endoffset; KASSERT(delta > 0); uio->uio_resid += delta; /* uio->uio_offset = ps->ps_endoffset; */ } else { KASSERT(ps->ps_endoffset == -1); } if (bp != NULL && bp != obp) { putiobuf(bp); } if (error == 0) { error = ps->ps_error; } mutex_destroy(&ps->ps_lock); cv_destroy(&ps->ps_cv); kmem_free(ps, sizeof(*ps)); /* * [clean up the state of the buffer] * Remember if somebody wants it, so we can wake them up below. * Also, if we had to steal it, give it back. */ if (obp != NULL) { KASSERT((obp->b_cflags & BC_BUSY) != 0); /* * [if another process is waiting for the raw I/O buffer, * wake up processes waiting to do physical I/O; */ mutex_enter(&bufcache_lock); obp->b_cflags &= ~(BC_BUSY | BC_WANTED); obp->b_flags &= ~(B_PHYS | B_RAW); obp->b_iodone = NULL; cv_broadcast(&obp->b_busy); mutex_exit(&bufcache_lock); } uvm_lwp_rele(l); DPRINTF(("%s: done: off=%" PRIu64 ", resid=%zu\n", __func__, uio->uio_offset, uio->uio_resid)); return error; }