/* * set process group (setpgid/old setpgrp) * * caller does setpgid(targpid, targpgid) * * pid must be caller or child of caller (ESRCH) * if a child * pid must be in same session (EPERM) * pid can't have done an exec (EACCES) * if pgid != pid * there must exist some pid in same session having pgid (EPERM) * pid must not be session leader (EPERM) */ int sys_setpgid(struct setpgid_args *uap) { struct proc *curp = curproc; struct proc *targp; /* target process */ struct pgrp *pgrp = NULL; /* target pgrp */ int error; if (uap->pgid < 0) return (EINVAL); if (uap->pid != 0 && uap->pid != curp->p_pid) { if ((targp = pfind(uap->pid)) == NULL || !inferior(targp)) { if (targp) PRELE(targp); error = ESRCH; targp = NULL; goto done; } lwkt_gettoken(&targp->p_token); /* targp now referenced and its token is held */ if (targp->p_pgrp == NULL || targp->p_session != curp->p_session) { error = EPERM; goto done; } if (targp->p_flags & P_EXEC) { error = EACCES; goto done; } } else { targp = curp; PHOLD(targp); lwkt_gettoken(&targp->p_token); } if (SESS_LEADER(targp)) { error = EPERM; goto done; } if (uap->pgid == 0) { uap->pgid = targp->p_pid; } else if (uap->pgid != targp->p_pid) { if ((pgrp = pgfind(uap->pgid)) == NULL || pgrp->pg_session != curp->p_session) { error = EPERM; goto done; } } error = enterpgrp(targp, uap->pgid, 0); done: if (pgrp) pgrel(pgrp); if (targp) { lwkt_reltoken(&targp->p_token); PRELE(targp); } return (error); }
/* * Write to a file */ static int pfs_write(struct vop_write_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf sb; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); KASSERT_PN_IS_FILE(pn); if (!(pn->pn_flags & PFS_WR)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWWR) { error = pn_fill(curthread, proc, pn, NULL, uio); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } sbuf_uionew(&sb, uio, &error); if (error) { if (proc != NULL) PRELE(proc); PFS_RETURN (error); } error = pn_fill(curthread, proc, pn, &sb, uio); sbuf_delete(&sb); if (proc != NULL) PRELE(proc); PFS_RETURN (error); }
/* * readlink reads the link of `curproc' or `file' */ static int procfs_readlink(struct vop_readlink_args *ap) { char buf[16]; /* should be enough */ struct proc *procp; struct vnode *vp = ap->a_vp; struct pfsnode *pfs = VTOPFS(vp); char *fullpath, *freepath; int error, len; switch (pfs->pfs_type) { case Pcurproc: if (pfs->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) return (EINVAL); len = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); return (uiomove(buf, len, ap->a_uio)); /* * There _should_ be no way for an entire process to disappear * from under us... */ case Pfile: procp = pfs_pfind(pfs->pfs_pid); if (procp == NULL || procp->p_ucred == NULL) { kprintf("procfs_readlink: pid %d disappeared\n", pfs->pfs_pid); if (procp) PRELE(procp); return (uiomove("unknown", sizeof("unknown") - 1, ap->a_uio)); } error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0); if (error != 0) { if (procp) PRELE(procp); return (uiomove("unknown", sizeof("unknown") - 1, ap->a_uio)); } error = uiomove(fullpath, strlen(fullpath), ap->a_uio); kfree(freepath, M_TEMP); if (procp) PRELE(procp); return (error); default: return (EINVAL); } }
/* * close the pfsnode (vp) after doing i/o. * (vp) is not locked on entry or exit. * * nothing to do for procfs other than undo * any exclusive open flag (see _open above). */ static int linprocfs_close(struct vop_close_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct proc *p; switch (pfs->pfs_type) { case Pmem: if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) pfs->pfs_flags &= ~(FWRITE|O_EXCL); /* * If this is the last close, then it checks to see if * the target process has PF_LINGER set in p_pfsflags, * if this is *not* the case, then the process' stop flags * are cleared, and the process is woken up. This is * to help prevent the case where a process has been * told to stop on an event, but then the requesting process * has gone away or forgotten about it. */ p = NULL; if ((ap->a_vp->v_opencount < 2) && (p = pfind(pfs->pfs_pid)) && !(p->p_pfsflags & PF_LINGER)) { p->p_stops = 0; p->p_step = 0; wakeup(&p->p_step); } if (p) PRELE(p); break; default: break; } return (vop_stdclose(ap)); }
/* * Get an arbitrary pid's process group id */ int sys_getpgid(struct getpgid_args *uap) { struct proc *p = curproc; struct proc *pt; int error; error = 0; if (uap->pid == 0) { pt = p; PHOLD(pt); } else { pt = pfind(uap->pid); if (pt == NULL) error = ESRCH; } if (error == 0) { lwkt_gettoken_shared(&pt->p_token); uap->sysmsg_result = pt->p_pgrp->pg_id; lwkt_reltoken(&pt->p_token); } if (pt) PRELE(pt); return (error); }
/* * Read a symbolic link */ static int pfs_readlink(struct vop_readlink_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc = NULL; struct thread *td = curthread; char buf[PATH_MAX]; struct sbuf sb; int error, locked; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VLNK) PFS_RETURN (EINVAL); KASSERT_PN_IS_LINK(pn); if (pn->pn_fill == NULL) PFS_RETURN (EIO); if (pvd->pvd_pid != NO_PID) { if ((proc = pfind(pvd->pvd_pid)) == NULL) PFS_RETURN (EIO); if (proc->p_flag & P_WEXIT) { PROC_UNLOCK(proc); PFS_RETURN (EIO); } _PHOLD(proc); PROC_UNLOCK(proc); } vhold(vn); locked = VOP_ISLOCKED(vn, td); VOP_UNLOCK(vn, 0, td); /* sbuf_new() can't fail with a static buffer */ sbuf_new(&sb, buf, sizeof buf, 0); error = pn_fill(td, proc, pn, &sb, NULL); if (proc != NULL) PRELE(proc); vn_lock(vn, locked | LK_RETRY, td); vdrop(vn); if (error) { sbuf_delete(&sb); PFS_RETURN (error); } sbuf_finish(&sb); error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio); sbuf_delete(&sb); PFS_RETURN (error); }
static void scheduler(void *dummy) { struct scheduler_info info; struct proc *p; KKASSERT(!IN_CRITICAL_SECT(curthread)); loop: scheduler_notify = 0; /* * Don't try to swap anything in if we are low on memory. */ if (vm_page_count_severe()) { vm_wait(0); goto loop; } /* * Look for a good candidate to wake up * * XXX we should make the schedule thread pcpu and then use a * segmented allproc scan. */ info.pp = NULL; info.ppri = INT_MIN; allproc_scan(scheduler_callback, &info, 0); /* * Nothing to do, back to sleep for at least 1/10 of a second. If * we are woken up, immediately process the next request. If * multiple requests have built up the first is processed * immediately and the rest are staggered. */ if ((p = info.pp) == NULL) { tsleep(&proc0, 0, "nowork", hz / 10); if (scheduler_notify == 0) tsleep(&scheduler_notify, 0, "nowork", 0); goto loop; } /* * Fault the selected process in, then wait for a short period of * time and loop up. * * XXX we need a heuristic to get a measure of system stress and * then adjust our stagger wakeup delay accordingly. */ lwkt_gettoken(&p->p_token); faultin(p); p->p_swtime = 0; lwkt_reltoken(&p->p_token); PRELE(p); tsleep(&proc0, 0, "swapin", hz / 10); goto loop; }
static int uwrite(proc_t *p, void *kaddr, size_t len, uintptr_t uaddr) { ssize_t n; PHOLD(p); n = proc_writemem(curthread, p, uaddr, kaddr, len); PRELE(p); if (n != len) return (ENOMEM); return (0); }
static int proc_ops(int op, proc_t *p, void *kaddr, off_t uaddr, size_t len) { struct iovec iov; struct uio uio; iov.iov_base = kaddr; iov.iov_len = len; uio.uio_offset = uaddr; uio.uio_iov = &iov; uio.uio_resid = len; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = curthread; uio.uio_rw = op; PHOLD(p); if (proc_rwmem(p, &uio) < 0) { PRELE(p); return (-1); } PRELE(p); return (0); }
/* * set things up for doing i/o on * the pfsnode (vp). (vp) is locked * on entry, and should be left locked * on exit. * * for procfs we don't need to do anything * in particular for i/o. all that is done * is to support exclusive open on process * memory images. * * procfs_open(struct vnode *a_vp, int a_mode, struct ucred *a_cred, * struct file *a_fp) */ static int procfs_open(struct vop_open_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct proc *p1, *p2; int error; p2 = pfs_pfind(pfs->pfs_pid); if (p2 == NULL) return (ENOENT); if (pfs->pfs_pid && !PRISON_CHECK(ap->a_cred, p2->p_ucred)) { error = ENOENT; goto done; } switch (pfs->pfs_type) { case Pmem: if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) { error = EBUSY; goto done; } p1 = curproc; KKASSERT(p1); /* Can't trace a process that's currently exec'ing. */ if ((p2->p_flag & P_INEXEC) != 0) { error = EAGAIN; goto done; } if (!CHECKIO(p1, p2) || p_trespass(ap->a_cred, p2->p_ucred)) { error = EPERM; goto done; } if (ap->a_mode & FWRITE) pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); break; default: break; } error = vop_stdopen(ap); done: PRELE(p2); return error; }
static int procfs_readdir_proc(struct vop_readdir_args *ap) { struct pfsnode *pfs; int error, i, retval; struct proc *p; struct lwp *lp; struct proc_target *pt; struct uio *uio = ap->a_uio; pfs = VTOPFS(ap->a_vp); p = pfs_pfind(pfs->pfs_pid); if (p == NULL) return(0); if (!PRISON_CHECK(ap->a_cred, p->p_ucred)) { error = 0; goto done; } /* XXX lwp, not MPSAFE */ lp = FIRST_LWP_IN_PROC(p); error = 0; i = (int)uio->uio_offset; if (i < 0) { error = EINVAL; goto done; } for (pt = &proc_targets[i]; !error && uio->uio_resid > 0 && i < nproc_targets; pt++, i++) { if (pt->pt_valid && (*pt->pt_valid)(lp) == 0) continue; retval = vop_write_dirent(&error, uio, PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype), pt->pt_type, pt->pt_namlen, pt->pt_name); if (retval) break; } uio->uio_offset = (off_t)i; error = 0; done: PRELE(p); return error; }
static int scheduler_callback(struct proc *p, void *data) { struct scheduler_info *info = data; struct lwp *lp; segsz_t pgs; int pri; if (p->p_flags & P_SWAPWAIT) { pri = 0; FOREACH_LWP_IN_PROC(lp, p) { /* XXX lwp might need a different metric */ pri += lp->lwp_slptime; } pri += p->p_swtime - p->p_nice * 8; /* * The more pages paged out while we were swapped, * the more work we have to do to get up and running * again and the lower our wakeup priority. * * Each second of sleep time is worth ~1MB */ lwkt_gettoken(&p->p_vmspace->vm_map.token); pgs = vmspace_resident_count(p->p_vmspace); if (pgs < p->p_vmspace->vm_swrss) { pri -= (p->p_vmspace->vm_swrss - pgs) / (1024 * 1024 / PAGE_SIZE); } lwkt_reltoken(&p->p_vmspace->vm_map.token); /* * If this process is higher priority and there is * enough space, then select this process instead of * the previous selection. */ if (pri > info->ppri) { if (info->pp) PRELE(info->pp); PHOLD(p); info->pp = p; info->ppri = pri; } }
/* * Get an arbitrary pid's session id. */ int sys_getsid(struct getsid_args *uap) { struct proc *p = curproc; struct proc *pt; int error; error = 0; if (uap->pid == 0) { pt = p; PHOLD(pt); } else { pt = pfind(uap->pid); if (pt == NULL) error = ESRCH; } if (error == 0) uap->sysmsg_result = pt->p_session->s_sid; if (pt) PRELE(pt); return (error); }
/* * Get an arbitrary pid's process group id */ int sys_getpgid(struct getpgid_args *uap) { struct proc *p = curproc; struct proc *pt; int error; error = 0; if (uap->pid == 0) { pt = p; PHOLD(pt); } else { pt = pfind(uap->pid); if (pt == NULL) error = ESRCH; } /* XXX MPSAFE on pgrp? */ if (error == 0) uap->sysmsg_result = pt->p_pgrp->pg_id; if (pt) PRELE(pt); return (error); }
int trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) { vm_offset_t va; struct vmspace *vm = NULL; vm_map_t map = 0; int rv = 0; int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; va = trunc_page(eva); if (va >= KERNBASE) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid * instruction entry in the IDT, then the Intel Pentium * F00F bug workaround was triggered, and we need to * treat it is as an illegal instruction, and not a page * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { frame->tf_trapno = T_PRIVINFLT; return -2; } #endif if (usermode) goto nogo; map = &kernel_map; } else { /* * This is a fault on non-kernel virtual memory. * vm is initialized above to NULL. If curproc is NULL * or curproc->p_vmspace is NULL the fault is fatal. */ if (lp != NULL) vm = lp->lwp_vmspace; if (vm == NULL) goto nogo; map = &vm->vm_map; } if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else ftype = VM_PROT_READ; if (map != &kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PHOLD(lp->lwp_proc); /* * Issue fault */ fault_flags = 0; if (usermode) fault_flags |= VM_FAULT_BURST; if (ftype & VM_PROT_WRITE) fault_flags |= VM_FAULT_DIRTY; else fault_flags |= VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); nogo: if (!usermode) { /* * NOTE: cpu doesn't push esp on kernel trap */ if (td->td_gd->gd_intr_nesting_level == 0 && td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == (int)&frame->tf_esp) { frame->tf_eip = (register_t)td->td_pcb->pcb_onfault; return (0); } if (td->td_gd->gd_intr_nesting_level == 0 && td->td_pcb->pcb_onfault) { kprintf("ESP mismatch %p %08x\n", &frame->tf_esp, td->td_pcb->pcb_onfault_sp); } trap_fatal(frame, eva); return (-1); } /* kludge to pass faulting virtual address to sendsig */ frame->tf_xflags = frame->tf_err; frame->tf_err = eva; return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); }
/* * Invent attributes for pfsnode (vp) and store * them in (vap). * Directories lengths are returned as zero since * any real length would require the genuine size * to be computed, and nothing cares anyway. * * this is relatively minimal for procfs. * * procfs_getattr(struct vnode *a_vp, struct vattr *a_vap) */ static int procfs_getattr(struct vop_getattr_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct vattr *vap = ap->a_vap; struct proc *procp; int error; /* * First make sure that the process and its credentials * still exist. */ switch (pfs->pfs_type) { case Proot: case Pcurproc: procp = NULL; break; default: procp = pfs_pfind(pfs->pfs_pid); if (procp == NULL || procp->p_ucred == NULL) { error = ENOENT; goto done; } } error = 0; /* start by zeroing out the attributes */ VATTR_NULL(vap); /* next do all the common fields */ vap->va_type = ap->a_vp->v_type; vap->va_mode = pfs->pfs_mode; vap->va_fileid = pfs->pfs_fileno; vap->va_flags = 0; vap->va_blocksize = PAGE_SIZE; vap->va_bytes = vap->va_size = 0; vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; /* * Make all times be current TOD. * It would be possible to get the process start * time from the p_stat structure, but there's * no "file creation" time stamp anyway, and the * p_stat structure is not addressible if u. gets * swapped out for that process. */ nanotime(&vap->va_ctime); vap->va_atime = vap->va_mtime = vap->va_ctime; /* * If the process has exercised some setuid or setgid * privilege, then rip away read/write permission so * that only root can gain access. */ switch (pfs->pfs_type) { case Pctl: case Pregs: case Pfpregs: case Pdbregs: case Pmem: if (procp->p_flag & P_SUGID) vap->va_mode &= ~((VREAD|VWRITE)| ((VREAD|VWRITE)>>3)| ((VREAD|VWRITE)>>6)); break; default: break; } /* * now do the object specific fields * * The size could be set from struct reg, but it's hardly * worth the trouble, and it puts some (potentially) machine * dependent data into this machine-independent code. If it * becomes important then this function should break out into * a per-file stat function in the corresponding .c file. */ vap->va_nlink = 1; if (procp) { vap->va_uid = procp->p_ucred->cr_uid; vap->va_gid = procp->p_ucred->cr_gid; } switch (pfs->pfs_type) { case Proot: /* * Set nlink to 1 to tell fts(3) we don't actually know. */ vap->va_nlink = 1; vap->va_uid = 0; vap->va_gid = 0; vap->va_size = vap->va_bytes = DEV_BSIZE; break; case Pcurproc: { char buf[16]; /* should be enough */ vap->va_uid = 0; vap->va_gid = 0; vap->va_size = vap->va_bytes = ksnprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); break; } case Pproc: vap->va_nlink = nproc_targets; vap->va_size = vap->va_bytes = DEV_BSIZE; break; case Pfile: { char *fullpath, *freepath; error = cache_fullpath(procp, &procp->p_textnch, &fullpath, &freepath, 0); if (error == 0) { vap->va_size = strlen(fullpath); kfree(freepath, M_TEMP); } else { vap->va_size = sizeof("unknown") - 1; error = 0; } vap->va_bytes = vap->va_size; break; } case Pmem: /* * If we denied owner access earlier, then we have to * change the owner to root - otherwise 'ps' and friends * will break even though they are setgid kmem. *SIGH* */ if (procp->p_flag & P_SUGID) vap->va_uid = 0; else vap->va_uid = procp->p_ucred->cr_uid; break; case Pregs: vap->va_bytes = vap->va_size = sizeof(struct reg); break; case Pfpregs: vap->va_bytes = vap->va_size = sizeof(struct fpreg); break; case Pdbregs: vap->va_bytes = vap->va_size = sizeof(struct dbreg); break; case Ptype: case Pmap: case Pctl: case Pstatus: case Pnote: case Pnotepg: case Pcmdline: case Prlimit: break; default: panic("procfs_getattr"); } done: if (procp) PRELE(procp); return (error); }
/* * do an ioctl operation on a pfsnode (vp). * (vp) is not locked on entry or exit. */ static int procfs_ioctl(struct vop_ioctl_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct proc *procp; struct proc *p; int error; int signo; struct procfs_status *psp; unsigned char flags; procp = pfind(pfs->pfs_pid); if (procp == NULL) return ENOTTY; p = curproc; if (p == NULL) { error = EINVAL; goto done; } /* Can't trace a process that's currently exec'ing. */ if ((procp->p_flag & P_INEXEC) != 0) { error = EAGAIN; goto done; } if (!CHECKIO(p, procp) || p_trespass(ap->a_cred, procp->p_ucred)) { error = EPERM; goto done; } switch (ap->a_command) { case PIOCBIS: procp->p_stops |= *(unsigned int*)ap->a_data; break; case PIOCBIC: procp->p_stops &= ~*(unsigned int*)ap->a_data; break; case PIOCSFL: /* * NFLAGS is "non-suser_xxx flags" -- currently, only * PFS_ISUGID ("ignore set u/g id"); */ #define NFLAGS (PF_ISUGID) flags = (unsigned char)*(unsigned int*)ap->a_data; if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0))) goto done; procp->p_pfsflags = flags; break; case PIOCGFL: *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; break; case PIOCSTATUS: /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. */ psp = (struct procfs_status *)ap->a_data; psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; spin_lock(&procp->p_spin); if (procp->p_step) { psp->state = 0; psp->why = procp->p_stype; psp->val = procp->p_xstat; spin_unlock(&procp->p_spin); } else { psp->state = 1; spin_unlock(&procp->p_spin); psp->why = 0; /* Not defined values */ psp->val = 0; /* Not defined values */ } break; case PIOCWAIT: /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. */ psp = (struct procfs_status *)ap->a_data; spin_lock(&procp->p_spin); while (procp->p_step == 0) { tsleep_interlock(&procp->p_stype, PCATCH); spin_unlock(&procp->p_spin); error = tsleep(&procp->p_stype, PCATCH | PINTERLOCKED, "piocwait", 0); if (error) goto done; spin_lock(&procp->p_spin); } spin_unlock(&procp->p_spin); psp->state = 1; /* It stopped */ psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; psp->why = procp->p_stype; /* why it stopped */ psp->val = procp->p_xstat; /* any extra info */ break; case PIOCCONT: /* Restart a proc */ /* * NOTE: syscall entry deals with stopevents and may run without * the MP lock. However, the caller is presumably interlocked * by having waited. */ if (procp->p_step == 0) { error = EINVAL; /* Can only start a stopped process */ goto done; } if ((signo = *(int*)ap->a_data) != 0) { if (signo >= NSIG || signo <= 0) { error = EINVAL; goto done; } ksignal(procp, signo); } procp->p_step = 0; wakeup(&procp->p_step); break; default: error = ENOTTY; goto done; } error = 0; done: PRELE(procp); return 0; }
/* * lookup. this is incredibly complicated in the general case, however * for most pseudo-filesystems very little needs to be done. * * procfs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, * struct componentname *a_cnp) */ static int procfs_lookup(struct vop_old_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; char *pname = cnp->cn_nameptr; /* struct proc *curp = cnp->cn_proc; */ struct proc_target *pt; pid_t pid; struct pfsnode *pfs; struct proc *p; struct lwp *lp; int i; int error; *vpp = NULL; if (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME) return (EROFS); p = NULL; error = 0; if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; vref(*vpp); goto out; } pfs = VTOPFS(dvp); switch (pfs->pfs_type) { case Proot: if (cnp->cn_flags & CNP_ISDOTDOT) return (EIO); if (CNEQ(cnp, "curproc", 7)) { error = procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc); goto out; } pid = atopid(pname, cnp->cn_namelen); if (pid == NO_PID) break; p = pfs_pfind(pid); if (p == NULL) break; if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) break; if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) break; error = procfs_allocvp(dvp->v_mount, vpp, pid, Pproc); goto out; case Pproc: if (cnp->cn_flags & CNP_ISDOTDOT) { error = procfs_root(dvp->v_mount, vpp); goto out; } p = pfs_pfind(pfs->pfs_pid); if (p == NULL) break; /* XXX lwp */ lp = FIRST_LWP_IN_PROC(p); if (!PRISON_CHECK(ap->a_cnp->cn_cred, p->p_ucred)) break; if (ps_showallprocs == 0 && ap->a_cnp->cn_cred->cr_uid != 0 && ap->a_cnp->cn_cred->cr_uid != p->p_ucred->cr_uid) break; for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { if (cnp->cn_namelen == pt->pt_namlen && bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && (pt->pt_valid == NULL || (*pt->pt_valid)(lp))) goto found; } break; found: error = procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, pt->pt_pfstype); goto out; default: error = ENOTDIR; goto out; } if (cnp->cn_nameiop == NAMEI_LOOKUP) error = ENOENT; else error = EROFS; /* * If no error occured *vpp will hold a referenced locked vnode. * dvp was passed to us locked and *vpp must be returned locked. * If *vpp != dvp then we should unlock dvp if (1) this is not the * last component or (2) CNP_LOCKPARENT is not set. */ out: if (error == 0 && *vpp != dvp) { if ((cnp->cn_flags & CNP_LOCKPARENT) == 0) { cnp->cn_flags |= CNP_PDIRUNLOCK; vn_unlock(dvp); } } if (p) PRELE(p); return (error); }
/* * do an ioctl operation on a pfsnode (vp). * (vp) is not locked on entry or exit. */ static int linprocfs_ioctl(struct vop_ioctl_args *ap) { struct pfsnode *pfs = VTOPFS(ap->a_vp); struct proc *procp; int error; int signo; struct procfs_status *psp; unsigned char flags; procp = pfind(pfs->pfs_pid); if (procp == NULL) { return ENOTTY; } if (p_trespass(ap->a_cred, procp->p_ucred)) { error = EPERM; goto done; } switch (ap->a_command) { case PIOCBIS: procp->p_stops |= *(unsigned int*)ap->a_data; break; case PIOCBIC: procp->p_stops &= ~*(unsigned int*)ap->a_data; break; case PIOCSFL: /* * NFLAGS is "non-suser_xxx flags" -- currently, only * PFS_ISUGID ("ignore set u/g id"); */ #define NFLAGS (PF_ISUGID) flags = (unsigned char)*(unsigned int*)ap->a_data; if (flags & NFLAGS && (error = priv_check_cred(ap->a_cred, PRIV_ROOT, 0))) goto done; procp->p_pfsflags = flags; break; case PIOCGFL: *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; case PIOCSTATUS: psp = (struct procfs_status *)ap->a_data; psp->state = (procp->p_step == 0); psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; if (procp->p_step) { psp->why = procp->p_stype; psp->val = procp->p_xstat; } else { psp->why = psp->val = 0; /* Not defined values */ } break; case PIOCWAIT: psp = (struct procfs_status *)ap->a_data; if (procp->p_step == 0) { error = tsleep(&procp->p_stype, PCATCH, "piocwait", 0); if (error) goto done; } psp->state = 1; /* It stopped */ psp->flags = procp->p_pfsflags; psp->events = procp->p_stops; psp->why = procp->p_stype; /* why it stopped */ psp->val = procp->p_xstat; /* any extra info */ break; case PIOCCONT: /* Restart a proc */ if (procp->p_step == 0) { error = EINVAL; /* Can only start a stopped process */ goto done; } if ((signo = *(int*)ap->a_data) != 0) { if (signo >= NSIG || signo <= 0) { error = EINVAL; goto done; } ksignal(procp, signo); } procp->p_step = 0; wakeup(&procp->p_step); break; default: error = ENOTTY; goto done; } error = 0; done: if (procp) PRELE(procp); return error; }
int physio(dev_t dev, struct uio *uio, int ioflag) { int i; int error; int spl; caddr_t sa; off_t blockno; u_int iolen; struct buf *bp; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = bp->b_error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { if (uio->uio_rw == UIO_READ) bp->b_flags = B_PHYS | B_CALL | B_READ; else bp->b_flags = B_PHYS | B_CALL | B_WRITE; bp->b_dev = dev; bp->b_iodone = physwakeup; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; blockno = bp->b_offset >> DEV_BSHIFT; if ((daddr_t)blockno != blockno) { error = EINVAL; /* blockno overflow */ goto doerror; } bp->b_blkno = blockno; if (uio->uio_segflg == UIO_USERSPACE) { if (!useracc(bp->b_data, bp->b_bufsize, bp->b_flags & B_READ ? VM_PROT_WRITE : VM_PROT_READ)) { error = EFAULT; goto doerror; } vmapbuf(bp); } BUF_STRATEGY(bp, 0); spl = splbio(); while ((bp->b_flags & B_DONE) == 0) tsleep((caddr_t)bp, PRIBIO, "physstr", 0); splx(spl); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_flags & B_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base += iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_flags & B_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { int i; int error; caddr_t sa; u_int iolen; struct buf *bp; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { bp->b_flags = 0; if (uio->uio_rw == UIO_READ) { bp->b_iocmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->b_iocmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->b_iodone = bdone; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_iooffset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; bp->b_blkno = btodb(bp->b_offset); if (uio->uio_segflg == UIO_USERSPACE) if (vmapbuf(bp) < 0) { error = EFAULT; goto doerror; } dev_strategy(dev, bp); if (uio->uio_rw == UIO_READ) bwait(bp, PRIBIO, "physrd"); else bwait(bp, PRIBIO, "physwr"); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_ioflags & BIO_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
/* * Process only has its hold count bumped, we need the token * to safely scan the LWPs */ static int scheduler_callback(struct proc *p, void *data) { struct scheduler_info *info = data; struct vmspace *vm; struct lwp *lp; segsz_t pgs; int pri; /* * We only care about processes in swap-wait. Interlock test with * token if the flag is found set. */ if ((p->p_flags & P_SWAPWAIT) == 0) return 0; lwkt_gettoken_shared(&p->p_token); if ((p->p_flags & P_SWAPWAIT) == 0) { lwkt_reltoken(&p->p_token); return 0; } /* * Calculate priority for swap-in */ pri = 0; FOREACH_LWP_IN_PROC(lp, p) { /* XXX lwp might need a different metric */ pri += lp->lwp_slptime; } pri += p->p_swtime - p->p_nice * 8; /* * The more pages paged out while we were swapped, * the more work we have to do to get up and running * again and the lower our wakeup priority. * * Each second of sleep time is worth ~1MB */ if ((vm = p->p_vmspace) != NULL) { vmspace_hold(vm); pgs = vmspace_resident_count(vm); if (pgs < vm->vm_swrss) { pri -= (vm->vm_swrss - pgs) / (1024 * 1024 / PAGE_SIZE); } vmspace_drop(vm); } lwkt_reltoken(&p->p_token); /* * If this process is higher priority and there is * enough space, then select this process instead of * the previous selection. */ if (pri > info->ppri) { if (info->pp) PRELE(info->pp); PHOLD(p); info->pp = p; info->ppri = pri; } return(0); }
/* * Read from a file */ static int pfs_read(struct vop_read_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf *sb = NULL; int error, locked; unsigned int buflen, offset, resid; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); KASSERT_PN_IS_FILE(pn); if (!(pn->pn_flags & PFS_RD)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } vhold(vn); locked = VOP_ISLOCKED(vn, curthread); VOP_UNLOCK(vn, 0, curthread); if (pn->pn_flags & PFS_RAWRD) { PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid)); error = pn_fill(curthread, proc, pn, NULL, uio); PFS_TRACE(("%lu resid", (unsigned long)uio->uio_resid)); goto ret; } /* beaucoup sanity checks so we don't ask for bogus allocation */ if (uio->uio_offset < 0 || uio->uio_resid < 0 || (offset = uio->uio_offset) != uio->uio_offset || (resid = uio->uio_resid) != uio->uio_resid || (buflen = offset + resid + 1) < offset || buflen > INT_MAX) { if (proc != NULL) PRELE(proc); error = EINVAL; goto ret; } if (buflen > MAXPHYS + 1) { error = EIO; goto ret; } sb = sbuf_new(sb, NULL, buflen, 0); if (sb == NULL) { error = EIO; goto ret; } error = pn_fill(curthread, proc, pn, sb, uio); if (error) { sbuf_delete(sb); goto ret; } sbuf_finish(sb); error = uiomove_frombuf(sbuf_data(sb), sbuf_len(sb), uio); sbuf_delete(sb); ret: vn_lock(vn, locked | LK_RETRY, curthread); vdrop(vn); if (proc != NULL) PRELE(proc); PFS_RETURN (error); }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { struct cdevsw *csw; struct buf *pbuf; struct bio *bp; struct vm_page **pages; caddr_t sa; u_int iolen, poff; int error, i, npages, maxpages; vm_prot_t prot; csw = dev->si_devsw; npages = 0; sa = NULL; /* check if character device is being destroyed */ if (csw == NULL) return (ENXIO); /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } /* * If the driver does not want I/O to be split, that means that we * need to reject any requests that will not fit into one buffer. */ if (dev->si_flags & SI_NOSPLIT && (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS || uio->uio_iovcnt > 1)) { /* * Tell the user why his I/O was rejected. */ if (uio->uio_resid > dev->si_iosize_max) uprintf("%s: request size=%zd > si_iosize_max=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, dev->si_iosize_max); if (uio->uio_resid > MAXPHYS) uprintf("%s: request size=%zd > MAXPHYS=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, MAXPHYS); if (uio->uio_iovcnt > 1) uprintf("%s: request vectors=%d > 1; " "cannot split request\n", devtoname(dev), uio->uio_iovcnt); return (EFBIG); } /* * Keep the process UPAGES from being swapped. Processes swapped * out while holding pbufs, used by swapper, may lead to deadlock. */ PHOLD(curproc); bp = g_alloc_bio(); if (uio->uio_segflg != UIO_USERSPACE) { pbuf = NULL; pages = NULL; } else if ((dev->si_flags & SI_UNMAPPED) && unmapped_buf_allowed) { pbuf = NULL; maxpages = btoc(MIN(uio->uio_resid, MAXPHYS)) + 1; pages = malloc(sizeof(*pages) * maxpages, M_DEVBUF, M_WAITOK); } else { pbuf = uma_zalloc(pbuf_zone, M_WAITOK); sa = pbuf->b_data; maxpages = btoc(MAXPHYS); pages = pbuf->b_pages; } prot = VM_PROT_READ; if (uio->uio_rw == UIO_READ) prot |= VM_PROT_WRITE; /* Less backwards than it looks */ error = 0; for (i = 0; i < uio->uio_iovcnt; i++) { #ifdef RACCT if (racct_enable) { PROC_LOCK(curproc); if (uio->uio_rw == UIO_READ) { racct_add_force(curproc, RACCT_READBPS, uio->uio_iov[i].iov_len); racct_add_force(curproc, RACCT_READIOPS, 1); } else { racct_add_force(curproc, RACCT_WRITEBPS, uio->uio_iov[i].iov_len); racct_add_force(curproc, RACCT_WRITEIOPS, 1); } PROC_UNLOCK(curproc); } #endif /* RACCT */ while (uio->uio_iov[i].iov_len) { g_reset_bio(bp); if (uio->uio_rw == UIO_READ) { bp->bio_cmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->bio_cmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->bio_offset = uio->uio_offset; bp->bio_data = uio->uio_iov[i].iov_base; bp->bio_length = uio->uio_iov[i].iov_len; if (bp->bio_length > dev->si_iosize_max) bp->bio_length = dev->si_iosize_max; if (bp->bio_length > MAXPHYS) bp->bio_length = MAXPHYS; /* * Make sure the pbuf can map the request. * The pbuf has kvasize = MAXPHYS, so a request * larger than MAXPHYS - PAGE_SIZE must be * page aligned or it will be fragmented. */ poff = (vm_offset_t)bp->bio_data & PAGE_MASK; if (pbuf && bp->bio_length + poff > pbuf->b_kvasize) { if (dev->si_flags & SI_NOSPLIT) { uprintf("%s: request ptr %p is not " "on a page boundary; cannot split " "request\n", devtoname(dev), bp->bio_data); error = EFBIG; goto doerror; } bp->bio_length = pbuf->b_kvasize; if (poff != 0) bp->bio_length -= PAGE_SIZE; } bp->bio_bcount = bp->bio_length; bp->bio_dev = dev; if (pages) { if ((npages = vm_fault_quick_hold_pages( &curproc->p_vmspace->vm_map, (vm_offset_t)bp->bio_data, bp->bio_length, prot, pages, maxpages)) < 0) { error = EFAULT; goto doerror; } if (pbuf && sa) { pmap_qenter((vm_offset_t)sa, pages, npages); bp->bio_data = sa + poff; } else { bp->bio_ma = pages; bp->bio_ma_n = npages; bp->bio_ma_offset = poff; bp->bio_data = unmapped_buf; bp->bio_flags |= BIO_UNMAPPED; } } csw->d_strategy(bp); if (uio->uio_rw == UIO_READ) biowait(bp, "physrd"); else biowait(bp, "physwr"); if (pages) { if (pbuf) pmap_qremove((vm_offset_t)sa, npages); vm_page_unhold_pages(pages, npages); } iolen = bp->bio_length - bp->bio_resid; if (iolen == 0 && !(bp->bio_flags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if (bp->bio_flags & BIO_ERROR) { error = bp->bio_error; goto doerror; } } } doerror: if (pbuf) uma_zfree(pbuf_zone, pbuf); else if (pages) free(pages, M_DEVBUF); g_destroy_bio(bp); PRELE(curproc); return (error); }
/* * MPALMOSTSAFE */ int sys_linux_ptrace(struct linux_ptrace_args *uap) { struct thread *td = curthread; struct proc *curp = td->td_proc; union { struct linux_pt_reg reg; struct linux_pt_fpreg fpreg; struct linux_pt_fpxreg fpxreg; } r; union { struct reg bsd_reg; struct fpreg bsd_fpreg; struct dbreg bsd_dbreg; } u; void *addr; pid_t pid; int error, req; struct proc *p = NULL; /* held process */ error = 0; /* by default, just copy data intact */ req = uap->req; pid = (pid_t)uap->pid; addr = (void *)uap->addr; switch (req) { case PTRACE_TRACEME: case PTRACE_POKETEXT: case PTRACE_POKEDATA: case PTRACE_KILL: error = kern_ptrace(curp, req, pid, addr, uap->data, &uap->sysmsg_iresult); break; case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: { /* need to preserve return value, use dummy */ l_int rval = 0; error = kern_ptrace(curp, req, pid, addr, 0, &rval); if (error == 0) { error = copyout(&rval, (caddr_t)uap->data, sizeof(l_int)); } break; } case PTRACE_DETACH: error = kern_ptrace(curp, PT_DETACH, pid, (void *)1, map_signum(uap->data), &uap->sysmsg_iresult); break; case PTRACE_SINGLESTEP: case PTRACE_CONT: error = kern_ptrace(curp, req, pid, (void *)1, map_signum(uap->data), &uap->sysmsg_iresult); break; case PTRACE_ATTACH: error = kern_ptrace(curp, PT_ATTACH, pid, addr, uap->data, &uap->sysmsg_iresult); break; case PTRACE_GETREGS: /* Linux is using data where FreeBSD is using addr */ error = kern_ptrace(curp, PT_GETREGS, pid, &u.bsd_reg, 0, &uap->sysmsg_iresult); if (error == 0) { map_regs_to_linux(&u.bsd_reg, &r.reg); error = copyout(&r.reg, (void *)uap->data, sizeof(r.reg)); } break; case PTRACE_SETREGS: /* Linux is using data where FreeBSD is using addr */ error = copyin((caddr_t)uap->data, &r.reg, sizeof(r.reg)); if (error == 0) { map_regs_from_linux(&u.bsd_reg, &r.reg); error = kern_ptrace(curp, PT_SETREGS, pid, &u.bsd_reg, 0, &uap->sysmsg_iresult); } break; case PTRACE_GETFPREGS: /* Linux is using data where FreeBSD is using addr */ error = kern_ptrace(curp, PT_GETFPREGS, pid, &u.bsd_fpreg, 0, &uap->sysmsg_iresult); if (error == 0) { map_fpregs_to_linux(&u.bsd_fpreg, &r.fpreg); error = copyout(&r.fpreg, (caddr_t)uap->data, sizeof(r.fpreg)); } break; case PTRACE_SETFPREGS: /* Linux is using data where FreeBSD is using addr */ error = copyin((caddr_t)uap->data, &r.fpreg, sizeof(r.fpreg)); if (error == 0) { map_fpregs_from_linux(&u.bsd_fpreg, &r.fpreg); error = kern_ptrace(curp, PT_SETFPREGS, pid, &u.bsd_fpreg, 0, &uap->sysmsg_iresult); } break; case PTRACE_SETFPXREGS: #ifndef CPU_DISABLE_SSE error = copyin((caddr_t)uap->data, &r.fpxreg, sizeof(r.fpxreg)); if (error) break; #endif /* FALL THROUGH */ case PTRACE_GETFPXREGS: { #ifndef CPU_DISABLE_SSE struct proc *p; struct lwp *lp; if (sizeof(struct linux_pt_fpxreg) != sizeof(struct savexmm)) { static int once = 0; if (!once) { kprintf("linux: savexmm != linux_pt_fpxreg\n"); once = 1; } error = EIO; break; } if ((p = pfind(uap->pid)) == NULL) { error = ESRCH; break; } if (!PRISON_CHECK(curp->p_ucred, p->p_ucred)) { error = ESRCH; goto fail; } /* System processes can't be debugged. */ if ((p->p_flag & P_SYSTEM) != 0) { error = EINVAL; goto fail; } /* not being traced... */ if ((p->p_flag & P_TRACED) == 0) { error = EPERM; goto fail; } /* not being traced by YOU */ if (p->p_pptr != curp) { error = EBUSY; goto fail; } /* not currently stopped */ if ((p->p_flag & (P_TRACED|P_WAITED)) == 0) { error = EBUSY; goto fail; } /* XXX lwp */ lp = FIRST_LWP_IN_PROC(p); if (req == PTRACE_GETFPXREGS) { LWPHOLD(lp); error = linux_proc_read_fpxregs(lp, &r.fpxreg); LWPRELE(lp); if (error == 0) error = copyout(&r.fpxreg, (caddr_t)uap->data, sizeof(r.fpxreg)); } else { /* clear dangerous bits exactly as Linux does*/ r.fpxreg.mxcsr &= 0xffbf; LWPHOLD(lp); error = linux_proc_write_fpxregs(lp, &r.fpxreg); LWPRELE(lp); } break; fail: #else error = EIO; #endif break; } case PTRACE_PEEKUSR: case PTRACE_POKEUSR: { error = EIO; /* check addr for alignment */ if (uap->addr < 0 || uap->addr & (sizeof(l_int) - 1)) break; /* * Allow linux programs to access register values in * user struct. We simulate this through PT_GET/SETREGS * as necessary. */ if (uap->addr < sizeof(struct linux_pt_reg)) { error = kern_ptrace(curp, PT_GETREGS, pid, &u.bsd_reg, 0, &uap->sysmsg_iresult); if (error != 0) break; map_regs_to_linux(&u.bsd_reg, &r.reg); if (req == PTRACE_PEEKUSR) { error = copyout((char *)&r.reg + uap->addr, (caddr_t)uap->data, sizeof(l_int)); break; } *(l_int *)((char *)&r.reg + uap->addr) = (l_int)uap->data; map_regs_from_linux(&u.bsd_reg, &r.reg); error = kern_ptrace(curp, PT_SETREGS, pid, &u.bsd_reg, 0, &uap->sysmsg_iresult); } /* * Simulate debug registers access */ if (uap->addr >= LINUX_DBREG_OFFSET && uap->addr <= LINUX_DBREG_OFFSET + LINUX_DBREG_SIZE) { error = kern_ptrace(curp, PT_GETDBREGS, pid, &u.bsd_dbreg, 0, &uap->sysmsg_iresult); if (error != 0) break; uap->addr -= LINUX_DBREG_OFFSET; if (req == PTRACE_PEEKUSR) { error = copyout((char *)&u.bsd_dbreg + uap->addr, (caddr_t)uap->data, sizeof(l_int)); break; } *(l_int *)((char *)&u.bsd_dbreg + uap->addr) = uap->data; error = kern_ptrace(curp, PT_SETDBREGS, pid, &u.bsd_dbreg, 0, &uap->sysmsg_iresult); } break; } case PTRACE_SYSCALL: /* fall through */ default: kprintf("linux: ptrace(%u, ...) not implemented\n", (unsigned int)uap->req); error = EINVAL; break; } /* * Release held proces (if any) before returning. */ if (p) PRELE(p); return (error); }
int physio(struct cdev *dev, struct uio *uio, int ioflag) { struct buf *bp; struct cdevsw *csw; caddr_t sa; u_int iolen; int error, i, mapped; /* Keep the process UPAGES from being swapped. XXX: why ? */ PHOLD(curproc); bp = getpbuf(NULL); sa = bp->b_data; error = 0; /* XXX: sanity check */ if(dev->si_iosize_max < PAGE_SIZE) { printf("WARNING: %s si_iosize_max=%d, using DFLTPHYS.\n", devtoname(dev), dev->si_iosize_max); dev->si_iosize_max = DFLTPHYS; } /* * If the driver does not want I/O to be split, that means that we * need to reject any requests that will not fit into one buffer. */ if (dev->si_flags & SI_NOSPLIT && (uio->uio_resid > dev->si_iosize_max || uio->uio_resid > MAXPHYS || uio->uio_iovcnt > 1)) { /* * Tell the user why his I/O was rejected. */ if (uio->uio_resid > dev->si_iosize_max) uprintf("%s: request size=%zd > si_iosize_max=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, dev->si_iosize_max); if (uio->uio_resid > MAXPHYS) uprintf("%s: request size=%zd > MAXPHYS=%d; " "cannot split request\n", devtoname(dev), uio->uio_resid, MAXPHYS); if (uio->uio_iovcnt > 1) uprintf("%s: request vectors=%d > 1; " "cannot split request\n", devtoname(dev), uio->uio_iovcnt); error = EFBIG; goto doerror; } for (i = 0; i < uio->uio_iovcnt; i++) { while (uio->uio_iov[i].iov_len) { bp->b_flags = 0; if (uio->uio_rw == UIO_READ) { bp->b_iocmd = BIO_READ; curthread->td_ru.ru_inblock++; } else { bp->b_iocmd = BIO_WRITE; curthread->td_ru.ru_oublock++; } bp->b_iodone = bdone; bp->b_data = uio->uio_iov[i].iov_base; bp->b_bcount = uio->uio_iov[i].iov_len; bp->b_offset = uio->uio_offset; bp->b_iooffset = uio->uio_offset; bp->b_saveaddr = sa; /* Don't exceed drivers iosize limit */ if (bp->b_bcount > dev->si_iosize_max) bp->b_bcount = dev->si_iosize_max; /* * Make sure the pbuf can map the request * XXX: The pbuf has kvasize = MAXPHYS so a request * XXX: larger than MAXPHYS - PAGE_SIZE must be * XXX: page aligned or it will be fragmented. */ iolen = ((vm_offset_t) bp->b_data) & PAGE_MASK; if ((bp->b_bcount + iolen) > bp->b_kvasize) { /* * This device does not want I/O to be split. */ if (dev->si_flags & SI_NOSPLIT) { uprintf("%s: request ptr %p is not " "on a page boundary; cannot split " "request\n", devtoname(dev), bp->b_data); error = EFBIG; goto doerror; } bp->b_bcount = bp->b_kvasize; if (iolen != 0) bp->b_bcount -= PAGE_SIZE; } bp->b_bufsize = bp->b_bcount; bp->b_blkno = btodb(bp->b_offset); csw = dev->si_devsw; if (uio->uio_segflg == UIO_USERSPACE) { if (dev->si_flags & SI_UNMAPPED) mapped = 0; else mapped = 1; if (vmapbuf(bp, mapped) < 0) { error = EFAULT; goto doerror; } } dev_strategy_csw(dev, csw, bp); if (uio->uio_rw == UIO_READ) bwait(bp, PRIBIO, "physrd"); else bwait(bp, PRIBIO, "physwr"); if (uio->uio_segflg == UIO_USERSPACE) vunmapbuf(bp); iolen = bp->b_bcount - bp->b_resid; if (iolen == 0 && !(bp->b_ioflags & BIO_ERROR)) goto doerror; /* EOF */ uio->uio_iov[i].iov_len -= iolen; uio->uio_iov[i].iov_base = (char *)uio->uio_iov[i].iov_base + iolen; uio->uio_resid -= iolen; uio->uio_offset += iolen; if( bp->b_ioflags & BIO_ERROR) { error = bp->b_error; goto doerror; } } } doerror: relpbuf(bp, NULL); PRELE(curproc); return (error); }
int nvme_ns_physio(struct cdev *dev, struct uio *uio, int ioflag) { struct uio uio_tmp; struct iovec uio_iov_tmp; struct nvme_namespace *ns; struct mtx *mtx; int i, nvme_err, physio_err = 0; #if __FreeBSD_version > 900017 int ref; #endif PHOLD(curproc); ns = dev->si_drv1; mtx = mtx_pool_find(mtxpool_sleep, &uio_tmp); #if __FreeBSD_version > 900017 dev_refthread(dev, &ref); #else dev_refthread(dev); #endif /* * NVM Express doesn't really support true SGLs. All SG elements * must be PAGE_SIZE, except for the first and last element. * Because of this, we need to break up each iovec into a separate * NVMe command - otherwise we could end up with sub-PAGE_SIZE * elements in the middle of an SGL which is not allowed. */ uio_tmp.uio_iov = &uio_iov_tmp; uio_tmp.uio_iovcnt = 1; uio_tmp.uio_offset = uio->uio_offset; uio_tmp.uio_segflg = uio->uio_segflg; uio_tmp.uio_rw = uio->uio_rw; uio_tmp.uio_td = uio->uio_td; for (i = 0; i < uio->uio_iovcnt; i++) { uio_iov_tmp.iov_base = uio->uio_iov[i].iov_base; uio_iov_tmp.iov_len = uio->uio_iov[i].iov_len; uio_tmp.uio_resid = uio_iov_tmp.iov_len; mtx_lock(mtx); if (uio->uio_rw == UIO_READ) nvme_err = nvme_read_uio(ns, &uio_tmp); else nvme_err = nvme_write_uio(ns, &uio_tmp); if (nvme_err == 0) msleep(&uio_tmp, mtx, PRIBIO, "nvme_physio", 0); mtx_unlock(mtx); if (uio_tmp.uio_resid == 0) { uio->uio_resid -= uio_iov_tmp.iov_len; uio->uio_offset += uio_iov_tmp.iov_len; } else { physio_err = EFAULT; break; } uio_tmp.uio_offset += uio_iov_tmp.iov_len; } #if __FreeBSD_version > 900017 dev_relthread(dev, ref); #else dev_relthread(dev); #endif PRELE(curproc); return (physio_err); }
static int trap_pfault(struct trapframe *frame, int usermode) { vm_offset_t va; struct vmspace *vm = NULL; vm_map_t map; int rv = 0; int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; struct proc *p; va = trunc_page(frame->tf_addr); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) { fault_flags = -1; ftype = -1; goto nogo; } map = &kernel_map; } else { /* * This is a fault on non-kernel virtual memory. * vm is initialized above to NULL. If curproc is NULL * or curproc->p_vmspace is NULL the fault is fatal. */ if (lp != NULL) vm = lp->lwp_vmspace; if (vm == NULL) { fault_flags = -1; ftype = -1; goto nogo; } /* * Debugging, try to catch kernel faults on the user address space when not inside * on onfault (e.g. copyin/copyout) routine. */ if (usermode == 0 && (td->td_pcb == NULL || td->td_pcb->pcb_onfault == NULL)) { #ifdef DDB if (freeze_on_seg_fault) { kprintf("trap_pfault: user address fault from kernel mode " "%016lx\n", (long)frame->tf_addr); while (freeze_on_seg_fault) tsleep(&freeze_on_seg_fault, 0, "frzseg", hz * 20); } #endif } map = &vm->vm_map; } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; #if JG else if ((frame->tf_err & PGEX_I) && pg_nx != 0) ftype = VM_PROT_EXECUTE; #endif else ftype = VM_PROT_READ; if (map != &kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PHOLD(lp->lwp_proc); /* * Issue fault */ fault_flags = 0; if (usermode) fault_flags |= VM_FAULT_BURST; if (ftype & VM_PROT_WRITE) fault_flags |= VM_FAULT_DIRTY; else fault_flags |= VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ fault_flags = VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); nogo: if (!usermode) { if (td->td_gd->gd_intr_nesting_level == 0 && td->td_pcb->pcb_onfault) { frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; return (0); } trap_fatal(frame, frame->tf_addr); return (-1); } /* * NOTE: on x86_64 we have a tf_addr field in the trapframe, no * kludge is needed to pass the fault address to signal handlers. */ p = td->td_proc; if (td->td_lwp->lwp_vkernel == NULL) { #ifdef DDB if (bootverbose || freeze_on_seg_fault || ddb_on_seg_fault) { #else if (bootverbose) { #endif kprintf("seg-fault ft=%04x ff=%04x addr=%p rip=%p " "pid=%d cpu=%d p_comm=%s\n", ftype, fault_flags, (void *)frame->tf_addr, (void *)frame->tf_rip, p->p_pid, mycpu->gd_cpuid, p->p_comm); } #ifdef DDB while (freeze_on_seg_fault) { tsleep(p, 0, "freeze", hz * 20); } if (ddb_on_seg_fault) Debugger("ddb_on_seg_fault"); #endif } return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } static void trap_fatal(struct trapframe *frame, vm_offset_t eva) { int code, ss; u_int type; long rsp; struct soft_segment_descriptor softseg; char *msg; code = frame->tf_err; type = frame->tf_trapno; sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)], &softseg); if (type <= MAX_TRAP_MSG) msg = trap_msg[type]; else msg = "UNKNOWN"; kprintf("\n\nFatal trap %d: %s while in %s mode\n", type, msg, ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel"); #ifdef SMP /* three separate prints in case of a trap on an unmapped page */ kprintf("cpuid = %d; ", mycpu->gd_cpuid); kprintf("lapic->id = %08x\n", lapic->id); #endif if (type == T_PAGEFLT) { kprintf("fault virtual address = 0x%lx\n", eva); kprintf("fault code = %s %s %s, %s\n", code & PGEX_U ? "user" : "supervisor", code & PGEX_W ? "write" : "read", code & PGEX_I ? "instruction" : "data", code & PGEX_P ? "protection violation" : "page not present"); } kprintf("instruction pointer = 0x%lx:0x%lx\n", frame->tf_cs & 0xffff, frame->tf_rip); if (ISPL(frame->tf_cs) == SEL_UPL) { ss = frame->tf_ss & 0xffff; rsp = frame->tf_rsp; } else { ss = GSEL(GDATA_SEL, SEL_KPL); rsp = (long)&frame->tf_rsp; } kprintf("stack pointer = 0x%x:0x%lx\n", ss, rsp); kprintf("frame pointer = 0x%x:0x%lx\n", ss, frame->tf_rbp); kprintf("code segment = base 0x%lx, limit 0x%lx, type 0x%x\n", softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type); kprintf(" = DPL %d, pres %d, long %d, def32 %d, gran %d\n", softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_long, softseg.ssd_def32, softseg.ssd_gran); kprintf("processor eflags = "); if (frame->tf_rflags & PSL_T) kprintf("trace trap, "); if (frame->tf_rflags & PSL_I) kprintf("interrupt enabled, "); if (frame->tf_rflags & PSL_NT) kprintf("nested task, "); if (frame->tf_rflags & PSL_RF) kprintf("resume, "); kprintf("IOPL = %ld\n", (frame->tf_rflags & PSL_IOPL) >> 12); kprintf("current process = "); if (curproc) { kprintf("%lu\n", (u_long)curproc->p_pid); } else { kprintf("Idle\n"); } kprintf("current thread = pri %d ", curthread->td_pri); if (curthread->td_critcount) kprintf("(CRIT)"); kprintf("\n"); #ifdef DDB if ((debugger_on_panic || db_active) && kdb_trap(type, code, frame)) return; #endif kprintf("trap number = %d\n", type); if (type <= MAX_TRAP_MSG) panic("%s", trap_msg[type]); else panic("unknown/reserved trap"); }
int sc_mouse_ioctl(struct tty *tp, u_long cmd, caddr_t data, int flag) { mouse_info_t *mouse; scr_stat *cur_scp; scr_stat *scp; struct proc *oproc; int f; scp = SC_STAT(tp->t_dev); switch (cmd) { case CONS_MOUSECTL: /* control mouse arrow */ mouse = (mouse_info_t*)data; cur_scp = scp->sc->cur_scp; switch (mouse->operation) { case MOUSE_MODE: if (ISSIGVALID(mouse->u.mode.signal)) { oproc = scp->mouse_proc; scp->mouse_signal = mouse->u.mode.signal; scp->mouse_proc = curproc; scp->mouse_pid = curproc->p_pid; PHOLD(curproc); } else { oproc = scp->mouse_proc; scp->mouse_signal = 0; scp->mouse_proc = NULL; scp->mouse_pid = 0; } if (oproc) { PRELE(oproc); oproc = NULL; } return 0; case MOUSE_SHOW: crit_enter(); if (!(scp->sc->flags & SC_MOUSE_ENABLED)) { scp->sc->flags |= SC_MOUSE_ENABLED; cur_scp->status &= ~MOUSE_HIDDEN; if (!ISGRAPHSC(cur_scp)) mark_all(cur_scp); crit_exit(); return 0; } else { crit_exit(); return EINVAL; } break; case MOUSE_HIDE: crit_enter(); if (scp->sc->flags & SC_MOUSE_ENABLED) { scp->sc->flags &= ~SC_MOUSE_ENABLED; sc_remove_all_mouse(scp->sc); crit_exit(); return 0; } else { crit_exit(); return EINVAL; } break; case MOUSE_MOVEABS: crit_enter(); scp->mouse_xpos = mouse->u.data.x; scp->mouse_ypos = mouse->u.data.y; set_mouse_pos(scp); crit_exit(); break; case MOUSE_MOVEREL: crit_enter(); scp->mouse_xpos += mouse->u.data.x; scp->mouse_ypos += mouse->u.data.y; set_mouse_pos(scp); crit_exit(); break; case MOUSE_GETINFO: mouse->u.data.x = scp->mouse_xpos; mouse->u.data.y = scp->mouse_ypos; mouse->u.data.z = 0; mouse->u.data.buttons = scp->mouse_buttons; return 0; case MOUSE_ACTION: case MOUSE_MOTION_EVENT: /* send out mouse event on /dev/sysmouse */ #if 0 /* this should maybe only be settable from /dev/consolectl SOS */ if (SC_VTY(tp->t_dev) != SC_CONSOLECTL) return ENOTTY; #endif crit_enter(); if (mouse->u.data.x != 0 || mouse->u.data.y != 0) { cur_scp->mouse_xpos += mouse->u.data.x; cur_scp->mouse_ypos += mouse->u.data.y; set_mouse_pos(cur_scp); } f = 0; if (mouse->operation == MOUSE_ACTION) { f = cur_scp->mouse_buttons ^ mouse->u.data.buttons; cur_scp->mouse_buttons = mouse->u.data.buttons; } crit_exit(); if (sysmouse_event(mouse) == 0) return 0; /* * If any buttons are down or the mouse has moved a lot, * stop the screen saver. */ if (((mouse->operation == MOUSE_ACTION) && mouse->u.data.buttons) || (mouse->u.data.x*mouse->u.data.x + mouse->u.data.y*mouse->u.data.y >= SC_WAKEUP_DELTA*SC_WAKEUP_DELTA)) { sc_touch_scrn_saver(); } cur_scp->status &= ~MOUSE_HIDDEN; if (cur_scp->mouse_signal) { /* has controlling process died? */ if (cur_scp->mouse_proc && (cur_scp->mouse_proc != pfindn(cur_scp->mouse_pid))){ oproc = cur_scp->mouse_proc; cur_scp->mouse_signal = 0; cur_scp->mouse_proc = NULL; cur_scp->mouse_pid = 0; if (oproc) PRELE(oproc); } else { ksignal(cur_scp->mouse_proc, cur_scp->mouse_signal); break; } } if (ISGRAPHSC(cur_scp) || (cut_buffer == NULL)) break; #ifndef SC_NO_CUTPASTE if ((mouse->operation == MOUSE_ACTION) && f) { /* process button presses */ if (cur_scp->mouse_buttons & MOUSE_BUTTON1DOWN) mouse_cut_start(cur_scp); else mouse_cut_end(cur_scp); if (cur_scp->mouse_buttons & MOUSE_BUTTON2DOWN || cur_scp->mouse_buttons & MOUSE_BUTTON3DOWN) mouse_paste(cur_scp); } #endif /* SC_NO_CUTPASTE */ break; case MOUSE_BUTTON_EVENT: if ((mouse->u.event.id & MOUSE_BUTTONS) == 0) return EINVAL; if (mouse->u.event.value < 0) return EINVAL; #if 0 /* this should maybe only be settable from /dev/consolectl SOS */ if (SC_VTY(tp->t_dev) != SC_CONSOLECTL) return ENOTTY; #endif if (mouse->u.event.value > 0) cur_scp->mouse_buttons |= mouse->u.event.id; else cur_scp->mouse_buttons &= ~mouse->u.event.id; if (sysmouse_event(mouse) == 0) return 0; /* if a button is held down, stop the screen saver */ if (mouse->u.event.value > 0) sc_touch_scrn_saver(); cur_scp->status &= ~MOUSE_HIDDEN; if (cur_scp->mouse_signal) { if (cur_scp->mouse_proc && (cur_scp->mouse_proc != pfindn(cur_scp->mouse_pid))){ oproc = cur_scp->mouse_proc; cur_scp->mouse_signal = 0; cur_scp->mouse_proc = NULL; cur_scp->mouse_pid = 0; if (oproc) PRELE(oproc); } else { ksignal(cur_scp->mouse_proc, cur_scp->mouse_signal); break; } } if (ISGRAPHSC(cur_scp) || (cut_buffer == NULL)) break; #ifndef SC_NO_CUTPASTE switch (mouse->u.event.id) { case MOUSE_BUTTON1DOWN: switch (mouse->u.event.value % 4) { case 0: /* up */ mouse_cut_end(cur_scp); break; case 1: /* single click: start cut operation */ mouse_cut_start(cur_scp); break; case 2: /* double click: cut a word */ mouse_cut_word(cur_scp); mouse_cut_end(cur_scp); break; case 3: /* triple click: cut a line */ mouse_cut_line(cur_scp); mouse_cut_end(cur_scp); break; } break; case SC_MOUSE_PASTEBUTTON: switch (mouse->u.event.value) { case 0: /* up */ break; default: mouse_paste(cur_scp); break; } break; case SC_MOUSE_EXTENDBUTTON: switch (mouse->u.event.value) { case 0: /* up */ if (!(cur_scp->mouse_buttons & MOUSE_BUTTON1DOWN)) mouse_cut_end(cur_scp); break; default: mouse_cut_extend(cur_scp); break; } break; } #endif /* SC_NO_CUTPASTE */ break; case MOUSE_MOUSECHAR: if (mouse->u.mouse_char < 0) { mouse->u.mouse_char = scp->sc->mouse_char; } else { if (mouse->u.mouse_char >= (unsigned char)-1 - 4) return EINVAL; crit_enter(); sc_remove_all_mouse(scp->sc); #ifndef SC_NO_FONT_LOADING if (ISTEXTSC(cur_scp) && (cur_scp->font != NULL)) sc_load_font(cur_scp, 0, cur_scp->font_size, cur_scp->font, cur_scp->sc->mouse_char, 4); #endif scp->sc->mouse_char = mouse->u.mouse_char; crit_exit(); } break; default: return EINVAL; } return 0; } return ENOIOCTL; }
static int trap_pfault(struct trapframe *frame, int usermode) { vm_offset_t va; struct vmspace *vm = NULL; vm_map_t map; int rv = 0; int fault_flags; vm_prot_t ftype; thread_t td = curthread; struct lwp *lp = td->td_lwp; struct proc *p; va = trunc_page(frame->tf_addr); if (va >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ if (usermode) { fault_flags = -1; ftype = -1; goto nogo; } map = &kernel_map; } else { /* * This is a fault on non-kernel virtual memory. * vm is initialized above to NULL. If curproc is NULL * or curproc->p_vmspace is NULL the fault is fatal. */ if (lp != NULL) vm = lp->lwp_vmspace; if (vm == NULL) { fault_flags = -1; ftype = -1; goto nogo; } /* * Debugging, try to catch kernel faults on the user address * space when not inside on onfault (e.g. copyin/copyout) * routine. */ if (usermode == 0 && (td->td_pcb == NULL || td->td_pcb->pcb_onfault == NULL)) { #ifdef DDB if (freeze_on_seg_fault) { kprintf("trap_pfault: user address fault from kernel mode " "%016lx\n", (long)frame->tf_addr); while (freeze_on_seg_fault) tsleep(&freeze_on_seg_fault, 0, "frzseg", hz * 20); } #endif } map = &vm->vm_map; } /* * PGEX_I is defined only if the execute disable bit capability is * supported and enabled. */ if (frame->tf_err & PGEX_W) ftype = VM_PROT_WRITE; else if (frame->tf_err & PGEX_I) ftype = VM_PROT_EXECUTE; else ftype = VM_PROT_READ; if (map != &kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PHOLD(lp->lwp_proc); /* * Issue fault */ fault_flags = 0; if (usermode) fault_flags |= VM_FAULT_BURST | VM_FAULT_USERMODE; if (ftype & VM_PROT_WRITE) fault_flags |= VM_FAULT_DIRTY; else fault_flags |= VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, fault_flags); PRELE(lp->lwp_proc); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ fault_flags = VM_FAULT_NORMAL; rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (rv == KERN_SUCCESS) return (0); nogo: if (!usermode) { /* * NOTE: in 64-bit mode traps push rsp/ss * even if no ring change occurs. */ if (td->td_pcb->pcb_onfault && td->td_pcb->pcb_onfault_sp == frame->tf_rsp && td->td_gd->gd_intr_nesting_level == 0) { frame->tf_rip = (register_t)td->td_pcb->pcb_onfault; return (0); } trap_fatal(frame, frame->tf_addr); return (-1); } /* * NOTE: on x86_64 we have a tf_addr field in the trapframe, no * kludge is needed to pass the fault address to signal handlers. */ p = td->td_proc; #ifdef DDB if (td->td_lwp->lwp_vkernel == NULL) { while (freeze_on_seg_fault) { tsleep(p, 0, "freeze", hz * 20); } if (ddb_on_seg_fault) Debugger("ddb_on_seg_fault"); } #endif return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); }