/* * Update disk usage, and take corrective action. */ int lfs_chkdq1(struct inode *ip, int64_t change, kauth_cred_t cred, int flags) { struct dquot *dq; int i; int ncurblocks, error; if ((error = lfs_getinoquota(ip)) != 0) return error; if (change == 0) return (0); if (change < 0) { for (i = 0; i < ULFS_MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; mutex_enter(&dq->dq_interlock); ncurblocks = dq->dq_curblocks + change; if (ncurblocks >= 0) dq->dq_curblocks = ncurblocks; else dq->dq_curblocks = 0; dq->dq_flags &= ~DQ_WARN(QL_BLOCK); dq->dq_flags |= DQ_MOD; mutex_exit(&dq->dq_interlock); } return (0); } for (i = 0; i < ULFS_MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; if ((flags & FORCE) == 0 && kauth_authorize_system(cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_NOLIMIT, KAUTH_ARG(i), KAUTH_ARG(QL_BLOCK), NULL) != 0) { mutex_enter(&dq->dq_interlock); error = chkdqchg(ip, change, cred, i); mutex_exit(&dq->dq_interlock); if (error != 0) return (error); } } for (i = 0; i < ULFS_MAXQUOTAS; i++) { if ((dq = ip->i_dquot[i]) == NODQUOT) continue; mutex_enter(&dq->dq_interlock); dq->dq_curblocks += change; dq->dq_flags |= DQ_MOD; mutex_exit(&dq->dq_interlock); } return (0); }
/* This function is used by clock_settime and settimeofday */ static int settime1(struct proc *p, const struct timespec *ts, bool check_kauth) { struct timespec delta, now; int s; /* WHAT DO WE DO ABOUT PENDING REAL-TIME TIMEOUTS??? */ s = splclock(); nanotime(&now); timespecsub(ts, &now, &delta); if (check_kauth && kauth_authorize_system(kauth_cred_get(), KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_SYSTEM, __UNCONST(ts), &delta, KAUTH_ARG(check_kauth ? false : true)) != 0) { splx(s); return (EPERM); } #ifdef notyet if ((delta.tv_sec < 86400) && securelevel > 0) { /* XXX elad - notyet */ splx(s); return (EPERM); } #endif tc_setclock(ts); timespecadd(&boottime, &delta, &boottime); resettodr(); splx(s); return (0); }
/* XXX shouldn't all this be in kauth ? */ static int quota_get_auth(struct mount *mp, struct lwp *l, uid_t id) { /* The user can always query about his own quota. */ if (id == kauth_cred_getuid(l->l_cred)) return 0; return kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_GET, mp, KAUTH_ARG(id), NULL); }
static int kill1(struct lwp *l, pid_t pid, ksiginfo_t *ksi, register_t *retval) { int error; struct proc *p; if ((u_int)ksi->ksi_signo >= NSIG) return EINVAL; if (pid != l->l_proc->p_pid) { if (ksi->ksi_pid != l->l_proc->p_pid) return EPERM; if (ksi->ksi_uid != kauth_cred_geteuid(l->l_cred)) return EPERM; switch (ksi->ksi_code) { case SI_USER: case SI_QUEUE: break; default: return EPERM; } } if (pid > 0) { /* kill single process */ mutex_enter(proc_lock); p = proc_find_raw(pid); if (p == NULL || (p->p_stat != SACTIVE && p->p_stat != SSTOP)) { mutex_exit(proc_lock); /* IEEE Std 1003.1-2001: return success for zombies */ return p ? 0 : ESRCH; } mutex_enter(p->p_lock); error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SIGNAL, p, KAUTH_ARG(ksi->ksi_signo), NULL, NULL); if (!error && ksi->ksi_signo) { kpsignal2(p, ksi); } mutex_exit(p->p_lock); mutex_exit(proc_lock); return error; } switch (pid) { case -1: /* broadcast signal */ return killpg1(l, ksi, 0, 1); case 0: /* signal own process group */ return killpg1(l, ksi, 0, 0); default: /* negative explicit process group */ return killpg1(l, ksi, -pid, 0); } /* NOTREACHED */ }
int ipcperm(kauth_cred_t cred, struct ipc_perm *perm, int mode) { int error; error = kauth_authorize_system(cred, KAUTH_SYSTEM_SYSVIPC, KAUTH_REQ_SYSTEM_SYSVIPC_BYPASS, perm, KAUTH_ARG(mode), NULL); if (error == 0) return (0); /* Adjust EPERM and EACCES errors until there's a better way to do this. */ if (mode != IPC_M) error = EACCES; return error; }
static int linux_do_tkill(struct lwp *l, int tgid, int tid, int signum) { struct proc *p; struct lwp *t; ksiginfo_t ksi; int error; if (signum < 0 || signum >= LINUX__NSIG) return EINVAL; signum = linux_to_native_signo[signum]; if (tgid == -1) { tgid = tid; } KSI_INIT(&ksi); ksi.ksi_signo = signum; ksi.ksi_code = SI_LWP; ksi.ksi_pid = l->l_proc->p_pid; ksi.ksi_uid = kauth_cred_geteuid(l->l_cred); ksi.ksi_lid = tid; mutex_enter(proc_lock); p = proc_find(tgid); if (p == NULL) { mutex_exit(proc_lock); return ESRCH; } mutex_enter(p->p_lock); error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_SIGNAL, p, KAUTH_ARG(signum), NULL, NULL); if ((t = lwp_find(p, ksi.ksi_lid)) == NULL) error = ESRCH; else if (signum != 0) kpsignal2(p, &ksi); mutex_exit(p->p_lock); mutex_exit(proc_lock); return error; }
static int linux_clone_nptl(struct lwp *l, const struct linux_sys_clone_args *uap, register_t *retval) { /* { syscallarg(int) flags; syscallarg(void *) stack; syscallarg(void *) parent_tidptr; syscallarg(void *) tls; syscallarg(void *) child_tidptr; } */ struct proc *p; struct lwp *l2; struct linux_emuldata *led; void *parent_tidptr, *tls, *child_tidptr; struct schedstate_percpu *spc; vaddr_t uaddr; lwpid_t lid; int flags, tnprocs, error; p = l->l_proc; flags = SCARG(uap, flags); parent_tidptr = SCARG(uap, parent_tidptr); tls = SCARG(uap, tls); child_tidptr = SCARG(uap, child_tidptr); tnprocs = atomic_inc_uint_nv(&nprocs); if (__predict_false(tnprocs >= maxproc) || kauth_authorize_process(l->l_cred, KAUTH_PROCESS_FORK, p, KAUTH_ARG(tnprocs), NULL, NULL) != 0) { atomic_dec_uint(&nprocs); return EAGAIN; } uaddr = uvm_uarea_alloc(); if (__predict_false(uaddr == 0)) { atomic_dec_uint(&nprocs); return ENOMEM; } error = lwp_create(l, p, uaddr, LWP_DETACHED | LWP_PIDLID, SCARG(uap, stack), 0, child_return, NULL, &l2, l->l_class); if (__predict_false(error)) { DPRINTF(("%s: lwp_create error=%d\n", __func__, error)); atomic_dec_uint(&nprocs); uvm_uarea_free(uaddr); return error; } lid = l2->l_lid; /* LINUX_CLONE_CHILD_CLEARTID: clear TID in child's memory on exit() */ if (flags & LINUX_CLONE_CHILD_CLEARTID) { led = l2->l_emuldata; led->led_clear_tid = child_tidptr; } /* LINUX_CLONE_PARENT_SETTID: store child's TID in parent's memory */ if (flags & LINUX_CLONE_PARENT_SETTID) { if ((error = copyout(&lid, parent_tidptr, sizeof(lid))) != 0) printf("%s: LINUX_CLONE_PARENT_SETTID " "failed (parent_tidptr = %p tid = %d error=%d)\n", __func__, parent_tidptr, lid, error); } /* LINUX_CLONE_CHILD_SETTID: store child's TID in child's memory */ if (flags & LINUX_CLONE_CHILD_SETTID) { if ((error = copyout(&lid, child_tidptr, sizeof(lid))) != 0) printf("%s: LINUX_CLONE_CHILD_SETTID " "failed (child_tidptr = %p, tid = %d error=%d)\n", __func__, child_tidptr, lid, error); } if (flags & LINUX_CLONE_SETTLS) { error = LINUX_LWP_SETPRIVATE(l2, tls); if (error) { DPRINTF(("%s: LINUX_LWP_SETPRIVATE %d\n", __func__, error)); lwp_exit(l2); return error; } } /* * Set the new LWP running, unless the process is stopping, * then the LWP is created stopped. */ mutex_enter(p->p_lock); lwp_lock(l2); spc = &l2->l_cpu->ci_schedstate; if ((l->l_flag & (LW_WREBOOT | LW_WSUSPEND | LW_WEXIT)) == 0) { if (p->p_stat == SSTOP || (p->p_sflag & PS_STOPPING) != 0) { KASSERT(l2->l_wchan == NULL); l2->l_stat = LSSTOP; p->p_nrlwps--; lwp_unlock_to(l2, spc->spc_lwplock); } else { KASSERT(lwp_locked(l2, spc->spc_mutex)); l2->l_stat = LSRUN; sched_enqueue(l2, false); lwp_unlock(l2); } } else { l2->l_stat = LSSUSPENDED; p->p_nrlwps--; lwp_unlock_to(l2, spc->spc_lwplock); } mutex_exit(p->p_lock); retval[0] = lid; retval[1] = 0; return 0; }
int adosfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; struct adosfs_args *args = data; struct adosfsmount *amp; int error; mode_t accessmode; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { amp = VFSTOADOSFS(mp); if (amp == NULL) return EIO; args->uid = amp->uid; args->gid = amp->gid; args->mask = amp->mask; args->fspec = NULL; *data_len = sizeof *args; return 0; } if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); if ((mp->mnt_flag & MNT_UPDATE) && args->fspec == NULL) return EOPNOTSUPP; /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return (error); if (devvp->v_type != VBLK) { vrele(devvp); return (ENOTBLK); } if (bdevsw_lookup(devvp->v_rdev) == NULL) { vrele(devvp); return (ENXIO); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); if (error) { vrele(devvp); return (error); } /* MNT_UPDATE? */ if ((error = adosfs_mountfs(devvp, mp, l)) != 0) { vrele(devvp); return (error); } amp = VFSTOADOSFS(mp); amp->uid = args->uid; amp->gid = args->gid; amp->mask = args->mask; return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); }
/* * mount syscall vfsop. * * Returns 0 on success. */ static int efs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct efs_args *args = data; struct pathbuf *pb; struct nameidata devnd; struct efs_mount *emp; struct vnode *devvp; int err, mode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { if ((emp = VFSTOEFS(mp)) == NULL) return (EIO); args->fspec = NULL; args->version = EFS_MNT_VERSION; *data_len = sizeof *args; return 0; } if (mp->mnt_flag & MNT_UPDATE) return (EOPNOTSUPP); /* XXX read-only */ /* look up our device's vnode. it is returned locked */ err = pathbuf_copyin(args->fspec, &pb); if (err) { return err; } NDINIT(&devnd, LOOKUP, FOLLOW | LOCKLEAF, pb); if ((err = namei(&devnd))) { pathbuf_destroy(pb); return (err); } devvp = devnd.ni_vp; pathbuf_destroy(pb); if (devvp->v_type != VBLK) { vput(devvp); return (ENOTBLK); } /* XXX - rdonly */ mode = FREAD; /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ err = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD)); if (err) { vput(devvp); return (err); } if ((err = VOP_OPEN(devvp, mode, l->l_cred))) { vput(devvp); return (err); } err = efs_mount_common(mp, path, devvp, args); if (err) { VOP_CLOSE(devvp, mode, l->l_cred); vput(devvp); return (err); } VOP_UNLOCK(devvp); return (0); }
static int quota_handle_cmd_clear(struct mount *mp, struct lwp *l, prop_dictionary_t cmddict, int type, prop_array_t datas) { prop_array_t replies; prop_object_iterator_t iter; prop_dictionary_t data; uint32_t id; struct ufsmount *ump = VFSTOUFS(mp); int error, defaultq = 0; const char *idstr; if ((ump->um_flags & UFS_QUOTA2) == 0) return EOPNOTSUPP; replies = prop_array_create(); if (replies == NULL) return ENOMEM; iter = prop_array_iterator(datas); if (iter == NULL) { prop_object_release(replies); return ENOMEM; } while ((data = prop_object_iterator_next(iter)) != NULL) { if (!prop_dictionary_get_uint32(data, "id", &id)) { if (!prop_dictionary_get_cstring_nocopy(data, "id", &idstr)) continue; if (strcmp(idstr, "default")) continue; id = 0; defaultq = 1; } else { defaultq = 0; } error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_FS_QUOTA, KAUTH_REQ_SYSTEM_FS_QUOTA_MANAGE, mp, KAUTH_ARG(id), NULL); if (error != 0) goto err; #ifdef QUOTA2 if (ump->um_flags & UFS_QUOTA2) { error = quota2_handle_cmd_clear(ump, type, id, defaultq, data); } else #endif panic("quota_handle_cmd_get: no support ?"); if (error && error != ENOENT) goto err; } prop_object_iterator_release(iter); if (!prop_dictionary_set_and_rel(cmddict, "data", replies)) { error = ENOMEM; } else { error = 0; } return error; err: prop_object_iterator_release(iter); prop_object_release(replies); return error; }
int hfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct hfs_args *args = data; struct vnode *devvp; struct hfsmount *hmp; int error = 0; int update; mode_t accessmode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; #ifdef HFS_DEBUG printf("vfsop = hfs_mount()\n"); #endif /* HFS_DEBUG */ if (mp->mnt_flag & MNT_GETARGS) { hmp = VFSTOHFS(mp); if (hmp == NULL) return EIO; args->fspec = NULL; *data_len = sizeof *args; return 0; } if (data == NULL) return EINVAL; /* FIXME: For development ONLY - disallow remounting for now */ #if 0 update = mp->mnt_flag & MNT_UPDATE; #else update = 0; #endif /* Check arguments */ if (args->fspec != NULL) { /* * Look up the name and verify that it's sane. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return error; if (!update) { /* * Be sure this is a valid block device */ if (devvp->v_type != VBLK) error = ENOTBLK; else if (bdevsw_lookup(devvp->v_rdev) == NULL) error = ENXIO; } else { /* * Be sure we're still naming the same device * used for our initial mount */ hmp = VFSTOHFS(mp); if (devvp != hmp->hm_devvp) error = EINVAL; } } else { if (update) { /* Use the extant mount */ hmp = VFSTOHFS(mp); devvp = hmp->hm_devvp; vref(devvp); } else { /* New mounts must have a filename for the device */ return EINVAL; } } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * Permission to update a mount is checked higher, so here we presume * updating the mount is okay (for example, as far as securelevel goes) * which leaves us with the normal check. */ if (error == 0) { accessmode = VREAD; if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : (mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); } if (error != 0) goto error; if (update) { printf("HFS: live remounting not yet supported!\n"); error = EINVAL; goto error; } if ((error = hfs_mountfs(devvp, mp, l, args->fspec)) != 0) goto error; error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); #ifdef HFS_DEBUG if(!update) { char* volname; hmp = VFSTOHFS(mp); volname = malloc(hmp->hm_vol.name.length + 1, M_TEMP, M_WAITOK); if (volname == NULL) printf("could not allocate volname; ignored\n"); else { if (hfs_unicode_to_ascii(hmp->hm_vol.name.unicode, hmp->hm_vol.name.length, volname) == NULL) printf("could not convert volume name to ascii; ignored\n"); else printf("mounted volume \"%s\"\n", volname); free(volname, M_TEMP); } } #endif /* HFS_DEBUG */ return error; error: vrele(devvp); return error; }
int v7fs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct v7fs_args *args = data; struct v7fs_mount *v7fsmount = (void *)mp->mnt_data; struct vnode *devvp = NULL; int error = 0; bool update = mp->mnt_flag & MNT_UPDATE; DPRINTF("mnt_flag=%x %s\n", mp->mnt_flag, update ? "update" : ""); if (*data_len < sizeof(*args)) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { if (!v7fsmount) return EIO; args->fspec = NULL; args->endian = v7fsmount->core->endian; *data_len = sizeof(*args); return 0; } DPRINTF("args->fspec=%s endian=%d\n", args->fspec, args->endian); if (args->fspec == NULL) { /* nothing to do. */ return EINVAL; } if (args->fspec != NULL) { /* Look up the name and verify that it's sane. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return (error); DPRINTF("mount device=%lx\n", (long)devvp->v_rdev); if (!update) { /* * Be sure this is a valid block device */ if (devvp->v_type != VBLK) error = ENOTBLK; else if (bdevsw_lookup(devvp->v_rdev) == NULL) error = ENXIO; } else { KDASSERT(v7fsmount); /* * Be sure we're still naming the same device * used for our initial mount */ if (devvp != v7fsmount->devvp) { DPRINTF("devvp %p != %p rootvp=%p\n", devvp, v7fsmount->devvp, rootvp); if (rootvp == v7fsmount->devvp) { vrele(devvp); devvp = rootvp; vref(devvp); } else { error = EINVAL; } } } } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * Permission to update a mount is checked higher, so here we presume * updating the mount is okay (for example, as far as securelevel goes) * which leaves us with the normal check. */ if (error == 0) { int accessmode = VREAD; if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : (mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); } if (error) { vrele(devvp); return error; } if (!update) { if ((error = v7fs_openfs(devvp, mp, l))) { vrele(devvp); return error; } if ((error = v7fs_mountfs(devvp, mp, args->endian))) { v7fs_closefs(devvp, mp); VOP_UNLOCK(devvp); vrele(devvp); return error; } VOP_UNLOCK(devvp); } else if (mp->mnt_flag & MNT_RDONLY) { /* XXX: r/w -> read only */ } return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); }
/* * VFS Operations. * * mount system call */ int cd9660_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; struct iso_args *args = data; int error; struct iso_mnt *imp = VFSTOISOFS(mp); if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { if (imp == NULL) return EIO; args->fspec = NULL; args->flags = imp->im_flags; *data_len = sizeof (*args); return 0; } if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EROFS); if ((mp->mnt_flag & MNT_UPDATE) && args->fspec == NULL) return EINVAL; /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return (error); if (devvp->v_type != VBLK) { vrele(devvp); return ENOTBLK; } if (bdevsw_lookup(devvp->v_rdev) == NULL) { vrele(devvp); return ENXIO; } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD)); VOP_UNLOCK(devvp); if (error) { vrele(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(devvp, FREAD, FSCRED); VOP_UNLOCK(devvp); if (error) goto fail; error = iso_mountfs(devvp, mp, l, args); if (error) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void)VOP_CLOSE(devvp, FREAD, NOCRED); VOP_UNLOCK(devvp); goto fail; } } else { vrele(devvp); if (devvp != imp->im_devvp && devvp->v_rdev != imp->im_devvp->v_rdev) return (EINVAL); /* needs translation */ } return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); fail: vrele(devvp); return (error); }
/* * Process debugging system call. */ int sys_ptrace(struct lwp *l, const struct sys_ptrace_args *uap, register_t *retval) { /* { syscallarg(int) req; syscallarg(pid_t) pid; syscallarg(void *) addr; syscallarg(int) data; } */ struct proc *p = l->l_proc; struct lwp *lt; struct proc *t; /* target process */ struct uio uio; struct iovec iov; struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct vmspace *vm; int error, write, tmp, req, pheld; int signo; ksiginfo_t ksi; #ifdef COREDUMP char *path; #endif error = 0; req = SCARG(uap, req); /* * If attaching or detaching, we need to get a write hold on the * proclist lock so that we can re-parent the target process. */ mutex_enter(proc_lock); /* "A foolish consistency..." XXX */ if (req == PT_TRACE_ME) { t = p; mutex_enter(t->p_lock); } else { /* Find the process we're supposed to be operating on. */ if ((t = p_find(SCARG(uap, pid), PFIND_LOCKED)) == NULL) { mutex_exit(proc_lock); return (ESRCH); } /* XXX-elad */ mutex_enter(t->p_lock); error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_CANSEE, t, KAUTH_ARG(KAUTH_REQ_PROCESS_CANSEE_ENTRY), NULL, NULL); if (error) { mutex_exit(proc_lock); mutex_exit(t->p_lock); return (ESRCH); } } /* * Grab a reference on the process to prevent it from execing or * exiting. */ if (!rw_tryenter(&t->p_reflock, RW_READER)) { mutex_exit(proc_lock); mutex_exit(t->p_lock); return EBUSY; } /* Make sure we can operate on it. */ switch (req) { case PT_TRACE_ME: /* Saying that you're being traced is always legal. */ break; case PT_ATTACH: /* * You can't attach to a process if: * (1) it's the process that's doing the attaching, */ if (t->p_pid == p->p_pid) { error = EINVAL; break; } /* * (2) it's a system process */ if (t->p_flag & PK_SYSTEM) { error = EPERM; break; } /* * (3) it's already being traced, or */ if (ISSET(t->p_slflag, PSL_TRACED)) { error = EBUSY; break; } /* * (4) the tracer is chrooted, and its root directory is * not at or above the root directory of the tracee */ mutex_exit(t->p_lock); /* XXXSMP */ tmp = proc_isunder(t, l); mutex_enter(t->p_lock); /* XXXSMP */ if (!tmp) { error = EPERM; break; } break; case PT_READ_I: case PT_READ_D: case PT_WRITE_I: case PT_WRITE_D: case PT_IO: #ifdef PT_GETREGS case PT_GETREGS: #endif #ifdef PT_SETREGS case PT_SETREGS: #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: #endif #ifdef __HAVE_PTRACE_MACHDEP PTRACE_MACHDEP_REQUEST_CASES #endif /* * You can't read/write the memory or registers of a process * if the tracer is chrooted, and its root directory is not at * or above the root directory of the tracee. */ mutex_exit(t->p_lock); /* XXXSMP */ tmp = proc_isunder(t, l); mutex_enter(t->p_lock); /* XXXSMP */ if (!tmp) { error = EPERM; break; } /*FALLTHROUGH*/ case PT_CONTINUE: case PT_KILL: case PT_DETACH: case PT_LWPINFO: case PT_SYSCALL: #ifdef COREDUMP case PT_DUMPCORE: #endif #ifdef PT_STEP case PT_STEP: #endif /* * You can't do what you want to the process if: * (1) It's not being traced at all, */ if (!ISSET(t->p_slflag, PSL_TRACED)) { error = EPERM; break; } /* * (2) it's being traced by procfs (which has * different signal delivery semantics), */ if (ISSET(t->p_slflag, PSL_FSTRACE)) { uprintf("file system traced\n"); error = EBUSY; break; } /* * (3) it's not being traced by _you_, or */ if (t->p_pptr != p) { uprintf("parent %d != %d\n", t->p_pptr->p_pid, p->p_pid); error = EBUSY; break; } /* * (4) it's not currently stopped. */ if (t->p_stat != SSTOP || !t->p_waited /* XXXSMP */) { uprintf("stat %d flag %d\n", t->p_stat, !t->p_waited); error = EBUSY; break; } break; default: /* It was not a legal request. */ error = EINVAL; break; } if (error == 0) error = kauth_authorize_process(l->l_cred, KAUTH_PROCESS_PTRACE, t, KAUTH_ARG(req), NULL, NULL); if (error != 0) { mutex_exit(proc_lock); mutex_exit(t->p_lock); rw_exit(&t->p_reflock); return error; } /* Do single-step fixup if needed. */ FIX_SSTEP(t); /* * XXX NJWLWP * * The entire ptrace interface needs work to be useful to a * process with multiple LWPs. For the moment, we'll kluge * this; memory access will be fine, but register access will * be weird. */ lt = LIST_FIRST(&t->p_lwps); KASSERT(lt != NULL); lwp_addref(lt); /* * Which locks do we need held? XXX Ugly. */ switch (req) { #ifdef PT_STEP case PT_STEP: #endif case PT_CONTINUE: case PT_DETACH: case PT_KILL: case PT_SYSCALL: case PT_ATTACH: case PT_TRACE_ME: pheld = 1; break; default: mutex_exit(proc_lock); mutex_exit(t->p_lock); pheld = 0; break; } /* Now do the operation. */ write = 0; *retval = 0; tmp = 0; switch (req) { case PT_TRACE_ME: /* Just set the trace flag. */ SET(t->p_slflag, PSL_TRACED); t->p_opptr = t->p_pptr; break; case PT_WRITE_I: /* XXX no separate I and D spaces */ case PT_WRITE_D: #if defined(__HAVE_RAS) /* * Can't write to a RAS */ if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) { error = EACCES; break; } #endif write = 1; tmp = SCARG(uap, data); /* FALLTHROUGH */ case PT_READ_I: /* XXX no separate I and D spaces */ case PT_READ_D: /* write = 0 done above. */ iov.iov_base = (void *)&tmp; iov.iov_len = sizeof(tmp); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(unsigned long)SCARG(uap, addr); uio.uio_resid = sizeof(tmp); uio.uio_rw = write ? UIO_WRITE : UIO_READ; UIO_SETUP_SYSSPACE(&uio); error = process_domem(l, lt, &uio); if (!write) *retval = tmp; break; case PT_IO: error = copyin(SCARG(uap, addr), &piod, sizeof(piod)); if (error) break; switch (piod.piod_op) { case PIOD_READ_D: case PIOD_READ_I: uio.uio_rw = UIO_READ; break; case PIOD_WRITE_D: case PIOD_WRITE_I: /* * Can't write to a RAS */ if (ras_lookup(t, SCARG(uap, addr)) != (void *)-1) { return (EACCES); } uio.uio_rw = UIO_WRITE; break; default: error = EINVAL; break; } if (error) break; error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = piod.piod_addr; iov.iov_len = piod.piod_len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)(unsigned long)piod.piod_offs; uio.uio_resid = piod.piod_len; uio.uio_vmspace = vm; error = process_domem(l, lt, &uio); piod.piod_len -= uio.uio_resid; (void) copyout(&piod, SCARG(uap, addr), sizeof(piod)); uvmspace_free(vm); break; #ifdef COREDUMP case PT_DUMPCORE: if ((path = SCARG(uap, addr)) != NULL) { char *dst; int len = SCARG(uap, data); if (len < 0 || len >= MAXPATHLEN) { error = EINVAL; break; } dst = malloc(len + 1, M_TEMP, M_WAITOK); if ((error = copyin(path, dst, len)) != 0) { free(dst, M_TEMP); break; } path = dst; path[len] = '\0'; } error = coredump(lt, path); if (path) free(path, M_TEMP); break; #endif #ifdef PT_STEP case PT_STEP: /* * From the 4.4BSD PRM: * "Execution continues as in request PT_CONTINUE; however * as soon as possible after execution of at least one * instruction, execution stops again. [ ... ]" */ #endif case PT_CONTINUE: case PT_SYSCALL: case PT_DETACH: if (req == PT_SYSCALL) { if (!ISSET(t->p_slflag, PSL_SYSCALL)) { SET(t->p_slflag, PSL_SYSCALL); #ifdef __HAVE_SYSCALL_INTERN (*t->p_emul->e_syscall_intern)(t); #endif } } else { if (ISSET(t->p_slflag, PSL_SYSCALL)) { CLR(t->p_slflag, PSL_SYSCALL); #ifdef __HAVE_SYSCALL_INTERN (*t->p_emul->e_syscall_intern)(t); #endif } } p->p_trace_enabled = trace_is_enabled(p); /* * From the 4.4BSD PRM: * "The data argument is taken as a signal number and the * child's execution continues at location addr as if it * incurred that signal. Normally the signal number will * be either 0 to indicate that the signal that caused the * stop should be ignored, or that value fetched out of * the process's image indicating which signal caused * the stop. If addr is (int *)1 then execution continues * from where it stopped." */ /* Check that the data is a valid signal number or zero. */ if (SCARG(uap, data) < 0 || SCARG(uap, data) >= NSIG) { error = EINVAL; break; } uvm_lwp_hold(lt); /* If the address parameter is not (int *)1, set the pc. */ if ((int *)SCARG(uap, addr) != (int *)1) if ((error = process_set_pc(lt, SCARG(uap, addr))) != 0) { uvm_lwp_rele(lt); break; } #ifdef PT_STEP /* * Arrange for a single-step, if that's requested and possible. */ error = process_sstep(lt, req == PT_STEP); if (error) { uvm_lwp_rele(lt); break; } #endif uvm_lwp_rele(lt); if (req == PT_DETACH) { CLR(t->p_slflag, PSL_TRACED|PSL_FSTRACE|PSL_SYSCALL); /* give process back to original parent or init */ if (t->p_opptr != t->p_pptr) { struct proc *pp = t->p_opptr; proc_reparent(t, pp ? pp : initproc); } /* not being traced any more */ t->p_opptr = NULL; } signo = SCARG(uap, data); sendsig: /* Finally, deliver the requested signal (or none). */ if (t->p_stat == SSTOP) { /* * Unstop the process. If it needs to take a * signal, make all efforts to ensure that at * an LWP runs to see it. */ t->p_xstat = signo; proc_unstop(t); } else if (signo != 0) { KSI_INIT_EMPTY(&ksi); ksi.ksi_signo = signo; kpsignal2(t, &ksi); } break; case PT_KILL: /* just send the process a KILL signal. */ signo = SIGKILL; goto sendsig; /* in PT_CONTINUE, above. */ case PT_ATTACH: /* * Go ahead and set the trace flag. * Save the old parent (it's reset in * _DETACH, and also in kern_exit.c:wait4() * Reparent the process so that the tracing * proc gets to see all the action. * Stop the target. */ t->p_opptr = t->p_pptr; if (t->p_pptr != p) { struct proc *parent = t->p_pptr; if (parent->p_lock < t->p_lock) { if (!mutex_tryenter(parent->p_lock)) { mutex_exit(t->p_lock); mutex_enter(parent->p_lock); } } else if (parent->p_lock > t->p_lock) { mutex_enter(parent->p_lock); } parent->p_slflag |= PSL_CHTRACED; proc_reparent(t, p); if (parent->p_lock != t->p_lock) mutex_exit(parent->p_lock); } SET(t->p_slflag, PSL_TRACED); signo = SIGSTOP; goto sendsig; case PT_LWPINFO: if (SCARG(uap, data) != sizeof(pl)) { error = EINVAL; break; } error = copyin(SCARG(uap, addr), &pl, sizeof(pl)); if (error) break; tmp = pl.pl_lwpid; lwp_delref(lt); mutex_enter(t->p_lock); if (tmp == 0) lt = LIST_FIRST(&t->p_lwps); else { lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lt = LIST_NEXT(lt, l_sibling); } while (lt != NULL && lt->l_stat == LSZOMB) lt = LIST_NEXT(lt, l_sibling); pl.pl_lwpid = 0; pl.pl_event = 0; if (lt) { lwp_addref(lt); pl.pl_lwpid = lt->l_lid; if (lt->l_lid == t->p_sigctx.ps_lwp) pl.pl_event = PL_EVENT_SIGNAL; } mutex_exit(t->p_lock); error = copyout(&pl, SCARG(uap, addr), sizeof(pl)); break; #ifdef PT_SETREGS case PT_SETREGS: write = 1; #endif #ifdef PT_GETREGS case PT_GETREGS: /* write = 0 done above. */ #endif #if defined(PT_SETREGS) || defined(PT_GETREGS) tmp = SCARG(uap, data); if (tmp != 0 && t->p_nlwps > 1) { lwp_delref(lt); mutex_enter(t->p_lock); lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lwp_addref(lt); mutex_exit(t->p_lock); } if (!process_validregs(lt)) error = EINVAL; else { error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = SCARG(uap, addr); iov.iov_len = sizeof(struct reg); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct reg); uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_vmspace = vm; error = process_doregs(l, lt, &uio); uvmspace_free(vm); } break; #endif #ifdef PT_SETFPREGS case PT_SETFPREGS: write = 1; #endif #ifdef PT_GETFPREGS case PT_GETFPREGS: /* write = 0 done above. */ #endif #if defined(PT_SETFPREGS) || defined(PT_GETFPREGS) tmp = SCARG(uap, data); if (tmp != 0 && t->p_nlwps > 1) { lwp_delref(lt); mutex_enter(t->p_lock); lt = lwp_find(t, tmp); if (lt == NULL) { mutex_exit(t->p_lock); error = ESRCH; break; } lwp_addref(lt); mutex_exit(t->p_lock); } if (!process_validfpregs(lt)) error = EINVAL; else { error = proc_vmspace_getref(l->l_proc, &vm); if (error) break; iov.iov_base = SCARG(uap, addr); iov.iov_len = sizeof(struct fpreg); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = sizeof(struct fpreg); uio.uio_rw = write ? UIO_WRITE : UIO_READ; uio.uio_vmspace = vm; error = process_dofpregs(l, lt, &uio); uvmspace_free(vm); } break; #endif #ifdef __HAVE_PTRACE_MACHDEP PTRACE_MACHDEP_REQUEST_CASES error = ptrace_machdep_dorequest(l, lt, req, SCARG(uap, addr), SCARG(uap, data)); break; #endif } if (pheld) { mutex_exit(t->p_lock); mutex_exit(proc_lock); } if (lt != NULL) lwp_delref(lt); rw_exit(&t->p_reflock); return error; }
int msgctl1(struct lwp *l, int msqid, int cmd, struct msqid_ds *msqbuf) { kauth_cred_t cred = l->l_cred; struct msqid_ds *msqptr; kmsq_t *msq; int error = 0, ix; MSG_PRINTF(("call to msgctl1(%d, %d)\n", msqid, cmd)); ix = IPCID_TO_IX(msqid); mutex_enter(&msgmutex); if (ix < 0 || ix >= msginfo.msgmni) { MSG_PRINTF(("msqid (%d) out of range (0<=msqid<%d)\n", ix, msginfo.msgmni)); error = EINVAL; goto unlock; } msq = &msqs[ix]; msqptr = &msq->msq_u; if (msqptr->msg_qbytes == 0) { MSG_PRINTF(("no such msqid\n")); error = EINVAL; goto unlock; } if (msqptr->msg_perm._seq != IPCID_TO_SEQ(msqid)) { MSG_PRINTF(("wrong sequence number\n")); error = EINVAL; goto unlock; } switch (cmd) { case IPC_RMID: { struct __msg *msghdr; if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M)) != 0) break; /* Free the message headers */ msghdr = msqptr->_msg_first; while (msghdr != NULL) { struct __msg *msghdr_tmp; /* Free the segments of each message */ msqptr->_msg_cbytes -= msghdr->msg_ts; msqptr->msg_qnum--; msghdr_tmp = msghdr; msghdr = msghdr->msg_next; msg_freehdr(msghdr_tmp); } KASSERT(msqptr->_msg_cbytes == 0); KASSERT(msqptr->msg_qnum == 0); /* Mark it as free */ msqptr->msg_qbytes = 0; cv_broadcast(&msq->msq_cv); } break; case IPC_SET: if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_M))) break; if (msqbuf->msg_qbytes > msqptr->msg_qbytes && kauth_authorize_system(cred, KAUTH_SYSTEM_SYSVIPC, KAUTH_REQ_SYSTEM_SYSVIPC_MSGQ_OVERSIZE, KAUTH_ARG(msqbuf->msg_qbytes), KAUTH_ARG(msqptr->msg_qbytes), NULL) != 0) { error = EPERM; break; } if (msqbuf->msg_qbytes > msginfo.msgmnb) { MSG_PRINTF(("can't increase msg_qbytes beyond %d " "(truncating)\n", msginfo.msgmnb)); /* silently restrict qbytes to system limit */ msqbuf->msg_qbytes = msginfo.msgmnb; } if (msqbuf->msg_qbytes == 0) { MSG_PRINTF(("can't reduce msg_qbytes to 0\n")); error = EINVAL; /* XXX non-standard errno! */ break; } msqptr->msg_perm.uid = msqbuf->msg_perm.uid; msqptr->msg_perm.gid = msqbuf->msg_perm.gid; msqptr->msg_perm.mode = (msqptr->msg_perm.mode & ~0777) | (msqbuf->msg_perm.mode & 0777); msqptr->msg_qbytes = msqbuf->msg_qbytes; msqptr->msg_ctime = time_second; break; case IPC_STAT: if ((error = ipcperm(cred, &msqptr->msg_perm, IPC_R))) { MSG_PRINTF(("requester doesn't have read access\n")); break; } memcpy(msqbuf, msqptr, sizeof(struct msqid_ds)); break; default: MSG_PRINTF(("invalid command %d\n", cmd)); error = EINVAL; break; } unlock: mutex_exit(&msgmutex); return (error); }
/* * mp - path - addr in user space of mount point (ie /usr or whatever) * data - addr in user space of mount params including the name of the block * special file to treat as a filesystem. */ int msdosfs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; /* vnode for blk device to mount */ struct msdosfs_args *args = data; /* holds data from mount request */ /* msdosfs specific mount control block */ struct msdosfsmount *pmp = NULL; int error, flags; mode_t accessmode; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { pmp = VFSTOMSDOSFS(mp); if (pmp == NULL) return EIO; args->fspec = NULL; args->uid = pmp->pm_uid; args->gid = pmp->pm_gid; args->mask = pmp->pm_mask; args->flags = pmp->pm_flags; args->version = MSDOSFSMNT_VERSION; args->dirmask = pmp->pm_dirmask; args->gmtoff = pmp->pm_gmtoff; *data_len = sizeof *args; return 0; } /* * If not versioned (i.e. using old mount_msdos(8)), fill in * the additional structure items with suitable defaults. */ if ((args->flags & MSDOSFSMNT_VERSIONED) == 0) { args->version = 1; args->dirmask = args->mask; } /* * Reset GMT offset for pre-v3 mount structure args. */ if (args->version < 3) args->gmtoff = 0; /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { pmp = VFSTOMSDOSFS(mp); error = 0; if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, NULLVP, flags); } if (!error && (mp->mnt_flag & MNT_RELOAD)) /* not yet implemented */ error = EOPNOTSUPP; if (error) { DPRINTF(("vflush %d\n", error)); return (error); } if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_iflag & IMNT_WANTRDWR)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. * * Permission to update a mount is checked higher, so * here we presume updating the mount is okay (for * example, as far as securelevel goes) which leaves us * with the normal check. */ devvp = pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(VREAD | VWRITE)); VOP_UNLOCK(devvp); DPRINTF(("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d\n", error)); if (error) return (error); pmp->pm_flags &= ~MSDOSFSMNT_RONLY; } if (args->fspec == NULL) { DPRINTF(("missing fspec\n")); return EINVAL; } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible block device. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) { DPRINTF(("namei %d\n", error)); return (error); } if (devvp->v_type != VBLK) { DPRINTF(("not block\n")); vrele(devvp); return (ENOTBLK); } if (bdevsw_lookup(devvp->v_rdev) == NULL) { DPRINTF(("no block switch\n")); vrele(devvp); return (ENXIO); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accessmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); if (error) { DPRINTF(("KAUTH_REQ_SYSTEM_MOUNT_DEVICE %d\n", error)); vrele(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { int xflags; if (mp->mnt_flag & MNT_RDONLY) xflags = FREAD; else xflags = FREAD|FWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(devvp, xflags, FSCRED); VOP_UNLOCK(devvp); if (error) { DPRINTF(("VOP_OPEN %d\n", error)); goto fail; } error = msdosfs_mountfs(devvp, mp, l, args); if (error) { DPRINTF(("msdosfs_mountfs %d\n", error)); vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void) VOP_CLOSE(devvp, xflags, NOCRED); VOP_UNLOCK(devvp); goto fail; } #ifdef MSDOSFS_DEBUG /* only needed for the printf below */ pmp = VFSTOMSDOSFS(mp); #endif } else { vrele(devvp); if (devvp != pmp->pm_devvp) { DPRINTF(("devvp %p pmp %p\n", devvp, pmp->pm_devvp)); return (EINVAL); /* needs translation */ } } if ((error = update_mp(mp, args)) != 0) { msdosfs_unmount(mp, MNT_FORCE); DPRINTF(("update_mp %d\n", error)); return error; } #ifdef MSDOSFS_DEBUG printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); #endif return set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); fail: vrele(devvp); return (error); }
/* * VFS Operations. * * mount system call */ int ext2fs_mount(struct mount *mp, const char *path, void *data, size_t *data_len) { struct lwp *l = curlwp; struct vnode *devvp; struct ufs_args *args = data; struct ufsmount *ump = NULL; struct m_ext2fs *fs; int error = 0, flags, update; mode_t accessmode; if (args == NULL) return EINVAL; if (*data_len < sizeof *args) return EINVAL; if (mp->mnt_flag & MNT_GETARGS) { ump = VFSTOUFS(mp); if (ump == NULL) return EIO; memset(args, 0, sizeof *args); args->fspec = NULL; *data_len = sizeof *args; return 0; } update = mp->mnt_flag & MNT_UPDATE; /* Check arguments */ if (args->fspec != NULL) { /* * Look up the name and verify that it's sane. */ error = namei_simple_user(args->fspec, NSM_FOLLOW_NOEMULROOT, &devvp); if (error != 0) return error; if (!update) { /* * Be sure this is a valid block device */ if (devvp->v_type != VBLK) error = ENOTBLK; else if (bdevsw_lookup(devvp->v_rdev) == NULL) error = ENXIO; } else { /* * Be sure we're still naming the same device * used for our initial mount */ ump = VFSTOUFS(mp); if (devvp != ump->um_devvp) { if (devvp->v_rdev != ump->um_devvp->v_rdev) error = EINVAL; else { vrele(devvp); devvp = ump->um_devvp; vref(devvp); } } } } else { if (!update) { /* New mounts must have a filename for the device */ return EINVAL; } else { ump = VFSTOUFS(mp); devvp = ump->um_devvp; vref(devvp); } } /* * If mount by non-root, then verify that user has necessary * permissions on the device. * * Permission to update a mount is checked higher, so here we presume * updating the mount is okay (for example, as far as securelevel goes) * which leaves us with the normal check. */ if (error == 0) { accessmode = VREAD; if (update ? (mp->mnt_iflag & IMNT_WANTRDWR) != 0 : (mp->mnt_flag & MNT_RDONLY) == 0) accessmode |= VWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, KAUTH_REQ_SYSTEM_MOUNT_DEVICE, mp, devvp, KAUTH_ARG(accessmode)); VOP_UNLOCK(devvp); } if (error) { vrele(devvp); return error; } if (!update) { int xflags; if (mp->mnt_flag & MNT_RDONLY) xflags = FREAD; else xflags = FREAD|FWRITE; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_OPEN(devvp, xflags, FSCRED); VOP_UNLOCK(devvp); if (error) goto fail; error = ext2fs_mountfs(devvp, mp); if (error) { vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); (void)VOP_CLOSE(devvp, xflags, NOCRED); VOP_UNLOCK(devvp); goto fail; } ump = VFSTOUFS(mp); fs = ump->um_e2fs; } else { /* * Update the mount. */ /* * The initial mount got a reference on this * device, so drop the one obtained via * namei(), above. */ vrele(devvp); ump = VFSTOUFS(mp); fs = ump->um_e2fs; if (fs->e2fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { /* * Changing from r/w to r/o */ flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = ext2fs_flushfiles(mp, flags); if (error == 0 && ext2fs_cgupdate(ump, MNT_WAIT) == 0 && (fs->e2fs.e2fs_state & E2FS_ERRORS) == 0) { fs->e2fs.e2fs_state = E2FS_ISCLEAN; (void) ext2fs_sbupdate(ump, MNT_WAIT); } if (error) return error; fs->e2fs_ronly = 1; } if (mp->mnt_flag & MNT_RELOAD) { error = ext2fs_reload(mp, l->l_cred, l); if (error) return error; } if (fs->e2fs_ronly && (mp->mnt_iflag & IMNT_WANTRDWR)) { /* * Changing from read-only to read/write */ fs->e2fs_ronly = 0; if (fs->e2fs.e2fs_state == E2FS_ISCLEAN) fs->e2fs.e2fs_state = 0; else fs->e2fs.e2fs_state = E2FS_ERRORS; fs->e2fs_fmod = 1; } if (args->fspec == NULL) return 0; } error = set_statvfs_info(path, UIO_USERSPACE, args->fspec, UIO_USERSPACE, mp->mnt_op->vfs_name, mp, l); if (error == 0) ext2fs_sb_setmountinfo(fs, mp); if (fs->e2fs_fmod != 0) { /* XXX */ fs->e2fs_fmod = 0; if (fs->e2fs.e2fs_state == 0) fs->e2fs.e2fs_wtime = time_second; else printf("%s: file system not clean; please fsck(8)\n", mp->mnt_stat.f_mntfromname); (void) ext2fs_cgupdate(ump, MNT_WAIT); } return error; fail: vrele(devvp); return error; }
/* * General fork call. Note that another LWP in the process may call exec() * or exit() while we are forking. It's safe to continue here, because * neither operation will complete until all LWPs have exited the process. */ int fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize, void (*func)(void *), void *arg, register_t *retval, struct proc **rnewprocp) { struct proc *p1, *p2, *parent; struct plimit *p1_lim; uid_t uid; struct lwp *l2; int count; vaddr_t uaddr; int tnprocs; int tracefork; int error = 0; p1 = l1->l_proc; uid = kauth_cred_getuid(l1->l_cred); tnprocs = atomic_inc_uint_nv(&nprocs); /* * Although process entries are dynamically created, we still keep * a global limit on the maximum number we will create. */ if (__predict_false(tnprocs >= maxproc)) error = -1; else error = kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL); if (error) { static struct timeval lasttfm; atomic_dec_uint(&nprocs); if (ratecheck(&lasttfm, &fork_tfmrate)) tablefull("proc", "increase kern.maxproc or NPROC"); if (forkfsleep) kpause("forkmx", false, forkfsleep, NULL); return EAGAIN; } /* * Enforce limits. */ count = chgproccnt(uid, 1); if (__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) { if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT, p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS), &p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); if (forkfsleep) kpause("forkulim", false, forkfsleep, NULL); return EAGAIN; } } /* * Allocate virtual address space for the U-area now, while it * is still easy to abort the fork operation if we're out of * kernel virtual address space. */ uaddr = uvm_uarea_alloc(); if (__predict_false(uaddr == 0)) { (void)chgproccnt(uid, -1); atomic_dec_uint(&nprocs); return ENOMEM; } /* * We are now committed to the fork. From here on, we may * block on resources, but resource allocation may NOT fail. */ /* Allocate new proc. */ p2 = proc_alloc(); /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, * then copy the section that is copied directly from the parent. */ memset(&p2->p_startzero, 0, (unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero)); memcpy(&p2->p_startcopy, &p1->p_startcopy, (unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy)); TAILQ_INIT(&p2->p_sigpend.sp_info); LIST_INIT(&p2->p_lwps); LIST_INIT(&p2->p_sigwaiters); /* * Duplicate sub-structures as needed. * Increase reference counts on shared objects. * Inherit flags we want to keep. The flags related to SIGCHLD * handling are important in order to keep a consistent behaviour * for the child after the fork. If we are a 32-bit process, the * child will be too. */ p2->p_flag = p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32); p2->p_emul = p1->p_emul; p2->p_execsw = p1->p_execsw; if (flags & FORK_SYSTEM) { /* * Mark it as a system process. Set P_NOCLDWAIT so that * children are reparented to init(8) when they exit. * init(8) can easily wait them out for us. */ p2->p_flag |= (PK_SYSTEM | PK_NOCLDWAIT); } mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH); mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE); rw_init(&p2->p_reflock); cv_init(&p2->p_waitcv, "wait"); cv_init(&p2->p_lwpcv, "lwpwait"); /* * Share a lock between the processes if they are to share signal * state: we must synchronize access to it. */ if (flags & FORK_SHARESIGS) { p2->p_lock = p1->p_lock; mutex_obj_hold(p1->p_lock); } else p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); kauth_proc_fork(p1, p2); p2->p_raslist = NULL; #if defined(__HAVE_RAS) ras_fork(p1, p2); #endif /* bump references to the text vnode (for procfs) */ p2->p_textvp = p1->p_textvp; if (p2->p_textvp) vref(p2->p_textvp); if (flags & FORK_SHAREFILES) fd_share(p2); else if (flags & FORK_CLEANFILES) p2->p_fd = fd_init(NULL); else p2->p_fd = fd_copy(); /* XXX racy */ p2->p_mqueue_cnt = p1->p_mqueue_cnt; if (flags & FORK_SHARECWD) cwdshare(p2); else p2->p_cwdi = cwdinit(); /* * Note: p_limit (rlimit stuff) is copy-on-write, so normally * we just need increase pl_refcnt. */ p1_lim = p1->p_limit; if (!p1_lim->pl_writeable) { lim_addref(p1_lim); p2->p_limit = p1_lim; } else { p2->p_limit = lim_copy(p1_lim); } if (flags & FORK_PPWAIT) { /* Mark ourselves as waiting for a child. */ l1->l_pflag |= LP_VFORKWAIT; p2->p_lflag = PL_PPWAIT; p2->p_vforklwp = l1; } else { p2->p_lflag = 0; } p2->p_sflag = 0; p2->p_slflag = 0; parent = (flags & FORK_NOWAIT) ? initproc : p1; p2->p_pptr = parent; p2->p_ppid = parent->p_pid; LIST_INIT(&p2->p_children); p2->p_aio = NULL; #ifdef KTRACE /* * Copy traceflag and tracefile if enabled. * If not inherited, these were zeroed above. */ if (p1->p_traceflag & KTRFAC_INHERIT) { mutex_enter(&ktrace_lock); p2->p_traceflag = p1->p_traceflag; if ((p2->p_tracep = p1->p_tracep) != NULL) ktradref(p2); mutex_exit(&ktrace_lock); } #endif /* * Create signal actions for the child process. */ p2->p_sigacts = sigactsinit(p1, flags & FORK_SHARESIGS); mutex_enter(p1->p_lock); p2->p_sflag |= (p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP)); sched_proc_fork(p1, p2); mutex_exit(p1->p_lock); p2->p_stflag = p1->p_stflag; /* * p_stats. * Copy parts of p_stats, and zero out the rest. */ p2->p_stats = pstatscopy(p1->p_stats); /* * Set up the new process address space. */ uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? true : false); /* * Finish creating the child process. * It will return through a different path later. */ lwp_create(l1, p2, uaddr, (flags & FORK_PPWAIT) ? LWP_VFORK : 0, stack, stacksize, (func != NULL) ? func : child_return, arg, &l2, l1->l_class); /* * Inherit l_private from the parent. * Note that we cannot use lwp_setprivate() here since that * also sets the CPU TLS register, which is incorrect if the * process has changed that without letting the kernel know. */ l2->l_private = l1->l_private; /* * If emulation has a process fork hook, call it now. */ if (p2->p_emul->e_proc_fork) (*p2->p_emul->e_proc_fork)(p2, l1, flags); /* * ...and finally, any other random fork hooks that subsystems * might have registered. */ doforkhooks(p2, p1); SDT_PROBE(proc,,,create, p2, p1, flags, 0, 0); /* * It's now safe for the scheduler and other processes to see the * child process. */ mutex_enter(proc_lock); if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT) p2->p_lflag |= PL_CONTROLT; LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling); p2->p_exitsig = exitsig; /* signal for parent on exit */ /* * We don't want to tracefork vfork()ed processes because they * will not receive the SIGTRAP until it is too late. */ tracefork = (p1->p_slflag & (PSL_TRACEFORK|PSL_TRACED)) == (PSL_TRACEFORK|PSL_TRACED) && (flags && FORK_PPWAIT) == 0; if (tracefork) { p2->p_slflag |= PSL_TRACED; p2->p_opptr = p2->p_pptr; if (p2->p_pptr != p1->p_pptr) { struct proc *parent1 = p2->p_pptr; if (parent1->p_lock < p2->p_lock) { if (!mutex_tryenter(parent1->p_lock)) { mutex_exit(p2->p_lock); mutex_enter(parent1->p_lock); } } else if (parent1->p_lock > p2->p_lock) { mutex_enter(parent1->p_lock); } parent1->p_slflag |= PSL_CHTRACED; proc_reparent(p2, p1->p_pptr); if (parent1->p_lock != p2->p_lock) mutex_exit(parent1->p_lock); } /* * Set ptrace status. */ p1->p_fpid = p2->p_pid; p2->p_fpid = p1->p_pid; } LIST_INSERT_AFTER(p1, p2, p_pglist); LIST_INSERT_HEAD(&allproc, p2, p_list); p2->p_trace_enabled = trace_is_enabled(p2); #ifdef __HAVE_SYSCALL_INTERN (*p2->p_emul->e_syscall_intern)(p2); #endif /* * Update stats now that we know the fork was successful. */ uvmexp.forks++; if (flags & FORK_PPWAIT) uvmexp.forks_ppwait++; if (flags & FORK_SHAREVM) uvmexp.forks_sharevm++; /* * Pass a pointer to the new process to the caller. */ if (rnewprocp != NULL) *rnewprocp = p2; if (ktrpoint(KTR_EMUL)) p2->p_traceflag |= KTRFAC_TRC_EMUL; /* * Notify any interested parties about the new process. */ if (!SLIST_EMPTY(&p1->p_klist)) { mutex_exit(proc_lock); KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid); mutex_enter(proc_lock); } /* * Make child runnable, set start time, and add to run queue except * if the parent requested the child to start in SSTOP state. */ mutex_enter(p2->p_lock); /* * Start profiling. */ if ((p2->p_stflag & PST_PROFIL) != 0) { mutex_spin_enter(&p2->p_stmutex); startprofclock(p2); mutex_spin_exit(&p2->p_stmutex); } getmicrotime(&p2->p_stats->p_start); p2->p_acflag = AFORK; lwp_lock(l2); KASSERT(p2->p_nrlwps == 1); if (p2->p_sflag & PS_STOPFORK) { struct schedstate_percpu *spc = &l2->l_cpu->ci_schedstate; p2->p_nrlwps = 0; p2->p_stat = SSTOP; p2->p_waited = 0; p1->p_nstopchild++; l2->l_stat = LSSTOP; KASSERT(l2->l_wchan == NULL); lwp_unlock_to(l2, spc->spc_lwplock); } else { p2->p_nrlwps = 1; p2->p_stat = SACTIVE; l2->l_stat = LSRUN; sched_enqueue(l2, false); lwp_unlock(l2); } /* * Return child pid to parent process, * marking us as parent via retval[1]. */ if (retval != NULL) { retval[0] = p2->p_pid; retval[1] = 0; } mutex_exit(p2->p_lock); /* * Preserve synchronization semantics of vfork. If waiting for * child to exec or exit, sleep until it clears LP_VFORKWAIT. */ #if 0 while (l1->l_pflag & LP_VFORKWAIT) { cv_wait(&l1->l_waitcv, proc_lock); } #else while (p2->p_lflag & PL_PPWAIT) cv_wait(&p1->p_waitcv, proc_lock); #endif /* * Let the parent know that we are tracing its child. */ if (tracefork) { ksiginfo_t ksi; KSI_INIT_EMPTY(&ksi); ksi.ksi_signo = SIGTRAP; ksi.ksi_lid = l1->l_lid; kpsignal(p1, &ksi, NULL); } mutex_exit(proc_lock); return 0; }