int mac_vnop_setxattr (struct vnode *vp, const char *name, char *buf, size_t len) { vfs_context_t ctx; int options = XATTR_NOSECURITY; char uio_buf[ UIO_SIZEOF(1) ]; uio_t auio; int error; if (vfs_isrdonly(vp->v_mount)) return (EROFS); ctx = vfs_context_current(); auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_WRITE, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, CAST_USER_ADDR_T(buf), len); error = vn_setxattr(vp, name, auio, options, ctx); #if CONFIG_FSE if (error == 0) { add_fsevent(FSE_XATTR_MODIFIED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } #endif return (error); }
RTDECL(int) RTFileOpen(PRTFILE phFile, const char *pszFilename, uint64_t fOpen) { RTFILEINT *pThis = (RTFILEINT *)RTMemAllocZ(sizeof(*pThis)); if (!pThis) return VERR_NO_MEMORY; errno_t rc; pThis->u32Magic = RTFILE_MAGIC; pThis->fOpen = fOpen; pThis->hVfsCtx = vfs_context_current(); if (pThis->hVfsCtx != NULL) { int fCMode = (fOpen & RTFILE_O_CREATE_MODE_MASK) ? (fOpen & RTFILE_O_CREATE_MODE_MASK) >> RTFILE_O_CREATE_MODE_SHIFT : RT_FILE_PERMISSION; int fVnFlags = 0; /* VNODE_LOOKUP_XXX */ int fOpenMode = 0; if (fOpen & RTFILE_O_NON_BLOCK) fOpenMode |= O_NONBLOCK; if (fOpen & RTFILE_O_WRITE_THROUGH) fOpenMode |= O_SYNC; /* create/truncate file */ switch (fOpen & RTFILE_O_ACTION_MASK) { case RTFILE_O_OPEN: break; case RTFILE_O_OPEN_CREATE: fOpenMode |= O_CREAT; break; case RTFILE_O_CREATE: fOpenMode |= O_CREAT | O_EXCL; break; case RTFILE_O_CREATE_REPLACE: fOpenMode |= O_CREAT | O_TRUNC; break; /** @todo replacing needs fixing, this is *not* a 1:1 mapping! */ } if (fOpen & RTFILE_O_TRUNCATE) fOpenMode |= O_TRUNC; switch (fOpen & RTFILE_O_ACCESS_MASK) { case RTFILE_O_READ: fOpenMode |= FREAD; break; case RTFILE_O_WRITE: fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | FWRITE : FWRITE; break; case RTFILE_O_READWRITE: fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | FWRITE | FREAD : FWRITE | FREAD; break; default: AssertMsgFailed(("RTFileOpen received an invalid RW value, fOpen=%#x\n", fOpen)); return VERR_INVALID_PARAMETER; } pThis->fOpenMode = fOpenMode; rc = vnode_open(pszFilename, fOpenMode, fCMode, fVnFlags, &pThis->hVnode, pThis->hVfsCtx); if (rc == 0) { *phFile = pThis; return VINF_SUCCESS; } rc = RTErrConvertFromErrno(rc); }
static vdev_t * vdev_lookup_by_path(vdev_t *vd, const char *name) { vdev_t *mvd; int c; char pathbuf[MAXPATHLEN]; char *lookup_name; int err = 0; if (!vd) return NULL; // Check both strings are valid if (name && *name && vd->vdev_path && vd->vdev_path[0]) { int off; struct vnode *vp; lookup_name = vd->vdev_path; // We need to resolve symlinks here to get the final source name dprintf("ZFS: Looking up '%s'\n", vd->vdev_path); if ((err = vnode_lookup(vd->vdev_path, 0, &vp, vfs_context_current())) == 0) { int len = MAXPATHLEN; if ((err = vn_getpath(vp, pathbuf, &len)) == 0) { dprintf("ZFS: '%s' resolved name is '%s'\n", vd->vdev_path, pathbuf); lookup_name = pathbuf; } vnode_put(vp); } if (err) dprintf("ZFS: Lookup failed %d\n", err); // Skip /dev/ or not? strncmp("/dev/", lookup_name, 5) == 0 ? off=5 : off=0; dprintf("ZFS: vdev '%s' == '%s' ?\n", name, &lookup_name[off]); if (!strcmp(name, &lookup_name[off])) return vd; } for (c = 0; c < vd->vdev_children; c++) if ((mvd = vdev_lookup_by_path(vd->vdev_child[c], name)) != NULL) return (mvd); return (NULL); }
void vm_swapfile_open(const char *path, vnode_t *vp) { int error = 0; vfs_context_t ctx = vfs_context_current(); if ((error = vnode_open(path, (O_CREAT | O_TRUNC | FREAD | FWRITE), S_IRUSR | S_IWUSR, 0, vp, ctx))) { printf("Failed to open swap file %d\n", error); *vp = NULL; return; } vnode_put(*vp); }
static int vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop, __unused dev_t dev, int in_kernel, proc_t p) { vfs_context_t ctx = vfs_context_current(); struct nameidata nd; int error, flags; shadow_map_t * map; off_t file_size; flags = FREAD|FWRITE; if (in_kernel) { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), vniop->vn_file, ctx); } /* vn_open gives both long- and short-term references */ error = vn_open(&nd, flags, 0); if (error) { /* shadow MUST be writable! */ return (error); } if (nd.ni_vp->v_type != VREG || (error = vnode_size(nd.ni_vp, &file_size, ctx))) { (void)vn_close(nd.ni_vp, flags, ctx); vnode_put(nd.ni_vp); return (error ? error : EINVAL); } map = shadow_map_create(vn->sc_fsize, file_size, 0, vn->sc_secsize); if (map == NULL) { (void)vn_close(nd.ni_vp, flags, ctx); vnode_put(nd.ni_vp); vn->sc_shadow_vp = NULL; return (ENOMEM); } vn->sc_shadow_vp = nd.ni_vp; vn->sc_shadow_vid = vnode_vid(nd.ni_vp); vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA; vn->sc_shadow_map = map; vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */ /* lose the short-term reference */ vnode_put(nd.ni_vp); return(0); }
void vm_swapfile_close(uint64_t path_addr, vnode_t vp) { struct nameidata nd; vfs_context_t context = vfs_context_current(); int error = 0; vnode_getwithref(vp); vnode_close(vp, 0, context); NDINIT(&nd, DELETE, OP_UNLINK, AUDITVNPATH1, UIO_SYSSPACE, path_addr, context); error = unlink1(context, &nd, 0); }
void vm_swapfile_close(uint64_t path_addr, vnode_t vp) { vfs_context_t context = vfs_context_current(); int error; vnode_getwithref(vp); vnode_close(vp, 0, context); error = unlink1(context, NULLVP, CAST_USER_ADDR_T(path_addr), UIO_SYSSPACE, 0); #if DEVELOPMENT || DEBUG if (error) printf("%s : unlink of %s failed with error %d", __FUNCTION__, (char *)path_addr, error); #endif }
int mac_vnop_getxattr (struct vnode *vp, const char *name, char *buf, size_t len, size_t *attrlen) { vfs_context_t ctx = vfs_context_current(); int options = XATTR_NOSECURITY; char uio_buf[ UIO_SIZEOF(1) ]; uio_t auio; int error; auio = uio_createwithbuffer(1, 0, UIO_SYSSPACE, UIO_READ, &uio_buf[0], sizeof(uio_buf)); uio_addiov(auio, CAST_USER_ADDR_T(buf), len); error = vn_getxattr(vp, name, auio, attrlen, options, ctx); *attrlen = len - uio_resid(auio); return (error); }
int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) { int error = 0; uint64_t file_size = 0; vfs_context_t ctx = NULL; ctx = vfs_context_current(); error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); if (error) { printf("vnode_setsize for swap files failed: %d\n", error); goto done; } error = vnode_size(vp, (off_t*) &file_size, ctx); if (error) { printf("vnode_size (new file) for swap file failed: %d\n", error); goto done; } assert(file_size == *size); if (pin != NULL && *pin != FALSE) { error = VNOP_IOCTL(vp, FIOPINSWAP, NULL, 0, ctx); if (error) { printf("pin for swap files failed: %d, file_size = %lld\n", error, file_size); /* this is not fatal, carry on with files wherever they landed */ *pin = FALSE; error = 0; } } vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); done: return error; }
int mac_vnop_removexattr (struct vnode *vp, const char *name) { vfs_context_t ctx = vfs_context_current(); int options = XATTR_NOSECURITY; int error; if (vfs_isrdonly(vp->v_mount)) return (EROFS); error = vn_removexattr(vp, name, options, ctx); #if CONFIG_FSE if (error == 0) { add_fsevent(FSE_XATTR_REMOVED, ctx, FSE_ARG_VNODE, vp, FSE_ARG_DONE); } #endif return (error); }
static int sd_callback3(proc_t p, void * args) { struct sd_iterargs * sd = (struct sd_iterargs *)args; vfs_context_t ctx = vfs_context_current(); int setsdstate = sd->setsdstate; proc_lock(p); p->p_shutdownstate = setsdstate; if (p->p_stat != SZOMB) { /* * NOTE: following code ignores sig_lock and plays * with exit_thread correctly. This is OK unless we * are a multiprocessor, in which case I do not * understand the sig_lock. This needs to be fixed. * XXX */ if (p->exit_thread) { /* someone already doing it */ proc_unlock(p); /* give him a chance */ thread_block(THREAD_CONTINUE_NULL); } else { p->exit_thread = current_thread(); printf("."); sd_log(ctx, "%s[%d] had to be forced closed with exit1().\n", p->p_comm, p->p_pid); proc_unlock(p); KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_PROC, BSD_PROC_FRCEXIT) | DBG_FUNC_NONE, p->p_pid, 0, 1, 0, 0); sd->activecount++; exit1(p, 1, (int *)NULL); } } else { proc_unlock(p); } return PROC_RETURNED; }
static int nsmb_dev_open_nolock(dev_t dev, int oflags, int devtype, struct proc *p) { #pragma unused(oflags, devtype, p) struct smb_dev *sdp; kauth_cred_t cred = vfs_context_ucred(vfs_context_current()); sdp = SMB_GETDEV(dev); if (sdp && (sdp->sd_flags & NSMBFL_OPEN)) return (EBUSY); if (!sdp || minor(dev) == 0) { int avail_minor; for (avail_minor = 1; avail_minor < SMBMINORS; avail_minor++) if (!SMB_GETDEV(avail_minor)) break; if (avail_minor >= SMBMINORS) { SMBERROR("Too many minor devices, %d >= %d !", avail_minor, SMBMINORS); return (ENOMEM); } SMB_MALLOC(sdp, struct smb_dev *, sizeof(*sdp), M_NSMBDEV, M_WAITOK); bzero(sdp, sizeof(*sdp)); dev = makedev(smb_major, avail_minor); sdp->sd_devfs = devfs_make_node(dev, DEVFS_CHAR, kauth_cred_getuid(cred), kauth_cred_getgid(cred), 0700, "nsmb%x", avail_minor); if (!sdp->sd_devfs) { SMBERROR("devfs_make_node failed %d\n", avail_minor); SMB_FREE(sdp, M_NSMBDEV); return (ENOMEM); } if (avail_minor > smb_minor_hiwat) smb_minor_hiwat = avail_minor; SMB_GETDEV(dev) = sdp; return (EBUSY); }
static int vniocattach_file(struct vn_softc *vn, struct vn_ioctl_64 *vniop, dev_t dev, int in_kernel, proc_t p) { dev_t cdev; vfs_context_t ctx = vfs_context_current(); kauth_cred_t cred; struct nameidata nd; off_t file_size; int error, flags; flags = FREAD|FWRITE; if (in_kernel) { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), vniop->vn_file, ctx); } /* vn_open gives both long- and short-term references */ error = vn_open(&nd, flags, 0); if (error) { if (error != EACCES && error != EPERM && error != EROFS) { return (error); } flags &= ~FWRITE; if (in_kernel) { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); } else { NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), vniop->vn_file, ctx); } error = vn_open(&nd, flags, 0); if (error) { return (error); } } if (nd.ni_vp->v_type != VREG) { error = EINVAL; } else { error = vnode_size(nd.ni_vp, &file_size, ctx); } if (error != 0) { (void) vn_close(nd.ni_vp, flags, ctx); vnode_put(nd.ni_vp); return (error); } cred = kauth_cred_proc_ref(p); nd.ni_vp->v_flag |= VNOCACHE_DATA; error = setcred(nd.ni_vp, cred); if (error) { (void)vn_close(nd.ni_vp, flags, ctx); vnode_put(nd.ni_vp); kauth_cred_unref(&cred); return(error); } vn->sc_secsize = DEV_BSIZE; vn->sc_fsize = file_size; vn->sc_size = file_size / vn->sc_secsize; vn->sc_vp = nd.ni_vp; vn->sc_vid = vnode_vid(nd.ni_vp); vn->sc_open_flags = flags; vn->sc_cred = cred; cdev = makedev(vndevice_cdev_major, minor(dev)); vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR, UID_ROOT, GID_OPERATOR, 0600, "rvn%d", minor(dev)); vn->sc_flags |= VNF_INITED; if (flags == FREAD) vn->sc_flags |= VNF_READONLY; /* lose the short-term reference */ vnode_put(nd.ni_vp); return(0); }
/* * Routine: macx_swapoff * Function: * Syscall interface to remove a file from backing store */ int macx_swapoff( struct macx_swapoff_args *args) { __unused int flags = args->flags; kern_return_t kr; mach_port_t backing_store; struct vnode *vp = 0; struct nameidata nd, *ndp; struct proc *p = current_proc(); int i; int error; boolean_t funnel_state; vfs_context_t ctx = vfs_context_current(); AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF); funnel_state = thread_funnel_set(kernel_flock, TRUE); backing_store = NULL; ndp = &nd; if ((error = suser(kauth_cred_get(), 0))) goto swapoff_bailout; /* * Get the vnode for the paging area. */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), (user_addr_t) args->filename, ctx); if ((error = namei(ndp))) goto swapoff_bailout; nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VREG) { error = EINVAL; goto swapoff_bailout; } #if CONFIG_MACF vnode_lock(vp); error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp); vnode_unlock(vp); if (error) goto swapoff_bailout; #endif for(i = 0; i < MAX_BACKING_STORE; i++) { if(bs_port_table[i].vp == vp) { break; } } if (i == MAX_BACKING_STORE) { error = EINVAL; goto swapoff_bailout; } backing_store = (mach_port_t)bs_port_table[i].bs; kr = default_pager_backing_store_delete(backing_store); switch (kr) { case KERN_SUCCESS: error = 0; bs_port_table[i].vp = 0; /* This vnode is no longer used for swapfile */ vnode_lock_spin(vp); CLR(vp->v_flag, VSWAP); vnode_unlock(vp); /* get rid of macx_swapon() "long term" reference */ vnode_rele(vp); break; case KERN_FAILURE: error = EAGAIN; break; default: error = EAGAIN; break; } swapoff_bailout: /* get rid of macx_swapoff() namei() reference */ if (vp) vnode_put(vp); (void) thread_funnel_set(kernel_flock, FALSE); AUDIT_MACH_SYSCALL_EXIT(error); return(error); }
/* * Routine: macx_swapon * Function: * Syscall interface to add a file to backing store */ int macx_swapon( struct macx_swapon_args *args) { int size = args->size; vnode_t vp = (vnode_t)NULL; struct nameidata nd, *ndp; register int error; kern_return_t kr; mach_port_t backing_store; memory_object_default_t default_pager; int i; boolean_t funnel_state; off_t file_size; vfs_context_t ctx = vfs_context_current(); struct proc *p = current_proc(); int dp_cluster_size; AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); AUDIT_ARG(value32, args->priority); funnel_state = thread_funnel_set(kernel_flock, TRUE); ndp = &nd; if ((error = suser(kauth_cred_get(), 0))) goto swapon_bailout; /* * Get a vnode for the paging area. */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), (user_addr_t) args->filename, ctx); if ((error = namei(ndp))) goto swapon_bailout; nameidone(ndp); vp = ndp->ni_vp; if (vp->v_type != VREG) { error = EINVAL; goto swapon_bailout; } /* get file size */ if ((error = vnode_size(vp, &file_size, ctx)) != 0) goto swapon_bailout; #if CONFIG_MACF vnode_lock(vp); error = mac_system_check_swapon(vfs_context_ucred(ctx), vp); vnode_unlock(vp); if (error) goto swapon_bailout; #endif /* resize to desired size if it's too small */ if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0)) goto swapon_bailout; if (default_pager_init_flag == 0) { start_def_pager(NULL); default_pager_init_flag = 1; } /* add new backing store to list */ i = 0; while(bs_port_table[i].vp != 0) { if(i == MAX_BACKING_STORE) break; i++; } if(i == MAX_BACKING_STORE) { error = ENOMEM; goto swapon_bailout; } /* remember the vnode. This vnode has namei() reference */ bs_port_table[i].vp = vp; /* * Look to see if we are already paging to this file. */ /* make certain the copy send of kernel call will work */ default_pager = MEMORY_OBJECT_DEFAULT_NULL; kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); if(kr != KERN_SUCCESS) { error = EAGAIN; bs_port_table[i].vp = 0; goto swapon_bailout; } if (vp->v_mount->mnt_kern_flag & MNTK_SSD) { /* * keep the cluster size small since the * seek cost is effectively 0 which means * we don't care much about fragmentation */ dp_isssd = TRUE; dp_cluster_size = 2 * PAGE_SIZE; } else { /* * use the default cluster size */ dp_isssd = FALSE; dp_cluster_size = 0; } kr = default_pager_backing_store_create(default_pager, -1, /* default priority */ dp_cluster_size, &backing_store); memory_object_default_deallocate(default_pager); if(kr != KERN_SUCCESS) { error = ENOMEM; bs_port_table[i].vp = 0; goto swapon_bailout; } /* Mark this vnode as being used for swapfile */ vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); /* * NOTE: we are able to supply PAGE_SIZE here instead of * an actual record size or block number because: * a: we do not support offsets from the beginning of the * file (allowing for non page size/record modulo offsets. * b: because allow paging will be done modulo page size */ kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp, PAGE_SIZE, (int)(file_size/PAGE_SIZE)); if(kr != KERN_SUCCESS) { bs_port_table[i].vp = 0; if(kr == KERN_INVALID_ARGUMENT) error = EINVAL; else error = ENOMEM; /* This vnode is not to be used for swapfile */ vnode_lock_spin(vp); CLR(vp->v_flag, VSWAP); vnode_unlock(vp); goto swapon_bailout; } bs_port_table[i].bs = (void *)backing_store; error = 0; ubc_setthreadcred(vp, p, current_thread()); /* * take a long term reference on the vnode to keep * vnreclaim() away from this vnode. */ vnode_ref(vp); swapon_bailout: if (vp) { vnode_put(vp); } (void) thread_funnel_set(kernel_flock, FALSE); AUDIT_MACH_SYSCALL_EXIT(error); return(error); }
static void proc_shutdown(void) { vfs_context_t ctx = vfs_context_current(); struct proc *p, *self; int delayterm = 0; struct sd_filterargs sfargs; struct sd_iterargs sdargs; int error = 0; struct timespec ts; /* * Kill as many procs as we can. (Except ourself...) */ self = (struct proc *)current_proc(); /* * Signal the init with SIGTERM so that he does not launch * new processes */ p = proc_find(1); if (p && p != self) { psignal(p, SIGTERM); } proc_rele(p); printf("Killing all processes "); sigterm_loop: /* * send SIGTERM to those procs interested in catching one */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 0; sdargs.signo = SIGTERM; sdargs.setsdstate = 1; sdargs.countproc = 1; sdargs.activecount = 0; error = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback1, (void *)&sdargs, sd_filt1, (void *)&sfargs); if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); if (proc_shutdown_exitcount != 0) { /* * now wait for up to 30 seconds to allow those procs catching SIGTERM * to digest it * as soon as these procs have exited, we'll continue on to the next step */ ts.tv_sec = 30; ts.tv_nsec = 0; error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); if (error != 0) { for (p = allproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } for (p = zombproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } } } proc_list_unlock(); } if (error == ETIMEDOUT) { /* * log the names of the unresponsive tasks */ proc_list_lock(); for (p = allproc.lh_first; p; p = p->p_list.le_next) { if (p->p_shutdownstate == 1) { printf("%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); sd_log(ctx, "%s[%d]: didn't act on SIGTERM\n", p->p_comm, p->p_pid); } } proc_list_unlock(); delay_for_interval(1000 * 5, 1000 * 1000); } /* * send a SIGKILL to all the procs still hanging around */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 2; sdargs.signo = SIGKILL; sdargs.setsdstate = 2; sdargs.countproc = 1; sdargs.activecount = 0; /* post a SIGKILL to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback2, (void *)&sdargs, sd_filt2, (void *)&sfargs); if (sdargs.activecount != 0 && proc_shutdown_exitcount!= 0) { proc_list_lock(); if (proc_shutdown_exitcount != 0) { /* * wait for up to 60 seconds to allow these procs to exit normally * * History: The delay interval was changed from 100 to 200 * for NFS requests in particular. */ ts.tv_sec = 60; ts.tv_nsec = 0; error = msleep(&proc_shutdown_exitcount, proc_list_mlock, PWAIT, "shutdownwait", &ts); if (error != 0) { for (p = allproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } for (p = zombproc.lh_first; p; p = p->p_list.le_next) { if ((p->p_listflag & P_LIST_EXITCOUNT) == P_LIST_EXITCOUNT) p->p_listflag &= ~P_LIST_EXITCOUNT; } } } proc_list_unlock(); } /* * if we still have procs that haven't exited, then brute force 'em */ sfargs.delayterm = delayterm; sfargs.shutdownstate = 3; sdargs.signo = 0; sdargs.setsdstate = 3; sdargs.countproc = 0; sdargs.activecount = 0; /* post a SIGTERM to all that catch SIGTERM and not marked for delay */ proc_rebootscan(sd_callback3, (void *)&sdargs, sd_filt2, (void *)&sfargs); printf("\n"); /* Now start the termination of processes that are marked for delayed termn */ if (delayterm == 0) { delayterm = 1; goto sigterm_loop; } sd_closelog(ctx); /* drop the ref on initproc */ proc_rele(initproc); printf("continuing\n"); }
/* ARGSUSED */ int pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) { struct fileproc *rf, *wf; struct pipe *rpipe, *wpipe; lck_mtx_t *pmtx; int fd, error; if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL) return (ENOMEM); rpipe = wpipe = NULL; if (pipe_create(&rpipe) || pipe_create(&wpipe)) { error = ENFILE; goto freepipes; } /* * allocate the space for the normal I/O direction up * front... we'll delay the allocation for the other * direction until a write actually occurs (most likely it won't)... */ error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0)); if (error) goto freepipes; TAILQ_INIT(&rpipe->pipe_evlist); TAILQ_INIT(&wpipe->pipe_evlist); error = falloc(p, &rf, &fd, vfs_context_current()); if (error) { goto freepipes; } retval[0] = fd; /* * for now we'll create half-duplex pipes(refer returns section above). * this is what we've always supported.. */ rf->f_flag = FREAD; rf->f_data = (caddr_t)rpipe; rf->f_ops = &pipeops; error = falloc(p, &wf, &fd, vfs_context_current()); if (error) { fp_free(p, retval[0], rf); goto freepipes; } wf->f_flag = FWRITE; wf->f_data = (caddr_t)wpipe; wf->f_ops = &pipeops; rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; /* both structures share the same mutex */ rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; retval[1] = fd; #if CONFIG_MACF /* * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX * * struct pipe represents a pipe endpoint. The MAC label is shared * between the connected endpoints. As a result mac_pipe_label_init() and * mac_pipe_label_associate() should only be called on one of the endpoints * after they have been connected. */ mac_pipe_label_init(rpipe); mac_pipe_label_associate(kauth_cred_get(), rpipe); wpipe->pipe_label = rpipe->pipe_label; #endif proc_fdlock_spin(p); procfdtbl_releasefd(p, retval[0], NULL); procfdtbl_releasefd(p, retval[1], NULL); fp_drop(p, retval[0], rf, 1); fp_drop(p, retval[1], wf, 1); proc_fdunlock(p); return (0); freepipes: pipeclose(rpipe); pipeclose(wpipe); lck_mtx_free(pmtx, pipe_mtx_grp); return (error); }
int vm_swapfile_preallocate(vnode_t vp, uint64_t *size, boolean_t *pin) { int error = 0; uint64_t file_size = 0; vfs_context_t ctx = NULL; ctx = vfs_context_current(); #if CONFIG_PROTECT { #if 0 // <rdar://11771612> if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { if(config_protect_bug) { printf("swap protection class set failed with %d\n", error); } else { panic("swap protection class set failed with %d\n", error); } } #endif /* initialize content protection keys manually */ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { printf("Content Protection key failure on swap: %d\n", error); vnode_put(vp); vp = NULL; goto done; } } #endif error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); if (error) { printf("vnode_setsize for swap files failed: %d\n", error); goto done; } error = vnode_size(vp, (off_t*) &file_size, ctx); if (error) { printf("vnode_size (new file) for swap file failed: %d\n", error); goto done; } assert(file_size == *size); if (pin != NULL && *pin != FALSE) { assert(vnode_tag(vp) == VT_HFS); error = hfs_pin_vnode(VTOHFS(vp), vp, HFS_PIN_IT | HFS_DATALESS_PIN, NULL, ctx); if (error) { printf("hfs_pin_vnode for swap files failed: %d\n", error); /* this is not fatal, carry on with files wherever they landed */ *pin = FALSE; error = 0; } } vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); done: return error; }
bool FileNVRAM::start(IOService *provider) { bool earlyInit = false; LOG(NOTICE, "start() called (%d)\n", mInitComplete); //start is called upon wake for some reason. if (mInitComplete) { return true; } if (!super::start(provider)) { return false; } LOG(NOTICE, "start() called (%d)\n", mInitComplete); // mFilePath = NULL; // no know file mLoggingLevel = NOTICE; // start with logging disabled, can be update for debug mInitComplete = false; // Don't resync anything that's already in the file system. mSafeToSync = false; // Don't sync untill later // We should be root right now... cache this for later. mCtx = vfs_context_current(); // Register Power modes PMinit(); registerPowerDriver(this, sPowerStates, sizeof(sPowerStates) / sizeof(IOPMPowerState)); provider->joinPMtree(this); IORegistryEntry* bootnvram = IORegistryEntry::fromPath(NVRAM_FILE_DT_LOCATION, gIODTPlane); IORegistryEntry* root = IORegistryEntry::fromPath("/", gIODTPlane); // Create the command gate. mCommandGate = IOCommandGate::commandGate( this, dispatchCommand); getWorkLoop()->addEventSource( mCommandGate ); // Replace the IOService dicionary with an empty one, clean out variables we don't want. OSDictionary* dict = OSDictionary::withCapacity(1); if (!dict) { return false; } setPropertyTable(dict); if (bootnvram) { copyEntryProperties(NULL, bootnvram); bootnvram->detachFromParent(root, gIODTPlane); } else { IOTimerEventSource* mTimer = IOTimerEventSource::timerEventSource(this, timeoutOccurred); if (mTimer) { getWorkLoop()->addEventSource( mTimer); mTimer->setTimeoutMS(50); // callback isn't being setup right, causes a panic mSafeToSync = false; } else { earlyInit = true; } } // We don't have initial NVRAM data from the bootloader, or we couldn't schedule a // timer to read in /Extra/NVRAM/nvram.plist, so start up immediately. if (earlyInit == true) { mSafeToSync = true; registerNVRAM(); } mInitComplete = true; return true; }
/* * Accounting system call. Written based on the specification and * previous implementation done by Mark Tinguely. */ int acct(proc_t p, struct acct_args *uap, __unused int *retval) { struct nameidata nd; int error; struct vfs_context *ctx; ctx = vfs_context_current(); /* Make sure that the caller is root. */ if ((error = suser(vfs_context_ucred(ctx), &p->p_acflag))) return (error); /* * If accounting is to be started to a file, open that file for * writing and make sure it's a 'normal'. */ if (uap->path != USER_ADDR_NULL) { NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, ctx); if ((error = vn_open(&nd, FWRITE, 0))) return (error); #if CONFIG_MACF error = mac_system_check_acct(vfs_context_ucred(ctx), nd.ni_vp); if (error) { vnode_put(nd.ni_vp); vn_close(nd.ni_vp, FWRITE, ctx); return (error); } #endif vnode_put(nd.ni_vp); if (nd.ni_vp->v_type != VREG) { vn_close(nd.ni_vp, FWRITE, ctx); return (EACCES); } } #if CONFIG_MACF else { error = mac_system_check_acct(vfs_context_ucred(ctx), NULL); if (error) return (error); } #endif /* * If accounting was previously enabled, kill the old space-watcher, * close the file, and (if no new file was specified, leave). */ if (acctp != NULLVP || suspend_acctp != NULLVP) { untimeout(acctwatch_funnel, NULL); error = vn_close((acctp != NULLVP ? acctp : suspend_acctp), FWRITE, vfs_context_current()); acctp = suspend_acctp = NULLVP; } if (uap->path == USER_ADDR_NULL) return (error); /* * Save the new accounting file vnode, and schedule the new * free space watcher. */ acctp = nd.ni_vp; acctwatch(NULL); return (error); }
struct kern_direct_file_io_ref_t * kern_open_file_for_direct_io(const char * name, kern_get_file_extents_callback_t callback, void * callback_ref, dev_t * partition_device_result, dev_t * image_device_result, uint64_t * partitionbase_result, uint64_t * maxiocount_result, uint32_t * oflags, off_t offset, caddr_t addr, vm_size_t len) { struct kern_direct_file_io_ref_t * ref; proc_t p; struct vnode_attr va; int error; off_t f_offset; uint64_t fileblk; size_t filechunk; uint64_t physoffset; dev_t device; dev_t target = 0; int isssd = 0; uint32_t flags = 0; uint32_t blksize; off_t maxiocount, count; boolean_t locked = FALSE; int (*do_ioctl)(void * p1, void * p2, u_long theIoctl, caddr_t result); void * p1 = NULL; void * p2 = NULL; error = EFAULT; ref = (struct kern_direct_file_io_ref_t *) kalloc(sizeof(struct kern_direct_file_io_ref_t)); if (!ref) { error = EFAULT; goto out; } bzero(ref, sizeof(*ref)); p = kernproc; ref->ctx = vfs_context_create(vfs_context_current()); if ((error = vnode_open(name, (O_CREAT | FWRITE), (0), 0, &ref->vp, ref->ctx))) goto out; if (addr && len) { if ((error = kern_write_file(ref, offset, addr, len))) goto out; } VATTR_INIT(&va); VATTR_WANTED(&va, va_rdev); VATTR_WANTED(&va, va_fsid); VATTR_WANTED(&va, va_data_size); VATTR_WANTED(&va, va_nlink); error = EFAULT; if (vnode_getattr(ref->vp, &va, ref->ctx)) goto out; kprintf("vp va_rdev major %d minor %d\n", major(va.va_rdev), minor(va.va_rdev)); kprintf("vp va_fsid major %d minor %d\n", major(va.va_fsid), minor(va.va_fsid)); kprintf("vp size %qd\n", va.va_data_size); if (ref->vp->v_type == VREG) { /* Don't dump files with links. */ if (va.va_nlink != 1) goto out; device = va.va_fsid; p1 = &device; p2 = p; do_ioctl = &file_ioctl; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { /* Partition. */ device = va.va_rdev; p1 = ref->vp; p2 = ref->ctx; do_ioctl = &device_ioctl; } else { /* Don't dump to non-regular files. */ error = EFAULT; goto out; } ref->device = device; // get block size error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &ref->blksize); if (error) goto out; if (ref->vp->v_type == VREG) ref->filelength = va.va_data_size; else { error = do_ioctl(p1, p2, DKIOCGETBLOCKCOUNT, (caddr_t) &fileblk); if (error) goto out; ref->filelength = fileblk * ref->blksize; } // pin logical extents error = kern_ioctl_file_extents(ref, _DKIOCCSPINEXTENT, 0, ref->filelength); if (error && (ENOTTY != error)) goto out; ref->pinned = (error == 0); // generate the block list error = do_ioctl(p1, p2, DKIOCLOCKPHYSICALEXTENTS, NULL); if (error) goto out; locked = TRUE; f_offset = 0; while (f_offset < ref->filelength) { if (ref->vp->v_type == VREG) { filechunk = 1*1024*1024*1024; daddr64_t blkno; error = VNOP_BLOCKMAP(ref->vp, f_offset, filechunk, &blkno, &filechunk, NULL, 0, NULL); if (error) goto out; fileblk = blkno * ref->blksize; } else if ((ref->vp->v_type == VBLK) || (ref->vp->v_type == VCHR)) { fileblk = f_offset; filechunk = f_offset ? 0 : ref->filelength; } physoffset = 0; while (physoffset < filechunk) { dk_physical_extent_t getphysreq; bzero(&getphysreq, sizeof(getphysreq)); getphysreq.offset = fileblk + physoffset; getphysreq.length = (filechunk - physoffset); error = do_ioctl(p1, p2, DKIOCGETPHYSICALEXTENT, (caddr_t) &getphysreq); if (error) goto out; if (!target) { target = getphysreq.dev; } else if (target != getphysreq.dev) { error = ENOTSUP; goto out; } callback(callback_ref, getphysreq.offset, getphysreq.length); physoffset += getphysreq.length; } f_offset += filechunk; } callback(callback_ref, 0ULL, 0ULL); if (ref->vp->v_type == VREG) p1 = ⌖ // get partition base error = do_ioctl(p1, p2, DKIOCGETBASE, (caddr_t) partitionbase_result); if (error) goto out; // get block size & constraints error = do_ioctl(p1, p2, DKIOCGETBLOCKSIZE, (caddr_t) &blksize); if (error) goto out; maxiocount = 1*1024*1024*1024; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTREAD, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBLOCKCOUNTWRITE, (caddr_t) &count); if (error) count = 0; count *= blksize; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTREAD, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; error = do_ioctl(p1, p2, DKIOCGETMAXSEGMENTBYTECOUNTWRITE, (caddr_t) &count); if (error) count = 0; if (count && (count < maxiocount)) maxiocount = count; kprintf("max io 0x%qx bytes\n", maxiocount); if (maxiocount_result) *maxiocount_result = maxiocount; error = do_ioctl(p1, p2, DKIOCISSOLIDSTATE, (caddr_t)&isssd); if (!error && isssd) flags |= kIOHibernateOptionSSD; if (partition_device_result) *partition_device_result = device; if (image_device_result) *image_device_result = target; if (flags) *oflags = flags; out: kprintf("kern_open_file_for_direct_io(%d)\n", error); if (error && locked) { p1 = &device; (void) do_ioctl(p1, p2, DKIOCUNLOCKPHYSICALEXTENTS, NULL); } if (error && ref) { if (ref->vp) { vnode_close(ref->vp, FWRITE, ref->ctx); ref->vp = NULLVP; } vfs_context_rele(ref->ctx); kfree(ref, sizeof(struct kern_direct_file_io_ref_t)); ref = NULL; } return(ref); }
int vm_swapfile_preallocate(vnode_t vp, uint64_t *size) { int error = 0; uint64_t file_size = 0; vfs_context_t ctx = NULL; ctx = vfs_context_current(); #if CONFIG_PROTECT { #if 0 // <rdar://11771612> if ((error = cp_vnode_setclass(vp, PROTECTION_CLASS_F))) { if(config_protect_bug) { printf("swap protection class set failed with %d\n", error); } else { panic("swap protection class set failed with %d\n", error); } } #endif /* initialize content protection keys manually */ if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { printf("Content Protection key failure on swap: %d\n", error); vnode_put(vp); vp = NULL; goto done; } } #endif /* * This check exists because dynamic_pager creates the 1st swapfile, * swapfile0, for us from user-space in a supported manner (with IO_NOZEROFILL etc). * * If dynamic_pager, in the future, discontinues creating that file, * then we need to change this check to a panic / assert or return an error. * That's because we can't be sure if the file has been created correctly. */ if ((error = vnode_size(vp, (off_t*) &file_size, ctx)) != 0) { printf("vnode_size (existing files) for swap files failed: %d\n", error); goto done; } else { if (file_size == 0) { error = vnode_setsize(vp, *size, IO_NOZEROFILL, ctx); if (error) { printf("vnode_setsize for swap files failed: %d\n", error); goto done; } } else { *size = file_size; } } vnode_lock_spin(vp); SET(vp->v_flag, VSWAP); vnode_unlock(vp); done: return error; }
int spl_vfs_root(mount_t mount, struct vnode **vp) { return VFS_ROOT(mount, vp, vfs_context_current() ); }
kern_return_t map_fd_funneled( int fd, vm_object_offset_t offset, vm_offset_t *va, boolean_t findspace, vm_size_t size) { kern_return_t result; struct fileproc *fp; struct vnode *vp; void * pager; vm_offset_t map_addr=0; vm_size_t map_size; int err=0; vm_map_t my_map; proc_t p = current_proc(); struct vnode_attr vattr; /* * Find the inode; verify that it's a regular file. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); if (fp->f_fglob->fg_type != DTYPE_VNODE){ err = KERN_INVALID_ARGUMENT; goto bad; } if (!(fp->f_fglob->fg_flag & FREAD)) { err = KERN_PROTECTION_FAILURE; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; err = vnode_getwithref(vp); if(err != 0) goto bad; if (vp->v_type != VREG) { (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&vattr); nanotime(&vattr.va_access_time); VATTR_SET_ACTIVE(&vattr, va_access_time); vnode_setattr(vp, &vattr, vfs_context_current()); } if (offset & PAGE_MASK_64) { printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm); (void)vnode_put(vp); err = KERN_INVALID_ARGUMENT; goto bad; } map_size = round_page(size); /* * Allow user to map in a zero length file. */ if (size == 0) { (void)vnode_put(vp); err = KERN_SUCCESS; goto bad; } /* * Map in the file. */ pager = (void *)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); err = KERN_FAILURE; goto bad; } my_map = current_map(); result = vm_map_64( my_map, &map_addr, map_size, (vm_offset_t)0, VM_FLAGS_ANYWHERE, pager, offset, TRUE, VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); err = result; goto bad; } if (!findspace) { vm_offset_t dst_addr; vm_map_copy_t tmp; if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr)) || trunc_page_32(dst_addr) != dst_addr) { (void) vm_map_remove( my_map, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } result = vm_map_copyin(my_map, (vm_map_address_t)map_addr, (vm_map_size_t)map_size, TRUE, &tmp); if (result != KERN_SUCCESS) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = result; goto bad; } result = vm_map_copy_overwrite(my_map, (vm_map_address_t)dst_addr, tmp, FALSE); if (result != KERN_SUCCESS) { vm_map_copy_discard(tmp); (void)vnode_put(vp); err = result; goto bad; } } else { if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) { (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr), vm_map_round_page(map_addr + map_size), VM_MAP_NO_FLAGS); (void)vnode_put(vp); err = KERN_INVALID_ADDRESS; goto bad; } } ubc_setthreadcred(vp, current_proc(), current_thread()); (void)ubc_map(vp, (PROT_READ | PROT_EXEC)); (void)vnode_put(vp); err = 0; bad: fp_drop(p, fd, fp, 0); return (err); }
/* * This routine exists to support the load_dylinker(). * * This routine has its own, separate, understanding of the FAT file format, * which is terrifically unfortunate. */ static load_return_t get_macho_vnode( char *path, integer_t archbits, struct mach_header *mach_header, off_t *file_offset, off_t *macho_size, struct vnode **vpp ) { struct vnode *vp; vfs_context_t ctx = vfs_context_current(); proc_t p = vfs_context_proc(ctx); kauth_cred_t kerncred; struct nameidata nid, *ndp; boolean_t is_fat; struct fat_arch fat_arch; int error = LOAD_SUCCESS; int resid; union { struct mach_header mach_header; struct fat_header fat_header; char pad[512]; } header; off_t fsize = (off_t)0; int err2; /* * Capture the kernel credential for use in the actual read of the * file, since the user doing the execution may have execute rights * but not read rights, but to exec something, we have to either map * or read it into the new process address space, which requires * read rights. This is to deal with lack of common credential * serialization code which would treat NOCRED as "serialize 'root'". */ kerncred = vfs_context_ucred(vfs_context_kernel()); ndp = &nid; /* init the namei data to point the file user's program name */ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(path), ctx); if ((error = namei(ndp)) != 0) { if (error == ENOENT) { error = LOAD_ENOENT; } else { error = LOAD_FAILURE; } return(error); } nameidone(ndp); vp = ndp->ni_vp; /* check for regular file */ if (vp->v_type != VREG) { error = LOAD_PROTECT; goto bad1; } /* get size */ if ((error = vnode_size(vp, &fsize, ctx)) != 0) { error = LOAD_FAILURE; goto bad1; } /* Check mount point */ if (vp->v_mount->mnt_flag & MNT_NOEXEC) { error = LOAD_PROTECT; goto bad1; } /* check access */ if ((error = vnode_authorize(vp, NULL, KAUTH_VNODE_EXECUTE, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } /* try to open it */ if ((error = VNOP_OPEN(vp, FREAD, ctx)) != 0) { error = LOAD_PROTECT; goto bad1; } if ((error = vn_rdwr(UIO_READ, vp, (caddr_t)&header, sizeof(header), 0, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p)) != 0) { error = LOAD_IOERROR; goto bad2; } if (header.mach_header.magic == MH_MAGIC || header.mach_header.magic == MH_MAGIC_64) is_fat = FALSE; else if (header.fat_header.magic == FAT_MAGIC || header.fat_header.magic == FAT_CIGAM) is_fat = TRUE; else { error = LOAD_BADMACHO; goto bad2; } if (is_fat) { /* Look up our architecture in the fat file. */ error = fatfile_getarch_with_bits(vp, archbits, (vm_offset_t)(&header.fat_header), &fat_arch); if (error != LOAD_SUCCESS) goto bad2; /* Read the Mach-O header out of it */ error = vn_rdwr(UIO_READ, vp, (caddr_t)&header.mach_header, sizeof(header.mach_header), fat_arch.offset, UIO_SYSSPACE32, IO_NODELOCKED, kerncred, &resid, p); if (error) { error = LOAD_IOERROR; goto bad2; } /* Is this really a Mach-O? */ if (header.mach_header.magic != MH_MAGIC && header.mach_header.magic != MH_MAGIC_64) { error = LOAD_BADMACHO; goto bad2; } *file_offset = fat_arch.offset; *macho_size = fat_arch.size; } else { /* * Force get_macho_vnode() to fail if the architecture bits * do not match the expected architecture bits. This in * turn causes load_dylinker() to fail for the same reason, * so it ensures the dynamic linker and the binary are in * lock-step. This is potentially bad, if we ever add to * the CPU_ARCH_* bits any bits that are desirable but not * required, since the dynamic linker might work, but we will * refuse to load it because of this check. */ if ((cpu_type_t)(header.mach_header.cputype & CPU_ARCH_MASK) != archbits) return(LOAD_BADARCH); *file_offset = 0; *macho_size = fsize; } *mach_header = header.mach_header; *vpp = vp; ubc_setsize(vp, fsize); return (error); bad2: err2 = VNOP_CLOSE(vp, FREAD, ctx); vnode_put(vp); return (error); bad1: vnode_put(vp); return(error); }
/* * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct * XXX usage is PROT_* from an interface perspective. Thus the values of * XXX VM_PROT_* and PROT_* need to correspond. */ int mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval) { /* * Map in special device (must be SHARED) or file */ struct fileproc *fp; register struct vnode *vp; int flags; int prot, file_prot; int err=0; vm_map_t user_map; kern_return_t result; mach_vm_offset_t user_addr; mach_vm_size_t user_size; vm_object_offset_t pageoff; vm_object_offset_t file_pos; int alloc_flags=0; boolean_t docow; vm_prot_t maxprot; void *handle; vm_pager_t pager; int mapanon=0; int fpref=0; int error =0; int fd = uap->fd; user_addr = (mach_vm_offset_t)uap->addr; user_size = (mach_vm_size_t) uap->len; AUDIT_ARG(addr, user_addr); AUDIT_ARG(len, user_size); AUDIT_ARG(fd, uap->fd); prot = (uap->prot & VM_PROT_ALL); #if 3777787 /* * Since the hardware currently does not support writing without * read-before-write, or execution-without-read, if the request is * for write or execute access, we must imply read access as well; * otherwise programs expecting this to work will fail to operate. */ if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; #endif /* radar 3777787 */ flags = uap->flags; vp = NULLVP; /* * The vm code does not have prototypes & compiler doesn't do the' * the right thing when you cast 64bit value and pass it in function * call. So here it is. */ file_pos = (vm_object_offset_t)uap->pos; /* make sure mapping fits into numeric range etc */ if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64) return (EINVAL); /* * Align the file position to a page boundary, * and save its page offset component. */ pageoff = (file_pos & PAGE_MASK); file_pos -= (vm_object_offset_t)pageoff; /* Adjust size for rounding (on both ends). */ user_size += pageoff; /* low end... */ user_size = mach_vm_round_page(user_size); /* hi end */ /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). */ if (flags & MAP_FIXED) { /* * The specified address must have the same remainder * as the file offset taken modulo PAGE_SIZE, so it * should be aligned after adjustment by pageoff. */ user_addr -= pageoff; if (user_addr & PAGE_MASK) return (EINVAL); } #ifdef notyet /* DO not have apis to get this info, need to wait till then*/ /* * XXX for non-fixed mappings where no hint is provided or * the hint would fall in the potential heap space, * place it after the end of the largest possible heap. * * There should really be a pmap call to determine a reasonable * location. */ else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ)) addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ); #endif alloc_flags = 0; if (flags & MAP_ANON) { /* * Mapping blank space is trivial. Use positive fds as the alias * value for memory tracking. */ if (fd != -1) { /* * Use "fd" to pass (some) Mach VM allocation flags, * (see the VM_FLAGS_* definitions). */ alloc_flags = fd & (VM_FLAGS_ALIAS_MASK | VM_FLAGS_PURGABLE); if (alloc_flags != fd) { /* reject if there are any extra flags */ return EINVAL; } } handle = NULL; maxprot = VM_PROT_ALL; file_pos = 0; mapanon = 1; } else { struct vnode_attr va; vfs_context_t ctx = vfs_context_current(); /* * Mapping file, get fp for validation. Obtain vnode and make * sure it is of appropriate type. */ err = fp_lookup(p, fd, &fp, 0); if (err) return(err); fpref = 1; if(fp->f_fglob->fg_type == DTYPE_PSXSHM) { uap->addr = (user_addr_t)user_addr; uap->len = (user_size_t)user_size; uap->prot = prot; uap->flags = flags; uap->pos = file_pos; error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff); goto bad; } if (fp->f_fglob->fg_type != DTYPE_VNODE) { error = EINVAL; goto bad; } vp = (struct vnode *)fp->f_fglob->fg_data; error = vnode_getwithref(vp); if(error != 0) goto bad; if (vp->v_type != VREG && vp->v_type != VCHR) { (void)vnode_put(vp); error = EINVAL; goto bad; } AUDIT_ARG(vnpath, vp, ARG_VNODE1); /* * POSIX: mmap needs to update access time for mapped files */ if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, ctx); } /* * XXX hack to handle use of /dev/zero to map anon memory (ala * SunOS). */ if (vp->v_type == VCHR || vp->v_type == VSTR) { (void)vnode_put(vp); error = ENODEV; goto bad; } else { /* * Ensure that file and memory protections are * compatible. Note that we only worry about * writability if mapping is shared; in this case, * current and max prot are dictated by the open file. * XXX use the vnode instead? Problem is: what * credentials do we use for determination? What if * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ if (fp->f_fglob->fg_flag & FREAD) maxprot |= VM_PROT_READ; else if (prot & PROT_READ) { (void)vnode_put(vp); error = EACCES; goto bad; } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character * device mappings), and we are trying to get write * permission although we opened it without asking * for it, bail out. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_fglob->fg_flag & FWRITE) != 0) { /* * check for write access * * Note that we already made this check when granting FWRITE * against the file, so it seems redundant here. */ error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx); /* if not granted for any reason, but we wanted it, bad */ if ((prot & PROT_WRITE) && (error != 0)) { vnode_put(vp); goto bad; } /* if writable, remember */ if (error == 0) maxprot |= VM_PROT_WRITE; } else if ((prot & PROT_WRITE) != 0) { (void)vnode_put(vp); error = EACCES; goto bad; } } else maxprot |= VM_PROT_WRITE; handle = (void *)vp; #if CONFIG_MACF error = mac_file_check_mmap(vfs_context_ucred(ctx), fp->f_fglob, prot, flags, &maxprot); if (error) { (void)vnode_put(vp); goto bad; } #endif /* MAC */ } } if (user_size == 0) { if (!mapanon) (void)vnode_put(vp); error = 0; goto bad; } /* * We bend a little - round the start and end addresses * to the nearest page boundary. */ user_size = mach_vm_round_page(user_size); if (file_pos & PAGE_MASK_64) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } user_map = current_map(); if ((flags & MAP_FIXED) == 0) { alloc_flags |= VM_FLAGS_ANYWHERE; user_addr = mach_vm_round_page(user_addr); } else { if (user_addr != mach_vm_trunc_page(user_addr)) { if (!mapanon) (void)vnode_put(vp); error = EINVAL; goto bad; } /* * mmap(MAP_FIXED) will replace any existing mappings in the * specified range, if the new mapping is successful. * If we just deallocate the specified address range here, * another thread might jump in and allocate memory in that * range before we get a chance to establish the new mapping, * and we won't have a chance to restore the old mappings. * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it * has to deallocate the existing mappings and establish the * new ones atomically. */ alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE; } if (flags & MAP_NOCACHE) alloc_flags |= VM_FLAGS_NO_CACHE; /* * Lookup/allocate object. */ if (handle == NULL) { pager = NULL; #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, IPC_PORT_NULL, 0, FALSE, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) goto out; } else { pager = (vm_pager_t)ubc_getpager(vp); if (pager == NULL) { (void)vnode_put(vp); error = ENOMEM; goto bad; } /* * Set credentials: * FIXME: if we're writing the file we need a way to * ensure that someone doesn't replace our R/W creds * with ones that only work for read. */ ubc_setthreadcred(vp, p, current_thread()); docow = FALSE; if ((flags & (MAP_ANON|MAP_SHARED)) == 0) { docow = TRUE; } #ifdef notyet /* Hmm .. */ #if defined(VM_PROT_READ_IS_EXEC) if (prot & VM_PROT_READ) prot |= VM_PROT_EXECUTE; if (maxprot & VM_PROT_READ) maxprot |= VM_PROT_EXECUTE; #endif #endif /* notyet */ #if 3777787 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) prot |= VM_PROT_READ; if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE)) maxprot |= VM_PROT_READ; #endif /* radar 3777787 */ result = vm_map_enter_mem_object(user_map, &user_addr, user_size, 0, alloc_flags, (ipc_port_t)pager, file_pos, docow, prot, maxprot, (flags & MAP_SHARED) ? VM_INHERIT_SHARE : VM_INHERIT_DEFAULT); if (result != KERN_SUCCESS) { (void)vnode_put(vp); goto out; } file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC); if (docow) { /* private mapping: won't write to the file */ file_prot &= ~PROT_WRITE; } (void) ubc_map(vp, file_prot); } if (!mapanon) (void)vnode_put(vp); out: switch (result) { case KERN_SUCCESS: *retval = user_addr + pageoff; error = 0; break; case KERN_INVALID_ADDRESS: case KERN_NO_SPACE: error = ENOMEM; break; case KERN_PROTECTION_FAILURE: error = EACCES; break; default: error = EINVAL; break; } bad: if (fpref) fp_drop(p, fd, fp, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0); KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32), (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0); return(error); }
/* * shared_region_map_np() * * This system call is intended for dyld. * * dyld uses this to map a shared cache file into a shared region. * This is usually done only the first time a shared cache is needed. * Subsequent processes will just use the populated shared region without * requiring any further setup. */ int shared_region_map_np( struct proc *p, struct shared_region_map_np_args *uap, __unused int *retvalp) { int error; kern_return_t kr; int fd; struct fileproc *fp; struct vnode *vp, *root_vp; struct vnode_attr va; off_t fs; memory_object_size_t file_size; user_addr_t user_mappings; struct shared_file_mapping_np *mappings; #define SFM_MAX_STACK 8 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; unsigned int mappings_count; vm_size_t mappings_size; memory_object_control_t file_control; struct vm_shared_region *shared_region; SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] -> map\n", current_thread(), p->p_pid, p->p_comm)); shared_region = NULL; mappings_count = 0; mappings_size = 0; mappings = NULL; fp = NULL; vp = NULL; /* get file descriptor for shared region cache file */ fd = uap->fd; /* get file structure from file descriptor */ error = fp_lookup(p, fd, &fp, 0); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d lookup failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } /* make sure we're attempting to map a vnode */ if (fp->f_fglob->fg_type != DTYPE_VNODE) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not a vnode (type=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, fp->f_fglob->fg_type)); error = EINVAL; goto done; } /* we need at least read permission on the file */ if (! (fp->f_fglob->fg_flag & FREAD)) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d not readable\n", current_thread(), p->p_pid, p->p_comm, fd)); error = EPERM; goto done; } /* get vnode from file structure */ error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map: " "fd=%d getwithref failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, fd, error)); goto done; } vp = (struct vnode *) fp->f_fglob->fg_data; /* make sure the vnode is a regular file */ if (vp->v_type != VREG) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not a file (type=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp->v_type)); error = EINVAL; goto done; } /* make sure vnode is on the process's root volume */ root_vp = p->p_fd->fd_rdir; if (root_vp == NULL) { root_vp = rootvnode; } if (vp->v_mount != root_vp->v_mount) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "not on process's root volume\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EPERM; goto done; } /* make sure vnode is owned by "root" */ VATTR_INIT(&va); VATTR_WANTED(&va, va_uid); error = vnode_getattr(vp, &va, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_getattr(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } if (va.va_uid != 0) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "owned by uid=%d instead of 0\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, va.va_uid)); error = EPERM; goto done; } /* get vnode size */ error = vnode_size(vp, &fs, vfs_context_current()); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vnode_size(%p) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, vp, error)); goto done; } file_size = fs; /* get the file's memory object handle */ file_control = ubc_getobject(vp, UBC_HOLDOBJECT); if (file_control == MEMORY_OBJECT_CONTROL_NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no memory object\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = EINVAL; goto done; } /* get the list of mappings the caller wants us to establish */ mappings_count = uap->count; /* number of mappings */ mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); if (mappings_count == 0) { SHARED_REGION_TRACE_INFO( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no mappings\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); error = 0; /* no mappings: we're done ! */ goto done; } else if (mappings_count <= SFM_MAX_STACK) { mappings = &stack_mappings[0]; } else { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "too many mappings (%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, mappings_count)); error = EINVAL; goto done; } user_mappings = uap->mappings; /* the mappings, in user space */ error = copyin(user_mappings, mappings, mappings_size); if (error) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "copyin(0x%llx, %d) failed (error=%d)\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error)); goto done; } /* get the process's shared region (setup in vm_map_exec()) */ shared_region = vm_shared_region_get(current_task()); if (shared_region == NULL) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "no shared region\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name)); goto done; } /* map the file into that shared region's submap */ kr = vm_shared_region_map_file(shared_region, mappings_count, mappings, file_control, file_size, (void *) p->p_fd->fd_rdir); if (kr != KERN_SUCCESS) { SHARED_REGION_TRACE_ERROR( ("shared_region: %p [%d(%s)] map(%p:'%s'): " "vm_shared_region_map_file() failed kr=0x%x\n", current_thread(), p->p_pid, p->p_comm, vp, vp->v_name, kr)); switch (kr) { case KERN_INVALID_ADDRESS: error = EFAULT; break; case KERN_PROTECTION_FAILURE: error = EPERM; break; case KERN_NO_SPACE: error = ENOMEM; break; case KERN_FAILURE: case KERN_INVALID_ARGUMENT: default: error = EINVAL; break; } goto done; } /* * The mapping was successful. Let the buffer cache know * that we've mapped that file with these protections. This * prevents the vnode from getting recycled while it's mapped. */ (void) ubc_map(vp, VM_PROT_READ); error = 0; /* update the vnode's access time */ if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { VATTR_INIT(&va); nanotime(&va.va_access_time); VATTR_SET_ACTIVE(&va, va_access_time); vnode_setattr(vp, &va, vfs_context_current()); } if (p->p_flag & P_NOSHLIB) { /* signal that this process is now using split libraries */ OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag); } done: if (vp != NULL) { /* * release the vnode... * ubc_map() still holds it for us in the non-error case */ (void) vnode_put(vp); vp = NULL; } if (fp != NULL) { /* release the file descriptor */ fp_drop(p, fd, fp, 0); fp = NULL; } if (shared_region != NULL) { vm_shared_region_deallocate(shared_region); } SHARED_REGION_TRACE_DEBUG( ("shared_region: %p [%d(%s)] <- map\n", current_thread(), p->p_pid, p->p_comm)); return error; }
/* ARGSUSED */ int auditctl(proc_t p, struct auditctl_args *uap, __unused int32_t *retval) { struct nameidata nd; kauth_cred_t cred; struct vnode *vp; int error = 0; error = suser(kauth_cred_get(), &p->p_acflag); if (error) return (error); vp = NULL; cred = NULL; /* * If a path is specified, open the replacement vnode, perform * validity checks, and grab another reference to the current * credential. * * XXX Changes API slightly. NULL path no longer disables audit but * returns EINVAL. */ if (uap->path == USER_ADDR_NULL) return (EINVAL); NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW | LOCKLEAF | AUDITVNPATH1, (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), uap->path, vfs_context_current()); error = vn_open(&nd, AUDIT_OPEN_FLAGS, 0); if (error) return (error); vp = nd.ni_vp; #if CONFIG_MACF /* * Accessibility of the vnode was determined in vn_open; the * mac_system_check_auditctl should only determine whether that vnode * is appropriate for storing audit data, or that the caller was * permitted to control the auditing system at all. For example, a * confidentiality policy may want to ensure that audit files are * always high sensitivity. */ error = mac_system_check_auditctl(kauth_cred_get(), vp); if (error) { vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current()); vnode_put(vp); return (error); } #endif if (vp->v_type != VREG) { vn_close(vp, AUDIT_CLOSE_FLAGS, vfs_context_current()); vnode_put(vp); return (EINVAL); } mtx_lock(&audit_mtx); /* * XXXAUDIT: Should audit_suspended actually be cleared by * audit_worker? */ audit_suspended = 0; mtx_unlock(&audit_mtx); /* * The following gets unreferenced in audit_rotate_vnode() * after the rotation and it is no longer needed. */ cred = kauth_cred_get_with_ref(); audit_rotate_vnode(cred, vp); vnode_put(vp); return (error); }