/* ARGSUSED */ int port_associate_ksource(int port, int source, port_source_t **portsrc, void (*port_src_close)(void *, int, pid_t, int), void *arg, int (*port_src_associate)(port_kevent_t *, int, int, uintptr_t, void *)) { port_t *pp; file_t *fp; port_source_t **ps; port_source_t *pse; if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_source_mutex); ps = &pp->port_queue.portq_scache[PORT_SHASH(source)]; for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { if (pse->portsrc_source == source) break; } if (pse == NULL) { /* Create association of the event source with the port */ pse = kmem_zalloc(sizeof (port_source_t), KM_NOSLEEP); if (pse == NULL) { mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); return (ENOMEM); } pse->portsrc_source = source; pse->portsrc_close = port_src_close; pse->portsrc_closearg = arg; pse->portsrc_cnt = 1; if (*ps) pse->portsrc_next = (*ps)->portsrc_next; *ps = pse; } else { /* entry already available, source is only requesting count */ pse->portsrc_cnt++; } mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); if (portsrc) *portsrc = pse; return (0); }
void check_promisc(int fildes, dl_promiscon_req_t * promiscon) { char *device; file_t *file; cred_t *cred; dev_t rdev; if (promiscon->dl_primitive == DL_PROMISCON_REQ && promiscon->dl_level == DL_PROMISC_PHYS) { file = getf(fildes); if (!file) return; rdev = file->f_vnode->v_rdev; device = ddi_major_to_name(getmajor(rdev)); cred = ddi_get_cred(); log_msg(CE_WARN, "Promiscuous mode enabled on interface %s", device ? device : "unknown"); releasef(fildes); } return; }
int smmaplf32(struct mmaplf32a *uap, rval_t *rvp) { struct file *fp; int error; caddr_t a = uap->addr; int flags = (int)uap->flags; int fd = (int)uap->fd; #ifdef _BIG_ENDIAN offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo; #else offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi; #endif if (flags & _MAP_LOW32) error = EINVAL; else if (fd == -1 && (flags & MAP_ANON) != 0) error = smmap_common(&a, uap->len, (int)uap->prot, flags | _MAP_LOW32, NULL, off); else if ((fp = getf(fd)) != NULL) { error = smmap_common(&a, uap->len, (int)uap->prot, flags | _MAP_LOW32, fp, off); releasef(fd); } else error = EBADF; if (error == 0) rvp->r_val1 = (uintptr_t)a; return (error); }
int getdents64(int fd, void *buf, size_t count) { vnode_t *vp; file_t *fp; struct uio auio; struct iovec aiov; register int error; int sink; if (count < sizeof (struct dirent64)) return (set_errno(EINVAL)); /* * Don't let the user overcommit kernel resources. */ if (count > MAXGETDENTS_SIZE) count = MAXGETDENTS_SIZE; if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); vp = fp->f_vnode; if (vp->v_type != VDIR) { releasef(fd); return (set_errno(ENOTDIR)); } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_loffset = fp->f_offset; auio.uio_segflg = UIO_USERSPACE; auio.uio_resid = count; auio.uio_fmode = 0; auio.uio_extflg = UIO_COPY_CACHED; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(vp, &auio, fp->f_cred, &sink, NULL, 0); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); if (error) { releasef(fd); return (set_errno(error)); } count = count - auio.uio_resid; fp->f_offset = auio.uio_loffset; releasef(fd); return (count); }
/* * The port_dissociate_fd() function dissociates the delivered file * descriptor from the event port and removes already fired events. * If a fd is shared between processes, all involved processes will get * the same rights related to re-association of the fd with the port and * retrieve of events from that fd. * The process which associated the fd with a port for the first time * becomes also the owner of the association. Only the owner of the * association is allowed to dissociate the fd from the port. */ int port_dissociate_fd(port_t *pp, uintptr_t object) { int fd; port_fdcache_t *pcp; portfd_t *pfd; file_t *fp; if (object > (uintptr_t)INT_MAX) return (EBADFD); fd = object; pcp = pp->port_queue.portq_pcp; mutex_enter(&pcp->pc_lock); if (pcp->pc_hash == NULL) { /* no file descriptor cache available */ mutex_exit(&pcp->pc_lock); return (0); } if ((fp = getf(fd)) == NULL) { mutex_exit(&pcp->pc_lock); return (EBADFD); } pfd = port_cache_lookup_fp(pcp, fd, fp); if (pfd == NULL) { releasef(fd); mutex_exit(&pcp->pc_lock); return (0); } /* only association owner is allowed to remove the association */ if (curproc->p_pid != PFTOD(pfd)->pd_portev->portkev_pid) { releasef(fd); mutex_exit(&pcp->pc_lock); return (EACCES); } /* remove port from the file descriptor interested list */ delfd_port(fd, pfd); releasef(fd); /* remove polldat & port event structure */ port_remove_fd_object(pfd, pp, pcp); mutex_exit(&pcp->pc_lock); return (0); }
/* * The port_alloc_event() function has to be used by all event sources * to request an slot for event notification. * The slot reservation could be denied because of lack of resources. * For that reason the event source should allocate an event slot as early * as possible and be prepared to get an error code instead of the * port event pointer. * Al current event sources allocate an event slot during a system call * entry. They return an error code to the application if an event slot * could not be reserved. * It is also recommended to associate the event source with the port * before some other port function is used. * The port argument is a file descriptor obtained by the application as * a return value of port_create(). * Possible values of flags are: * PORT_ALLOC_DEFAULT * This is the standard type of port events. port_get(n) will free this * type of event structures as soon as the events are delivered to the * application. * PORT_ALLOC_PRIVATE * This type of event will be use for private use of the event source. * The port_get(n) function will deliver events of such an structure to * the application but it will not free the event structure itself. * The event source must free this structure using port_free_event(). * PORT_ALLOC_CACHED * This type of events is used when the event source helds an own * cache. * The port_get(n) function will deliver events of such an structure to * the application but it will not free the event structure itself. * The event source must free this structure using port_free_event(). */ int port_alloc_event(int port, int flags, int source, port_kevent_t **pkevpp) { port_t *pp; file_t *fp; port_kevent_t *pkevp; if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pkevp = kmem_cache_alloc(port_control.pc_cache, KM_NOSLEEP); if (pkevp == NULL) { releasef(port); return (ENOMEM); } /* * port_max_events is controlled by the resource control * process.port-max-events */ pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_mutex); if (pp->port_curr >= pp->port_max_events) { mutex_exit(&pp->port_queue.portq_mutex); kmem_cache_free(port_control.pc_cache, pkevp); releasef(port); return (EAGAIN); } pp->port_curr++; mutex_exit(&pp->port_queue.portq_mutex); bzero(pkevp, sizeof (port_kevent_t)); mutex_init(&pkevp->portkev_lock, NULL, MUTEX_DEFAULT, NULL); pkevp->portkev_source = source; pkevp->portkev_flags = flags; pkevp->portkev_pid = curproc->p_pid; pkevp->portkev_port = pp; *pkevpp = pkevp; releasef(port); return (0); }
/* ARGSUSED */ static int port_fd_callback(void *arg, int *events, pid_t pid, int flag, void *evp) { portfd_t *pfd = (portfd_t *)arg; polldat_t *pdp = PFTOD(pfd); port_fdcache_t *pcp; file_t *fp; int error; ASSERT((pdp != NULL) && (events != NULL)); switch (flag) { case PORT_CALLBACK_DEFAULT: if (curproc->p_pid != pid) { /* * Check if current process is allowed to retrieve * events from this fd. */ fp = getf(pdp->pd_fd); if (fp == NULL) { error = EACCES; /* deny delivery of events */ break; } releasef(pdp->pd_fd); if (fp != pdp->pd_fp) { error = EACCES; /* deny delivery of events */ break; } } *events = pdp->pd_portev->portkev_events; /* update events */ error = 0; break; case PORT_CALLBACK_DISSOCIATE: error = 0; break; case PORT_CALLBACK_CLOSE: /* remove polldat/portfd struct */ pdp->pd_portev = NULL; pcp = (port_fdcache_t *)pdp->pd_pcache; mutex_enter(&pcp->pc_lock); pdp->pd_fp = NULL; pdp->pd_events = 0; if (pdp->pd_php != NULL) { pollhead_delete(pdp->pd_php, pdp); pdp->pd_php = NULL; } port_pcache_remove_fd(pcp, pfd); mutex_exit(&pcp->pc_lock); error = 0; break; default: error = EINVAL; break; } return (error); }
/* * The port_dissociate_ksource() function dissociates an event source from * a port. */ int port_dissociate_ksource(int port, int source, port_source_t *ps) { port_t *pp; file_t *fp; port_source_t **psh; if (ps == NULL) return (EINVAL); if ((fp = getf(port)) == NULL) return (EBADF); if (fp->f_vnode->v_type != VPORT) { releasef(port); return (EBADFD); } pp = VTOEP(fp->f_vnode); mutex_enter(&pp->port_queue.portq_source_mutex); if (--ps->portsrc_cnt == 0) { /* last association removed -> free source structure */ if (ps->portsrc_prev == NULL) { /* first entry */ psh = &pp->port_queue.portq_scache[PORT_SHASH(source)]; *psh = ps->portsrc_next; if (ps->portsrc_next) ps->portsrc_next->portsrc_prev = NULL; } else { ps->portsrc_prev->portsrc_next = ps->portsrc_next; if (ps->portsrc_next) ps->portsrc_next->portsrc_prev = ps->portsrc_prev; } kmem_free(ps, sizeof (port_source_t)); } mutex_exit(&pp->port_queue.portq_source_mutex); releasef(port); return (0); }
/* * Helper for SMBIOC_DUP_DEV * Duplicate state from the FD @arg ("from") onto * the FD for this device instance. */ int smb_usr_dup_dev(smb_dev_t *sdp, intptr_t arg, int flags) { file_t *fp = NULL; vnode_t *vp; smb_dev_t *from_sdp; dev_t dev; int32_t ufd; int err; /* Should be no VC */ if (sdp->sd_vc != NULL) return (EISCONN); /* * Get from_sdp (what we will duplicate) */ if (ddi_copyin((void *) arg, &ufd, sizeof (ufd), flags)) return (EFAULT); if ((fp = getf(ufd)) == NULL) return (EBADF); /* rele fp below */ vp = fp->f_vnode; dev = vp->v_rdev; if (dev == 0 || dev == NODEV || getmajor(dev) != nsmb_major) { err = EINVAL; goto out; } from_sdp = ddi_get_soft_state(statep, getminor(dev)); if (from_sdp == NULL) { err = EINVAL; goto out; } /* * Duplicate VC and share references onto this FD. */ if ((sdp->sd_vc = from_sdp->sd_vc) != NULL) smb_vc_hold(sdp->sd_vc); if ((sdp->sd_share = from_sdp->sd_share) != NULL) smb_share_hold(sdp->sd_share); sdp->sd_level = from_sdp->sd_level; err = 0; out: if (fp) releasef(ufd); return (err); }
/* * File-descriptor based version of 'chdir'. */ int fchdir(int fd) { vnode_t *vp; file_t *fp; int error; if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); vp = fp->f_vnode; VN_HOLD(vp); releasef(fd); error = chdirec(vp, 0, 0); if (error) return (set_errno(error)); return (0); }
/* * Helper used by smbfs_mount */ int smb_dev2share(int fd, struct smb_share **sspp) { file_t *fp = NULL; vnode_t *vp; smb_dev_t *sdp; smb_share_t *ssp; dev_t dev; int err; if ((fp = getf(fd)) == NULL) return (EBADF); /* rele fp below */ vp = fp->f_vnode; dev = vp->v_rdev; if (dev == 0 || dev == NODEV || getmajor(dev) != nsmb_major) { err = EINVAL; goto out; } sdp = ddi_get_soft_state(statep, getminor(dev)); if (sdp == NULL) { err = EINVAL; goto out; } ssp = sdp->sd_share; if (ssp == NULL) { err = ENOTCONN; goto out; } /* * Our caller gains a ref. to this share. */ *sspp = ssp; smb_share_hold(ssp); err = 0; out: if (fp) releasef(fd); return (err); }
int fgetlabel(int fd, bslabel_t *label_p) { file_t *fp; int error; if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); error = cgetlabel(label_p, fp->f_vnode); releasef(fd); if (error != 0) return (set_errno(error)); else return (0); }
/* * The port_remove_portfd() function dissociates the port from the fd * and vive versa. */ static void port_remove_portfd(polldat_t *pdp, port_fdcache_t *pcp) { port_t *pp; file_t *fp; ASSERT(MUTEX_HELD(&pcp->pc_lock)); pp = pdp->pd_portev->portkev_port; fp = getf(pdp->pd_fd); /* * If we did not get the fp for pd_fd but its portfd_t * still exist in the cache, it means the pd_fd is being * closed by some other thread which will also free the portfd_t. */ if (fp != NULL) { delfd_port(pdp->pd_fd, PDTOF(pdp)); releasef(pdp->pd_fd); port_remove_fd_object(PDTOF(pdp), pp, pcp); } }
/* * LP64 mmap(2) system call: 64-bit offset, 64-bit address. * * The "large file" mmap routine mmap64(2) is also mapped to this routine * by the 64-bit version of libc. * * Eventually, this should be the only version, and have smmap_common() * folded back into it again. Some day. */ caddr_t smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos) { struct file *fp; int error; if (flags & _MAP_LOW32) error = EINVAL; else if (fd == -1 && (flags & MAP_ANON) != 0) error = smmap_common(&addr, len, prot, flags, NULL, (offset_t)pos); else if ((fp = getf(fd)) != NULL) { error = smmap_common(&addr, len, prot, flags, fp, (offset_t)pos); releasef(fd); } else error = EBADF; return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr); }
/* * ILP32 mmap(2) system call: 32-bit offset, 32-bit address. */ caddr_t smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos) { struct file *fp; int error; caddr_t a = (caddr_t)(uintptr_t)addr; if (flags & _MAP_LOW32) error = EINVAL; else if (fd == -1 && (flags & MAP_ANON) != 0) error = smmap_common(&a, (size_t)len, prot, flags | _MAP_LOW32, NULL, (offset_t)pos); else if ((fp = getf(fd)) != NULL) { error = smmap_common(&a, (size_t)len, prot, flags | _MAP_LOW32, fp, (offset_t)pos); releasef(fd); } else error = EBADF; ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX); return (error ? (caddr_t)(uintptr_t)set_errno(error) : a); }
/* * Flush output pending for file. */ int fdsync(int fd, int flag) { file_t *fp; register int error; int syncflag; if ((fp = getf(fd)) != NULL) { /* * This flag will determine the file sync * or data sync. * FSYNC : file sync * FDSYNC : data sync */ syncflag = flag & (FSYNC|FDSYNC); if (error = VOP_FSYNC(fp->f_vnode, syncflag, fp->f_cred, NULL)) (void) set_errno(error); releasef(fd); } else error = set_errno(EBADF); return (error); }
/* * wput(9E) is symmetric for master and slave sides, so this handles both * without splitting the codepath. (The only exception to this is the * processing of zcons ioctls, which is restricted to the master side.) * * zc_wput() looks at the other side; if there is no process holding that * side open, it frees the message. This prevents processes from hanging * if no one is holding open the console. Otherwise, it putnext's high * priority messages, putnext's normal messages if possible, and otherwise * enqueues the messages; in the case that something is enqueued, wsrv(9E) * will take care of eventually shuttling I/O to the other side. */ static void zc_wput(queue_t *qp, mblk_t *mp) { unsigned char type = mp->b_datap->db_type; zc_state_t *zcs; struct iocblk *iocbp; file_t *slave_filep; struct snode *slave_snodep; int slave_fd; ASSERT(qp->q_ptr); DBG1("entering zc_wput, %s side", zc_side(qp)); /* * Process zcons ioctl messages if qp is the master console's write * queue. */ zcs = (zc_state_t *)qp->q_ptr; if (zcs->zc_master_rdq != NULL && qp == WR(zcs->zc_master_rdq) && type == M_IOCTL) { iocbp = (struct iocblk *)(void *)mp->b_rptr; switch (iocbp->ioc_cmd) { case ZC_HOLDSLAVE: /* * Hold the slave's vnode and increment the refcount * of the snode. If the vnode is already held, then * indicate success. */ if (iocbp->ioc_count != TRANSPARENT) { miocack(qp, mp, 0, EINVAL); return; } if (zcs->zc_slave_vnode != NULL) { miocack(qp, mp, 0, 0); return; } /* * The process that passed the ioctl must be running in * the global zone. */ if (curzone != global_zone) { miocack(qp, mp, 0, EINVAL); return; } /* * The calling process must pass a file descriptor for * the slave device. */ slave_fd = (int)(intptr_t)*(caddr_t *)(void *)mp->b_cont-> b_rptr; slave_filep = getf(slave_fd); if (slave_filep == NULL) { miocack(qp, mp, 0, EINVAL); return; } if (ZC_STATE_TO_SLAVEDEV(zcs) != slave_filep->f_vnode->v_rdev) { releasef(slave_fd); miocack(qp, mp, 0, EINVAL); return; } /* * Get a reference to the slave's vnode. Also bump the * reference count on the associated snode. */ ASSERT(vn_matchops(slave_filep->f_vnode, spec_getvnodeops())); zcs->zc_slave_vnode = slave_filep->f_vnode; VN_HOLD(zcs->zc_slave_vnode); slave_snodep = VTOCS(zcs->zc_slave_vnode); mutex_enter(&slave_snodep->s_lock); ++slave_snodep->s_count; mutex_exit(&slave_snodep->s_lock); releasef(slave_fd); miocack(qp, mp, 0, 0); return; case ZC_RELEASESLAVE: /* * Release the master's handle on the slave's vnode. * If there isn't a handle for the vnode, then indicate * success. */ if (iocbp->ioc_count != TRANSPARENT) { miocack(qp, mp, 0, EINVAL); return; } if (zcs->zc_slave_vnode == NULL) { miocack(qp, mp, 0, 0); return; } /* * The process that passed the ioctl must be running in * the global zone. */ if (curzone != global_zone) { miocack(qp, mp, 0, EINVAL); return; } /* * The process that passed the ioctl must have provided * a file descriptor for the slave device. Make sure * this is correct. */ slave_fd = (int)(intptr_t)*(caddr_t *)(void *)mp->b_cont-> b_rptr; slave_filep = getf(slave_fd); if (slave_filep == NULL) { miocack(qp, mp, 0, EINVAL); return; } if (zcs->zc_slave_vnode->v_rdev != slave_filep->f_vnode->v_rdev) { releasef(slave_fd); miocack(qp, mp, 0, EINVAL); return; } /* * Decrement the snode's reference count and release the * vnode. */ ASSERT(vn_matchops(slave_filep->f_vnode, spec_getvnodeops())); slave_snodep = VTOCS(zcs->zc_slave_vnode); mutex_enter(&slave_snodep->s_lock); --slave_snodep->s_count; mutex_exit(&slave_snodep->s_lock); VN_RELE(zcs->zc_slave_vnode); zcs->zc_slave_vnode = NULL; releasef(slave_fd); miocack(qp, mp, 0, 0); return; default: break; } } if (zc_switch(RD(qp)) == NULL) { DBG1("wput to %s side (no one listening)", zc_side(qp)); switch (type) { case M_FLUSH: handle_mflush(qp, mp); break; case M_IOCTL: miocnak(qp, mp, 0, 0); break; default: freemsg(mp); break; } return; } if (type >= QPCTL) { DBG1("(hipri) wput, %s side", zc_side(qp)); switch (type) { case M_READ: /* supposedly from ldterm? */ DBG("zc_wput: tossing M_READ\n"); freemsg(mp); break; case M_FLUSH: handle_mflush(qp, mp); break; default: /* * Put this to the other side. */ ASSERT(zc_switch(RD(qp)) != NULL); putnext(zc_switch(RD(qp)), mp); break; } DBG1("done (hipri) wput, %s side", zc_side(qp)); return; } /* * Only putnext if there isn't already something in the queue. * otherwise things would wind up out of order. */ if (qp->q_first == NULL && bcanputnext(RD(zc_switch(qp)), mp->b_band)) { DBG("wput: putting message to other side\n"); putnext(RD(zc_switch(qp)), mp); } else { DBG("wput: putting msg onto queue\n"); (void) putq(qp, mp); } DBG1("done wput, %s side", zc_side(qp)); }
static int copen(int startfd, char *fname, int filemode, int createmode) { struct pathname pn; vnode_t *vp, *sdvp; file_t *fp, *startfp; enum vtype type; int error; int fd, dupfd; vnode_t *startvp; proc_t *p = curproc; if (startfd == AT_FDCWD) { /* * Regular open() */ startvp = NULL; } else { /* * We're here via openat() */ char startchar; if (copyin(fname, &startchar, sizeof (char))) return (set_errno(EFAULT)); /* * if startchar is / then startfd is ignored */ if (startchar == '/') startvp = NULL; else { if ((startfp = getf(startfd)) == NULL) return (set_errno(EBADF)); startvp = startfp->f_vnode; VN_HOLD(startvp); releasef(startfd); } } if (filemode & FXATTR) { /* * Make sure we have a valid request. * We must either have a real fd or AT_FDCWD */ if (startfd != AT_FDCWD && startvp == NULL) { error = EINVAL; goto out; } if (error = pn_get(fname, UIO_USERSPACE, &pn)) { goto out; } if (startfd == AT_FDCWD) { mutex_enter(&p->p_lock); startvp = PTOU(p)->u_cdir; VN_HOLD(startvp); mutex_exit(&p->p_lock); } /* * Verify permission to put attributes on file */ if ((VOP_ACCESS(startvp, VREAD, 0, CRED()) != 0) && (VOP_ACCESS(startvp, VWRITE, 0, CRED()) != 0) && (VOP_ACCESS(startvp, VEXEC, 0, CRED()) != 0)) { error = EACCES; pn_free(&pn); goto out; } if ((startvp->v_vfsp->vfs_flag & VFS_XATTR) != 0) { error = VOP_LOOKUP(startvp, "", &sdvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR, rootvp, CRED()); } else { error = EINVAL; } pn_free(&pn); if (error != 0) goto out; VN_RELE(startvp); startvp = sdvp; } if ((filemode & (FREAD|FWRITE)) != 0) { if ((filemode & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY)) filemode &= ~FNDELAY; error = falloc((vnode_t *)NULL, filemode, &fp, &fd); if (error == 0) { #ifdef C2_AUDIT if (audit_active) audit_setfsat_path(1); #endif /* C2_AUDIT */ /* * Last arg is a don't-care term if * !(filemode & FCREAT). */ error = vn_openat(fname, UIO_USERSPACE, filemode, (int)(createmode & MODEMASK), &vp, CRCREAT, u.u_cmask, startvp); if (startvp != NULL) VN_RELE(startvp); if (error == 0) { #ifdef C2_AUDIT if (audit_active) audit_copen(fd, fp, vp); #endif /* C2_AUDIT */ if ((vp->v_flag & VDUP) == 0) { fp->f_vnode = vp; mutex_exit(&fp->f_tlock); /* * We must now fill in the slot * falloc reserved. */ setf(fd, fp); return (fd); } else { /* * Special handling for /dev/fd. * Give up the file pointer * and dup the indicated file descriptor * (in v_rdev). This is ugly, but I've * seen worse. */ unfalloc(fp); dupfd = getminor(vp->v_rdev); type = vp->v_type; mutex_enter(&vp->v_lock); vp->v_flag &= ~VDUP; mutex_exit(&vp->v_lock); VN_RELE(vp); if (type != VCHR) return (set_errno(EINVAL)); if ((fp = getf(dupfd)) == NULL) { setf(fd, NULL); return (set_errno(EBADF)); } mutex_enter(&fp->f_tlock); fp->f_count++; mutex_exit(&fp->f_tlock); setf(fd, fp); releasef(dupfd); } return (fd); } else { setf(fd, NULL); unfalloc(fp); return (set_errno(error)); } } } else { error = EINVAL; } out: if (startvp != NULL) VN_RELE(startvp); return (set_errno(error)); }
void zfs_onexit_fd_rele(int fd) { releasef(fd); }
/*ARGSUSED*/ int s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) { proc_t *p = curproc; int res; *rval = 0; if (cmd == B_S10_NATIVE) return (s10_native((void *)arg1, (void *)arg2)); res = brand_solaris_cmd(cmd, arg1, arg2, arg3, &s10_brand, S10_VERSION); if (res >= 0) return (res); switch ((cmd)) { case B_S10_PIDINFO: /* * The s10 brand needs to be able to get the pid of the * current process and the pid of the zone's init, and it * needs to do this on every process startup. Early in * brand startup, we can't call getpid() because calls to * getpid() represent a magical signal to some old-skool * debuggers. By merging all of this into one call, we * make this quite a bit cheaper and easier to handle in * the brand module. */ if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) return (EFAULT); if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, sizeof (pid_t)) != 0) return (EFAULT); return (0); case B_S10_ISFDXATTRDIR: { /* * This subcommand enables the userland brand emulation library * to determine whether a file descriptor refers to an extended * file attributes directory. There is no standard syscall or * libc function that can make such a determination. */ file_t *dir_filep; dir_filep = getf((int)arg1); if (dir_filep == NULL) return (EBADF); ASSERT(dir_filep->f_vnode != NULL); *rval = IS_XATTRDIR(dir_filep->f_vnode); releasef((int)arg1); return (0); } #ifdef __amd64 case B_S10_FSREGCORRECTION: /* * This subcommand exists so that the SYS_lwp_private and * SYS_lwp_create syscalls can manually set the current thread's * %fs register to the legacy S10 selector value for 64-bit x86 * processes. */ s10_amd64_correct_fsreg(ttolwp(curthread)); return (0); #endif /* __amd64 */ } return (EINVAL); }
static int copen(int startfd, char *fname, int filemode, int createmode) { struct pathname pn; vnode_t *vp, *sdvp; file_t *fp, *startfp; enum vtype type; int error; int fd, dupfd; vnode_t *startvp; proc_t *p = curproc; uio_seg_t seg = UIO_USERSPACE; char *open_filename = fname; uint32_t auditing = AU_AUDITING(); char startchar; if (filemode & (FSEARCH|FEXEC)) { /* * Must be one or the other and neither FREAD nor FWRITE * Must not be any of FAPPEND FCREAT FTRUNC FXATTR FXATTRDIROPEN * XXX: Should these just be silently ignored? */ if ((filemode & (FREAD|FWRITE)) || (filemode & (FSEARCH|FEXEC)) == (FSEARCH|FEXEC) || (filemode & (FAPPEND|FCREAT|FTRUNC|FXATTR|FXATTRDIROPEN))) return (set_errno(EINVAL)); } if (startfd == AT_FDCWD) { /* * Regular open() */ startvp = NULL; } else { /* * We're here via openat() */ if (copyin(fname, &startchar, sizeof (char))) return (set_errno(EFAULT)); /* * if startchar is / then startfd is ignored */ if (startchar == '/') startvp = NULL; else { if ((startfp = getf(startfd)) == NULL) return (set_errno(EBADF)); startvp = startfp->f_vnode; VN_HOLD(startvp); releasef(startfd); } } /* * Handle __openattrdirat() requests */ if (filemode & FXATTRDIROPEN) { if (auditing && startvp != NULL) audit_setfsat_path(1); if (error = lookupnameat(fname, seg, FOLLOW, NULLVPP, &vp, startvp)) return (set_errno(error)); if (startvp != NULL) VN_RELE(startvp); startvp = vp; } /* * Do we need to go into extended attribute space? */ if (filemode & FXATTR) { if (startfd == AT_FDCWD) { if (copyin(fname, &startchar, sizeof (char))) return (set_errno(EFAULT)); /* * If startchar == '/' then no extended attributes * are looked up. */ if (startchar == '/') { startvp = NULL; } else { mutex_enter(&p->p_lock); startvp = PTOU(p)->u_cdir; VN_HOLD(startvp); mutex_exit(&p->p_lock); } } /* * Make sure we have a valid extended attribute request. * We must either have a real fd or AT_FDCWD and a relative * pathname. */ if (startvp == NULL) { goto noxattr; } } if (filemode & (FXATTR|FXATTRDIROPEN)) { vattr_t vattr; if (error = pn_get(fname, UIO_USERSPACE, &pn)) { goto out; } /* * In order to access hidden attribute directory the * user must be able to stat() the file */ vattr.va_mask = AT_ALL; if (error = VOP_GETATTR(startvp, &vattr, 0, CRED(), NULL)) { pn_free(&pn); goto out; } if ((startvp->v_vfsp->vfs_flag & VFS_XATTR) != 0 || vfs_has_feature(startvp->v_vfsp, VFSFT_SYSATTR_VIEWS)) { error = VOP_LOOKUP(startvp, "", &sdvp, &pn, (filemode & FXATTRDIROPEN) ? LOOKUP_XATTR : LOOKUP_XATTR|CREATE_XATTR_DIR, rootvp, CRED(), NULL, NULL, NULL); } else { error = EINVAL; } /* * For __openattrdirat() use "." as filename to open * as part of vn_openat() */ if (error == 0 && (filemode & FXATTRDIROPEN)) { open_filename = "."; seg = UIO_SYSSPACE; } pn_free(&pn); if (error != 0) goto out; VN_RELE(startvp); startvp = sdvp; } noxattr: if ((filemode & (FREAD|FWRITE|FSEARCH|FEXEC|FXATTRDIROPEN)) != 0) { if ((filemode & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY)) filemode &= ~FNDELAY; error = falloc((vnode_t *)NULL, filemode, &fp, &fd); if (error == 0) { if (auditing && startvp != NULL) audit_setfsat_path(1); /* * Last arg is a don't-care term if * !(filemode & FCREAT). */ error = vn_openat(open_filename, seg, filemode, (int)(createmode & MODEMASK), &vp, CRCREAT, PTOU(curproc)->u_cmask, startvp, fd); if (startvp != NULL) VN_RELE(startvp); if (error == 0) { if ((vp->v_flag & VDUP) == 0) { fp->f_vnode = vp; mutex_exit(&fp->f_tlock); /* * We must now fill in the slot * falloc reserved. */ setf(fd, fp); return (fd); } else { /* * Special handling for /dev/fd. * Give up the file pointer * and dup the indicated file descriptor * (in v_rdev). This is ugly, but I've * seen worse. */ unfalloc(fp); dupfd = getminor(vp->v_rdev); type = vp->v_type; mutex_enter(&vp->v_lock); vp->v_flag &= ~VDUP; mutex_exit(&vp->v_lock); VN_RELE(vp); if (type != VCHR) return (set_errno(EINVAL)); if ((fp = getf(dupfd)) == NULL) { setf(fd, NULL); return (set_errno(EBADF)); } mutex_enter(&fp->f_tlock); fp->f_count++; mutex_exit(&fp->f_tlock); setf(fd, fp); releasef(dupfd); } return (fd); } else { setf(fd, NULL); unfalloc(fp); return (set_errno(error)); } } } else { error = EINVAL; } out: if (startvp != NULL) VN_RELE(startvp); return (set_errno(error)); }
/* * File control. */ int fcntl(int fdes, int cmd, intptr_t arg) { int iarg; int error = 0; int retval; proc_t *p; file_t *fp; vnode_t *vp; u_offset_t offset; u_offset_t start; struct vattr vattr; int in_crit; int flag; struct flock sbf; struct flock64 bf; struct o_flock obf; struct flock64_32 bf64_32; struct fshare fsh; struct shrlock shr; struct shr_locowner shr_own; offset_t maxoffset; model_t datamodel; int fdres; #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32) ASSERT(sizeof (struct flock) == sizeof (struct flock32)); ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32)); #endif #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32) ASSERT(sizeof (struct flock) == sizeof (struct flock64_64)); ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64)); #endif /* * First, for speed, deal with the subset of cases * that do not require getf() / releasef(). */ switch (cmd) { case F_GETFD: if ((error = f_getfd_error(fdes, &flag)) == 0) retval = flag; goto out; case F_SETFD: error = f_setfd_error(fdes, (int)arg); retval = 0; goto out; case F_GETFL: if ((error = f_getfl(fdes, &flag)) == 0) retval = (flag & (FMASK | FASYNC)) + FOPEN; goto out; case F_GETXFL: if ((error = f_getfl(fdes, &flag)) == 0) retval = flag + FOPEN; goto out; case F_BADFD: if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0) retval = fdres; goto out; } /* * Second, for speed, deal with the subset of cases that * require getf() / releasef() but do not require copyin. */ if ((fp = getf(fdes)) == NULL) { error = EBADF; goto out; } iarg = (int)arg; switch (cmd) { /* ONC_PLUS EXTRACT END */ case F_DUPFD: p = curproc; if ((uint_t)iarg >= p->p_fno_ctl) { if (iarg >= 0) fd_too_big(p); error = EINVAL; } else if ((retval = ufalloc_file(iarg, fp)) == -1) { error = EMFILE; } else { mutex_enter(&fp->f_tlock); fp->f_count++; mutex_exit(&fp->f_tlock); } goto done; case F_DUP2FD: p = curproc; if (fdes == iarg) { retval = iarg; } else if ((uint_t)iarg >= p->p_fno_ctl) { if (iarg >= 0) fd_too_big(p); error = EBADF; } else { /* * We can't hold our getf(fdes) across the call to * closeandsetf() because it creates a window for * deadlock: if one thread is doing dup2(a, b) while * another is doing dup2(b, a), each one will block * waiting for the other to call releasef(). The * solution is to increment the file reference count * (which we have to do anyway), then releasef(fdes), * then closeandsetf(). Incrementing f_count ensures * that fp won't disappear after we call releasef(). * When closeandsetf() fails, we try avoid calling * closef() because of all the side effects. */ mutex_enter(&fp->f_tlock); fp->f_count++; mutex_exit(&fp->f_tlock); releasef(fdes); if ((error = closeandsetf(iarg, fp)) == 0) { retval = iarg; } else { mutex_enter(&fp->f_tlock); if (fp->f_count > 1) { fp->f_count--; mutex_exit(&fp->f_tlock); } else { mutex_exit(&fp->f_tlock); (void) closef(fp); } } goto out; } goto done; case F_SETFL: vp = fp->f_vnode; flag = fp->f_flag; if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY)) iarg &= ~FNDELAY; if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred)) == 0) { iarg &= FMASK; mutex_enter(&fp->f_tlock); fp->f_flag &= ~FMASK | (FREAD|FWRITE); fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE); mutex_exit(&fp->f_tlock); } retval = 0; goto done; } /* * Finally, deal with the expensive cases. */ retval = 0; in_crit = 0; maxoffset = MAXOFF_T; datamodel = DATAMODEL_NATIVE; #if defined(_SYSCALL32_IMPL) if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32) maxoffset = MAXOFF32_T; #endif vp = fp->f_vnode; flag = fp->f_flag; offset = fp->f_offset; switch (cmd) { /* ONC_PLUS EXTRACT START */ /* * The file system and vnode layers understand and implement * locking with flock64 structures. So here once we pass through * the test for compatibility as defined by LFS API, (for F_SETLK, * F_SETLKW, F_GETLK, F_GETLKW, F_FREESP) we transform * the flock structure to a flock64 structure and send it to the * lower layers. Similarly in case of GETLK the returned flock64 * structure is transformed to a flock structure if everything fits * in nicely, otherwise we return EOVERFLOW. */ case F_GETLK: case F_O_GETLK: case F_SETLK: case F_SETLKW: case F_SETLK_NBMAND: /* * Copy in input fields only. */ if (cmd == F_O_GETLK) { if (datamodel != DATAMODEL_ILP32) { error = EINVAL; break; } if (copyin((void *)arg, &obf, sizeof (obf))) { error = EFAULT; break; } bf.l_type = obf.l_type; bf.l_whence = obf.l_whence; bf.l_start = (off64_t)obf.l_start; bf.l_len = (off64_t)obf.l_len; bf.l_sysid = (int)obf.l_sysid; bf.l_pid = obf.l_pid; } else if (datamodel == DATAMODEL_NATIVE) { if (copyin((void *)arg, &sbf, sizeof (sbf))) { error = EFAULT; break; } /* * XXX In an LP64 kernel with an LP64 application * there's no need to do a structure copy here * struct flock == struct flock64. However, * we did it this way to avoid more conditional * compilation. */ bf.l_type = sbf.l_type; bf.l_whence = sbf.l_whence; bf.l_start = (off64_t)sbf.l_start; bf.l_len = (off64_t)sbf.l_len; bf.l_sysid = sbf.l_sysid; bf.l_pid = sbf.l_pid; } #if defined(_SYSCALL32_IMPL) else { struct flock32 sbf32; if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { error = EFAULT; break; } bf.l_type = sbf32.l_type; bf.l_whence = sbf32.l_whence; bf.l_start = (off64_t)sbf32.l_start; bf.l_len = (off64_t)sbf32.l_len; bf.l_sysid = sbf32.l_sysid; bf.l_pid = sbf32.l_pid; } #endif /* _SYSCALL32_IMPL */ /* * 64-bit support: check for overflow for 32-bit lock ops */ if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0) break; /* * Not all of the filesystems understand F_O_GETLK, and * there's no need for them to know. Map it to F_GETLK. */ if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd, &bf, flag, offset, NULL, fp->f_cred)) != 0) break; /* * If command is GETLK and no lock is found, only * the type field is changed. */ if ((cmd == F_O_GETLK || cmd == F_GETLK) && bf.l_type == F_UNLCK) { /* l_type always first entry, always a short */ if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, sizeof (bf.l_type))) error = EFAULT; break; } if (cmd == F_O_GETLK) { /* * Return an SVR3 flock structure to the user. */ obf.l_type = (int16_t)bf.l_type; obf.l_whence = (int16_t)bf.l_whence; obf.l_start = (int32_t)bf.l_start; obf.l_len = (int32_t)bf.l_len; if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) { /* * One or both values for the above fields * is too large to store in an SVR3 flock * structure. */ error = EOVERFLOW; break; } obf.l_sysid = (int16_t)bf.l_sysid; obf.l_pid = (int16_t)bf.l_pid; if (copyout(&obf, (void *)arg, sizeof (obf))) error = EFAULT; } else if (cmd == F_GETLK) { /* * Copy out SVR4 flock. */ int i; if (bf.l_start > maxoffset || bf.l_len > maxoffset) { error = EOVERFLOW; break; } if (datamodel == DATAMODEL_NATIVE) { for (i = 0; i < 4; i++) sbf.l_pad[i] = 0; /* * XXX In an LP64 kernel with an LP64 * application there's no need to do a * structure copy here as currently * struct flock == struct flock64. * We did it this way to avoid more * conditional compilation. */ sbf.l_type = bf.l_type; sbf.l_whence = bf.l_whence; sbf.l_start = (off_t)bf.l_start; sbf.l_len = (off_t)bf.l_len; sbf.l_sysid = bf.l_sysid; sbf.l_pid = bf.l_pid; if (copyout(&sbf, (void *)arg, sizeof (sbf))) error = EFAULT; } #if defined(_SYSCALL32_IMPL) else { struct flock32 sbf32; if (bf.l_start > MAXOFF32_T || bf.l_len > MAXOFF32_T) { error = EOVERFLOW; break; } for (i = 0; i < 4; i++) sbf32.l_pad[i] = 0; sbf32.l_type = (int16_t)bf.l_type; sbf32.l_whence = (int16_t)bf.l_whence; sbf32.l_start = (off32_t)bf.l_start; sbf32.l_len = (off32_t)bf.l_len; sbf32.l_sysid = (int32_t)bf.l_sysid; sbf32.l_pid = (pid32_t)bf.l_pid; if (copyout(&sbf32, (void *)arg, sizeof (sbf32))) error = EFAULT; } #endif } break; /* ONC_PLUS EXTRACT END */ case F_CHKFL: /* * This is for internal use only, to allow the vnode layer * to validate a flags setting before applying it. User * programs can't issue it. */ error = EINVAL; break; case F_ALLOCSP: case F_FREESP: case F_ALLOCSP64: case F_FREESP64: if ((flag & FWRITE) == 0) { error = EBADF; break; } if (vp->v_type != VREG) { error = EINVAL; break; } if (datamodel != DATAMODEL_ILP32 && (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { error = EINVAL; break; } #if defined(_ILP32) || defined(_SYSCALL32_IMPL) if (datamodel == DATAMODEL_ILP32 && (cmd == F_ALLOCSP || cmd == F_FREESP)) { struct flock32 sbf32; /* * For compatibility we overlay an SVR3 flock on an SVR4 * flock. This works because the input field offsets * in "struct flock" were preserved. */ if (copyin((void *)arg, &sbf32, sizeof (sbf32))) { error = EFAULT; break; } else { bf.l_type = sbf32.l_type; bf.l_whence = sbf32.l_whence; bf.l_start = (off64_t)sbf32.l_start; bf.l_len = (off64_t)sbf32.l_len; bf.l_sysid = sbf32.l_sysid; bf.l_pid = sbf32.l_pid; } } #endif /* _ILP32 || _SYSCALL32_IMPL */ #if defined(_LP64) if (datamodel == DATAMODEL_LP64 && (cmd == F_ALLOCSP || cmd == F_FREESP)) { if (copyin((void *)arg, &bf, sizeof (bf))) { error = EFAULT; break; } } #endif /* defined(_LP64) */ #if !defined(_LP64) || defined(_SYSCALL32_IMPL) if (datamodel == DATAMODEL_ILP32 && (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) { if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { error = EFAULT; break; } else { /* * Note that the size of flock64 is different in * the ILP32 and LP64 models, due to the l_pad * field. We do not want to assume that the * flock64 structure is laid out the same in * ILP32 and LP64 environments, so we will * copy in the ILP32 version of flock64 * explicitly and copy it to the native * flock64 structure. */ bf.l_type = (short)bf64_32.l_type; bf.l_whence = (short)bf64_32.l_whence; bf.l_start = bf64_32.l_start; bf.l_len = bf64_32.l_len; bf.l_sysid = (int)bf64_32.l_sysid; bf.l_pid = (pid_t)bf64_32.l_pid; } } #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ if (cmd == F_ALLOCSP || cmd == F_FREESP) error = flock_check(vp, &bf, offset, maxoffset); else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64) error = flock_check(vp, &bf, offset, MAXOFFSET_T); if (error) break; if (vp->v_type == VREG && bf.l_len == 0 && bf.l_start > OFFSET_MAX(fp)) { error = EFBIG; break; } /* * Make sure that there are no conflicting non-blocking * mandatory locks in the region being manipulated. If * there are such locks then return EACCES. */ if ((error = flock_get_start(vp, &bf, offset, &start)) != 0) break; if (nbl_need_check(vp)) { u_offset_t begin; ssize_t length; nbl_start_crit(vp, RW_READER); in_crit = 1; vattr.va_mask = AT_SIZE; if ((error = VOP_GETATTR(vp, &vattr, 0, CRED())) != 0) break; begin = start > vattr.va_size ? vattr.va_size : start; length = vattr.va_size > start ? vattr.va_size - start : start - vattr.va_size; if (nbl_conflict(vp, NBL_WRITE, begin, length, 0)) { error = EACCES; break; } } if (cmd == F_ALLOCSP64) cmd = F_ALLOCSP; else if (cmd == F_FREESP64) cmd = F_FREESP; error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL); break; #if !defined(_LP64) || defined(_SYSCALL32_IMPL) /* ONC_PLUS EXTRACT START */ case F_GETLK64: case F_SETLK64: case F_SETLKW64: case F_SETLK64_NBMAND: /* * Large Files: Here we set cmd as *LK and send it to * lower layers. *LK64 is only for the user land. * Most of the comments described above for F_SETLK * applies here too. * Large File support is only needed for ILP32 apps! */ if (datamodel != DATAMODEL_ILP32) { error = EINVAL; break; } if (cmd == F_GETLK64) cmd = F_GETLK; else if (cmd == F_SETLK64) cmd = F_SETLK; else if (cmd == F_SETLKW64) cmd = F_SETLKW; else if (cmd == F_SETLK64_NBMAND) cmd = F_SETLK_NBMAND; /* * Note that the size of flock64 is different in the ILP32 * and LP64 models, due to the sucking l_pad field. * We do not want to assume that the flock64 structure is * laid out in the same in ILP32 and LP64 environments, so * we will copy in the ILP32 version of flock64 explicitly * and copy it to the native flock64 structure. */ if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) { error = EFAULT; break; } bf.l_type = (short)bf64_32.l_type; bf.l_whence = (short)bf64_32.l_whence; bf.l_start = bf64_32.l_start; bf.l_len = bf64_32.l_len; bf.l_sysid = (int)bf64_32.l_sysid; bf.l_pid = (pid_t)bf64_32.l_pid; if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0) break; if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset, NULL, fp->f_cred)) != 0) break; if ((cmd == F_GETLK) && bf.l_type == F_UNLCK) { if (copyout(&bf.l_type, &((struct flock *)arg)->l_type, sizeof (bf.l_type))) error = EFAULT; break; } if (cmd == F_GETLK) { int i; /* * We do not want to assume that the flock64 structure * is laid out in the same in ILP32 and LP64 * environments, so we will copy out the ILP32 version * of flock64 explicitly after copying the native * flock64 structure to it. */ for (i = 0; i < 4; i++) bf64_32.l_pad[i] = 0; bf64_32.l_type = (int16_t)bf.l_type; bf64_32.l_whence = (int16_t)bf.l_whence; bf64_32.l_start = bf.l_start; bf64_32.l_len = bf.l_len; bf64_32.l_sysid = (int32_t)bf.l_sysid; bf64_32.l_pid = (pid32_t)bf.l_pid; if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32))) error = EFAULT; } break; /* ONC_PLUS EXTRACT END */ #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */ /* ONC_PLUS EXTRACT START */ case F_SHARE: case F_SHARE_NBMAND: case F_UNSHARE: /* * Copy in input fields only. */ if (copyin((void *)arg, &fsh, sizeof (fsh))) { error = EFAULT; break; } /* * Local share reservations always have this simple form */ shr.s_access = fsh.f_access; shr.s_deny = fsh.f_deny; shr.s_sysid = 0; shr.s_pid = ttoproc(curthread)->p_pid; shr_own.sl_pid = shr.s_pid; shr_own.sl_id = fsh.f_id; shr.s_own_len = sizeof (shr_own); shr.s_owner = (caddr_t)&shr_own; error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred); /* ONC_PLUS EXTRACT END */ break; default: error = EINVAL; break; } if (in_crit) nbl_end_crit(vp); done: releasef(fdes); out: if (error) return (set_errno(error)); return (retval); }
/* * Mount a file descriptor onto the node in the file system. * Create a new vnode, update the attributes with info from the * file descriptor and the mount point. The mask, mode, uid, gid, * atime, mtime and ctime are taken from the mountpt. Link count is * set to one, the file system id is namedev and nodeid is unique * for each mounted object. Other attributes are taken from mount point. * Make sure user is owner (or root) with write permissions on mount point. * Hash the new vnode and return 0. * Upon entry to this routine, the file descriptor is in the * fd field of a struct namefd. Copy that structure from user * space and retrieve the file descriptor. */ static int nm_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *crp) { struct namefd namefdp; struct vnode *filevp; /* file descriptor vnode */ struct file *fp; struct vnode *newvp; /* vnode representing this mount */ struct vnode *rvp; /* realvp (if any) for the mountpt */ struct namenode *nodep; /* namenode for this mount */ struct vattr filevattr; /* attributes of file dec. */ struct vattr *vattrp; /* attributes of this mount */ char *resource_name; char *resource_nodetype; statvfs64_t *svfsp; int error = 0; /* * Get the file descriptor from user space. * Make sure the file descriptor is valid and has an * associated file pointer. * If so, extract the vnode from the file pointer. */ if (uap->datalen != sizeof (struct namefd)) return (EINVAL); if (copyin(uap->dataptr, &namefdp, uap->datalen)) return (EFAULT); if ((fp = getf(namefdp.fd)) == NULL) return (EBADF); /* * If the mount point already has something mounted * on it, disallow this mount. (This restriction may * be removed in a later release). * Or unmount has completed but the namefs ROOT vnode * count has not decremented to zero, disallow this mount. */ mutex_enter(&mvp->v_lock); if ((mvp->v_flag & VROOT) || vfs_matchops(mvp->v_vfsp, namefs_vfsops)) { mutex_exit(&mvp->v_lock); releasef(namefdp.fd); return (EBUSY); } mutex_exit(&mvp->v_lock); /* * Cannot allow users to fattach() in /dev/pts. * First, there is no need for doing so and secondly * we cannot allow arbitrary users to park on a node in * /dev/pts or /dev/vt. */ rvp = NULLVP; if (vn_matchops(mvp, spec_getvnodeops()) && VOP_REALVP(mvp, &rvp, NULL) == 0 && rvp && (vn_matchops(rvp, devpts_getvnodeops()) || vn_matchops(rvp, devvt_getvnodeops()))) { releasef(namefdp.fd); return (ENOTSUP); } filevp = fp->f_vnode; if (filevp->v_type == VDIR || filevp->v_type == VPORT) { releasef(namefdp.fd); return (EINVAL); } /* * If the fd being mounted refers to neither a door nor a stream, * make sure the caller is privileged. */ if (filevp->v_type != VDOOR && filevp->v_stream == NULL) { if (secpolicy_fs_mount(crp, filevp, vfsp) != 0) { /* fd is neither a stream nor a door */ releasef(namefdp.fd); return (EINVAL); } } /* * Make sure the file descriptor is not the root of some * file system. * If it's not, create a reference and allocate a namenode * to represent this mount request. */ if (filevp->v_flag & VROOT) { releasef(namefdp.fd); return (EBUSY); } nodep = kmem_zalloc(sizeof (struct namenode), KM_SLEEP); mutex_init(&nodep->nm_lock, NULL, MUTEX_DEFAULT, NULL); vattrp = &nodep->nm_vattr; vattrp->va_mask = AT_ALL; if (error = VOP_GETATTR(mvp, vattrp, 0, crp, NULL)) goto out; filevattr.va_mask = AT_ALL; if (error = VOP_GETATTR(filevp, &filevattr, 0, crp, NULL)) goto out; /* * Make sure the user is the owner of the mount point * or has sufficient privileges. */ if (error = secpolicy_vnode_owner(crp, vattrp->va_uid)) goto out; /* * Make sure the user has write permissions on the * mount point (or has sufficient privileges). */ if (!(vattrp->va_mode & VWRITE) && secpolicy_vnode_access(crp, mvp, vattrp->va_uid, VWRITE) != 0) { error = EACCES; goto out; } /* * If the file descriptor has file/record locking, don't * allow the mount to succeed. */ if (vn_has_flocks(filevp)) { error = EACCES; goto out; } /* * Initialize the namenode. */ if (filevp->v_stream) { struct stdata *stp = filevp->v_stream; mutex_enter(&stp->sd_lock); stp->sd_flag |= STRMOUNT; mutex_exit(&stp->sd_lock); } nodep->nm_filevp = filevp; mutex_enter(&fp->f_tlock); fp->f_count++; mutex_exit(&fp->f_tlock); releasef(namefdp.fd); nodep->nm_filep = fp; nodep->nm_mountpt = mvp; /* * The attributes for the mounted file descriptor were initialized * above by applying VOP_GETATTR to the mount point. Some of * the fields of the attributes structure will be overwritten * by the attributes from the file descriptor. */ vattrp->va_type = filevattr.va_type; vattrp->va_fsid = namedev; vattrp->va_nodeid = namenodeno_alloc(); vattrp->va_nlink = 1; vattrp->va_size = filevattr.va_size; vattrp->va_rdev = filevattr.va_rdev; vattrp->va_blksize = filevattr.va_blksize; vattrp->va_nblocks = filevattr.va_nblocks; vattrp->va_seq = 0; /* * Initialize new vnode structure for the mounted file descriptor. */ nodep->nm_vnode = vn_alloc(KM_SLEEP); newvp = NMTOV(nodep); newvp->v_flag = filevp->v_flag | VROOT | VNOMAP | VNOSWAP; vn_setops(newvp, nm_vnodeops); newvp->v_vfsp = vfsp; newvp->v_stream = filevp->v_stream; newvp->v_type = filevp->v_type; newvp->v_rdev = filevp->v_rdev; newvp->v_data = (caddr_t)nodep; VFS_HOLD(vfsp); vn_exists(newvp); /* * Initialize the vfs structure. */ vfsp->vfs_vnodecovered = NULL; vfsp->vfs_flag |= VFS_UNLINKABLE; vfsp->vfs_bsize = 1024; vfsp->vfs_fstype = namefstype; vfs_make_fsid(&vfsp->vfs_fsid, namedev, namefstype); vfsp->vfs_data = (caddr_t)nodep; vfsp->vfs_dev = namedev; vfsp->vfs_bcount = 0; /* * Set the name we mounted from. */ switch (filevp->v_type) { case VPROC: /* VOP_GETATTR() translates this to VREG */ case VREG: resource_nodetype = "file"; break; case VDIR: resource_nodetype = "directory"; break; case VBLK: resource_nodetype = "device"; break; case VCHR: resource_nodetype = "device"; break; case VLNK: resource_nodetype = "link"; break; case VFIFO: resource_nodetype = "fifo"; break; case VDOOR: resource_nodetype = "door"; break; case VSOCK: resource_nodetype = "socket"; break; default: resource_nodetype = "resource"; break; } #define RESOURCE_NAME_SZ 128 /* Maximum length of the resource name */ resource_name = kmem_alloc(RESOURCE_NAME_SZ, KM_SLEEP); svfsp = kmem_alloc(sizeof (statvfs64_t), KM_SLEEP); error = VFS_STATVFS(filevp->v_vfsp, svfsp); if (error == 0) { (void) snprintf(resource_name, RESOURCE_NAME_SZ, "unspecified_%s_%s", svfsp->f_basetype, resource_nodetype); } else { (void) snprintf(resource_name, RESOURCE_NAME_SZ, "unspecified_%s", resource_nodetype); } vfs_setresource(vfsp, resource_name); kmem_free(svfsp, sizeof (statvfs64_t)); kmem_free(resource_name, RESOURCE_NAME_SZ); #undef RESOURCE_NAME_SZ /* * Insert the namenode. */ mutex_enter(&ntable_lock); nameinsert(nodep); mutex_exit(&ntable_lock); return (0); out: releasef(namefdp.fd); kmem_free(nodep, sizeof (struct namenode)); return (error); }
/* * Native 32-bit system call for non-large-file applications. */ int getdents32(int fd, void *buf, size_t count) { vnode_t *vp; file_t *fp; struct uio auio; struct iovec aiov; register int error; int sink; char *newbuf; char *obuf; int bufsize; int osize, nsize; struct dirent64 *dp; struct dirent32 *op; if (count < sizeof (struct dirent32)) return (set_errno(EINVAL)); if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); vp = fp->f_vnode; if (vp->v_type != VDIR) { releasef(fd); return (set_errno(ENOTDIR)); } /* * Don't let the user overcommit kernel resources. */ if (count > MAXGETDENTS_SIZE) count = MAXGETDENTS_SIZE; bufsize = count; newbuf = kmem_alloc(bufsize, KM_SLEEP); obuf = kmem_alloc(bufsize, KM_SLEEP); aiov.iov_base = newbuf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_loffset = fp->f_offset; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = count; auio.uio_fmode = 0; auio.uio_extflg = UIO_COPY_CACHED; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(vp, &auio, fp->f_cred, &sink, NULL, 0); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); if (error) goto out; count = count - auio.uio_resid; fp->f_offset = auio.uio_loffset; dp = (struct dirent64 *)newbuf; op = (struct dirent32 *)obuf; osize = 0; nsize = 0; while (nsize < count) { uint32_t reclen, namlen; /* * This check ensures that the 64 bit d_ino and d_off * fields will fit into their 32 bit equivalents. * * Although d_off is a signed value, the check is done * against the full 32 bits because certain file systems, * NFS for one, allow directory cookies to use the full * 32 bits. We use uint64_t because there is no exact * unsigned analog to the off64_t type of dp->d_off. */ if (dp->d_ino > (ino64_t)UINT32_MAX || dp->d_off > (uint64_t)UINT32_MAX) { error = EOVERFLOW; goto out; } op->d_ino = (ino32_t)dp->d_ino; op->d_off = (off32_t)dp->d_off; namlen = strlen(dp->d_name); reclen = DIRENT32_RECLEN(namlen); op->d_reclen = (uint16_t)reclen; /* use strncpy(9f) to zero out uninitialized bytes */ (void) strncpy(op->d_name, dp->d_name, DIRENT32_NAMELEN(reclen)); nsize += (uint_t)dp->d_reclen; osize += (uint_t)op->d_reclen; dp = (struct dirent64 *)((char *)dp + (uint_t)dp->d_reclen); op = (struct dirent32 *)((char *)op + (uint_t)op->d_reclen); } ASSERT(osize <= count); ASSERT((char *)op <= (char *)obuf + bufsize); ASSERT((char *)dp <= (char *)newbuf + bufsize); if ((error = copyout(obuf, buf, osize)) < 0) error = EFAULT; out: kmem_free(newbuf, bufsize); kmem_free(obuf, bufsize); if (error) { releasef(fd); return (set_errno(error)); } releasef(fd); return (osize); }
/* * port_associate_fd() * This function associates new file descriptors with a port or * reactivate already associated file descriptors. * The reactivation also updates the events types to be checked and the * attached user pointer. * Per port a cache is used to store associated file descriptors. * Internally the VOP_POLL interface is used to poll for existing events. * The VOP_POLL interface can also deliver a pointer to a pollhead_t structure * which is used to enqueue polldat_t structures with pending events. * If VOP_POLL immediately returns valid events (revents) then those events * will be submitted to the event port with port_send_event(). * Otherwise VOP_POLL does not return events but it delivers a pointer to a * pollhead_t structure. In such a case the corresponding file system behind * VOP_POLL will use the pollwakeup() function to notify about exisiting * events. */ int port_associate_fd(port_t *pp, int source, uintptr_t object, int events, void *user) { port_fdcache_t *pcp; int fd; struct pollhead *php = NULL; portfd_t *pfd; polldat_t *pdp; file_t *fp; port_kevent_t *pkevp; short revents; int error = 0; pcp = pp->port_queue.portq_pcp; if (object > (uintptr_t)INT_MAX) return (EBADFD); fd = object; if ((fp = getf(fd)) == NULL) return (EBADFD); mutex_enter(&pcp->pc_lock); if (pcp->pc_hash == NULL) { /* * This is the first time that a fd is being associated with * the current port: * - create PORT_SOURCE_FD cache * - associate PORT_SOURCE_FD source with the port */ error = port_associate_ksource(pp->port_fd, PORT_SOURCE_FD, NULL, port_close_sourcefd, pp, NULL); if (error) { mutex_exit(&pcp->pc_lock); releasef(fd); return (error); } /* create polldat cache */ pcp->pc_hashsize = PORTHASH_START; pcp->pc_hash = kmem_zalloc(pcp->pc_hashsize * sizeof (portfd_t *), KM_SLEEP); pfd = NULL; } else { /* Check if the fd/fp is already associated with the port */ pfd = port_cache_lookup_fp(pcp, fd, fp); } if (pfd == NULL) { /* * new entry * Allocate a polldat_t structure per fd * The use of the polldat_t structure to cache file descriptors * is required to be able to share the pollwakeup() function * with poll(2) and devpoll(7d). */ pfd = kmem_zalloc(sizeof (portfd_t), KM_SLEEP); pdp = PFTOD(pfd); pdp->pd_fd = fd; pdp->pd_fp = fp; pdp->pd_pcache = (void *)pcp; /* Allocate a port event structure per fd */ error = port_alloc_event_local(pp, source, PORT_ALLOC_CACHED, &pdp->pd_portev); if (error) { kmem_free(pfd, sizeof (portfd_t)); releasef(fd); mutex_exit(&pcp->pc_lock); return (error); } pkevp = pdp->pd_portev; pkevp->portkev_callback = port_fd_callback; pkevp->portkev_arg = pfd; /* add portfd_t entry to the cache */ port_cache_insert_fd(pcp, pdp); pkevp->portkev_object = fd; pkevp->portkev_user = user; /* * Add current port to the file descriptor interested list * The members of the list are notified when the file descriptor * is closed. */ addfd_port(fd, pfd); } else { /* * The file descriptor is already associated with the port */ pdp = PFTOD(pfd); pkevp = pdp->pd_portev; /* * Check if the re-association happens before the last * submitted event of the file descriptor was retrieved. * Clear the PORT_KEV_VALID flag if set. No new events * should get submitted after this flag is cleared. */ mutex_enter(&pkevp->portkev_lock); if (pkevp->portkev_flags & PORT_KEV_VALID) { pkevp->portkev_flags &= ~PORT_KEV_VALID; } if (pkevp->portkev_flags & PORT_KEV_DONEQ) { mutex_exit(&pkevp->portkev_lock); /* * Remove any events that where already fired * for this fd and are still in the port queue. */ port_remove_done_event(pkevp); } else { mutex_exit(&pkevp->portkev_lock); } pkevp->portkev_user = user; } mutex_enter(&pkevp->portkev_lock); pkevp->portkev_events = 0; /* no fired events */ pdp->pd_events = events; /* events associated */ /* * allow new events. */ pkevp->portkev_flags |= PORT_KEV_VALID; mutex_exit(&pkevp->portkev_lock); /* * do VOP_POLL and cache this poll fd. * * XXX - pollrelock() logic needs to know * which pollcache lock to grab. It'd be a * cleaner solution if we could pass pcp as * an arguement in VOP_POLL interface instead * of implicitly passing it using thread_t * struct. On the other hand, changing VOP_POLL * interface will require all driver/file system * poll routine to change. */ curthread->t_pollcache = (pollcache_t *)pcp; error = VOP_POLL(fp->f_vnode, events, 0, &revents, &php); curthread->t_pollcache = NULL; /* * To keep synchronization between VOP_POLL above and * pollhead_insert below, it is necessary to * call VOP_POLL() again (see port_bind_pollhead()). */ if (error) { /* dissociate the fd from the port */ delfd_port(fd, pfd); port_remove_fd_local(pfd, pcp); releasef(fd); mutex_exit(&pcp->pc_lock); return (error); } if (php != NULL) { /* * No events delivered yet. * Bind pollhead pointer with current polldat_t structure. * Sub-system will call pollwakeup() later with php as * argument. */ error = port_bind_pollhead(&php, pdp, &revents); if (error) { delfd_port(fd, pfd); port_remove_fd_local(pfd, pcp); releasef(fd); mutex_exit(&pcp->pc_lock); return (error); } } /* * Check if new events where detected and no events have been * delivered. The revents was already set after the VOP_POLL * above or it was updated in port_bind_pollhead(). */ mutex_enter(&pkevp->portkev_lock); if (revents && (pkevp->portkev_flags & PORT_KEV_VALID)) { ASSERT((pkevp->portkev_flags & PORT_KEV_DONEQ) == 0); pkevp->portkev_flags &= ~PORT_KEV_VALID; revents = revents & (pdp->pd_events | POLLHUP | POLLERR); /* send events to the event port */ pkevp->portkev_events = revents; /* * port_send_event will release the portkev_lock mutex. */ port_send_event(pkevp); } else { mutex_exit(&pkevp->portkev_lock); } releasef(fd); mutex_exit(&pcp->pc_lock); return (error); }
static int cfutimesat(int fd, char *fname, int nmflag, vattr_t *vap, int flags, int follow) { file_t *fp; vnode_t *startvp, *vp; int error; char startchar; if (fd == AT_FDCWD && fname == NULL) return (set_errno(EFAULT)); if (nmflag == 1 || (nmflag == 2 && fname != NULL)) { if (copyin(fname, &startchar, sizeof (char))) return (set_errno(EFAULT)); } else { startchar = '\0'; } if (fd == AT_FDCWD) { startvp = NULL; } else { /* * is this absolute path? */ if (startchar != '/') { if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); startvp = fp->f_vnode; VN_HOLD(startvp); releasef(fd); } else { startvp = NULL; } } if ((nmflag == 1) || ((nmflag == 2) && (fname != NULL))) { if (AU_AUDITING() && startvp != NULL) audit_setfsat_path(1); if ((error = lookupnameat(fname, UIO_USERSPACE, follow, NULLVPP, &vp, startvp)) != 0) { if (startvp != NULL) VN_RELE(startvp); return (set_errno(error)); } } else { vp = startvp; VN_HOLD(vp); } if (startvp != NULL) { VN_RELE(startvp); } if (vn_is_readonly(vp)) { error = EROFS; } else { error = VOP_SETATTR(vp, vap, flags, CRED(), NULL); } VN_RELE(vp); if (error != 0) return (set_errno(error)); return (0); }
/* * nmflag has the following values * * 1 - Always do lookup. i.e. chown, lchown. * 2 - Name is optional i.e. fchownat * 0 - Don't lookup name, vp is in file_p. i.e. fchown * */ int cfchownat(int fd, char *name, int nmflag, uid_t uid, gid_t gid, int flags) { vnode_t *startvp, *vp; file_t *filefp; struct vattr vattr; int error = 0; char startchar; if (uid < -1 || uid > MAXUID || gid < -1 || gid > MAXUID) return (set_errno(EINVAL)); vattr.va_uid = uid; vattr.va_gid = gid; vattr.va_mask = 0; if (vattr.va_uid != -1) vattr.va_mask |= AT_UID; if (vattr.va_gid != -1) vattr.va_mask |= AT_GID; if (fd == AT_FDCWD && name == NULL) return (set_errno(EFAULT)); if (nmflag == 1 || (nmflag == 2 && name != NULL)) { if (copyin(name, &startchar, sizeof (char))) return (set_errno(EFAULT)); } else startchar = '\0'; if (fd == AT_FDCWD) startvp = NULL; else { /* * only get fd if not doing absolute lookup */ if (startchar != '/' || nmflag == 0) { if ((filefp = getf(fd)) == NULL) { return (set_errno(EBADF)); } startvp = filefp->f_vnode; VN_HOLD(startvp); releasef(fd); } else { startvp = NULL; } } #if C2_AUDIT if ((nmflag == 2) && audit_active) audit_setfsat_path(1); #endif /* C2_AUDIT */ /* * Do lookups for chown, lchown and fchownat when name not NULL */ if ((nmflag == 2 && name != NULL) || nmflag == 1) { if (error = lookupnameat(name, UIO_USERSPACE, (flags == AT_SYMLINK_NOFOLLOW) ? NO_FOLLOW : FOLLOW, NULLVPP, &vp, startvp)) { if (startvp != NULL) VN_RELE(startvp); return (set_errno(error)); } } else { vp = startvp; ASSERT(vp); VN_HOLD(vp); } if (vn_is_readonly(vp)) { error = EROFS; } else { error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL); } if (startvp != NULL) VN_RELE(startvp); if (vp != NULL) VN_RELE(vp); if (error != 0) return (set_errno(error)); else return (error); }