int smb_vop_read(vnode_t *vp, uio_t *uiop, cred_t *cr) { int error; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &smb_ct); error = VOP_READ(vp, uiop, 0, cr, &smb_ct); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &smb_ct); return (error); }
/* ARGSUSED */ void acl3_setacl(SETACL3args *args, SETACL3res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { int error; vnode_t *vp; vattr_t *vap; vattr_t va; vap = NULL; vp = nfs3_fhtovp(&args->fh, exi); if (vp == NULL) { error = ESTALE; goto out1; } (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); va.va_mask = AT_ALL; vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va; if (rdonly(ro, vp)) { resp->status = NFS3ERR_ROFS; goto out1; } error = VOP_SETSECATTR(vp, &args->acl, 0, cr, NULL); va.va_mask = AT_ALL; vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va; if (error) goto out; VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); VN_RELE(vp); resp->status = NFS3_OK; vattr_to_post_op_attr(vap, &resp->resok.attr); return; out: if (curthread->t_flag & T_WOULDBLOCK) { curthread->t_flag &= ~T_WOULDBLOCK; resp->status = NFS3ERR_JUKEBOX; } else resp->status = puterrno3(error); out1: if (vp != NULL) { VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); VN_RELE(vp); } vattr_to_post_op_attr(vap, &resp->resfail.attr); }
static void walk_dir(struct vnode *dvp, void *arg, int (*callback)(char *, void *)) { char *nm; int eof, error; struct iovec iov; struct uio uio; struct dirent64 *dp; dirent64_t *dbuf; size_t dbuflen, dlen; ASSERT(dvp); dlen = 4096; dbuf = kmem_zalloc(dlen, KM_SLEEP); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_fmode = 0; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = 0; uio.uio_llimit = MAXOFFSET_T; eof = 0; error = 0; while (!error && !eof) { uio.uio_resid = dlen; iov.iov_base = (char *)dbuf; iov.iov_len = dlen; (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(dvp, &uio, kcred, &eof, NULL, 0); VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); dbuflen = dlen - uio.uio_resid; if (error || dbuflen == 0) break; for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { nm = dp->d_name; if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) continue; if (callback(nm, arg) == WALK_DIR_TERMINATE) goto end; } } end: kmem_free(dbuf, dlen); }
/* * Set security attributes (acl's) * * Note that the dv_contents lock has already been acquired * by the caller's VOP_RWLOCK. */ static int devfs_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, struct cred *cr) { dvnode_t *dv = VTODV(vp); struct vnode *avp; int error; dcmn_err2(("devfs_setsecattr %s\n", dv->dv_name)); ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); ASSERT(RW_LOCK_HELD(&dv->dv_contents)); /* * Not a supported operation on drivers not providing * file system based permissions. */ if (dv->dv_flags & DV_NO_FSPERM) return (ENOTSUP); /* * To complete, the setsecattr requires an underlying attribute node. */ if (dv->dv_attrvp == NULL) { ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); dv_shadow_node(DVTOV(dv->dv_dotdot), dv->dv_name, vp, NULL, NULLVP, cr, DV_SHADOW_CREATE | DV_SHADOW_WRITE_HELD); } if ((avp = dv->dv_attrvp) == NULL) { dcmn_err2(("devfs_setsecattr %s: " "cannot construct attribute node\n", dv->dv_name)); return (fs_nosys()); } /* * The acl(2) system call issues a VOP_RWLOCK before setting an ACL. * Since backing file systems expect the lock to be held before seeing * a VOP_SETSECATTR ACL, we need to issue the VOP_RWLOCK to the backing * store before forwarding the ACL. */ (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, NULL); error = VOP_SETSECATTR(avp, vsap, flags, cr); dsysdebug(error, ("vop_setsecattr %s %d\n", VTODV(vp)->dv_name, error)); VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, NULL); /* * Set DV_ACL if we have a non-trivial set of ACLs. It is not * necessary to hold VOP_RWLOCK since fs_acl_nontrivial only does * VOP_GETSECATTR calls. */ if (fs_acl_nontrivial(avp, cr)) dv->dv_flags |= DV_ACL; return (error); }
/* * Read or write a vnode. Called from kernel code. */ int vn_rdwr( enum uio_rw rw, struct vnode *vp, caddr_t base, ssize_t len, offset_t offset, enum uio_seg seg, int ioflag, rlim64_t ulimit, /* meaningful only if rw is UIO_WRITE */ cred_t *cr, ssize_t *residp) { struct uio uio; struct iovec iov; int error; if (rw == UIO_WRITE && ISROFILE(vp)) return (EROFS); if (len < 0) return (EIO); iov.iov_base = base; iov.iov_len = len; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_loffset = offset; uio.uio_segflg = (short)seg; uio.uio_resid = len; uio.uio_llimit = ulimit; (void) VOP_RWLOCK(vp, rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); if (rw == UIO_WRITE) { uio.uio_fmode = FWRITE; uio.uio_extflg = UIO_COPY_DEFAULT; error = VOP_WRITE(vp, &uio, ioflag, cr, NULL, NULL, NULL); } else { uio.uio_fmode = FREAD; uio.uio_extflg = UIO_COPY_CACHED; error = VOP_READ(vp, &uio, ioflag, cr, NULL); } VOP_RWUNLOCK(vp, rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL); if (residp) *residp = uio.uio_resid; else if (uio.uio_resid) error = EIO; return (error); }
int getdents64(int fd, void *buf, size_t count) { vnode_t *vp; file_t *fp; struct uio auio; struct iovec aiov; register int error; int sink; if (count < sizeof (struct dirent64)) return (set_errno(EINVAL)); /* * Don't let the user overcommit kernel resources. */ if (count > MAXGETDENTS_SIZE) count = MAXGETDENTS_SIZE; if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); vp = fp->f_vnode; if (vp->v_type != VDIR) { releasef(fd); return (set_errno(ENOTDIR)); } aiov.iov_base = buf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_loffset = fp->f_offset; auio.uio_segflg = UIO_USERSPACE; auio.uio_resid = count; auio.uio_fmode = 0; auio.uio_extflg = UIO_COPY_CACHED; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(vp, &auio, fp->f_cred, &sink, NULL, 0); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); if (error) { releasef(fd); return (set_errno(error)); } count = count - auio.uio_resid; fp->f_offset = auio.uio_loffset; releasef(fd); return (count); }
/* ARGSUSED */ void acl2_setacl(SETACL2args *args, SETACL2res *resp, struct exportinfo *exi, struct svc_req *req, cred_t *cr, bool_t ro) { int error; vnode_t *vp; vattr_t va; vp = nfs_fhtovp(&args->fh, exi); if (vp == NULL) { resp->status = NFSERR_STALE; return; } if (rdonly(ro, vp)) { VN_RELE(vp); resp->status = NFSERR_ROFS; return; } (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); error = VOP_SETSECATTR(vp, &args->acl, 0, cr, NULL); if (error) { VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); VN_RELE(vp); resp->status = puterrno(error); return; } va.va_mask = AT_ALL; error = rfs4_delegated_getattr(vp, &va, 0, cr); VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); VN_RELE(vp); /* check for overflowed values */ if (!error) { error = vattr_to_nattr(&va, &resp->resok.attr); } if (error) { resp->status = puterrno(error); return; } resp->status = NFS_OK; }
int smb_vop_write(vnode_t *vp, uio_t *uiop, int ioflag, uint32_t *lcount, cred_t *cr) { int error; *lcount = uiop->uio_resid; uiop->uio_llimit = MAXOFFSET_T; (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &smb_ct); error = VOP_WRITE(vp, uiop, ioflag, cr, &smb_ct); VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &smb_ct); *lcount -= uiop->uio_resid; return (error); }
/* * smb_vop_readdir() * * Collects an SMB_MINLEN_RDDIR_BUF "page" of directory entries. * The directory entries are returned in an fs-independent format by the * underlying file system. That is, the "page" of information returned is * not literally stored on-disk in the format returned. * If the file system supports extended directory entries (has features * VFSFT_DIRENTFLAGS), set V_RDDIR_ENTFLAGS to cause the buffer to be * filled with edirent_t structures, instead of dirent64_t structures. * If the file system supports access based enumeration (abe), set * V_RDDIR_ACCFILTER to filter directory entries based on user cred. */ int smb_vop_readdir(vnode_t *vp, uint32_t offset, void *buf, int *count, int *eof, uint32_t rddir_flag, cred_t *cr) { int error = 0; int flags = 0; int rdirent_size; struct uio auio; struct iovec aiov; if (vp->v_type != VDIR) return (ENOTDIR); if (vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS)) { flags |= V_RDDIR_ENTFLAGS; rdirent_size = sizeof (edirent_t); } else { rdirent_size = sizeof (dirent64_t); } if (*count < rdirent_size) return (EINVAL); if (rddir_flag & SMB_ABE) flags |= V_RDDIR_ACCFILTER; aiov.iov_base = buf; aiov.iov_len = *count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_loffset = (uint64_t)offset; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = *count; auio.uio_fmode = 0; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &smb_ct); error = VOP_READDIR(vp, &auio, cr, eof, &smb_ct, flags); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &smb_ct); if (error == 0) *count = *count - auio.uio_resid; return (error); }
/* * smb_vop_acl_write * * Writes the given ACL in aclp for the specified file. */ int smb_vop_acl_write(vnode_t *vp, acl_t *aclp, int flags, cred_t *cr) { int error; vsecattr_t vsecattr; int aclbsize; ASSERT(vp); ASSERT(aclp); error = smb_fsacl_to_vsa(aclp, &vsecattr, &aclbsize); if (error == 0) { (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &smb_ct); error = VOP_SETSECATTR(vp, &vsecattr, flags, cr, &smb_ct); VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &smb_ct); } if (aclbsize && vsecattr.vsa_aclentp) kmem_free(vsecattr.vsa_aclentp, aclbsize); return (error); }
int afs_UFSRead(register struct vcache *avc, struct uio *auio, struct AFS_UCRED *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, len, tlen; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; #endif struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest treq; AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_InitReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, &treq); if (code) { code = afs_CheckCode(code, &treq, 11); /* failed to get it */ AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, &treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); return afs_CheckCode(EACCES, &treq, 12); } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); #ifdef AFS_DISCON_ENV if (!tdc) { printf("Network down in afs_read"); error = ENETDOWN; break; } #endif /* AFS_DISCON_ENV */ ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (hsame(avc->f.m.DataVersion, tdc->f.versionNo) && ((len = tdc->validPos - filePos) > 0)) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { /* If we get, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ #ifdef IHINT if (tfile = tdc->ihint) { if (tdc->f.inode != tfile->inum) { afs_warn("afs_UFSRead: %x hint mismatch tdc %d inum %d\n", tdc, tdc->f.inode, tfile->inum); osi_UFSClose(tfile); tdc->ihint = tfile = 0; nihints--; } } if (tfile != 0) { usedihint++; } else #endif /* IHINT */ #if defined(LINUX_USE_FH) tfile = (struct osi_file *)osi_UFSOpen_fh(&tdc->f.fh, tdc->f.fh_type); #else tfile = (struct osi_file *)osi_UFSOpen(tdc->f.inode); #endif #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV { caller_context_t ct; VOP_RWLOCK(tfile->vnode, 0, &ct); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, &ct); VOP_RWUNLOCK(tfile->vnode, 0, &ct); } #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_OSF_ENV) tuio.uio_rw = UIO_READ; AFS_GUNLOCK(); VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, code); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD50_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif #ifdef IHINT if (!tdc->ihint && nihints < maxIHint) { tdc->ihint = tfile; nihints++; } else #endif /* IHINT */ osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* try to queue prefetch, if needed */ if (!noLock) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, &treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, &treq, 13); return error; }
int afs_MemRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error, trybusy = 1; afs_int32 code; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop = NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_MemRead); if (avc->vc_error) return EIO; /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 8); /* failed to get it */ afs_DestroyReq(treq); return code; } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { code = afs_CheckCode(EACCES, treq, 9); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* * Locks held: * avc->lock(R) */ /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 665); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version * (avoid new GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %lld m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } #if 0 printf("afs_read: forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); #endif } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (!tdc) { error = EIO; break; } /* * Locks held: * avc->lock(R) * tdc->lock(R) */ if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the mem cache */ /* mung uio structure to be right for this transfer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif code = afs_MemReadUIO(&tdc->f.inode, tuiop); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* the whole while loop */ /* * Locks held: * avc->lock(R) * tdc->lock(R) if tdc */ /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { ReleaseReadLock(&tdc->lock); /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion)) && #ifndef AFS_VM_RDWR_ENV afs_preCache #else 1 #endif ) { afs_PrefetchChunk(avc, tdc, acred, treq); } afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif error = afs_CheckCode(error, treq, 10); afs_DestroyReq(treq); return error; } /* called with the dcache entry triggering the fetch, the vcache entry involved, * and a vrequest for the read call. Marks the dcache entry as having already * triggered a prefetch, starts the prefetch going and sets the DFFetchReq * flag in the prefetched block, so that the next call to read knows to wait * for the daemon to start doing things. * * This function must be called with the vnode at least read-locked, and * no locks on the dcache, because it plays around with dcache entries. */ void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc, afs_ucred_t *acred, struct vrequest *areq) { struct dcache *tdc; afs_size_t offset; afs_size_t j1, j2; /* junk vbls for GetDCache to trash */ offset = adc->f.chunk + 1; /* next chunk we'll need */ offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */ ObtainReadLock(&adc->lock); ObtainSharedLock(&adc->mflock, 662); if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted) && !afs_BBusy()) { struct brequest *bp; UpgradeSToWLock(&adc->mflock, 663); adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */ ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */ /* * In disconnected mode, type 2 can return 0 because it doesn't * make any sense to allocate a dcache we can never fill */ if (tdc == NULL) return; ObtainSharedLock(&tdc->mflock, 651); if (!(tdc->mflags & DFFetchReq)) { /* ask the daemon to do the work */ UpgradeSToWLock(&tdc->mflock, 652); tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, * since we don't want to wait for it to finish before doing so ourselves. */ bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) offset, (afs_size_t) 1, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; just abort non-important prefetching to avoid deadlocks */ tdc->mflags &= ~DFFetchReq; ReleaseWriteLock(&tdc->mflock); afs_PutDCache(tdc); /* * DCLOCKXXX: This is a little sketchy, since someone else * could have already started a prefetch.. In practice, * this probably doesn't matter; at most it would cause an * extra slot in the BKG table to be used up when someone * prefetches this for the second time. */ ObtainReadLock(&adc->lock); ObtainWriteLock(&adc->mflock, 664); adc->mflags &= ~DFNextStarted; ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); } else { ReleaseWriteLock(&tdc->mflock); } } else { ReleaseSharedLock(&tdc->mflock); afs_PutDCache(tdc); } } else { ReleaseSharedLock(&adc->mflock); ReleaseReadLock(&adc->lock); } } int afs_UFSRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 11); /* failed to get it */ afs_DestroyReq(treq); AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); code = afs_CheckCode(EACCES, treq, 12); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); if (!tdc) { error = ENETDOWN; break; } ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %d m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ tfile = (struct osi_file *)osi_UFSOpen(&tdc->f.inode); #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV VOP_RWLOCK(tfile->vnode, 0, NULL); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, NULL); VOP_RWUNLOCK(tfile->vnode, 0, NULL); #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_NBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion))) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, treq, 13); afs_DestroyReq(treq); return error; }
static int xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct, int flags) { vnode_t *pvp; int error; int local_eof; int reset_off = 0; int has_xattrs = 0; if (eofp == NULL) { eofp = &local_eof; } *eofp = 0; /* * See if there is a real extended attribute directory. */ error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct); if (error == 0) { has_xattrs = 1; } /* * Start by reading up the static entries. */ if (uiop->uio_loffset == 0) { ino64_t pino, ino; offset_t off; gfs_dir_t *dp = dvp->v_data; gfs_readdir_state_t gstate; if (has_xattrs) { /* * If there is a real xattr dir, skip . and .. * in the GFS dir. We'll pick them up below * when we call into the underlying fs. */ uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET; } error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino); if (error == 0) { error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1, uiop, pino, ino, flags); } if (error) { return (error); } while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 && !*eofp) { if (off >= 0 && off < dp->gfsd_nstatic) { int eflags; /* * Check to see if this sysattr set name has a * case-insensitive conflict with a real xattr * name. */ eflags = 0; if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) { error = readdir_xattr_casecmp(pvp, dp->gfsd_static[off].gfse_name, cr, ct, &eflags); if (error) break; } ino = dp->gfsd_inode(dvp, off); error = gfs_readdir_emit(&gstate, uiop, off, ino, dp->gfsd_static[off].gfse_name, eflags); if (error) break; } else { *eofp = 1; } } error = gfs_readdir_fini(&gstate, error, eofp, *eofp); if (error) { return (error); } /* * We must read all of the static entries in the first * call. Otherwise we won't know if uio_loffset in a * subsequent call refers to the static entries or to those * in an underlying fs. */ if (*eofp == 0) return (EINVAL); reset_off = 1; } if (!has_xattrs) { *eofp = 1; return (0); } *eofp = 0; if (reset_off) { uiop->uio_loffset = 0; } (void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags); VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL); return (error); }
/* * Native 32-bit system call for non-large-file applications. */ int getdents32(int fd, void *buf, size_t count) { vnode_t *vp; file_t *fp; struct uio auio; struct iovec aiov; register int error; int sink; char *newbuf; char *obuf; int bufsize; int osize, nsize; struct dirent64 *dp; struct dirent32 *op; if (count < sizeof (struct dirent32)) return (set_errno(EINVAL)); if ((fp = getf(fd)) == NULL) return (set_errno(EBADF)); vp = fp->f_vnode; if (vp->v_type != VDIR) { releasef(fd); return (set_errno(ENOTDIR)); } /* * Don't let the user overcommit kernel resources. */ if (count > MAXGETDENTS_SIZE) count = MAXGETDENTS_SIZE; bufsize = count; newbuf = kmem_alloc(bufsize, KM_SLEEP); obuf = kmem_alloc(bufsize, KM_SLEEP); aiov.iov_base = newbuf; aiov.iov_len = count; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_loffset = fp->f_offset; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = count; auio.uio_fmode = 0; auio.uio_extflg = UIO_COPY_CACHED; (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(vp, &auio, fp->f_cred, &sink, NULL, 0); VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); if (error) goto out; count = count - auio.uio_resid; fp->f_offset = auio.uio_loffset; dp = (struct dirent64 *)newbuf; op = (struct dirent32 *)obuf; osize = 0; nsize = 0; while (nsize < count) { uint32_t reclen, namlen; /* * This check ensures that the 64 bit d_ino and d_off * fields will fit into their 32 bit equivalents. * * Although d_off is a signed value, the check is done * against the full 32 bits because certain file systems, * NFS for one, allow directory cookies to use the full * 32 bits. We use uint64_t because there is no exact * unsigned analog to the off64_t type of dp->d_off. */ if (dp->d_ino > (ino64_t)UINT32_MAX || dp->d_off > (uint64_t)UINT32_MAX) { error = EOVERFLOW; goto out; } op->d_ino = (ino32_t)dp->d_ino; op->d_off = (off32_t)dp->d_off; namlen = strlen(dp->d_name); reclen = DIRENT32_RECLEN(namlen); op->d_reclen = (uint16_t)reclen; /* use strncpy(9f) to zero out uninitialized bytes */ (void) strncpy(op->d_name, dp->d_name, DIRENT32_NAMELEN(reclen)); nsize += (uint_t)dp->d_reclen; osize += (uint_t)op->d_reclen; dp = (struct dirent64 *)((char *)dp + (uint_t)dp->d_reclen); op = (struct dirent32 *)((char *)op + (uint_t)op->d_reclen); } ASSERT(osize <= count); ASSERT((char *)op <= (char *)obuf + bufsize); ASSERT((char *)dp <= (char *)newbuf + bufsize); if ((error = copyout(obuf, buf, osize)) < 0) error = EFAULT; out: kmem_free(newbuf, bufsize); kmem_free(obuf, bufsize); if (error) { releasef(fd); return (set_errno(error)); } releasef(fd); return (osize); }
/* * Find the entry in the directory corresponding to the target vnode. */ int dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf, size_t dlen, dirent64_t **rdp) { size_t dbuflen; struct iovec iov; struct uio uio; int error; int eof; vnode_t *cmpvp; struct dirent64 *dp; pathname_t pnp; ASSERT(dvp->v_type == VDIR); /* * This is necessary because of the strange semantics of VOP_LOOKUP(). */ bzero(&pnp, sizeof (pnp)); eof = 0; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_segflg = UIO_SYSSPACE; uio.uio_fmode = 0; uio.uio_extflg = UIO_COPY_CACHED; uio.uio_loffset = 0; if ((error = VOP_ACCESS(dvp, VREAD, 0, cr, NULL)) != 0) return (error); while (!eof) { uio.uio_resid = dlen; iov.iov_base = dbuf; iov.iov_len = dlen; (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); error = VOP_READDIR(dvp, &uio, cr, &eof, NULL, 0); VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); dbuflen = dlen - uio.uio_resid; if (error || dbuflen == 0) break; dp = (dirent64_t *)dbuf; while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { /* * Ignore '.' and '..' entries */ if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) { dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); continue; } error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0, vrootp, cr, NULL, NULL, NULL); /* * We only want to bail out if there was an error other * than ENOENT. Otherwise, it could be that someone * just removed an entry since the readdir() call, and * the entry we want is further on in the directory. */ if (error == 0) { if (vnode_match(tvp, cmpvp, cr)) { VN_RELE(cmpvp); *rdp = dp; return (0); } VN_RELE(cmpvp); } else if (error != ENOENT) { return (error); } dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); } } /* * Something strange has happened, this directory does not contain the * specified vnode. This should never happen in the normal case, since * we ensured that dvp is the parent of vp. This is possible in some * rare conditions (races and the special .zfs directory). */ if (error == 0) { error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr, NULL, NULL, NULL); if (error == 0) { if (vnode_match(tvp, cmpvp, cr)) { (void) strcpy(dp->d_name, ".zfs"); dp->d_reclen = strlen(".zfs"); dp->d_off = 2; dp->d_ino = 1; *rdp = dp; } else { error = ENOENT; } VN_RELE(cmpvp); } } return (error); }
static int nm_rwlock(vnode_t *vp, int write, caller_context_t *ctp) { return (VOP_RWLOCK(VTONM(vp)->nm_filevp, write, ctp)); }
/* ARGSUSED */ static int nm_setattr( vnode_t *vp, vattr_t *vap, int flags, cred_t *crp, caller_context_t *ctp) { struct namenode *nodep = VTONM(vp); struct vattr *nmvap = &nodep->nm_vattr; long mask = vap->va_mask; int error = 0; /* * Cannot set these attributes. */ if (mask & (AT_NOSET|AT_SIZE)) return (EINVAL); (void) VOP_RWLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp); mutex_enter(&nodep->nm_lock); /* * Change ownership/group/time/access mode of mounted file * descriptor. */ error = secpolicy_vnode_setattr(crp, vp, vap, nmvap, flags, nm_access_unlocked, nodep); if (error) goto out; mask = vap->va_mask; /* * If request to change mode, copy new * mode into existing attribute structure. */ if (mask & AT_MODE) nmvap->va_mode = vap->va_mode & ~VSVTX; /* * If request was to change user or group, turn off suid and sgid * bits. * If the system was configured with the "rstchown" option, the * owner is not permitted to give away the file, and can change * the group id only to a group of which he or she is a member. */ if (mask & AT_UID) nmvap->va_uid = vap->va_uid; if (mask & AT_GID) nmvap->va_gid = vap->va_gid; /* * If request is to modify times, make sure user has write * permissions on the file. */ if (mask & AT_ATIME) nmvap->va_atime = vap->va_atime; if (mask & AT_MTIME) { nmvap->va_mtime = vap->va_mtime; gethrestime(&nmvap->va_ctime); } out: mutex_exit(&nodep->nm_lock); VOP_RWUNLOCK(nodep->nm_filevp, V_WRITELOCK_TRUE, ctp); return (error); }