static int unionfs_lock(void *v) { struct vop_lock_args *ap = v; int error; int flags; struct vnode *lvp; struct vnode *uvp; struct unionfs_node *unp; unp = VTOUNIONFS(ap->a_vp); lvp = unp->un_lowervp; uvp = unp->un_uppervp; flags = ap->a_flags; error = 0; if (lvp != NULLVP) { error = VOP_LOCK(lvp, flags); } if (error == 0 && uvp != NULLVP) { error = VOP_LOCK(uvp, flags); if (error != 0) { VOP_UNLOCK(lvp); } } return error; }
/* * Check that the vnode is still valid, and if so * acquire requested lock. */ int vn_lock(struct vnode *vp, int flags) { int error; #if 0 KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0); #endif KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0); KASSERT(!mutex_owned(vp->v_interlock)); #ifdef DIAGNOSTIC if (wapbl_vphaswapbl(vp)) WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp)); #endif error = VOP_LOCK(vp, flags); if ((flags & LK_RETRY) != 0 && error == ENOENT) error = VOP_LOCK(vp, flags); KASSERT((flags & LK_RETRY) == 0 || (flags & LK_NOWAIT) != 0 || error == 0); return error; }
int nandfs_get_node_entry(struct nandfsmount *nmp, struct nandfs_inode **inode, uint64_t ino, struct buf **bp) { struct nandfs_alloc_request req; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct vnode *vp; uint32_t index; int error = 0; req.entrynum = ino; mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); VOP_LOCK(vp, LK_EXCLUSIVE); error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0); if (error) { VOP_UNLOCK(vp, 0); return (error); } *inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index; *bp = req.bp_entry; VOP_UNLOCK(vp, 0); return (0); }
/* Generic write interface */ int afs_osi_Write(struct osi_file *afile, afs_int32 offset, void *aptr, afs_int32 asize) { unsigned int resid; afs_int32 code; AFS_STATCNT(osi_Write); if (!afile) osi_Panic("afs_osi_Write called with null afile"); if (offset != -1) afile->offset = offset; AFS_GUNLOCK(); VOP_LOCK(afile->vnode, LK_EXCLUSIVE | LK_RETRY); code = vn_rdwr(UIO_WRITE, afile->vnode, (caddr_t) aptr, asize, afile->offset, AFS_UIOSYS, IO_UNIT, afs_osi_credp, &resid, osi_curproc()); VOP_UNLOCK(afile->vnode, 0); AFS_GLOCK(); if (code == 0) { code = asize - resid; afile->offset += code; if (afile->offset > afile->size) afile->size = afile->offset; } else code = -1; if (afile->proc) (*afile->proc) (afile, code); return code; }
/* * Purge the cache of dead entries * * This is extremely inefficient due to the fact that vgone() not only * indirectly modifies the vnode cache, but may also sleep. We can * neither hold pfs_vncache_mutex across a vgone() call, nor make any * assumptions about the state of the cache after vgone() returns. In * consequence, we must start over after every vgone() call, and keep * trying until we manage to traverse the entire cache. * * The only way to improve this situation is to change the data structure * used to implement the cache. */ static void pfs_purge_locked(struct pfs_node *pn, bool force) { struct pfs_vdata *pvd; struct vnode *vnp; mtx_assert(&pfs_vncache_mutex, MA_OWNED); pvd = pfs_vncache; while (pvd != NULL) { if (force || pvd->pvd_dead || (pn != NULL && pvd->pvd_pn == pn)) { vnp = pvd->pvd_vnode; vhold(vnp); mtx_unlock(&pfs_vncache_mutex); VOP_LOCK(vnp, LK_EXCLUSIVE); vgone(vnp); VOP_UNLOCK(vnp, 0); mtx_lock(&pfs_vncache_mutex); vdrop(vnp); pvd = pfs_vncache; } else { pvd = pvd->pvd_next; } } }
static int _xfs_mkdir( struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct thread *td = curthread; struct ucred *credp = td->td_ucred; struct componentname *cnp = ap->a_cnp; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = vap->va_mode | S_IFDIR; va.va_mask |= XFS_AT_TYPE; xvp = NULL; XVOP_MKDIR(VPTOXFSVP(dvp), cnp, &va, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); }
int nandfs_get_dat_bdescs(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, uint32_t nmembs) { struct nandfs_node *dat_node; uint64_t map; uint32_t i; int error = 0; dat_node = nffsdev->nd_dat_node; VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { DPRINTF(CLEAN, ("%s: bd ino:%#jx oblk:%#jx blocknr:%#jx off:%#jx\n", __func__, (uintmax_t)bd[i].bd_ino, (uintmax_t)bd[i].bd_oblocknr, (uintmax_t)bd[i].bd_blocknr, (uintmax_t)bd[i].bd_offset)); error = nandfs_bmap_lookup(dat_node, bd[i].bd_offset, &map); if (error) break; bd[i].bd_blocknr = map; } VOP_UNLOCK(NTOV(dat_node), 0); return (error); }
int nandfs_markgc_segment(struct nandfs_device *fsdev, uint64_t seg) { struct nandfs_node *su_node; struct nandfs_segment_usage *su_usage; struct buf *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; VOP_LOCK(NTOV(su_node), LK_EXCLUSIVE); nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); return (error); } su_usage = SU_USAGE_OFF(bp, offset); MPASS((su_usage->su_flags & NANDFS_SEGMENT_USAGE_GC) == 0); su_usage->su_flags |= NANDFS_SEGMENT_USAGE_GC; brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); return (0); }
int osi_UFSTruncate(struct osi_file *afile, afs_int32 asize) { struct vattr tvattr; afs_int32 code; struct osi_stat tstat; AFS_STATCNT(osi_Truncate); /* * This routine only shrinks files, and most systems * have very slow truncates, even when the file is already * small enough. Check now and save some time. */ code = afs_osi_Stat(afile, &tstat); if (code || tstat.size <= asize) return code; ObtainWriteLock(&afs_xosi, 321); VATTR_NULL(&tvattr); tvattr.va_size = asize; AFS_GUNLOCK(); VOP_LOCK(afile->vnode, LK_EXCLUSIVE | LK_RETRY, curproc); code = VOP_SETATTR(afile->vnode, &tvattr, afs_osi_credp, curproc); VOP_UNLOCK(afile->vnode, 0, curproc); AFS_GLOCK(); if (code == 0) afile->size = asize; ReleaseWriteLock(&afs_xosi); return code; }
static int _xfs_symlink( struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap) { struct thread *td = curthread; struct ucred *credp = td->td_ucred; xfs_vnode_t *xvp; xfs_vattr_t va; int error; memset(&va, 0, sizeof (va)); va.va_mask |= XFS_AT_MODE; va.va_mode = ap->a_vap->va_mode | S_IFLNK; va.va_mask |= XFS_AT_TYPE; XVOP_SYMLINK(VPTOXFSVP(ap->a_dvp), ap->a_cnp, &va, ap->a_target, &xvp, credp, error); if (error == 0) { *ap->a_vpp = xvp->v_vnode; VOP_LOCK(xvp->v_vnode, LK_EXCLUSIVE); } return (error); }
int RUMP_VOP_LOCK(struct vnode *vp, int flags) { int error; rump_schedule(); error = VOP_LOCK(vp, flags); rump_unschedule(); return error; }
int nandfs_node_create(struct nandfsmount *nmp, struct nandfs_node **node, uint16_t mode) { struct nandfs_alloc_request req; struct nandfs_device *nandfsdev; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct nandfs_inode *inode; struct vnode *vp; uint32_t entry; int error = 0; nandfsdev = nmp->nm_nandfsdev; mdt = &nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); VOP_LOCK(vp, LK_EXCLUSIVE); /* Allocate new inode in ifile */ req.entrynum = nandfsdev->nd_last_ino + 1; error = nandfs_find_free_entry(mdt, ifile, &req); if (error) { VOP_UNLOCK(vp, 0); return (error); } error = nandfs_get_entry_block(mdt, ifile, &req, &entry, 1); if (error) { VOP_UNLOCK(vp, 0); return (error); } /* Inode initialization */ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + entry; nandfs_inode_init(inode, mode); error = nandfs_alloc_entry(mdt, &req); if (error) { VOP_UNLOCK(vp, 0); return (error); } VOP_UNLOCK(vp, 0); nandfsdev->nd_last_ino = req.entrynum; error = nandfs_get_node(nmp, req.entrynum, node); DPRINTF(IFILE, ("%s: node: %p ino: %#jx\n", __func__, node, (uintmax_t)((*node)->nn_ino))); return (error); }
static int nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, uint64_t nmembs) { struct nandfs_node *dat_node; struct buf *bp; uint64_t i; int error; dat_node = nffsdev->nd_dat_node; VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { if (!bd[i].bd_alive) continue; DPRINTF(CLEAN, ("%s: idx %jx offset %jx\n", __func__, i, bd[i].bd_offset)); if (bd[i].bd_level) { error = nandfs_bread_meta(dat_node, bd[i].bd_offset, NULL, 0, &bp); if (error) { nandfs_error("%s: cannot read dat node " "level:%d\n", __func__, bd[i].bd_level); brelse(bp); VOP_UNLOCK(NTOV(dat_node), 0); return (error); } nandfs_dirty_buf_meta(bp, 1); nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1); } else { error = nandfs_bread(dat_node, bd[i].bd_offset, NULL, 0, &bp); if (error) { nandfs_error("%s: cannot read dat node\n", __func__); brelse(bp); VOP_UNLOCK(NTOV(dat_node), 0); return (error); } nandfs_dirty_buf(bp, 1); } DPRINTF(CLEAN, ("%s: bp: %p\n", __func__, bp)); } VOP_UNLOCK(NTOV(dat_node), 0); return (0); }
/* * Insert the inode into the hash table, and return it locked. */ void efs_ihashins(struct efs_inode *eip) { struct ihashhead *ipp; KASSERT(mutex_owned(&efs_hashlock)); /* lock the inode, then put it on the appropriate hash list */ VOP_LOCK(EFS_ITOV(eip), LK_EXCLUSIVE); mutex_enter(&efs_ihash_lock); ipp = &ihashtbl[INOHASH(eip->ei_dev, eip->ei_number)]; LIST_INSERT_HEAD(ipp, eip, ei_hash); mutex_exit(&efs_ihash_lock); }
/* * Insert the inode into the hash table, and return it locked. */ void ulfs_ihashins(struct inode *ip) { struct ihashhead *ipp; int error __diagused; KASSERT(mutex_owned(&ulfs_hashlock)); /* lock the inode, then put it on the appropriate hash list */ error = VOP_LOCK(ITOV(ip), LK_EXCLUSIVE); KASSERT(error == 0); mutex_enter(&ulfs_ihash_lock); ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)]; LIST_INSERT_HEAD(ipp, ip, i_hash); mutex_exit(&ulfs_ihash_lock); }
int nandfs_node_destroy(struct nandfs_node *node) { struct nandfs_alloc_request req; struct nandfsmount *nmp; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct vnode *vp; int error = 0; nmp = node->nn_nmp; req.entrynum = node->nn_ino; mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); DPRINTF(IFILE, ("%s: destroy node: %p ino: %#jx\n", __func__, node, (uintmax_t)node->nn_ino)); VOP_LOCK(vp, LK_EXCLUSIVE); error = nandfs_find_entry(mdt, ifile, &req); if (error) { nandfs_error("%s: finding entry error:%d node %p(%jx)", __func__, error, node, node->nn_ino); VOP_UNLOCK(vp, 0); return (error); } nandfs_inode_destroy(&node->nn_inode); error = nandfs_free_entry(mdt, &req); if (error) { nandfs_error("%s: freing entry error:%d node %p(%jx)", __func__, error, node, node->nn_ino); VOP_UNLOCK(vp, 0); return (error); } VOP_UNLOCK(vp, 0); DPRINTF(IFILE, ("%s: freed node %p ino %#jx\n", __func__, node, (uintmax_t)node->nn_ino)); return (error); }
/* * Check that the vnode is still valid, and if so * acquire requested lock. */ int vn_lock(struct vnode *vp, int flags, struct proc *p) { int error; if ((flags & LK_RECURSEFAIL) == 0) flags |= LK_CANRECURSE; do { if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; tsleep(vp, PINOD, "vn_lock", 0); error = ENOENT; } else { error = VOP_LOCK(vp, flags, p); if (error == 0) return (error); } } while (flags & LK_RETRY); return (error); }
int nandfs_get_dat_vinfo(struct nandfs_device *nandfsdev, struct nandfs_vinfo *vinfo, uint32_t nmembs) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint32_t i, idx; int error = 0; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; DPRINTF(DAT, ("%s: nmembs %#x\n", __func__, nmembs)); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { req.entrynum = vinfo[i].nvi_vblocknr; error = nandfs_get_entry_block(mdt, dat,&req, &idx, 0); if (error) break; dat_entry = ((struct nandfs_dat_entry *) req.bp_entry->b_data); vinfo[i].nvi_start = dat_entry[idx].de_start; vinfo[i].nvi_end = dat_entry[idx].de_end; vinfo[i].nvi_blocknr = dat_entry[idx].de_blocknr; DPRINTF(DAT, ("%s: vinfo: %jx[%jx-%jx]->%jx\n", __func__, vinfo[i].nvi_vblocknr, vinfo[i].nvi_start, vinfo[i].nvi_end, vinfo[i].nvi_blocknr)); brelse(req.bp_entry); } VOP_UNLOCK(NTOV(dat), 0); return (error); }
/* * Check that the vnode is still valid, and if so * acquire requested lock. */ int vn_lock(struct vnode *vp, int flags) { int error; #if 0 KASSERT(vp->v_usecount > 0 || (vp->v_iflag & VI_ONWORKLST) != 0); #endif KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT|LK_RETRY)) == 0); KASSERT(!mutex_owned(vp->v_interlock)); #ifdef DIAGNOSTIC if (wapbl_vphaswapbl(vp)) WAPBL_JUNLOCK_ASSERT(wapbl_vptomp(vp)); #endif do { /* * XXX PR 37706 forced unmount of file systems is unsafe. * Race between vclean() and this the remaining problem. */ mutex_enter(vp->v_interlock); if (vp->v_iflag & VI_XLOCK) { if (flags & LK_NOWAIT) { mutex_exit(vp->v_interlock); return EBUSY; } vwait(vp, VI_XLOCK); mutex_exit(vp->v_interlock); error = ENOENT; } else { mutex_exit(vp->v_interlock); error = VOP_LOCK(vp, (flags & ~LK_RETRY)); if (error == 0 || error == EDEADLK || error == EBUSY) return (error); } } while (flags & LK_RETRY); return (error); }
int nandfs_vblock_end(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint64_t end; uint32_t entry; int locked, error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; end = nandfsdev->nd_last_cno; locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); if (!locked) VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = vblock; error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0); if (!error) { dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; dat_entry[entry].de_end = end; DPRINTF(DAT, ("%s: end vblock %#jx at checkpoint %#jx\n", __func__, (uintmax_t)vblock, (uintmax_t)end)); /* * It is mostly called from syncer() so * we want to force making buf dirty */ error = nandfs_dirty_buf(req.bp_entry, 1); } if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); }
int nandfs_vblock_free(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; int error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = vblock; error = nandfs_find_entry(mdt, dat, &req); if (!error) { DPRINTF(DAT, ("%s: vblk %#jx\n", __func__, (uintmax_t)vblock)); nandfs_free_entry(mdt, &req); } VOP_UNLOCK(NTOV(dat), 0); return (error); }
int nandfs_get_seg_stat(struct nandfs_device *nandfsdev, struct nandfs_seg_stat *nss) { struct nandfs_sufile_header *suhdr; struct nandfs_node *su_node; struct buf *bp; int err; su_node = nandfsdev->nd_su_node; NANDFS_WRITELOCK(nandfsdev); VOP_LOCK(NTOV(su_node), LK_SHARED); err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp); if (err) { brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); NANDFS_WRITEUNLOCK(nandfsdev); return (-1); } suhdr = (struct nandfs_sufile_header *)bp->b_data; nss->nss_nsegs = nandfsdev->nd_fsdata.f_nsegments; nss->nss_ncleansegs = suhdr->sh_ncleansegs; nss->nss_ndirtysegs = suhdr->sh_ndirtysegs; nss->nss_ctime = 0; nss->nss_nongc_ctime = nandfsdev->nd_ts.tv_sec; nss->nss_prot_seq = nandfsdev->nd_seg_sequence; brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); NANDFS_WRITEUNLOCK(nandfsdev); return (0); }
/* * We need to process our own vnode lock and then clear the * interlock flag as it applies only to our vnode, not the * vnodes below us on the stack. */ static int null_lock(struct vop_lock1_args *ap) { struct vnode *vp = ap->a_vp; int flags = ap->a_flags; struct null_node *nn; struct vnode *lvp; int error; if ((flags & LK_INTERLOCK) == 0) { VI_LOCK(vp); ap->a_flags = flags |= LK_INTERLOCK; } nn = VTONULL(vp); /* * If we're still active we must ask the lower layer to * lock as ffs has special lock considerations in it's * vop lock. */ if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) { VI_LOCK_FLAGS(lvp, MTX_DUPOK); VI_UNLOCK(vp); /* * We have to hold the vnode here to solve a potential * reclaim race. If we're forcibly vgone'd while we * still have refs, a thread could be sleeping inside * the lowervp's vop_lock routine. When we vgone we will * drop our last ref to the lowervp, which would allow it * to be reclaimed. The lowervp could then be recycled, * in which case it is not legal to be sleeping in it's VOP. * We prevent it from being recycled by holding the vnode * here. */ vholdl(lvp); error = VOP_LOCK(lvp, flags); /* * We might have slept to get the lock and someone might have * clean our vnode already, switching vnode lock from one in * lowervp to v_lock in our own vnode structure. Handle this * case by reacquiring correct lock in requested mode. */ if (VTONULL(vp) == NULL && error == 0) { ap->a_flags &= ~(LK_TYPE_MASK | LK_INTERLOCK); switch (flags & LK_TYPE_MASK) { case LK_SHARED: ap->a_flags |= LK_SHARED; break; case LK_UPGRADE: case LK_EXCLUSIVE: ap->a_flags |= LK_EXCLUSIVE; break; default: panic("Unsupported lock request %d\n", ap->a_flags); } VOP_UNLOCK(lvp, 0); error = vop_stdlock(ap); } vdrop(lvp); } else error = vop_stdlock(ap); return (error); }
int afs_MemRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error, trybusy = 1; afs_int32 code; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop = NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_MemRead); if (avc->vc_error) return EIO; /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 8); /* failed to get it */ afs_DestroyReq(treq); return code; } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { code = afs_CheckCode(EACCES, treq, 9); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* * Locks held: * avc->lock(R) */ /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->f.chunkBytes - offset; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, * and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 665); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; /* start the daemon (may already be running, however) */ UpgradeSToWLock(&tdc->mflock, 666); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); /* don't use bp pointer! */ } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version * (avoid new GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %lld m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } #if 0 printf("afs_read: forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); #endif } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (!tdc) { error = EIO; break; } /* * Locks held: * avc->lock(R) * tdc->lock(R) */ if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the mem cache */ /* mung uio structure to be right for this transfer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif code = afs_MemReadUIO(&tdc->f.inode, tuiop); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* the whole while loop */ /* * Locks held: * avc->lock(R) * tdc->lock(R) if tdc */ /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch. */ if (tdc) { ReleaseReadLock(&tdc->lock); /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion)) && #ifndef AFS_VM_RDWR_ENV afs_preCache #else 1 #endif ) { afs_PrefetchChunk(avc, tdc, acred, treq); } afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif error = afs_CheckCode(error, treq, 10); afs_DestroyReq(treq); return error; } /* called with the dcache entry triggering the fetch, the vcache entry involved, * and a vrequest for the read call. Marks the dcache entry as having already * triggered a prefetch, starts the prefetch going and sets the DFFetchReq * flag in the prefetched block, so that the next call to read knows to wait * for the daemon to start doing things. * * This function must be called with the vnode at least read-locked, and * no locks on the dcache, because it plays around with dcache entries. */ void afs_PrefetchChunk(struct vcache *avc, struct dcache *adc, afs_ucred_t *acred, struct vrequest *areq) { struct dcache *tdc; afs_size_t offset; afs_size_t j1, j2; /* junk vbls for GetDCache to trash */ offset = adc->f.chunk + 1; /* next chunk we'll need */ offset = AFS_CHUNKTOBASE(offset); /* base of next chunk */ ObtainReadLock(&adc->lock); ObtainSharedLock(&adc->mflock, 662); if (offset < avc->f.m.Length && !(adc->mflags & DFNextStarted) && !afs_BBusy()) { struct brequest *bp; UpgradeSToWLock(&adc->mflock, 663); adc->mflags |= DFNextStarted; /* we've tried to prefetch for this guy */ ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); tdc = afs_GetDCache(avc, offset, areq, &j1, &j2, 2); /* type 2 never returns 0 */ /* * In disconnected mode, type 2 can return 0 because it doesn't * make any sense to allocate a dcache we can never fill */ if (tdc == NULL) return; ObtainSharedLock(&tdc->mflock, 651); if (!(tdc->mflags & DFFetchReq)) { /* ask the daemon to do the work */ UpgradeSToWLock(&tdc->mflock, 652); tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or GetDCache */ /* last parm (1) tells bkg daemon to do an afs_PutDCache when it is done, * since we don't want to wait for it to finish before doing so ourselves. */ bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) offset, (afs_size_t) 1, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; just abort non-important prefetching to avoid deadlocks */ tdc->mflags &= ~DFFetchReq; ReleaseWriteLock(&tdc->mflock); afs_PutDCache(tdc); /* * DCLOCKXXX: This is a little sketchy, since someone else * could have already started a prefetch.. In practice, * this probably doesn't matter; at most it would cause an * extra slot in the BKG table to be used up when someone * prefetches this for the second time. */ ObtainReadLock(&adc->lock); ObtainWriteLock(&adc->mflock, 664); adc->mflags &= ~DFNextStarted; ReleaseWriteLock(&adc->mflock); ReleaseReadLock(&adc->lock); } else { ReleaseWriteLock(&tdc->mflock); } } else { ReleaseSharedLock(&tdc->mflock); afs_PutDCache(tdc); } } else { ReleaseSharedLock(&adc->mflock); ReleaseReadLock(&adc->lock); } } int afs_UFSRead(struct vcache *avc, struct uio *auio, afs_ucred_t *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, tlen; afs_size_t len = 0; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest *treq = NULL; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; memset(&tuio, 0, sizeof(tuio)); #endif AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_CreateReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, treq); if (code) { code = afs_CheckCode(code, treq, 11); /* failed to get it */ afs_DestroyReq(treq); AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); code = afs_CheckCode(EACCES, treq, 12); afs_DestroyReq(treq); return code; } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); memset(tvec, 0, sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif /* This bit is bogus. We're checking to see if the read goes past the * end of the file. If so, we should be zeroing out all of the buffers * that the client has passed into us (there is a danger that we may leak * kernel memory if we do not). However, this behaviour is disabled by * not setting len before this segment runs, and by setting len to 0 * immediately we enter it. In addition, we also need to check for a read * which partially goes off the end of the file in the while loop below. */ if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { int versionOk; /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } #ifdef STRUCT_TASK_STRUCT_HAS_CRED try_background: #endif tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 2); if (!tdc) { error = ENETDOWN; break; } ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (trybusy && (!afs_BBusy() || (afs_protocols & VICEP_ACCESS))) { #else if (trybusy && !afs_BBusy()) { #endif struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { int dontwait = B_DONTWAIT; UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) dontwait = 0; #endif bp = afs_BQueue(BOP_FETCH, avc, dontwait, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc, (void *)0, (void *)0); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ len = tdc->validPos - filePos; versionOk = hsame(avc->f.m.DataVersion, tdc->f.versionNo) ? 1 : 0; if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (versionOk && len > 0) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); #if 0 #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { printf("afs_read: DV mismatch? %d instead of %d for %u.%u.%u\n", tdc->f.versionNo.low, avc->f.m.DataVersion.low, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); printf("afs_read: validPos %llu filePos %llu totalLength %d m.Length %llu noLock %d\n", tdc->validPos, filePos, totalLength, avc->f.m.Length, noLock); printf("afs_read: or len too low? %lld for %u.%u.%u\n", len, avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); } #endif #endif ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { #ifdef STRUCT_TASK_STRUCT_HAS_CRED if (afs_protocols & VICEP_ACCESS) { /* avoid foreground fetch */ if (!versionOk) { printf("afs_read: avoid forground %u.%u.%u\n", avc->f.fid.Fid.Volume, avc->f.fid.Fid.Vnode, avc->f.fid.Fid.Unique); goto try_background; } } #endif /* If we get here, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ tfile = (struct osi_file *)osi_UFSOpen(&tdc->f.inode); #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV VOP_RWLOCK(tfile->vnode, 0, NULL); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, NULL); VOP_RWUNLOCK(tfile->vnode, 0, NULL); #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_NBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* * try to queue prefetch, if needed. If DataVersion is zero there * should not be any more: files with DV 0 never have been stored * on the fileserver, symbolic links and directories never require * more than a single chunk. */ if (!noLock && !(hiszero(avc->f.m.DataVersion))) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, treq, 13); afs_DestroyReq(treq); return error; }
int nandfs_get_segment_info_filter(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment, uint64_t *nsegs, uint32_t filter, uint32_t nfilter) { struct nandfs_segment_usage *su; struct nandfs_node *su_node; struct buf *bp; uint64_t curr, blocknr, blockoff, i; uint32_t flags; int err = 0; curr = ~(0); lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); su_node = fsdev->nd_su_node; VOP_LOCK(NTOV(su_node), LK_SHARED); bp = NULL; if (nsegs != NULL) *nsegs = 0; for (i = 0; i < nmembs; segment++) { if (segment == fsdev->nd_fsdata.f_nsegments) break; nandfs_seg_usage_blk_offset(fsdev, segment, &blocknr, &blockoff); if (i == 0 || curr != blocknr) { if (bp != NULL) brelse(bp); err = nandfs_bread(su_node, blocknr, NOCRED, 0, &bp); if (err) { goto out; } curr = blocknr; } su = SU_USAGE_OFF(bp, blockoff); flags = su->su_flags; if (segment == fsdev->nd_seg_num || segment == fsdev->nd_next_seg_num) flags |= NANDFS_SEGMENT_USAGE_ACTIVE; if (nfilter != 0 && (flags & nfilter) != 0) continue; if (filter != 0 && (flags & filter) == 0) continue; nsi->nsi_num = segment; nsi->nsi_lastmod = su->su_lastmod; nsi->nsi_blocks = su->su_nblocks; nsi->nsi_flags = flags; nsi++; i++; if (nsegs != NULL) (*nsegs)++; } out: if (bp != NULL) brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); return (err); }
/* * Look up a vnode/nfsnode by file handle. * Callers must check for mount points!! * In all cases, a pointer to a * nfsnode structure is returned. */ int nfs_nget1(struct mount *mntp, nfsfh_t *fhp, int fhsize, struct nfsnode **npp, int lkflags) { struct nfsnode *np; struct vnode *vp; struct nfsmount *nmp = VFSTONFS(mntp); int error; struct fh_match fhm; fhm.fhm_fhp = fhp; fhm.fhm_fhsize = fhsize; loop: rw_enter(&nmp->nm_rbtlock, RW_READER); np = rb_tree_find_node(&nmp->nm_rbtree, &fhm); if (np != NULL) { vp = NFSTOV(np); mutex_enter(vp->v_interlock); rw_exit(&nmp->nm_rbtlock); error = vget(vp, LK_EXCLUSIVE | lkflags); if (error == EBUSY) return error; if (error) goto loop; *npp = np; return(0); } rw_exit(&nmp->nm_rbtlock); error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, NULL, &vp); if (error) { *npp = 0; return (error); } np = pool_get(&nfs_node_pool, PR_WAITOK); memset(np, 0, sizeof *np); np->n_vnode = vp; /* * Insert the nfsnode in the hash queue for its new file handle */ if (fhsize > NFS_SMALLFH) { np->n_fhp = kmem_alloc(fhsize, KM_SLEEP); } else np->n_fhp = &np->n_fh; memcpy(np->n_fhp, fhp, fhsize); np->n_fhsize = fhsize; np->n_accstamp = -1; np->n_vattr = pool_get(&nfs_vattr_pool, PR_WAITOK); rw_enter(&nmp->nm_rbtlock, RW_WRITER); if (NULL != rb_tree_find_node(&nmp->nm_rbtree, &fhm)) { rw_exit(&nmp->nm_rbtlock); if (fhsize > NFS_SMALLFH) { kmem_free(np->n_fhp, fhsize); } pool_put(&nfs_vattr_pool, np->n_vattr); pool_put(&nfs_node_pool, np); ungetnewvnode(vp); goto loop; } vp->v_data = np; genfs_node_init(vp, &nfs_genfsops); /* * Initalize read/write creds to useful values. VOP_OPEN will * overwrite these. */ np->n_rcred = curlwp->l_cred; kauth_cred_hold(np->n_rcred); np->n_wcred = curlwp->l_cred; kauth_cred_hold(np->n_wcred); VOP_LOCK(vp, LK_EXCLUSIVE); NFS_INVALIDATE_ATTRCACHE(np); uvm_vnp_setsize(vp, 0); (void)rb_tree_insert_node(&nmp->nm_rbtree, np); rw_exit(&nmp->nm_rbtlock); *npp = np; return (0); }
int nandfs_vblock_alloc(struct nandfs_device *nandfsdev, nandfs_daddr_t *vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint64_t start; uint32_t entry; int locked, error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; start = nandfsdev->nd_last_cno + 1; locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); if (!locked) VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = 0; /* Alloc vblock number */ error = nandfs_find_free_entry(mdt, dat, &req); if (error) { nandfs_error("%s: cannot find free vblk entry\n", __func__); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Read/create buffer */ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 1); if (error) { nandfs_error("%s: cannot get free vblk entry\n", __func__); nandfs_abort_entry(&req); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Fill out vblock data */ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; dat_entry[entry].de_start = start; dat_entry[entry].de_end = UINTMAX_MAX; dat_entry[entry].de_blocknr = 0; /* Commit allocation */ error = nandfs_alloc_entry(mdt, &req); if (error) { nandfs_error("%s: cannot get free vblk entry\n", __func__); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Return allocated vblock */ *vblock = req.entrynum; DPRINTF(DAT, ("%s: allocated vblock %#jx\n", __func__, (uintmax_t)*vblock)); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); }
errno_t sc_open(thread_t *p, syscall_result_t *r, sc_open_args *arg) { int fd; int flags = arg->flags; int error = 0; vnode_t *node; proc_t *proc = p->thr_proc; char fname[PATH_MAX+1]; if((error = copyinstr(fname, arg->fname, PATH_MAX))) return error; KASSERT(proc->p_rootdir!=NULL); error = vfs_lookup(proc->p_curdir, &node, fname, p, LKP_NORMAL); if(error) { if(!(flags & O_CREAT)) return error; vnode_t *parent; error = vfs_lookup_parent(proc->p_curdir, &parent, fname, p); if(error) return error; if((error = VOP_ACCESS(parent, W_OK, proc->p_cred))) { vrele(parent); return error; } vattr_t attr; attr.va_mode = arg->mode & ~(proc->p_umask) & 0777; attr.va_type = VNODE_TYPE_REG; attr.va_uid = proc->p_cred->p_uid; attr.va_gid = proc->p_cred->p_gid; attr.va_size = 0; attr.va_dev = NULL; error = VOP_CREATE(parent, &node, _get_last_cmpt(fname), &attr); VOP_UNLOCK(parent); if(error) return error; VOP_LOCK(node); } else { //plik istnieje if((flags & O_CREAT) && (flags & O_EXCL)) { vrele(node); return -EEXIST; } if((node->v_type == VNODE_TYPE_DIR) && (flags & (O_RDWR | O_WRONLY))) { vrele(node); return -EISDIR; } } int wmode = (flags & O_RDWR) ? (W_OK | R_OK) : (flags & O_RDONLY) ? (R_OK) : (W_OK); if((error = VOP_ACCESS(node, wmode, proc->p_cred))) { vrele(node); return error; } if((error = VOP_OPEN(node, flags, arg->mode))) { vrele(node); return error; } if(ISSET(flags, O_RDWR | O_WRONLY) && ISSET(flags, O_TRUNC) && node->v_type == VNODE_TYPE_REG) VOP_TRUNCATE(node, 0); if((error = f_alloc(proc, node, flags, &fd))) { vrele(node); // <- powinno to tutaj być, dopisałem bo nie było. return error; } VOP_UNLOCK(node); r->result = fd; return 0; }
int afs_UFSRead(register struct vcache *avc, struct uio *auio, struct AFS_UCRED *acred, daddr_t albn, struct buf **abpp, int noLock) { afs_size_t totalLength; afs_size_t transferLength; afs_size_t filePos; afs_size_t offset, len, tlen; afs_int32 trimlen; struct dcache *tdc = 0; afs_int32 error; #ifdef AFS_DARWIN80_ENV uio_t tuiop=NULL; #else struct uio tuio; struct uio *tuiop = &tuio; struct iovec *tvec; #endif struct osi_file *tfile; afs_int32 code; int trybusy = 1; struct vrequest treq; AFS_STATCNT(afs_UFSRead); if (avc && avc->vc_error) return EIO; AFS_DISCON_LOCK(); /* check that we have the latest status info in the vnode cache */ if ((code = afs_InitReq(&treq, acred))) return code; if (!noLock) { if (!avc) osi_Panic("null avc in afs_UFSRead"); else { code = afs_VerifyVCache(avc, &treq); if (code) { code = afs_CheckCode(code, &treq, 11); /* failed to get it */ AFS_DISCON_UNLOCK(); return code; } } } #ifndef AFS_VM_RDWR_ENV if (AFS_NFSXLATORREQ(acred)) { if (!afs_AccessOK (avc, PRSFS_READ, &treq, CHECK_MODE_BITS | CMB_ALLOW_EXEC_AS_READ)) { AFS_DISCON_UNLOCK(); return afs_CheckCode(EACCES, &treq, 12); } } #endif #ifndef AFS_DARWIN80_ENV tvec = (struct iovec *)osi_AllocSmallSpace(sizeof(struct iovec)); #endif totalLength = AFS_UIO_RESID(auio); filePos = AFS_UIO_OFFSET(auio); afs_Trace4(afs_iclSetp, CM_TRACE_READ, ICL_TYPE_POINTER, avc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_INT32, totalLength, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(avc->f.m.Length)); error = 0; transferLength = 0; if (!noLock) ObtainReadLock(&avc->lock); #if defined(AFS_TEXT_ENV) && !defined(AFS_VM_RDWR_ENV) if (avc->flushDV.high == AFS_MAXDV && avc->flushDV.low == AFS_MAXDV) { hset(avc->flushDV, avc->f.m.DataVersion); } #endif if (filePos >= avc->f.m.Length) { if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ len = 0; #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); } while (avc->f.m.Length > 0 && totalLength > 0) { /* read all of the cached info */ if (filePos >= avc->f.m.Length) break; /* all done */ if (noLock) { if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); } tdc = afs_FindDCache(avc, filePos); if (tdc) { ObtainReadLock(&tdc->lock); offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } } else { /* a tricky question: does the presence of the DFFetching flag * mean that we're fetching the latest version of the file? No. * The server could update the file as soon as the fetch responsible * for the setting of the DFFetching flag completes. * * However, the presence of the DFFetching flag (visible under * a dcache read lock since it is set and cleared only under a * dcache write lock) means that we're fetching as good a version * as was known to this client at the time of the last call to * afs_VerifyVCache, since the latter updates the stat cache's * m.DataVersion field under a vcache write lock, and from the * time that the DFFetching flag goes on in afs_GetDCache (before * the fetch starts), to the time it goes off (after the fetch * completes), afs_GetDCache keeps at least a read lock on the * vcache entry. * * This means that if the DFFetching flag is set, we can use that * data for any reads that must come from the current version of * the file (current == m.DataVersion). * * Another way of looking at this same point is this: if we're * fetching some data and then try do an afs_VerifyVCache, the * VerifyVCache operation will not complete until after the * DFFetching flag is turned off and the dcache entry's f.versionNo * field is updated. * * Note, by the way, that if DFFetching is set, * m.DataVersion > f.versionNo (the latter is not updated until * after the fetch completes). */ if (tdc) { ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); /* before reusing tdc */ } tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 2); #ifdef AFS_DISCON_ENV if (!tdc) { printf("Network down in afs_read"); error = ENETDOWN; break; } #endif /* AFS_DISCON_ENV */ ObtainReadLock(&tdc->lock); /* now, first try to start transfer, if we'll need the data. If * data already coming, we don't need to do this, obviously. Type * 2 requests never return a null dcache entry, btw. */ if (!(tdc->dflags & DFFetching) && !hsame(avc->f.m.DataVersion, tdc->f.versionNo)) { /* have cache entry, it is not coming in now, and we'll need new data */ tagain: if (trybusy && !afs_BBusy()) { struct brequest *bp; /* daemon is not busy */ ObtainSharedLock(&tdc->mflock, 667); if (!(tdc->mflags & DFFetchReq)) { UpgradeSToWLock(&tdc->mflock, 668); tdc->mflags |= DFFetchReq; bp = afs_BQueue(BOP_FETCH, avc, B_DONTWAIT, 0, acred, (afs_size_t) filePos, (afs_size_t) 0, tdc); if (!bp) { /* Bkg table full; retry deadlocks */ tdc->mflags &= ~DFFetchReq; trybusy = 0; /* Avoid bkg daemon since they're too busy */ ReleaseWriteLock(&tdc->mflock); goto tagain; } ConvertWToSLock(&tdc->mflock); } code = 0; ConvertSToRLock(&tdc->mflock); while (!code && tdc->mflags & DFFetchReq) { afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); /* don't need waiting flag on this one */ ReleaseReadLock(&tdc->mflock); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); ObtainReadLock(&tdc->mflock); } ReleaseReadLock(&tdc->mflock); if (code) { error = code; break; } } } /* now data may have started flowing in (if DFFetching is on). If * data is now streaming in, then wait for some interesting stuff. */ code = 0; while (!code && (tdc->dflags & DFFetching) && tdc->validPos <= filePos) { /* too early: wait for DFFetching flag to vanish, * or data to appear */ afs_Trace4(afs_iclSetp, CM_TRACE_DCACHEWAIT, ICL_TYPE_STRING, __FILE__, ICL_TYPE_INT32, __LINE__, ICL_TYPE_POINTER, tdc, ICL_TYPE_INT32, tdc->dflags); ReleaseReadLock(&tdc->lock); ReleaseReadLock(&avc->lock); code = afs_osi_SleepSig(&tdc->validPos); ObtainReadLock(&avc->lock); ObtainReadLock(&tdc->lock); } if (code) { error = code; break; } /* fetching flag gone, data is here, or we never tried * (BBusy for instance) */ if (tdc->dflags & DFFetching) { /* still fetching, some new data is here: * compute length and offset */ offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); len = tdc->validPos - filePos; } else { /* no longer fetching, verify data version (avoid new * GetDCache call) */ if (hsame(avc->f.m.DataVersion, tdc->f.versionNo) && ((len = tdc->validPos - filePos) > 0)) { offset = filePos - AFS_CHUNKTOBASE(tdc->f.chunk); } else { /* don't have current data, so get it below */ afs_Trace3(afs_iclSetp, CM_TRACE_VERSIONNO, ICL_TYPE_INT64, ICL_HANDLE_OFFSET(filePos), ICL_TYPE_HYPER, &avc->f.m.DataVersion, ICL_TYPE_HYPER, &tdc->f.versionNo); ReleaseReadLock(&tdc->lock); afs_PutDCache(tdc); tdc = NULL; } } if (!tdc) { /* If we get, it was not possible to start the * background daemon. With flag == 1 afs_GetDCache * does the FetchData rpc synchronously. */ ReleaseReadLock(&avc->lock); tdc = afs_GetDCache(avc, filePos, &treq, &offset, &len, 1); ObtainReadLock(&avc->lock); if (tdc) ObtainReadLock(&tdc->lock); } } if (!tdc) { error = EIO; break; } len = tdc->validPos - filePos; afs_Trace3(afs_iclSetp, CM_TRACE_VNODEREAD, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(offset), ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(len)); if (len > totalLength) len = totalLength; /* will read len bytes */ if (len <= 0) { /* shouldn't get here if DFFetching is on */ afs_Trace4(afs_iclSetp, CM_TRACE_VNODEREAD2, ICL_TYPE_POINTER, tdc, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(tdc->validPos), ICL_TYPE_INT32, tdc->f.chunkBytes, ICL_TYPE_INT32, tdc->dflags); /* read past the end of a chunk, may not be at next chunk yet, and yet * also not at eof, so may have to supply fake zeros */ len = AFS_CHUNKTOSIZE(tdc->f.chunk) - offset; /* bytes left in chunk addr space */ if (len > totalLength) len = totalLength; /* and still within xfr request */ tlen = avc->f.m.Length - offset; /* and still within file */ if (len > tlen) len = tlen; if (len > AFS_ZEROS) len = sizeof(afs_zeros); /* and in 0 buffer */ #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); #else afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); #endif AFS_UIOMOVE(afs_zeros, trimlen, UIO_READ, tuiop, code); if (code) { error = code; break; } } else { /* get the data from the file */ #ifdef IHINT if (tfile = tdc->ihint) { if (tdc->f.inode != tfile->inum) { afs_warn("afs_UFSRead: %x hint mismatch tdc %d inum %d\n", tdc, tdc->f.inode, tfile->inum); osi_UFSClose(tfile); tdc->ihint = tfile = 0; nihints--; } } if (tfile != 0) { usedihint++; } else #endif /* IHINT */ #if defined(LINUX_USE_FH) tfile = (struct osi_file *)osi_UFSOpen_fh(&tdc->f.fh, tdc->f.fh_type); #else tfile = (struct osi_file *)osi_UFSOpen(tdc->f.inode); #endif #ifdef AFS_DARWIN80_ENV trimlen = len; tuiop = afsio_darwin_partialcopy(auio, trimlen); uio_setoffset(tuiop, offset); #else /* mung uio structure to be right for this transfer */ afsio_copy(auio, &tuio, tvec); trimlen = len; afsio_trim(&tuio, trimlen); tuio.afsio_offset = offset; #endif #if defined(AFS_AIX41_ENV) AFS_GUNLOCK(); code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL, NULL, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_AIX32_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, &tuio, NULL, NULL); /* Flush all JFS pages now for big performance gain in big file cases * If we do something like this, must check to be sure that AFS file * isn't mmapped... see afs_gn_map() for why. */ /* if (tfile->vnode->v_gnode && tfile->vnode->v_gnode->gn_seg) { many different ways to do similar things: so far, the best performing one is #2, but #1 might match it if we straighten out the confusion regarding which pages to flush. It really does matter. 1. vm_flushp(tfile->vnode->v_gnode->gn_seg, 0, len/PAGESIZE - 1); 2. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); 3. vms_inactive(tfile->vnode->v_gnode->gn_seg) Doesn't work correctly 4. vms_delete(tfile->vnode->v_gnode->gn_seg) probably also fails tfile->vnode->v_gnode->gn_seg = NULL; 5. deletep 6. ipgrlse 7. ifreeseg Unfortunately, this seems to cause frequent "cache corruption" episodes. vm_releasep(tfile->vnode->v_gnode->gn_seg, offset/PAGESIZE, (len + PAGESIZE-1)/PAGESIZE); } */ #elif defined(AFS_AIX_ENV) code = VNOP_RDWR(tfile->vnode, UIO_READ, FREAD, (off_t) & offset, &tuio, NULL, NULL, -1); #elif defined(AFS_SUN5_ENV) AFS_GUNLOCK(); #ifdef AFS_SUN510_ENV { caller_context_t ct; VOP_RWLOCK(tfile->vnode, 0, &ct); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, &ct); VOP_RWUNLOCK(tfile->vnode, 0, &ct); } #else VOP_RWLOCK(tfile->vnode, 0); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_RWUNLOCK(tfile->vnode, 0); #endif AFS_GLOCK(); #elif defined(AFS_SGI_ENV) AFS_GUNLOCK(); AFS_VOP_RWLOCK(tfile->vnode, VRWLOCK_READ); AFS_VOP_READ(tfile->vnode, &tuio, IO_ISLOCKED, afs_osi_credp, code); AFS_VOP_RWUNLOCK(tfile->vnode, VRWLOCK_READ); AFS_GLOCK(); #elif defined(AFS_OSF_ENV) tuio.uio_rw = UIO_READ; AFS_GUNLOCK(); VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp, code); AFS_GLOCK(); #elif defined(AFS_HPUX100_ENV) AFS_GUNLOCK(); code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); AFS_GLOCK(); #elif defined(AFS_LINUX20_ENV) AFS_GUNLOCK(); code = osi_rdwr(tfile, &tuio, UIO_READ); AFS_GLOCK(); #elif defined(AFS_DARWIN80_ENV) AFS_GUNLOCK(); code = VNOP_READ(tfile->vnode, tuiop, 0, afs_osi_ctxtp); AFS_GLOCK(); #elif defined(AFS_DARWIN_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, current_proc()); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, current_proc()); AFS_GLOCK(); #elif defined(AFS_FBSD80_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0); AFS_GLOCK(); #elif defined(AFS_FBSD50_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curthread); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curthread); AFS_GLOCK(); #elif defined(AFS_XBSD_ENV) AFS_GUNLOCK(); VOP_LOCK(tfile->vnode, LK_EXCLUSIVE, curproc); code = VOP_READ(tfile->vnode, &tuio, 0, afs_osi_credp); VOP_UNLOCK(tfile->vnode, 0, curproc); AFS_GLOCK(); #else code = VOP_RDWR(tfile->vnode, &tuio, UIO_READ, 0, afs_osi_credp); #endif #ifdef IHINT if (!tdc->ihint && nihints < maxIHint) { tdc->ihint = tfile; nihints++; } else #endif /* IHINT */ osi_UFSClose(tfile); if (code) { error = code; break; } } /* otherwise we've read some, fixup length, etc and continue with next seg */ len = len - AFS_UIO_RESID(tuiop); /* compute amount really transferred */ trimlen = len; afsio_skip(auio, trimlen); /* update input uio structure */ totalLength -= len; transferLength += len; filePos += len; if (len <= 0) break; /* surprise eof */ #ifdef AFS_DARWIN80_ENV if (tuiop) { uio_free(tuiop); tuiop = 0; } #endif } /* if we make it here with tdc non-zero, then it is the last chunk we * dealt with, and we have to release it when we're done. We hold on * to it in case we need to do a prefetch, obviously. */ if (tdc) { ReleaseReadLock(&tdc->lock); #if !defined(AFS_VM_RDWR_ENV) /* try to queue prefetch, if needed */ if (!noLock) { if (!(tdc->mflags & DFNextStarted)) afs_PrefetchChunk(avc, tdc, acred, &treq); } #endif afs_PutDCache(tdc); } if (!noLock) ReleaseReadLock(&avc->lock); #ifdef AFS_DARWIN80_ENV if (tuiop) uio_free(tuiop); #else osi_FreeSmallSpace(tvec); #endif AFS_DISCON_UNLOCK(); error = afs_CheckCode(error, &treq, 13); return error; }
static int nandfs_cleaner_clean_segments(struct nandfs_device *nffsdev, struct nandfs_vinfo *vinfo, uint32_t nvinfo, struct nandfs_period *pd, uint32_t npd, struct nandfs_bdesc *bdesc, uint32_t nbdesc, uint64_t *segments, uint32_t nsegs) { struct nandfs_node *gc; struct buf *bp; uint32_t i; int error = 0; gc = nffsdev->nd_gc_node; DPRINTF(CLEAN, ("%s: enter\n", __func__)); VOP_LOCK(NTOV(gc), LK_EXCLUSIVE); for (i = 0; i < nvinfo; i++) { if (!vinfo[i].nvi_alive) continue; DPRINTF(CLEAN, ("%s: read vblknr:%#jx blk:%#jx\n", __func__, (uintmax_t)vinfo[i].nvi_vblocknr, (uintmax_t)vinfo[i].nvi_blocknr)); error = nandfs_bread(nffsdev->nd_gc_node, vinfo[i].nvi_blocknr, NULL, 0, &bp); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); VOP_UNLOCK(NTOV(gc), 0); goto out; } nandfs_vblk_set(bp, vinfo[i].nvi_vblocknr); nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED); nandfs_dirty_buf(bp, 1); } VOP_UNLOCK(NTOV(gc), 0); /* Delete checkpoints */ for (i = 0; i < npd; i++) { DPRINTF(CLEAN, ("delete checkpoint: %jx\n", (uintmax_t)pd[i].p_start)); error = nandfs_delete_cp(nffsdev->nd_cp_node, pd[i].p_start, pd[i].p_end); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } } /* Update vblocks */ for (i = 0; i < nvinfo; i++) { if (vinfo[i].nvi_alive) continue; DPRINTF(CLEAN, ("freeing vblknr: %jx\n", vinfo[i].nvi_vblocknr)); error = nandfs_vblock_free(nffsdev, vinfo[i].nvi_vblocknr); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } } error = nandfs_process_bdesc(nffsdev, bdesc, nbdesc); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } /* Add segments to clean */ if (nffsdev->nd_free_count) { nffsdev->nd_free_base = realloc(nffsdev->nd_free_base, (nffsdev->nd_free_count + nsegs) * sizeof(uint64_t), M_NANDFSTEMP, M_WAITOK | M_ZERO); memcpy(&nffsdev->nd_free_base[nffsdev->nd_free_count], segments, nsegs * sizeof(uint64_t)); nffsdev->nd_free_count += nsegs; } else { nffsdev->nd_free_base = malloc(nsegs * sizeof(uint64_t), M_NANDFSTEMP, M_WAITOK|M_ZERO); memcpy(nffsdev->nd_free_base, segments, nsegs * sizeof(uint64_t)); nffsdev->nd_free_count = nsegs; } out: DPRINTF(CLEAN, ("%s: exit error %d\n", __func__, error)); return (error); }