STATIC int xfs_qm_scall_trunc_qfiles( xfs_mount_t *mp, uint flags) { int error; xfs_inode_t *qip; if (!capable(CAP_SYS_ADMIN)) return XFS_ERROR(EPERM); error = 0; if (!XFS_SB_VERSION_HASQUOTA(&mp->m_sb) || flags == 0) { qdprintk("qtrunc flags=%x m_qflags=%x\n", flags, mp->m_qflags); return XFS_ERROR(EINVAL); } if ((flags & XFS_DQ_USER) && mp->m_sb.sb_uquotino != NULLFSINO) { error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &qip, 0); if (! error) { (void) xfs_truncate_file(mp, qip); VN_RELE(XFS_ITOV(qip)); } } if ((flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) && mp->m_sb.sb_gquotino != NULLFSINO) { error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &qip, 0); if (! error) { (void) xfs_truncate_file(mp, qip); VN_RELE(XFS_ITOV(qip)); } } return (error); }
void xfs_refcache_resize(int xfs_refcache_new_size) { int i; xfs_inode_t *ip; int iplist_index = 0; xfs_inode_t **iplist; int error; /* * If the new size is smaller than the current size, * purge entries to create smaller cache, and * reposition index if necessary. * Don't bother if no refcache yet. */ if (xfs_refcache && (xfs_refcache_new_size < xfs_refcache_size)) { iplist = (xfs_inode_t **)kmem_zalloc(XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *), KM_SLEEP); spin_lock(&xfs_refcache_lock); for (i = xfs_refcache_new_size; i < xfs_refcache_size; i++) { ip = xfs_refcache[i]; if (ip != NULL) { xfs_refcache[i] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); iplist[iplist_index] = ip; iplist_index++; } } xfs_refcache_size = xfs_refcache_new_size; /* * Move index to beginning of cache if it's now past the end */ if (xfs_refcache_index >= xfs_refcache_new_size) xfs_refcache_index = 0; spin_unlock(&xfs_refcache_lock); /* * Now drop the inodes we collected. */ for (i = 0; i < iplist_index; i++) { VOP_RELEASE(XFS_ITOV(iplist[i]), error); VN_RELE(XFS_ITOV(iplist[i])); } kmem_free(iplist, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } else { spin_lock(&xfs_refcache_lock); xfs_refcache_size = xfs_refcache_new_size; spin_unlock(&xfs_refcache_lock); } }
/* * This is called from the XFS sync code to ensure that the refcache * is emptied out over time. We purge a small number of entries with * each call. */ void xfs_refcache_purge_some(xfs_mount_t *mp) { int error, i; xfs_inode_t *ip; int iplist_index; xfs_inode_t **iplist; if ((xfs_refcache == NULL) || (xfs_refcache_count == 0)) { return; } iplist_index = 0; iplist = (xfs_inode_t **)kmem_zalloc(xfs_refcache_purge_count * sizeof(xfs_inode_t *), KM_SLEEP); spin_lock(&xfs_refcache_lock); /* * Store any inodes we find in the next several entries * into the iplist array to be released after dropping * the spinlock. We always start looking from the currently * oldest place in the cache. We move the refcache index * forward as we go so that we are sure to eventually clear * out the entire cache when the system goes idle. */ for (i = 0; i < xfs_refcache_purge_count; i++) { ip = xfs_refcache[xfs_refcache_index]; if (ip != NULL) { xfs_refcache[xfs_refcache_index] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); iplist[iplist_index] = ip; iplist_index++; } xfs_refcache_index++; if (xfs_refcache_index == xfs_refcache_size) { xfs_refcache_index = 0; } } spin_unlock(&xfs_refcache_lock); /* * Now drop the inodes we collected. */ for (i = 0; i < iplist_index; i++) { VOP_RELEASE(XFS_ITOV(iplist[i]), error); VN_RELE(XFS_ITOV(iplist[i])); } kmem_free(iplist, xfs_refcache_purge_count * sizeof(xfs_inode_t *)); }
/* xfs_igetinode now returns an unlocked inode. This is fine, since we * have a refcount on the holding vnode. */ int xfs_igetinode(struct vfs *vfsp, dev_t dev, ino_t inode, struct xfs_inode **ipp) { struct xfs_inode *ip; vnode_t *vp; vattr_t vattr; int error; AFS_STATCNT(igetinode); *ipp = NULL; if (error = xfs_getinode(vfsp, dev, inode, &ip)) { return error; } xfs_iunlock(ip, XFS_ILOCK_SHARED); vp = XFS_ITOV(ip); vattr.va_mask = AT_STAT; AFS_VOP_GETATTR(vp, &vattr, 0, OSI_GET_CURRENT_CRED(), error); if (error) { SET_XFS_ERROR(4, vp->v_vfsp->vfs_dev, inode); VN_RELE(vp); return error; } if (vattr.va_nlink == 0 || vattr.va_type != VREG) { SET_XFS_ERROR(5, vp->v_vfsp->vfs_dev, inode); VN_RELE(vp); return ENOENT; } *ipp = ip; return 0; }
void xfs_refcache_iunlock( xfs_inode_t *ip, uint lock_flags) { xfs_inode_t *release_ip; int error; release_ip = ip->i_release; ip->i_release = NULL; xfs_iunlock(ip, lock_flags); if (release_ip != NULL) { VOP_RELEASE(XFS_ITOV(release_ip), error); VN_RELE(XFS_ITOV(release_ip)); } }
/* * xfs_root extracts the root vnode from a vfs. * * vfsp -- the vfs struct for the desired file system * vpp -- address of the caller's vnode pointer which should be * set to the desired fs root vnode */ STATIC int xfs_root( bhv_desc_t *bdp, vnode_t **vpp) { vnode_t *vp; vp = XFS_ITOV((XFS_BHVTOM(bdp))->m_rootip); VN_HOLD(vp); *vpp = vp; return 0; }
vnode_t * afs_XFSIGetVnode(ino_t ainode) { struct xfs_inode *ip; int error; vnode_t *vp; if ((error = xfs_igetinode(afs_cacheVfsp, (dev_t) cacheDev.dev, ainode, &ip))) { osi_Panic("afs_XFSIGetVnode: xfs_igetinode failed, error=%d", error); } vp = XFS_ITOV(ip); return vp; }
ssize_t xfs_splice_read( xfs_inode_t *ip, struct file *infilp, loff_t *ppos, struct pipe_inode_info *pipe, size_t count, int flags, int ioflags) { bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; ssize_t ret; XFS_STATS_INC(xs_read_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { bhv_vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *ppos, count, FILP_DELAY_FLAG(infilp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } xfs_rw_enter_trace(XFS_SPLICE_READ_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
/* * Special iput for brand-new inodes that are still locked */ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { xfs_vnode_t *vp = XFS_ITOV(ip); vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); printf("xfs_iput_new: ip %p\n",ip); if ((ip->i_d.di_mode == 0)) { ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE)); //vn_mark_bad(vp); printf("xfs_iput_new: ip %p di_mode == 0\n",ip); /* mabe call vgone here? RMC */ } if (lock_flags) xfs_iunlock(ip, lock_flags); ASSERT_VOP_LOCKED(vp->v_vnode, "xfs_iput_new"); vput(vp->v_vnode); }
/* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. * * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG * with XFS_ICHGTIME_ACC to be sure that access time * update will take. Calling first with XFS_ICHGTIME_ACC * and then XFS_ICHGTIME_MOD may fail to modify the access * timestamp if the filesystem is mounted noacctm. */ void xfs_ichgtime( xfs_inode_t *ip, int flags) { struct inode *inode = vn_to_inode(XFS_ITOV(ip)); timespec_t tv; nanotime(&tv); if (flags & XFS_ICHGTIME_MOD) { inode->i_mtime = tv; ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; } if (flags & XFS_ICHGTIME_ACC) { inode->i_atime = tv; ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; } if (flags & XFS_ICHGTIME_CHG) { inode->i_ctime = tv; ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; } /* * We update the i_update_core field _after_ changing * the timestamps in order to coordinate properly with * xfs_iflush() so that we don't lose timestamp updates. * This keeps us from having to hold the inode lock * while doing this. We use the SYNCHRONIZE macro to * ensure that the compiler does not reorder the update * of i_update_core above the timestamp updates above. */ SYNCHRONIZE(); ip->i_update_core = 1; if (!(inode->i_state & I_NEW)) mark_inode_dirty_sync(inode); }
/* * This is called from the XFS unmount code to purge all entries for the * given mount from the cache. It uses the refcache busy counter to * make sure that new entries are not added to the cache as we purge them. */ void xfs_refcache_purge_mp( xfs_mount_t *mp) { vnode_t *vp; int error, i; xfs_inode_t *ip; if (xfs_refcache == NULL) { return; } spin_lock(&xfs_refcache_lock); /* * Bumping the busy counter keeps new entries from being added * to the cache. We use a counter since multiple unmounts could * be in here simultaneously. */ xfs_refcache_busy++; for (i = 0; i < xfs_refcache_size; i++) { ip = xfs_refcache[i]; if ((ip != NULL) && (ip->i_mount == mp)) { xfs_refcache[i] = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); spin_unlock(&xfs_refcache_lock); vp = XFS_ITOV(ip); VOP_RELEASE(vp, error); VN_RELE(vp); spin_lock(&xfs_refcache_lock); } } xfs_refcache_busy--; ASSERT(xfs_refcache_busy >= 0); spin_unlock(&xfs_refcache_lock); }
/* * If the given inode is in the reference cache, purge its entry and * release the reference on the vnode. */ void xfs_refcache_purge_ip( xfs_inode_t *ip) { vnode_t *vp; int error; /* * If we're not pointing to our entry in the cache, then * we must not be in the cache. */ if (ip->i_refcache == NULL) { return; } spin_lock(&xfs_refcache_lock); if (ip->i_refcache == NULL) { spin_unlock(&xfs_refcache_lock); return; } /* * Clear both our pointer to the cache entry and its pointer * back to us. */ ASSERT(*(ip->i_refcache) == ip); *(ip->i_refcache) = NULL; ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); spin_unlock(&xfs_refcache_lock); vp = XFS_ITOV(ip); /* ASSERT(vp->v_count > 1); */ VOP_RELEASE(vp, error); VN_RELE(vp); }
int xfs_read_file(xfs_mount_t *mp, xfs_inode_t *ip, struct uio *uio, int ioflag) { xfs_fileoff_t lbn, nextlbn; xfs_fsize_t bytesinfile; long size, xfersize, blkoffset; struct buf *bp; struct vnode *vp; int error, orig_resid; int seqcount; seqcount = ioflag >> IO_SEQSHIFT; orig_resid = uio->uio_resid; if (orig_resid <= 0) return (0); vp = XFS_ITOV(ip)->v_vnode; /* * Ok so we couldn't do it all in one vm trick... * so cycle around trying smaller bites.. */ for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { if ((bytesinfile = ip->i_d.di_size - uio->uio_offset) <= 0) break; lbn = XFS_B_TO_FSBT(mp, uio->uio_offset); nextlbn = lbn + 1; /* * size of buffer. The buffer representing the * end of the file is rounded up to the size of * the block type ( fragment or full block, * depending ). */ size = mp->m_sb.sb_blocksize; blkoffset = XFS_B_FSB_OFFSET(mp, uio->uio_offset); /* * The amount we want to transfer in this iteration is * one FS block less the amount of the data before * our startpoint (duh!) */ xfersize = mp->m_sb.sb_blocksize - blkoffset; /* * But if we actually want less than the block, * or the file doesn't have a whole block more of data, * then use the lesser number. */ if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; if (bytesinfile < xfersize) xfersize = bytesinfile; if (XFS_FSB_TO_B(mp, nextlbn) >= ip->i_d.di_size ) { /* * Don't do readahead if this is the end of the file. */ error = bread(vp, lbn, size, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { /* * Otherwise if we are allowed to cluster, * grab as much as we can. * * XXX This may not be a win if we are not * doing sequential access. */ error = cluster_read(vp, ip->i_d.di_size, lbn, size, NOCRED, uio->uio_resid, seqcount, &bp); } else if (seqcount > 1) { /* * If we are NOT allowed to cluster, then * if we appear to be acting sequentially, * fire off a request for a readahead * as well as a read. Note that the 4th and 5th * arguments point to arrays of the size specified in * the 6th argument. */ int nextsize = mp->m_sb.sb_blocksize; error = breadn(vp, lbn, size, &nextlbn, &nextsize, 1, NOCRED, &bp); } else { /* * Failing all of the above, just read what the * user asked for. Interestingly, the same as * the first option above. */ error = bread(vp, lbn, size, NOCRED, &bp); } if (error) { brelse(bp); bp = NULL; break; } /* * If IO_DIRECT then set B_DIRECT for the buffer. This * will cause us to attempt to release the buffer later on * and will cause the buffer cache to attempt to free the * underlying pages. */ if (ioflag & IO_DIRECT) bp->b_flags |= B_DIRECT; /* * We should only get non-zero b_resid when an I/O error * has occurred, which should cause us to break above. * However, if the short read did not cause an error, * then we want to ensure that we do not uiomove bad * or uninitialized data. */ size -= bp->b_resid; if (size < xfersize) { if (size == 0) break; xfersize = size; } /* * otherwise use the general form */ error = uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); if (error) break; if (ioflag & (IO_VMIO|IO_DIRECT) ) { /* * If there are no dependencies, and it's VMIO, * then we don't need the buf, mark it available * for freeing. The VM has the data. */ bp->b_flags |= B_RELBUF; brelse(bp); } else { /* * Otherwise let whoever * made the request take care of * freeing it. We just queue * it onto another list. */ bqrelse(bp); } } /* * This can only happen in the case of an error * because the loop above resets bp to NULL on each iteration * and on normal completion has not set a new value into it. * so it must have come from a 'break' statement */ if (bp != NULL) { if (ioflag & (IO_VMIO|IO_DIRECT)) { bp->b_flags |= B_RELBUF; brelse(bp); } else bqrelse(bp); } return (error); }
/* * Insert the given inode into the reference cache. */ void xfs_refcache_insert( xfs_inode_t *ip) { vnode_t *vp; xfs_inode_t *release_ip; xfs_inode_t **refcache; ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE)); /* * If an unmount is busy blowing entries out of the cache, * then don't bother. */ if (xfs_refcache_busy) { return; } /* * If we tuned the refcache down to zero, don't do anything. */ if (!xfs_refcache_size) { return; } /* * The inode is already in the refcache, so don't bother * with it. */ if (ip->i_refcache != NULL) { return; } vp = XFS_ITOV(ip); /* ASSERT(vp->v_count > 0); */ VN_HOLD(vp); /* * We allocate the reference cache on use so that we don't * waste the memory on systems not being used as NFS servers. */ if (xfs_refcache == NULL) { refcache = (xfs_inode_t **)kmem_zalloc(XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *), KM_SLEEP); } else { refcache = NULL; } spin_lock(&xfs_refcache_lock); /* * If we allocated memory for the refcache above and it still * needs it, then use the memory we allocated. Otherwise we'll * free the memory below. */ if (refcache != NULL) { if (xfs_refcache == NULL) { xfs_refcache = refcache; refcache = NULL; } } /* * If an unmount is busy clearing out the cache, don't add new * entries to it. */ if (xfs_refcache_busy) { spin_unlock(&xfs_refcache_lock); VN_RELE(vp); /* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } return; } release_ip = xfs_refcache[xfs_refcache_index]; if (release_ip != NULL) { release_ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); } xfs_refcache[xfs_refcache_index] = ip; ASSERT(ip->i_refcache == NULL); ip->i_refcache = &(xfs_refcache[xfs_refcache_index]); xfs_refcache_count++; ASSERT(xfs_refcache_count <= xfs_refcache_size); xfs_refcache_index++; if (xfs_refcache_index == xfs_refcache_size) { xfs_refcache_index = 0; } spin_unlock(&xfs_refcache_lock); /* * Save the pointer to the inode to be released so that we can * VN_RELE it once we've dropped our inode locks in xfs_rwunlock(). * The pointer may be NULL, but that's OK. */ ip->i_release = release_ip; /* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } }
/* * xfs sync routine for internal use * * This routine supports all of the flags defined for the generic VFS_SYNC * interface as explained above under xfs_sync. In the interests of not * changing interfaces within the 6.5 family, additional internallly- * required functions are specified within a separate xflags parameter, * only available by calling this routine. * */ STATIC int xfs_sync_inodes( xfs_mount_t *mp, int flags, int xflags, int *bypassed) { xfs_inode_t *ip = NULL; xfs_inode_t *ip_next; xfs_buf_t *bp; vnode_t *vp = NULL; vmap_t vmap; int error; int last_error; uint64_t fflag; uint lock_flags; uint base_lock_flags; boolean_t mount_locked; boolean_t vnode_refed; int preempt; xfs_dinode_t *dip; xfs_iptr_t *ipointer; #ifdef DEBUG boolean_t ipointer_in = B_FALSE; #define IPOINTER_SET ipointer_in = B_TRUE #define IPOINTER_CLR ipointer_in = B_FALSE #else #define IPOINTER_SET #define IPOINTER_CLR #endif /* Insert a marker record into the inode list after inode ip. The list * must be locked when this is called. After the call the list will no * longer be locked. */ #define IPOINTER_INSERT(ip, mp) { \ ASSERT(ipointer_in == B_FALSE); \ ipointer->ip_mnext = ip->i_mnext; \ ipointer->ip_mprev = ip; \ ip->i_mnext = (xfs_inode_t *)ipointer; \ ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ preempt = 0; \ XFS_MOUNT_IUNLOCK(mp); \ mount_locked = B_FALSE; \ IPOINTER_SET; \ } /* Remove the marker from the inode list. If the marker was the only item * in the list then there are no remaining inodes and we should zero out * the whole list. If we are the current head of the list then move the head * past us. */ #define IPOINTER_REMOVE(ip, mp) { \ ASSERT(ipointer_in == B_TRUE); \ if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ ip = ipointer->ip_mnext; \ ip->i_mprev = ipointer->ip_mprev; \ ipointer->ip_mprev->i_mnext = ip; \ if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ mp->m_inodes = ip; \ } \ } else { \ ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ mp->m_inodes = NULL; \ ip = NULL; \ } \ IPOINTER_CLR; \ } #define XFS_PREEMPT_MASK 0x7f if (bypassed) *bypassed = 0; if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) return 0; error = 0; last_error = 0; preempt = 0; /* Allocate a reference marker */ ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); fflag = XFS_B_ASYNC; /* default is don't wait */ if (flags & SYNC_BDFLUSH) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ base_lock_flags = XFS_ILOCK_SHARED; if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. */ base_lock_flags |= XFS_IOLOCK_SHARED; } XFS_MOUNT_ILOCK(mp); ip = mp->m_inodes; mount_locked = B_TRUE; vnode_refed = B_FALSE; IPOINTER_CLR; do { ASSERT(ipointer_in == B_FALSE); ASSERT(vnode_refed == B_FALSE); lock_flags = base_lock_flags; /* * There were no inodes in the list, just break out * of the loop. */ if (ip == NULL) { break; } /* * We found another sync thread marker - skip it */ if (ip->i_mount == NULL) { ip = ip->i_mnext; continue; } vp = XFS_ITOV_NULL(ip); /* * If the vnode is gone then this is being torn down, * call reclaim if it is flushed, else let regular flush * code deal with it later in the loop. */ if (vp == NULL) { /* Skip ones already in reclaim */ if (ip->i_flags & XFS_IRECLAIM) { ip = ip->i_mnext; continue; } if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { ip = ip->i_mnext; } else if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { IPOINTER_INSERT(ip, mp); xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; IPOINTER_REMOVE(ip, mp); } else { xfs_iunlock(ip, XFS_ILOCK_EXCL); ip = ip->i_mnext; } continue; } if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { XFS_MOUNT_IUNLOCK(mp); kmem_free(ipointer, sizeof(xfs_iptr_t)); return 0; } /* * If this is just vfs_sync() or pflushd() calling * then we can skip inodes for which it looks like * there is nothing to do. Since we don't have the * inode locked this is racey, but these are periodic * calls so it doesn't matter. For the others we want * to know for sure, so we at least try to lock them. */ if (flags & SYNC_BDFLUSH) { if (((ip->i_itemp == NULL) || !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && (ip->i_update_core == 0)) { ip = ip->i_mnext; continue; } } /* * Try to lock without sleeping. We're out of order with * the inode list lock here, so if we fail we need to drop * the mount lock and try again. If we're called from * bdflush() here, then don't bother. * * The inode lock here actually coordinates with the * almost spurious inode lock in xfs_ireclaim() to prevent * the vnode we handle here without a reference from * being freed while we reference it. If we lock the inode * while it's on the mount list here, then the spurious inode * lock in xfs_ireclaim() after the inode is pulled from * the mount list will sleep until we release it here. * This keeps the vnode from being freed while we reference * it. It is also cheaper and simpler than actually doing * a vn_get() for every inode we touch here. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { ip = ip->i_mnext; continue; } /* * We need to unlock the inode list lock in order * to lock the inode. Insert a marker record into * the inode list to remember our position, dropping * the lock is now done inside the IPOINTER_INSERT * macro. * * We also use the inode list lock to protect us * in taking a snapshot of the vnode version number * for use in calling vn_get(). */ VMAP(vp, vmap); IPOINTER_INSERT(ip, mp); vp = vn_get(vp, &vmap); if (vp == NULL) { /* * The vnode was reclaimed once we let go * of the inode list lock. Skip to the * next list entry. Remove the marker. */ XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; vnode_refed = B_FALSE; IPOINTER_REMOVE(ip, mp); continue; } xfs_ilock(ip, lock_flags); ASSERT(vp == XFS_ITOV(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; } /* From here on in the loop we may have a marker record * in the inode list. */ if ((flags & SYNC_CLOSE) && (vp != NULL)) { /* * This is the shutdown case. We just need to * flush and invalidate all the pages associated * with the inode. Drop the inode lock since * we can't hold it across calls to the buffer * cache. * * We don't set the VREMAPPING bit in the vnode * here, because we don't hold the vnode lock * exclusively. It doesn't really matter, though, * because we only come here when we're shutting * down anyway. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (XFS_FORCED_SHUTDOWN(mp)) { VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); } else { VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF); } xfs_ilock(ip, XFS_ILOCK_SHARED); } else if ((flags & SYNC_DELWRI) && (vp != NULL)) { if (VN_DIRTY(vp)) { /* We need to have dropped the lock here, * so insert a marker if we have not already * done so. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } /* * Drop the inode lock since we can't hold it * across calls to the buffer cache. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, fflag, FI_NONE, error); xfs_ilock(ip, XFS_ILOCK_SHARED); } } if (flags & SYNC_BDFLUSH) { if ((flags & SYNC_ATTR) && ((ip->i_update_core) || ((ip->i_itemp != NULL) && (ip->i_itemp->ili_format.ilf_fields != 0)))) { /* Insert marker and drop lock if not already * done. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } /* * We don't want the periodic flushing of the * inodes by vfs_sync() to interfere with * I/O to the file, especially read I/O * where it is only the access time stamp * that is being flushed out. To prevent * long periods where we have both inode * locks held shared here while reading the * inode's buffer in from disk, we drop the * inode lock while reading in the inode * buffer. We have to release the buffer * and reacquire the inode lock so that they * are acquired in the proper order (inode * locks first). The buffer will go at the * end of the lru chain, though, so we can * expect it to still be there when we go * for it again in xfs_iflush(). */ if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); if (!error) { xfs_buf_relse(bp); } else { /* Bailing out, remove the * marker and free it. */ XFS_MOUNT_ILOCK(mp); IPOINTER_REMOVE(ip, mp); XFS_MOUNT_IUNLOCK(mp); ASSERT(!(lock_flags & XFS_IOLOCK_SHARED)); kmem_free(ipointer, sizeof(xfs_iptr_t)); return (0); } /* * Since we dropped the inode lock, * the inode may have been reclaimed. * Therefore, we reacquire the mount * lock and check to see if we were the * inode reclaimed. If this happened * then the ipointer marker will no * longer point back at us. In this * case, move ip along to the inode * after the marker, remove the marker * and continue. */ XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; if (ip != ipointer->ip_mprev) { IPOINTER_REMOVE(ip, mp); ASSERT(!vnode_refed); ASSERT(!(lock_flags & XFS_IOLOCK_SHARED)); continue; } ASSERT(ip->i_mount == mp); if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED) == 0) { ASSERT(ip->i_mount == mp); /* * We failed to reacquire * the inode lock without * sleeping, so just skip * the inode for now. We * clear the ILOCK bit from * the lock_flags so that we * won't try to drop a lock * we don't hold below. */ lock_flags &= ~XFS_ILOCK_SHARED; IPOINTER_REMOVE(ip_next, mp); } else if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { ASSERT(ip->i_mount == mp); /* * Since this is vfs_sync() * calling we only flush the * inode out if we can lock * it without sleeping and * it is not pinned. Drop * the mount lock here so * that we don't hold it for * too long. We already have * a marker in the list here. */ XFS_MOUNT_IUNLOCK(mp); mount_locked = B_FALSE; error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); } else { ASSERT(ip->i_mount == mp); IPOINTER_REMOVE(ip_next, mp); } } } } else { if ((flags & SYNC_ATTR) && ((ip->i_update_core) || ((ip->i_itemp != NULL) && (ip->i_itemp->ili_format.ilf_fields != 0)))) { if (mount_locked) { IPOINTER_INSERT(ip, mp); } if (flags & SYNC_WAIT) { xfs_iflock(ip); error = xfs_iflush(ip, XFS_IFLUSH_SYNC); } else { /* * If we can't acquire the flush * lock, then the inode is already * being flushed so don't bother * waiting. If we can lock it then * do a delwri flush so we can * combine multiple inode flushes * in each disk write. */ if (xfs_iflock_nowait(ip)) { error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); } else if (bypassed) (*bypassed)++; } } } if (lock_flags != 0) { xfs_iunlock(ip, lock_flags); } if (vnode_refed) { /* * If we had to take a reference on the vnode * above, then wait until after we've unlocked * the inode to release the reference. This is * because we can be already holding the inode * lock when VN_RELE() calls xfs_inactive(). * * Make sure to drop the mount lock before calling * VN_RELE() so that we don't trip over ourselves if * we have to go for the mount lock again in the * inactive code. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } VN_RELE(vp); vnode_refed = B_FALSE; } if (error) { last_error = error; } /* * bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) { if (!mount_locked) { XFS_MOUNT_ILOCK(mp); IPOINTER_REMOVE(ip, mp); } XFS_MOUNT_IUNLOCK(mp); ASSERT(ipointer_in == B_FALSE); kmem_free(ipointer, sizeof(xfs_iptr_t)); return XFS_ERROR(error); } /* Let other threads have a chance at the mount lock * if we have looped many times without dropping the * lock. */ if ((++preempt & XFS_PREEMPT_MASK) == 0) { if (mount_locked) { IPOINTER_INSERT(ip, mp); } } if (mount_locked == B_FALSE) { XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; IPOINTER_REMOVE(ip, mp); continue; } ASSERT(ipointer_in == B_FALSE); ip = ip->i_mnext; } while (ip != mp->m_inodes); XFS_MOUNT_IUNLOCK(mp); ASSERT(ipointer_in == B_FALSE); kmem_free(ipointer, sizeof(xfs_iptr_t)); return XFS_ERROR(last_error); }
ssize_t /* bytes read, or (-) error */ xfs_read( xfs_inode_t *ip, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; size_t size = 0; ssize_t ret = 0; xfs_fsize_t n; unsigned long seg; XFS_STATS_INC(xs_read_calls); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->bt_smask) || (size & target->bt_smask)) { if (*offset == ip->i_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (unlikely(ioflags & IO_ISDIRECT)) mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { bhv_vrwlock_t locktype = VRWLOCK_READ; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, vp, *offset, size, dmflags, &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (unlikely(ioflags & IO_ISDIRECT)) mutex_unlock(&inode->i_mutex); return ret; } } if (unlikely(ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } } xfs_rw_enter_trace(XFS_READ_ENTER, ip, (void *)iovp, segs, *offset, ioflags); iocb->ki_pos = *offset; ret = generic_file_aio_read(iocb, iovp, segs, *offset); if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
ssize_t xfs_splice_write( xfs_inode_t *ip, struct pipe_inode_info *pipe, struct file *outfilp, loff_t *ppos, size_t count, int flags, int ioflags) { bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; ssize_t ret; struct inode *inode = outfilp->f_mapping->host; xfs_fsize_t isize, new_size; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { bhv_vrwlock_t locktype = VRWLOCK_WRITE; int error; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, vp, *ppos, count, FILP_DELAY_FLAG(outfilp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; } } new_size = *ppos + count; xfs_ilock(ip, XFS_ILOCK_EXCL); if (new_size > ip->i_size) ip->i_new_size = new_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, ip, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) *ppos = isize; if (*ppos > ip->i_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); if (*ppos > ip->i_size) ip->i_size = *ppos; xfs_iunlock(ip, XFS_ILOCK_EXCL); } if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
/* * xfs_unmount_flush implements a set of flush operation on special * inodes, which are needed as a separate set of operations so that * they can be called as part of relocation process. */ int xfs_unmount_flush( xfs_mount_t *mp, /* Mount structure we are getting rid of. */ int relocation) /* Called from vfs relocation. */ { xfs_inode_t *rip = mp->m_rootip; xfs_inode_t *rbmip; xfs_inode_t *rsumip = NULL; vnode_t *rvp = XFS_ITOV(rip); int error; xfs_ilock(rip, XFS_ILOCK_EXCL); xfs_iflock(rip); /* * Flush out the real time inodes. */ if ((rbmip = mp->m_rbmip) != NULL) { xfs_ilock(rbmip, XFS_ILOCK_EXCL); xfs_iflock(rbmip); error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); xfs_iunlock(rbmip, XFS_ILOCK_EXCL); if (error == EFSCORRUPTED) goto fscorrupt_out; ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); rsumip = mp->m_rsumip; xfs_ilock(rsumip, XFS_ILOCK_EXCL); xfs_iflock(rsumip); error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); xfs_iunlock(rsumip, XFS_ILOCK_EXCL); if (error == EFSCORRUPTED) goto fscorrupt_out; ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); } /* * Synchronously flush root inode to disk */ error = xfs_iflush(rip, XFS_IFLUSH_SYNC); if (error == EFSCORRUPTED) goto fscorrupt_out2; if (vn_count(rvp) != 1 && !relocation) { xfs_iunlock(rip, XFS_ILOCK_EXCL); return XFS_ERROR(EBUSY); } /* * Release dquot that rootinode, rbmino and rsumino might be holding, * flush and purge the quota inodes. */ error = XFS_QM_UNMOUNT(mp); if (error == EFSCORRUPTED) goto fscorrupt_out2; if (rbmip) { VN_RELE(XFS_ITOV(rbmip)); VN_RELE(XFS_ITOV(rsumip)); } xfs_iunlock(rip, XFS_ILOCK_EXCL); return 0; fscorrupt_out: xfs_ifunlock(rip); fscorrupt_out2: xfs_iunlock(rip, XFS_ILOCK_EXCL); return XFS_ERROR(EFSCORRUPTED); }
/* * Convert userspace handle data into vnode (and inode). * We [ab]use the fact that all the fsop_handlereq ioctl calls * have a data structure argument whose first component is always * a xfs_fsop_handlereq_t, so we can cast to and from this type. * This allows us to optimise the copy_from_user calls and gives * a handy, shared routine. * * If no error, caller must always VN_RELE the returned vp. */ STATIC int xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ xfs_fsop_handlereq_t *hreq, vnode_t **vp, struct inode **inode) { void __user *hanp; size_t hlen; xfs_fid_t *xfid; xfs_handle_t *handlep; xfs_handle_t handle; xfs_inode_t *ip; struct inode *inodep; vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; /* * Only allow handle opens under a directory. */ if (!S_ISDIR(parinode->i_mode)) return XFS_ERROR(ENOTDIR); hanp = hreq->ihandle; hlen = hreq->ihandlen; handlep = &handle; if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) return XFS_ERROR(EINVAL); if (copy_from_user(handlep, hanp, hlen)) return XFS_ERROR(EFAULT); if (hlen < sizeof(*handlep)) memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); if (hlen > sizeof(handlep->ha_fsid)) { if (handlep->ha_fid.xfs_fid_len != (hlen - sizeof(handlep->ha_fsid) - sizeof(handlep->ha_fid.xfs_fid_len)) || handlep->ha_fid.xfs_fid_pad) return XFS_ERROR(EINVAL); } /* * Crack the handle, obtain the inode # & generation # */ xfid = (struct xfs_fid *)&handlep->ha_fid; if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { ino = xfid->xfs_fid_ino; igen = xfid->xfs_fid_gen; } else { return XFS_ERROR(EINVAL); } /* * Get the XFS inode, building a vnode to go with it. */ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) return error; if (ip == NULL) return XFS_ERROR(EIO); if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { xfs_iput_new(ip, XFS_ILOCK_SHARED); return XFS_ERROR(ENOENT); } vpp = XFS_ITOV(ip); inodep = vn_to_inode(vpp); xfs_iunlock(ip, XFS_ILOCK_SHARED); *vp = vpp; *inode = inodep; return 0; }
ssize_t /* bytes written, or (-) error */ xfs_write( struct xfs_inode *xip, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; bhv_vnode_t *vp = XFS_ITOV(xip); unsigned long segs = nsegs; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_i_mutex; XFS_STATS_INC(xs_write_calls); error = generic_segment_checks(iovp, &segs, &ocount, VERIFY_READ); if (error) return error; count = ocount; pos = *offset; if (count == 0) return 0; mp = xip->i_mount; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { goto out_unlock_internal; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != xip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(xip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (VN_CACHED(vp) || pos > xip->i_size)) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > xip->i_size) xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > xip->i_size) { error = xfs_zero_eof(xip, pos, xip->i_size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_path.dentry); if (unlikely(error)) { goto out_unlock_internal; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (VN_CACHED(vp)) { WARN_ON(need_i_mutex == 0); xfs_inval_cached_trace(xip, pos, -1, (pos & PAGE_CACHE_MASK), -1); error = xfs_flushinval_pages(xip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(xip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(xip, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(xip, locktype); if (error) goto out_unlock_internal; pos = xip->i_size; ret = 0; goto retry; } isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_size) xip->i_size = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { int error2; xfs_rwunlock(xip, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error2 = sync_page_range(inode, mapping, pos, ret); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(xip, locktype); error2 = xfs_write_sync_logforce(mp, xip); if (!error) error = error2; } out_unlock_internal: if (xip->i_new_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); xip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (xip->i_d.di_size > xip->i_size) xip->i_d.di_size = xip->i_size; xfs_iunlock(xip, XFS_ILOCK_EXCL); } xfs_rwunlock(xip, locktype); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return -error; }
/* * Convert userspace handle data into vnode (and inode). * We [ab]use the fact that all the fsop_handlereq ioctl calls * have a data structure argument whose first component is always * a xfs_fsop_handlereq_t, so we can cast to and from this type. * This allows us to optimise the copy_from_user calls and gives * a handy, shared routine. * * If no error, caller must always VN_RELE the returned vp. */ STATIC int xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ int cap, /* capability level for op */ unsigned long arg, /* userspace data pointer */ unsigned long size, /* size of expected struct */ /* output arguments */ xfs_fsop_handlereq_t *hreq, vnode_t **vp, struct inode **inode) { void *hanp; size_t hlen; xfs_fid_t *xfid; xfs_handle_t *handlep; xfs_handle_t handle; xfs_inode_t *ip; struct inode *inodep; vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; if (!capable(cap)) return XFS_ERROR(EPERM); /* * Only allow handle opens under a directory. */ if (!S_ISDIR(parinode->i_mode)) return XFS_ERROR(ENOTDIR); /* * Copy the handle down from the user and validate * that it looks to be in the correct format. */ if (copy_from_user(hreq, (struct xfs_fsop_handlereq *)arg, size)) return XFS_ERROR(EFAULT); hanp = hreq->ihandle; hlen = hreq->ihandlen; handlep = &handle; if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) return XFS_ERROR(EINVAL); if (copy_from_user(handlep, hanp, hlen)) return XFS_ERROR(EFAULT); if (hlen < sizeof(*handlep)) memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); if (hlen > sizeof(handlep->ha_fsid)) { if (handlep->ha_fid.xfs_fid_len != (hlen - sizeof(handlep->ha_fsid) - sizeof(handlep->ha_fid.xfs_fid_len)) || handlep->ha_fid.xfs_fid_pad) return XFS_ERROR(EINVAL); } /* * Crack the handle, obtain the inode # & generation # */ xfid = (struct xfs_fid *)&handlep->ha_fid; if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { ino = xfid->xfs_fid_ino; igen = xfid->xfs_fid_gen; } else { return XFS_ERROR(EINVAL); } /* * Get the XFS inode, building a vnode to go with it. */ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) return error; if (ip == NULL) return XFS_ERROR(EIO); if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { xfs_iput_new(ip, XFS_ILOCK_SHARED); return XFS_ERROR(ENOENT); } vpp = XFS_ITOV(ip); inodep = LINVFS_GET_IP(vpp); xfs_iunlock(ip, XFS_ILOCK_SHARED); *vp = vpp; *inode = inodep; return 0; }
/* * Allocate a block and fill it with dquots. * This is called when the bmapi finds a hole. */ STATIC int xfs_qm_dqalloc( xfs_trans_t **tpp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, xfs_fileoff_t offset_fsb, xfs_buf_t **O_bpp) { xfs_fsblock_t firstblock; xfs_bmap_free_t flist; xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); /* * Initialize the bmap freelist prior to calling bmapi code. */ XFS_BMAP_INIT(&flist, &firstblock); xfs_ilock(quotip, XFS_ILOCK_EXCL); /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ if (XFS_IS_THIS_QUOTA_OFF(dqp)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (ESRCH); } /* * xfs_trans_commit normally decrements the vnode ref count * when it unlocks the inode. Since we want to keep the quota * inode around, we bump the vnode ref count now. */ VN_HOLD(XFS_ITOV(quotip)); xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; if ((error = xfs_bmapi(tp, quotip, offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps, &flist))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); /* * Keep track of the blkno to save a lookup later */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(mp), 0); if (!bp || (error = XFS_BUF_GETERROR(bp))) goto error1; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); /* * xfs_bmap_finish() may commit the current transaction and * start a second transaction if the freelist is not empty. * * Since we still want to modify this buffer, we need to * ensure that the buffer is not released on commit of * the first transaction and ensure the buffer is added to the * second transaction. * * If there is only one transaction then don't stop the buffer * from being released when it commits later on. */ xfs_trans_bhold(tp, bp); if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { goto error1; } if (committed) { tp = *tpp; xfs_trans_bjoin(tp, bp); } else { xfs_trans_bhold_release(tp, bp); } *O_bpp = bp; return 0; error1: xfs_bmap_cancel(&flist); error0: xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (error); }
/* * Look up an inode by number in the given file system. * The inode is looked up in the hash table for the file system * represented by the mount point parameter mp. Each bucket of * the hash table is guarded by an individual semaphore. * * If the inode is found in the hash table, its corresponding vnode * is obtained with a call to vn_get(). This call takes care of * coordination with the reclamation of the inode and vnode. Note * that the vmap structure is filled in while holding the hash lock. * This gives us the state of the inode/vnode when we found it and * is used for coordination in vn_get(). * * If it is not in core, read it in from the file system's device and * add the inode into the hash table. * * The inode is locked according to the value of the lock_flags parameter. * This flag parameter indicates how and if the inode's IO lock and inode lock * should be taken. * * mp -- the mount point structure for the current file system. It points * to the inode hash table. * tp -- a pointer to the current transaction if there is one. This is * simply passed through to the xfs_iread() call. * ino -- the number of the inode desired. This is the unique identifier * within the file system for the inode being requested. * lock_flags -- flags indicating how to lock the inode. See the comment * for xfs_ilock() for a list of valid values. * bno -- the block number starting the buffer containing the inode, * if known (as by bulkstat), else 0. */ int xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp, xfs_daddr_t bno) { xfs_ihash_t *ih; xfs_inode_t *ip; xfs_inode_t *iq; xfs_vnode_t *vp; ulong version; int error; /* REFERENCED */ int newnode; xfs_chash_t *ch; xfs_chashlist_t *chl, *chlnew; vmap_t vmap; SPLDECL(s); XFS_STATS_INC(xs_ig_attempts); ih = XFS_IHASH(mp, ino); again: read_lock(&ih->ih_lock); for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { if (ip->i_ino == ino) { vp = XFS_ITOV(ip); VMAP(vp, vmap); /* * Inode cache hit: if ip is not at the front of * its hash chain, move it there now. * Do this with the lock held for update, but * do statistics after releasing the lock. */ if (ip->i_prevp != &ih->ih_next && rwlock_trypromote(&ih->ih_lock)) { if ((iq = ip->i_next)) { iq->i_prevp = ip->i_prevp; } *ip->i_prevp = iq; iq = ih->ih_next; iq->i_prevp = &ip->i_next; ip->i_next = iq; ip->i_prevp = &ih->ih_next; ih->ih_next = ip; write_unlock(&ih->ih_lock); } else { read_unlock(&ih->ih_lock); } XFS_STATS_INC(xs_ig_found); /* * Get a reference to the vnode/inode. * vn_get() takes care of coordination with * the file system inode release and reclaim * functions. If it returns NULL, the inode * has been reclaimed so just start the search * over again. We probably won't find it, * but we could be racing with another cpu * looking for the same inode so we have to at * least look. */ if (!(vp = vn_get(vp, &vmap))) { XFS_STATS_INC(xs_ig_frecycle); goto again; } if (lock_flags != 0) { ip->i_flags &= ~XFS_IRECLAIM; xfs_ilock(ip, lock_flags); } newnode = (ip->i_d.di_mode == 0); if (newnode) { xfs_iocore_inode_reinit(ip); } ip->i_flags &= ~XFS_ISTALE; vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); goto return_ip; } } /* * Inode cache miss: save the hash chain version stamp and unlock * the chain, so we don't deadlock in vn_alloc. */ XFS_STATS_INC(xs_ig_missed); version = ih->ih_version; read_unlock(&ih->ih_lock); /* * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ error = xfs_iread(mp, tp, ino, &ip, bno); if (error) { return error; } error = xfs_vn_allocate(mp, ip, &vp); if (error) { return error; } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); xfs_inode_lock_init(ip, vp); xfs_iocore_inode_init(ip); if (lock_flags != 0) { xfs_ilock(ip, lock_flags); } /* * Put ip on its hash chain, unless someone else hashed a duplicate * after we released the hash lock. */ write_lock(&ih->ih_lock); if (ih->ih_version != version) { for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { if (iq->i_ino == ino) { write_unlock(&ih->ih_lock); xfs_idestroy(ip); XFS_STATS_INC(xs_ig_dup); goto again; } } } /* * These values _must_ be set before releasing ihlock! */ ip->i_hash = ih; if ((iq = ih->ih_next)) { iq->i_prevp = &ip->i_next; } ip->i_next = iq; ip->i_prevp = &ih->ih_next; ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; write_unlock(&ih->ih_lock); /* * put ip on its cluster's hash chain */ ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL && ip->i_cnext == NULL); chlnew = NULL; ch = XFS_CHASH(mp, ip->i_blkno); chlredo: s = mutex_spinlock(&ch->ch_lock); for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { if (chl->chl_blkno == ip->i_blkno) { /* insert this inode into the doubly-linked list * where chl points */ if ((iq = chl->chl_ip)) { ip->i_cprev = iq->i_cprev; iq->i_cprev->i_cnext = ip; iq->i_cprev = ip; ip->i_cnext = iq; } else { ip->i_cnext = ip; ip->i_cprev = ip; } chl->chl_ip = ip; ip->i_chash = chl; break; } } /* no hash list found for this block; add a new hash list */ if (chl == NULL) { if (chlnew == NULL) { mutex_spinunlock(&ch->ch_lock, s); ASSERT(xfs_chashlist_zone != NULL); chlnew = (xfs_chashlist_t *) kmem_zone_alloc(xfs_chashlist_zone, KM_SLEEP); ASSERT(chlnew != NULL); goto chlredo; } else { ip->i_cnext = ip; ip->i_cprev = ip; ip->i_chash = chlnew; chlnew->chl_ip = ip; chlnew->chl_blkno = ip->i_blkno; chlnew->chl_next = ch->ch_list; ch->ch_list = chlnew; chlnew = NULL; } } else { if (chlnew != NULL) { kmem_zone_free(xfs_chashlist_zone, chlnew); } } mutex_spinunlock(&ch->ch_lock, s); /* * Link ip to its mount and thread it on the mount's inode list. */ XFS_MOUNT_ILOCK(mp); if ((iq = mp->m_inodes)) { ASSERT(iq->i_mprev->i_mnext == iq); ip->i_mprev = iq->i_mprev; iq->i_mprev->i_mnext = ip; iq->i_mprev = ip; ip->i_mnext = iq; } else { ip->i_mnext = ip; ip->i_mprev = ip; } mp->m_inodes = ip; XFS_MOUNT_IUNLOCK(mp); newnode = 1; return_ip: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); *ipp = ip; /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ XVFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); return 0; }
/* * Allocate a block and fill it with dquots. * This is called when the bmapi finds a hole. */ STATIC int xfs_qm_dqalloc( xfs_trans_t *tp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, xfs_fileoff_t offset_fsb, xfs_buf_t **O_bpp) { xfs_fsblock_t firstblock; xfs_bmap_free_t flist; xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); /* * Initialize the bmap freelist prior to calling bmapi code. */ XFS_BMAP_INIT(&flist, &firstblock); xfs_ilock(quotip, XFS_ILOCK_EXCL); /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ if (XFS_IS_THIS_QUOTA_OFF(dqp)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (ESRCH); } /* * xfs_trans_commit normally decrements the vnode ref count * when it unlocks the inode. Since we want to keep the quota * inode around, we bump the vnode ref count now. */ VN_HOLD(XFS_ITOV(quotip)); xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; if ((error = xfs_bmapi(tp, quotip, offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps, &flist))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); /* * Keep track of the blkno to save a lookup later */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(mp), 0); if (!bp || (error = XFS_BUF_GETERROR(bp))) goto error1; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT), dqp->dq_flags & (XFS_DQ_USER|XFS_DQ_GROUP), bp); if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed))) { goto error1; } *O_bpp = bp; return 0; error1: xfs_bmap_cancel(&flist); error0: xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (error); }
/* * xfs_rename */ int xfs_rename( bhv_desc_t *src_dir_bdp, bhv_vname_t *src_vname, bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; xfs_inode_t *src_dp, *target_dp, *src_ip, *target_ip; xfs_mount_t *mp; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ bhv_vnode_t *src_dir_vp; int spaceres; int target_link_zero = 0; int num_inodes; char *src_name = VNAME(src_vname); char *target_name = VNAME(target_vname); int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); src_dir_vp = BHV_TO_VNODE(src_dir_bdp); vn_trace_entry(src_dir_vp, "xfs_rename", (inst_t *)__return_address); vn_trace_entry(target_dir_vp, "xfs_rename", (inst_t *)__return_address); /* * Find the XFS behavior descriptor for the target directory * vnode since it was not handed to us. */ target_dp = xfs_vtoi(target_dir_vp); if (target_dp == NULL) { return XFS_ERROR(EXDEV); } src_dp = XFS_BHVTOI(src_dir_bdp); mp = src_dp->i_mount; if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, 0, 0); if (error) { return error; } } /* Return through std_return after this point. */ /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. * xfs_lock_for_rename() will return ENOENT if src_name * does not exist in the source directory. */ tp = NULL; error = xfs_lock_for_rename(src_dp, target_dp, src_vname, target_vname, &src_ip, &target_ip, inodes, &num_inodes); if (error) { /* * We have nothing locked, no inode references, and * no transaction, so just get out. */ goto std_return; } ASSERT(src_ip != NULL); if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); /* * Drop the locks on our inodes so that we can start the transaction. */ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); XFS_BMAP_INIT(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto rele_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto rele_return; } /* * Reacquire the inode locks we dropped above. */ xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ VN_HOLD(src_dir_vp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { VN_HOLD(target_dir_vp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } if ((src_ip != src_dp) && (src_ip != target_dp)) { xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); } if ((target_ip != NULL) && (target_ip != src_ip) && (target_ip != src_dp) && (target_ip != target_dp)) { xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ if (spaceres == 0 && (error = xfs_dir_canenter(tp, target_dp, target_name, target_namelen))) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; target_ip_dropped = 1; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } /* Do this test while we still hold the locks */ target_link_zero = (target_ip)->i_d.di_nlink==0; } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { /* * We always want to hit the ctime on the source inode. * We do it in the if clause above for the 'new_parent && * src_is_directory' case, and here we get all the other * cases. This isn't strictly required by the standards * since the source inode isn't really being changed, * but old unix file systems did it and some incremental * backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Update the generation counts on all the directory inodes * that we're modifying. */ src_dp->i_gen++; xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) { target_dp->i_gen++; xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); } /* * If there was a target inode, take an extra reference on * it here so that it doesn't go to xfs_inactive() from * within the commit. */ if (target_ip != NULL) { IHOLD(target_ip); } /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } /* * Take refs. for vop_link_removed calls below. No need to worry * about directory refs. because the caller holds them. * * Do holds before the xfs_bmap_finish since it might rele them down * to zero. */ if (target_ip_dropped) IHOLD(target_ip); IHOLD(src_ip); error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); if (target_ip != NULL) { IRELE(target_ip); } if (target_ip_dropped) { IRELE(target_ip); } IRELE(src_ip); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); } /* * Let interposed file systems know about removed links. */ if (target_ip_dropped) { bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } IRELE(src_ip); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; rele_return: IRELE(src_ip); if (target_ip != NULL) { IRELE(target_ip); } goto std_return; }
int xfs_write_file(xfs_inode_t *xip, struct uio *uio, int ioflag) { struct buf *bp; //struct thread *td; daddr_t lbn; off_t osize = 0; off_t offset= 0; int blkoffset, error, resid, xfersize; int fsblocksize; int seqcount; xfs_iomap_t iomap; int maps = 1; xfs_vnode_t *xvp = XFS_ITOV(xip); struct vnode *vp = xvp->v_vnode; xfs_mount_t *mp = (&xip->i_iocore)->io_mount; seqcount = ioflag >> IO_SEQSHIFT; memset(&iomap,0,sizeof(xfs_iomap_t)); /* * Maybe this should be above the vnode op call, but so long as * file servers have no limits, I don't think it matters. */ #if 0 td = uio->uio_td; if (vp->v_type == VREG && td != NULL) { PROC_LOCK(td->td_proc); if (uio->uio_offset + uio->uio_resid > lim_cur(td->td_proc, RLIMIT_FSIZE)) { psignal(td->td_proc, SIGXFSZ); PROC_UNLOCK(td->td_proc); return (EFBIG); } PROC_UNLOCK(td->td_proc); } #endif resid = uio->uio_resid; offset = uio->uio_offset; osize = xip->i_d.di_size; /* xfs bmap wants bytes for both offset and size */ XVOP_BMAP(xvp, uio->uio_offset, uio->uio_resid, BMAPI_WRITE|BMAPI_DIRECT, &iomap, &maps, error); if(error) { printf("XVOP_BMAP failed\n"); goto error; } for (error = 0; uio->uio_resid > 0;) { lbn = XFS_B_TO_FSBT(mp, offset); blkoffset = XFS_B_FSB_OFFSET(mp, offset); xfersize = mp->m_sb.sb_blocksize - blkoffset; fsblocksize = mp->m_sb.sb_blocksize; if (uio->uio_resid < xfersize) xfersize = uio->uio_resid; /* * getblk sets buf by blkno * bo->bo_bsize * bo_bsize is set from the mnt point fsize * so we call getblk in the case using fsblocks * not basic blocks */ bp = getblk(vp, lbn, fsblocksize, 0, 0, 0); if(!bp) { printf("getblk failed\n"); error = EINVAL; break; } if (!(bp->b_flags & B_CACHE) && fsblocksize > xfersize) vfs_bio_clrbuf(bp); if (offset + xfersize > xip->i_d.di_size) { xip->i_d.di_size = offset + xfersize; vnode_pager_setsize(vp, offset + fsblocksize); } /* move the offset for the next itteration of the loop */ offset += xfersize; error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio); if ((ioflag & IO_VMIO) && (LIST_FIRST(&bp->b_dep) == NULL)) /* in ext2fs? */ bp->b_flags |= B_RELBUF; /* force to full direct for now */ bp->b_flags |= B_DIRECT; /* and sync ... the delay path is not pushing data out */ ioflag |= IO_SYNC; if (ioflag & IO_SYNC) { (void)bwrite(bp); } else if (0 /* RMC xfersize + blkoffset == fs->s_frag_size */) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { bp->b_flags |= B_CLUSTEROK; cluster_write(vp, bp, osize, seqcount); } else { bawrite(bp); } } else { bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } if (error || xfersize == 0) break; } /* * If we successfully wrote any data, and we are not the superuser * we clear the setuid and setgid bits as a precaution against * tampering. */ #if 0 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) ip->i_mode &= ~(ISUID | ISGID); #endif if (error) { if (ioflag & IO_UNIT) { #if 0 (void)ext2_truncate(vp, osize, ioflag & IO_SYNC, ap->a_cred, uio->uio_td); #endif uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) { /* Update the vnode here? */ } error: return error; }
vnode_t * xfs_itov(xfs_inode_t *ip) { return XFS_ITOV(ip); }
/* * Syssgi interface for swapext */ int xfs_swapext( xfs_swapext_t __user *sxp) { xfs_swapext_t sx; xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; xfs_trans_t *tp; xfs_mount_t *mp; xfs_bstat_t *sbp; struct file *fp = NULL, *tfp = NULL; vnode_t *vp, *tvp; bhv_desc_t *bdp, *tbdp; vn_bhv_head_t *bhp, *tbhp; uint lock_flags=0; int ilf_fields, tilf_fields; int error = 0; xfs_ifork_t tempif, *ifp, *tifp; __uint64_t tmp; int aforkblks = 0; int taforkblks = 0; int locked = 0; if (copy_from_user(&sx, sxp, sizeof(sx))) return XFS_ERROR(EFAULT); /* Pull information for the target fd */ if (((fp = fget((int)sx.sx_fdtarget)) == NULL) || ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } bhp = VN_BHV_HEAD(vp); bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); if (bdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { ip = XFS_BHVTOI(bdp); } if (((tfp = fget((int)sx.sx_fdtmp)) == NULL) || ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } tbhp = VN_BHV_HEAD(tvp); tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); if (tbdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { tip = XFS_BHVTOI(tbdp); } if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); goto error0; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); goto error0; } mp = ip->i_mount; sbp = &sx.sx_stat; if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); goto error0; } locked = 1; /* Lock in i_ino order */ if (ip->i_ino < tip->i_ino) { ips[0] = ip; ips[1] = tip; } else { ips[0] = tip; ips[1] = ip; } lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; xfs_lock_inodes(ips, 2, 0, lock_flags); /* Check permissions */ error = xfs_iaccess(ip, S_IWUSR, NULL); if (error) goto error0; error = xfs_iaccess(tip, S_IWUSR, NULL); if (error) goto error0; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { error = XFS_ERROR(EINVAL); goto error0; } /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(tvp) != 0) xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), (xfs_off_t)0, 0, 0); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sx.sx_offset != 0 || sx.sx_length != ip->i_d.di_size || sx.sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* * If the target has extended attributes, the tmp file * must also in order to ensure the correct data fork * format. */ if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { error = XFS_ERROR(EINVAL); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * VOP_READ (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(vp)) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); return error; } xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; tempif = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = tempif; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } /* * Increment vnode ref counts since xfs_trans_commit & * xfs_trans_cancel will both unlock the inodes and * decrement the associated ref counts. */ VN_HOLD(vp); VN_HOLD(tvp); xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); fput(fp); fput(tfp); return error; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (fp != NULL) fput(fp); if (tfp != NULL) fput(tfp); return error; }
/* * Return quota status information, such as uquota-off, enforcements, etc. */ STATIC int xfs_qm_scall_getqstat( xfs_mount_t *mp, fs_quota_stat_t *out) { xfs_inode_t *uip, *gip; boolean_t tempuqip, tempgqip; uip = gip = NULL; tempuqip = tempgqip = B_FALSE; memset(out, 0, sizeof(fs_quota_stat_t)); out->qs_version = FS_QSTAT_VERSION; if (! XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) { out->qs_uquota.qfs_ino = NULLFSINO; out->qs_gquota.qfs_ino = NULLFSINO; return (0); } out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & (XFS_ALL_QUOTA_ACCT| XFS_ALL_QUOTA_ENFD)); out->qs_pad = 0; out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; if (mp->m_quotainfo) { uip = mp->m_quotainfo->qi_uquotaip; gip = mp->m_quotainfo->qi_gquotaip; } if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, 0, 0, &uip, 0) == 0) tempuqip = B_TRUE; } if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, 0, 0, &gip, 0) == 0) tempgqip = B_TRUE; } if (uip) { out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; if (tempuqip) VN_RELE(XFS_ITOV(uip)); } if (gip) { out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; if (tempgqip) VN_RELE(XFS_ITOV(gip)); } if (mp->m_quotainfo) { out->qs_incoredqs = XFS_QI_MPLNDQUOTS(mp); out->qs_btimelimit = XFS_QI_BTIMELIMIT(mp); out->qs_itimelimit = XFS_QI_ITIMELIMIT(mp); out->qs_rtbtimelimit = XFS_QI_RTBTIMELIMIT(mp); out->qs_bwarnlimit = XFS_QI_BWARNLIMIT(mp); out->qs_iwarnlimit = XFS_QI_IWARNLIMIT(mp); } return (0); }
STATIC int xfs_unmount( bhv_desc_t *bdp, int flags, cred_t *credp) { struct vfs *vfsp = bhvtovfs(bdp); xfs_mount_t *mp = XFS_BHVTOM(bdp); xfs_inode_t *rip; vnode_t *rvp; int unmount_event_wanted = 0; int unmount_event_flags = 0; int xfs_unmountfs_needed = 0; int error; rip = mp->m_rootip; rvp = XFS_ITOV(rip); if (vfsp->vfs_flag & VFS_DMI) { error = XFS_SEND_PREUNMOUNT(mp, vfsp, rvp, DM_RIGHT_NULL, rvp, DM_RIGHT_NULL, NULL, NULL, 0, 0, (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? 0:DM_FLAGS_UNWANTED); if (error) return XFS_ERROR(error); unmount_event_wanted = 1; unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))? 0 : DM_FLAGS_UNWANTED; } /* * First blow any referenced inode from this file system * out of the reference cache, and delete the timer. */ xfs_refcache_purge_mp(mp); XFS_bflush(mp->m_ddev_targp); error = xfs_unmount_flush(mp, 0); if (error) goto out; ASSERT(vn_count(rvp) == 1); /* * Drop the reference count */ VN_RELE(rvp); /* * If we're forcing a shutdown, typically because of a media error, * we want to make sure we invalidate dirty pages that belong to * referenced vnodes as well. */ if (XFS_FORCED_SHUTDOWN(mp)) { error = xfs_sync(&mp->m_bhv, (SYNC_WAIT | SYNC_CLOSE), credp); ASSERT(error != EFSCORRUPTED); } xfs_unmountfs_needed = 1; out: /* Send DMAPI event, if required. * Then do xfs_unmountfs() if needed. * Then return error (or zero). */ if (unmount_event_wanted) { /* Note: mp structure must still exist for * XFS_SEND_UNMOUNT() call. */ XFS_SEND_UNMOUNT(mp, vfsp, error == 0 ? rvp : NULL, DM_RIGHT_NULL, 0, error, unmount_event_flags); } if (xfs_unmountfs_needed) { /* * Call common unmount function to flush to disk * and free the super block buffer & mount structures. */ xfs_unmountfs(mp, credp); } return XFS_ERROR(error); }