/* * Clean a vnode of filesystem-specific data and prepare it for reuse. */ STATIC int vn_reclaim( struct vnode *vp) { int error; XFS_STATS_INC(vn_reclaim); vn_trace_entry(vp, "vn_reclaim", (inst_t *)__return_address); /* * Only make the VOP_RECLAIM call if there are behaviors * to call. */ if (vp->v_fbhv) { VOP_RECLAIM(vp, error); if (error) return -error; } ASSERT(vp->v_fbhv == NULL); VN_LOCK(vp); vp->v_flag &= (VRECLM|VWAIT); VN_UNLOCK(vp, 0); vp->v_type = VNON; vp->v_fbhv = NULL; #ifdef XFS_VNODE_TRACE ktrace_free(vp->v_trace); vp->v_trace = NULL; #endif return 0; }
/* * purge a vnode from the cache * At this point the vnode is guaranteed to have no references (vn_count == 0) * The caller has to make sure that there are no ways someone could * get a handle (via vn_get) on the vnode (usually done via a mount/vfs lock). */ void vn_purge( struct vnode *vp, vmap_t *vmap) { vn_trace_entry(vp, "vn_purge", (inst_t *)__return_address); again: /* * Check whether vp has already been reclaimed since our caller * sampled its version while holding a filesystem cache lock that * its VOP_RECLAIM function acquires. */ VN_LOCK(vp); if (vp->v_number != vmap->v_number) { VN_UNLOCK(vp, 0); return; } /* * If vp is being reclaimed or inactivated, wait until it is inert, * then proceed. Can't assume that vnode is actually reclaimed * just because the reclaimed flag is asserted -- a vn_alloc * reclaim can fail. */ if (vp->v_flag & (VINACT | VRECLM)) { ASSERT(vn_count(vp) == 0); vp->v_flag |= VWAIT; sv_wait(vptosync(vp), PINOD, &vp->v_lock, 0); goto again; } /* * Another process could have raced in and gotten this vnode... */ if (vn_count(vp) > 0) { VN_UNLOCK(vp, 0); return; } XFS_STATS_DEC(vn_active); vp->v_flag |= VRECLM; VN_UNLOCK(vp, 0); /* * Call VOP_RECLAIM and clean vp. The FSYNC_INVAL flag tells * vp's filesystem to flush and invalidate all cached resources. * When vn_reclaim returns, vp should have no private data, * either in a system cache or attached to v_data. */ if (vn_reclaim(vp) != 0) panic("vn_purge: cannot reclaim"); /* * Wakeup anyone waiting for vp to be reclaimed. */ vn_wakeup(vp); }
ssize_t xfs_sendfile( bhv_desc_t *bdp, struct file *filp, loff_t *offset, int ioflags, size_t count, read_actor_t actor, void *target, cred_t *credp) { ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; vnode_t *vp; ip = XFS_BHVTOI(bdp); vp = BHV_TO_VNODE(bdp); mp = ip->i_mount; vn_trace_entry(vp, "xfs_sendfile", (inst_t *)__return_address); XFS_STATS_INC(xs_read_calls); n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (count == 0)) return 0; if (n < count) count = n; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, FILP_DELAY_FLAG(filp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } ret = generic_file_sendfile(filp, offset, count, actor, target); xfs_iunlock(ip, XFS_IOLOCK_SHARED); XFS_STATS_ADD(xs_read_bytes, ret); xfs_ichgtime(ip, XFS_ICHGTIME_ACC); return ret; }
STATIC void linvfs_clear_inode( struct inode *inode) { vnode_t *vp = LINVFS_GET_VP(inode); if (vp) { vn_rele(vp); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); /* * Do all our cleanup, and remove this vnode. */ vn_remove(vp); } }
/* * Attempt to flush the inode, this will actually fail * if the inode is pinned, but we dirty the inode again * at the point when it is unpinned after a log write, * since this is when the inode itself becomes flushable. */ STATIC void linvfs_write_inode( struct inode *inode, int sync) { vnode_t *vp = LINVFS_GET_VP(inode); int error, flags = FLUSH_INODE; if (vp) { vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); if (sync) flags |= FLUSH_SYNC; VOP_IFLUSH(vp, flags, error); } }
int xfs_dir_lookup_int( bhv_desc_t *dir_bdp, uint lock_mode, vname_t *dentry, xfs_ino_t *inum, xfs_inode_t **ipp) { vnode_t *dir_vp; xfs_inode_t *dp; int error; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); dp = XFS_BHVTOI(dir_bdp); error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); if (!error) { /* * Unlock the directory. We do this because we can't * hold the directory lock while doing the vn_get() * in xfs_iget(). Doing so could cause us to hold * a lock while waiting for the inode to finish * being inactive while it's waiting for a log * reservation in the inactive routine. */ xfs_iunlock(dp, lock_mode); error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0); xfs_ilock(dp, lock_mode); if (error) { *ipp = NULL; } else if ((*ipp)->i_d.di_mode == 0) { /* * The inode has been freed. Something is * wrong so just get out of here. */ xfs_iunlock(dp, lock_mode); xfs_iput_new(*ipp, 0); *ipp = NULL; xfs_ilock(dp, lock_mode); error = XFS_ERROR(ENOENT); } } return error; }
/* * Call VOP_INACTIVE on last reference. */ void vn_rele( struct vnode *vp) { int vcnt; int cache; XFS_STATS_INC(vn_rele); VN_LOCK(vp); vn_trace_entry(vp, "vn_rele", (inst_t *)__return_address); vcnt = vn_count(vp); /* * Since we always get called from put_inode we know * that i_count won't be decremented after we * return. */ if (!vcnt) { /* * As soon as we turn this on, noone can find us in vn_get * until we turn off VINACT or VRECLM */ vp->v_flag |= VINACT; VN_UNLOCK(vp, 0); /* * Do not make the VOP_INACTIVE call if there * are no behaviors attached to the vnode to call. */ if (vp->v_fbhv) VOP_INACTIVE(vp, NULL, cache); VN_LOCK(vp); if (vp->v_flag & VWAIT) sv_broadcast(vptosync(vp)); vp->v_flag &= ~(VINACT|VWAIT|VRECLM|VMODIFIED); } VN_UNLOCK(vp, 0); vn_trace_exit(vp, "vn_rele", (inst_t *)__return_address); }
/* * Revalidate the Linux inode from the vnode. */ int vn_revalidate( struct vnode *vp) { vattr_t va; int error; vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address); ASSERT(vp->v_fbhv != NULL); va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS; VOP_GETATTR(vp, &va, 0, NULL, error); if (!error) { vn_revalidate_core(vp, &va); VUNMODIFY(vp); } return -error; }
/* * Revalidate the Linux inode from the vnode. */ int vn_revalidate( struct vnode *vp) { struct inode *inode; vattr_t va; int error; vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address); ASSERT(vp->v_fbhv != NULL); va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS; VOP_GETATTR(vp, &va, 0, NULL, error); if (!error) { inode = LINVFS_GET_IP(vp); inode->i_mode = VTTOIF(va.va_type) | va.va_mode; inode->i_nlink = va.va_nlink; inode->i_uid = va.va_uid; inode->i_gid = va.va_gid; inode->i_blocks = va.va_nblocks; inode->i_mtime = va.va_mtime; inode->i_ctime = va.va_ctime; inode->i_atime = va.va_atime; if (va.va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; if (va.va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; if (va.va_xflags & XFS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; if (va.va_xflags & XFS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; VUNMODIFY(vp); } return -error; }
/* * Special iput for brand-new inodes that are still locked */ void xfs_iput_new(xfs_inode_t *ip, uint lock_flags) { xfs_vnode_t *vp = XFS_ITOV(ip); vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address); printf("xfs_iput_new: ip %p\n",ip); if ((ip->i_d.di_mode == 0)) { ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE)); //vn_mark_bad(vp); printf("xfs_iput_new: ip %p di_mode == 0\n",ip); /* mabe call vgone here? RMC */ } if (lock_flags) xfs_iunlock(ip, lock_flags); ASSERT_VOP_LOCKED(vp->v_vnode, "xfs_iput_new"); vput(vp->v_vnode); }
int xfs_ioctl( bhv_desc_t *bdp, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, unsigned long arg) { int error; vnode_t *vp; xfs_inode_t *ip; xfs_mount_t *mp; vp = LINVFS_GET_VP(inode); vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); ip = XFS_BHVTOI(bdp); mp = ip->i_mount; switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); case XFS_IOC_DIOINFO: { struct dioattr da; da.d_miniosz = mp->m_sb.sb_blocksize; da.d_mem = mp->m_sb.sb_blocksize; /* * this only really needs to be BBSIZE. * it is set to the file system block size to * avoid having to do block zeroing on short writes. */ da.d_maxiosz = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); if (copy_to_user((struct dioattr *)arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: return xfs_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry_v1(mp, arg); case XFS_IOC_FSGEOMETRY: return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: return xfs_ioc_xattr(vp, ip, filp, cmd, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, NULL); return -error; } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(bdp, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: return xfs_find_handle(cmd, arg); case XFS_IOC_OPEN_BY_HANDLE: return xfs_open_by_handle(mp, arg, filp, inode); case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, filp, inode); case XFS_IOC_READLINK_BY_HANDLE: return xfs_readlink_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE: return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { error = xfs_swapext((struct xfs_swapext *)arg); return -error; } case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; error = xfs_fs_counts(mp, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; __uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&inout, (char *)arg, sizeof(inout))) return -XFS_ERROR(EFAULT); /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); if (error) return -error; if (copy_to_user((char *)arg, &inout, sizeof(inout))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GET_RESBLKS: { xfs_fsop_resblks_t out; if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_reserve_blocks(mp, NULL, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_data(mp, &in); return -error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_log(mp, &in); return -error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_rt(mp, &in); return -error; } case XFS_IOC_FREEZE: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen == SB_UNFROZEN) { freeze_bdev(mp->m_ddev_targp->pbr_bdev); if (mp->m_rtdev_targp) freeze_bdev(mp->m_rtdev_targp->pbr_bdev); } return 0; case XFS_IOC_THAW: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen != SB_UNFROZEN) { thaw_bdev(mp->m_ddev_targp->pbr_bdev, inode->i_sb); if (mp->m_rtdev_targp) thaw_bdev(mp->m_ddev_targp->pbr_bdev, NULL); } return 0; case XFS_IOC_GOINGDOWN: { __uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(in, (__uint32_t *)arg)) return -XFS_ERROR(EFAULT); error = xfs_fs_goingdown(mp, in); return -error; } case XFS_IOC_ERROR_INJECTION: { xfs_error_injection_t in; if (!capable(CAP_SYS_ADMIN)) return EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_errortag_add(in.errtag, mp); return -error; } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_errortag_clearall(mp); return -error; default: return -ENOTTY; } }
/* * xfs_rename */ int xfs_rename( bhv_desc_t *src_dir_bdp, bhv_vname_t *src_vname, bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; xfs_inode_t *src_dp, *target_dp, *src_ip, *target_ip; xfs_mount_t *mp; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ bhv_vnode_t *src_dir_vp; int spaceres; int target_link_zero = 0; int num_inodes; char *src_name = VNAME(src_vname); char *target_name = VNAME(target_vname); int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); src_dir_vp = BHV_TO_VNODE(src_dir_bdp); vn_trace_entry(src_dir_vp, "xfs_rename", (inst_t *)__return_address); vn_trace_entry(target_dir_vp, "xfs_rename", (inst_t *)__return_address); /* * Find the XFS behavior descriptor for the target directory * vnode since it was not handed to us. */ target_dp = xfs_vtoi(target_dir_vp); if (target_dp == NULL) { return XFS_ERROR(EXDEV); } src_dp = XFS_BHVTOI(src_dir_bdp); mp = src_dp->i_mount; if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, 0, 0); if (error) { return error; } } /* Return through std_return after this point. */ /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. * xfs_lock_for_rename() will return ENOENT if src_name * does not exist in the source directory. */ tp = NULL; error = xfs_lock_for_rename(src_dp, target_dp, src_vname, target_vname, &src_ip, &target_ip, inodes, &num_inodes); if (error) { /* * We have nothing locked, no inode references, and * no transaction, so just get out. */ goto std_return; } ASSERT(src_ip != NULL); if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); /* * Drop the locks on our inodes so that we can start the transaction. */ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); XFS_BMAP_INIT(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto rele_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto rele_return; } /* * Reacquire the inode locks we dropped above. */ xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ VN_HOLD(src_dir_vp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { VN_HOLD(target_dir_vp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } if ((src_ip != src_dp) && (src_ip != target_dp)) { xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); } if ((target_ip != NULL) && (target_ip != src_ip) && (target_ip != src_dp) && (target_ip != target_dp)) { xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ if (spaceres == 0 && (error = xfs_dir_canenter(tp, target_dp, target_name, target_namelen))) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; target_ip_dropped = 1; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } /* Do this test while we still hold the locks */ target_link_zero = (target_ip)->i_d.di_nlink==0; } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { /* * We always want to hit the ctime on the source inode. * We do it in the if clause above for the 'new_parent && * src_is_directory' case, and here we get all the other * cases. This isn't strictly required by the standards * since the source inode isn't really being changed, * but old unix file systems did it and some incremental * backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Update the generation counts on all the directory inodes * that we're modifying. */ src_dp->i_gen++; xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) { target_dp->i_gen++; xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); } /* * If there was a target inode, take an extra reference on * it here so that it doesn't go to xfs_inactive() from * within the commit. */ if (target_ip != NULL) { IHOLD(target_ip); } /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } /* * Take refs. for vop_link_removed calls below. No need to worry * about directory refs. because the caller holds them. * * Do holds before the xfs_bmap_finish since it might rele them down * to zero. */ if (target_ip_dropped) IHOLD(target_ip); IHOLD(src_ip); error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); if (target_ip != NULL) { IRELE(target_ip); } if (target_ip_dropped) { IRELE(target_ip); } IRELE(src_ip); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); } /* * Let interposed file systems know about removed links. */ if (target_ip_dropped) { bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } IRELE(src_ip); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; rele_return: IRELE(src_ip); if (target_ip != NULL) { IRELE(target_ip); } goto std_return; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address); xip = XFS_BHVTOI(bdp); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (size == 0) return 0; io = &(xip->i_iocore); mp = io->io_mount; xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (ioflags & IO_ISDIRECT) { pb_target_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (size && !(ioflags & IO_INVIS)) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } retry: if (ioflags & IO_ISDIRECT) { xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); } ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret <= 0) { xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ xfs_rwunlock(bdp, locktype); return(ret); }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; vnode_t *vp; unsigned long seg; ip = XFS_BHVTOI(bdp); vp = BHV_TO_VNODE(bdp); mp = ip->i_mount; vn_trace_entry(vp, "xfs_read", (inst_t *)__return_address); XFS_STATS_INC(xs_read_calls); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (ioflags & IO_ISDIRECT) { pb_target_t *target = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { if (*offset == ip->i_d.di_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } /* OK so we are holding the I/O lock for the duration * of the submission, then what happens if the I/O * does not really happen here, but is scheduled * later? */ xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int error; vrwlock_t locktype = VRWLOCK_READ; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } ret = __generic_file_aio_read(iocb, iovp, segs, offset); xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(ip, XFS_ICHGTIME_ACC); return ret; }