void xfs_initialize_vnode( bhv_desc_t *bdp, vnode_t *vp, bhv_desc_t *inode_bhv, int unlock) { xfs_inode_t *ip = XFS_BHVTOI(inode_bhv); struct inode *inode = LINVFS_GET_IP(vp); if (!inode_bhv->bd_vobj) { vp->v_vfsp = bhvtovfs(bdp); bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops); bhv_insert(VN_BHV_HEAD(vp), inode_bhv); } vp->v_type = IFTOVT(ip->i_d.di_mode); /* Have we been called during the new inode create process, * in which case we are too early to fill in the Linux inode. */ if (vp->v_type == VNON) return; xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); /* For new inodes we need to set the ops vectors, * and unlock the inode. */ if (unlock && (inode->i_state & I_NEW)) { xfs_set_inodeops(inode); unlock_new_inode(inode); } }
ssize_t xfs_splice_write( bhv_desc_t *bdp, struct pipe_inode_info *pipe, struct file *outfilp, loff_t *ppos, size_t count, int flags, int ioflags, cred_t *credp) { xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; struct inode *inode = outfilp->f_mapping->host; xfs_fsize_t isize; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && (!(ioflags & IO_INVIS))) { bhv_vrwlock_t locktype = VRWLOCK_WRITE; int error; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), *ppos, count, FILP_DELAY_FLAG(outfilp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; } } xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) *ppos = isize; if (*ppos > ip->i_d.di_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); if (*ppos > ip->i_d.di_size) { ip->i_d.di_size = *ppos; i_size_write(inode, *ppos); ip->i_update_core = 1; ip->i_update_size = 1; } xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
ssize_t xfs_sendfile( bhv_desc_t *bdp, struct file *filp, loff_t *offset, int ioflags, size_t count, read_actor_t actor, void *target, cred_t *credp) { ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; vnode_t *vp; ip = XFS_BHVTOI(bdp); vp = BHV_TO_VNODE(bdp); mp = ip->i_mount; vn_trace_entry(vp, "xfs_sendfile", (inst_t *)__return_address); XFS_STATS_INC(xs_read_calls); n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (count == 0)) return 0; if (n < count) count = n; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, FILP_DELAY_FLAG(filp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } ret = generic_file_sendfile(filp, offset, count, actor, target); xfs_iunlock(ip, XFS_IOLOCK_SHARED); XFS_STATS_ADD(xs_read_bytes, ret); xfs_ichgtime(ip, XFS_ICHGTIME_ACC); return ret; }
/* * Get a XFS inode from a given vnode. */ xfs_inode_t * xfs_vtoi( struct vnode *vp) { bhv_desc_t *bdp; bdp = bhv_lookup_range(VN_BHV_HEAD(vp), VNODE_POSITION_XFS, VNODE_POSITION_XFS); if (unlikely(bdp == NULL)) return NULL; return XFS_BHVTOI(bdp); }
int xfs_dir_lookup_int( bhv_desc_t *dir_bdp, uint lock_mode, vname_t *dentry, xfs_ino_t *inum, xfs_inode_t **ipp) { vnode_t *dir_vp; xfs_inode_t *dp; int error; dir_vp = BHV_TO_VNODE(dir_bdp); vn_trace_entry(dir_vp, __FUNCTION__, (inst_t *)__return_address); dp = XFS_BHVTOI(dir_bdp); error = XFS_DIR_LOOKUP(dp->i_mount, NULL, dp, VNAME(dentry), VNAMELEN(dentry), inum); if (!error) { /* * Unlock the directory. We do this because we can't * hold the directory lock while doing the vn_get() * in xfs_iget(). Doing so could cause us to hold * a lock while waiting for the inode to finish * being inactive while it's waiting for a log * reservation in the inactive routine. */ xfs_iunlock(dp, lock_mode); error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0); xfs_ilock(dp, lock_mode); if (error) { *ipp = NULL; } else if ((*ipp)->i_d.di_mode == 0) { /* * The inode has been freed. Something is * wrong so just get out of here. */ xfs_iunlock(dp, lock_mode); xfs_iput_new(*ipp, 0); *ipp = NULL; xfs_ilock(dp, lock_mode); error = XFS_ERROR(ENOENT); } } return error; }
int xfs_bmap(bhv_desc_t *bdp, xfs_off_t offset, ssize_t count, int flags, xfs_iomap_t *iomapp, int *niomaps) { xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_iocore_t *io = &ip->i_iocore; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); return xfs_iomap(io, offset, count, flags, iomapp, niomaps); }
/* * xfs_get_dir_entry is used to get a reference to an inode given * its parent directory inode and the name of the file. It does * not lock the child inode, and it unlocks the directory before * returning. The directory's generation number is returned for * use by a later call to xfs_lock_dir_and_entry. */ int xfs_get_dir_entry( vname_t *dentry, xfs_inode_t **ipp) { vnode_t *vp; bhv_desc_t *bdp; vp = VNAME_TO_VNODE(dentry); bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); if (!bdp) { *ipp = NULL; return XFS_ERROR(ENOENT); } VN_HOLD(vp); *ipp = XFS_BHVTOI(bdp); return 0; }
ssize_t xfs_sendfile( bhv_desc_t *bdp, struct file *filp, loff_t *offset, int ioflags, size_t count, read_actor_t actor, void *target, cred_t *credp) { xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; XFS_STATS_INC(xs_read_calls); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && (!(ioflags & IO_INVIS))) { vrwlock_t locktype = VRWLOCK_READ; int error; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, count, FILP_DELAY_FLAG(filp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } xfs_rw_enter_trace(XFS_SENDFILE_ENTER, &ip->i_iocore, (void *)(unsigned long)target, count, *offset, ioflags); ret = generic_file_sendfile(filp, offset, count, actor, target); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
ssize_t xfs_splice_write( bhv_desc_t *bdp, struct pipe_inode_info *pipe, struct file *outfilp, loff_t *ppos, size_t count, int flags, int ioflags, cred_t *credp) { xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_WRITE) && (!(ioflags & IO_INVIS))) { vrwlock_t locktype = VRWLOCK_WRITE; int error; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, BHV_TO_VNODE(bdp), *ppos, count, FILP_DELAY_FLAG(outfilp), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; } } xfs_rw_enter_trace(XFS_SPLICE_WRITE_ENTER, &ip->i_iocore, pipe, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
void xfs_page_trace( int tag, struct inode *inode, struct page *page, int mask) { xfs_inode_t *ip; bhv_desc_t *bdp; vnode_t *vp = LINVFS_GET_VP(inode); loff_t isize = i_size_read(inode); loff_t offset = page->index << PAGE_CACHE_SHIFT; int delalloc = -1, unmapped = -1, unwritten = -1; if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); bdp = vn_bhv_lookup(VN_BHV_HEAD(vp), &xfs_vnodeops); ip = XFS_BHVTOI(bdp); if (!ip->i_rwtrace) return; ktrace_enter(ip->i_rwtrace, (void *)((unsigned long)tag), (void *)ip, (void *)inode, (void *)page, (void *)((unsigned long)mask), (void *)((unsigned long)((ip->i_d.di_size >> 32) & 0xffffffff)), (void *)((unsigned long)(ip->i_d.di_size & 0xffffffff)), (void *)((unsigned long)((isize >> 32) & 0xffffffff)), (void *)((unsigned long)(isize & 0xffffffff)), (void *)((unsigned long)((offset >> 32) & 0xffffffff)), (void *)((unsigned long)(offset & 0xffffffff)), (void *)((unsigned long)delalloc), (void *)((unsigned long)unmapped), (void *)((unsigned long)unwritten), (void *)NULL, (void *)NULL); }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, struct file *file, char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; ip = XFS_BHVTOI(bdp); mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); if (unlikely(ioflags & IO_ISDIRECT)) { if ((ssize_t)size < 0) return -XFS_ERROR(EINVAL); if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { if (*offset >= ip->i_d.di_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (!(ioflags & IO_ISLOCKED)) xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int error; vrwlock_t locktype = VRWLOCK_READ; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, dmflags, &locktype); if (error) { if (!(ioflags & IO_ISLOCKED)) xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } if (unlikely(ioflags & IO_ISDIRECT)) { xfs_rw_enter_trace(XFS_DIORD_ENTER, &ip->i_iocore, buf, size, *offset, ioflags); ret = (*offset < ip->i_d.di_size) ? do_generic_direct_read(file, buf, size, offset) : 0; UPDATE_ATIME(file->f_dentry->d_inode); } else { xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, buf, size, *offset, ioflags); ret = generic_file_read(file, buf, size, offset); } if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); if (!(ioflags & IO_ISLOCKED)) xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (unlikely(ioflags & IO_INVIS)) { /* generic_file_read updates the atime but we need to * undo that because this I/O was supposed to be invisible. */ struct inode *inode = LINVFS_GET_IP(BHV_TO_VNODE(bdp)); inode->i_atime = ip->i_d.di_atime.t_sec; } else { xfs_ichgtime(ip, XFS_ICHGTIME_ACC); } return ret; }
xfs_inode_t * xfs_bhvtoi(bhv_desc_t *bhvp) { return XFS_BHVTOI(bhvp); }
/* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to * a file or fs handle. * * XFS_IOC_PATH_TO_FSHANDLE * returns fs handle for a mount point or path within that mount point * XFS_IOC_FD_TO_HANDLE * returns full handle for a FD opened in user space * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ STATIC int xfs_find_handle( unsigned int cmd, unsigned long arg) { int hsize; xfs_handle_t handle; xfs_fsop_handlereq_t hreq; struct inode *inode; struct vnode *vp; if (copy_from_user(&hreq, (xfs_fsop_handlereq_t *)arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); memset((char *)&handle, 0, sizeof(handle)); switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { struct nameidata nd; int error; error = user_path_walk_link(hreq.path, &nd); if (error) return error; ASSERT(nd.dentry); ASSERT(nd.dentry->d_inode); inode = igrab(nd.dentry->d_inode); path_release(&nd); break; } case XFS_IOC_FD_TO_HANDLE: { struct file *file; file = fget(hreq.fd); if (!file) return -EBADF; ASSERT(file->f_dentry); ASSERT(file->f_dentry->d_inode); inode = igrab(file->f_dentry->d_inode); fput(file); break; } default: ASSERT(0); return -XFS_ERROR(EINVAL); } if (inode->i_sb->s_magic != XFS_SB_MAGIC) { /* we're not in XFS anymore, Toto */ iput(inode); return -XFS_ERROR(EINVAL); } /* we need the vnode */ vp = LINVFS_GET_VP(inode); if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) { iput(inode); return -XFS_ERROR(EBADF); } /* now we can grab the fsid */ memcpy(&handle.ha_fsid, vp->v_vfsp->vfs_altfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { xfs_inode_t *ip; bhv_desc_t *bhv; int lock_mode; /* need to get access to the xfs_inode to read the generation */ bhv = vn_bhv_lookup_unlocked(VN_BHV_HEAD(vp), &xfs_vnodeops); ASSERT(bhv); ip = XFS_BHVTOI(bhv); ASSERT(ip); lock_mode = xfs_ilock_map_shared(ip); /* fill in fid section of handle from inode */ handle.ha_fid.xfs_fid_len = sizeof(xfs_fid_t) - sizeof(handle.ha_fid.xfs_fid_len); handle.ha_fid.xfs_fid_pad = 0; handle.ha_fid.xfs_fid_gen = ip->i_d.di_gen; handle.ha_fid.xfs_fid_ino = ip->i_ino; xfs_iunlock_map_shared(ip, lock_mode); hsize = XFS_HSIZE(handle); } /* now copy our handle into the user buffer & write out the size */ if (copy_to_user((xfs_handle_t *)hreq.ohandle, &handle, hsize) || copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { iput(inode); return -XFS_ERROR(EFAULT); } iput(inode); return 0; }
int xfs_ioctl( bhv_desc_t *bdp, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, unsigned long arg) { int error; vnode_t *vp; xfs_inode_t *ip; xfs_mount_t *mp; vp = LINVFS_GET_VP(inode); vn_trace_entry(vp, "xfs_ioctl", (inst_t *)__return_address); ip = XFS_BHVTOI(bdp); mp = ip->i_mount; switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!XFS_SB_VERSION_HASEXTFLGBIT(&mp->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; return xfs_ioc_space(bdp, vp, filp, ioflags, cmd, arg); case XFS_IOC_DIOINFO: { struct dioattr da; da.d_miniosz = mp->m_sb.sb_blocksize; da.d_mem = mp->m_sb.sb_blocksize; /* * this only really needs to be BBSIZE. * it is set to the file system block size to * avoid having to do block zeroing on short writes. */ da.d_maxiosz = XFS_FSB_TO_B(mp, XFS_B_TO_FSBT(mp, KIO_MAX_ATOMIC_IO << 10)); if (copy_to_user((struct dioattr *)arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: return xfs_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry_v1(mp, arg); case XFS_IOC_FSGEOMETRY: return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: return xfs_ioc_xattr(vp, ip, filp, cmd, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; if (copy_from_user(&dmi, (struct fsdmidata *)arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); error = xfs_set_dmattrs(bdp, dmi.fsd_dmevmask, dmi.fsd_dmstate, NULL); return -error; } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg); case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(bdp, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: return xfs_find_handle(cmd, arg); case XFS_IOC_OPEN_BY_HANDLE: return xfs_open_by_handle(mp, arg, filp, inode); case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, filp, inode); case XFS_IOC_READLINK_BY_HANDLE: return xfs_readlink_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, filp, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE: return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { error = xfs_swapext((struct xfs_swapext *)arg); return -error; } case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; error = xfs_fs_counts(mp, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; __uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&inout, (char *)arg, sizeof(inout))) return -XFS_ERROR(EFAULT); /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); if (error) return -error; if (copy_to_user((char *)arg, &inout, sizeof(inout))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GET_RESBLKS: { xfs_fsop_resblks_t out; if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_reserve_blocks(mp, NULL, &out); if (error) return -error; if (copy_to_user((char *)arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_data(mp, &in); return -error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_log(mp, &in); return -error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_rt(mp, &in); return -error; } case XFS_IOC_FREEZE: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen == SB_UNFROZEN) { freeze_bdev(mp->m_ddev_targp->pbr_bdev); if (mp->m_rtdev_targp) freeze_bdev(mp->m_rtdev_targp->pbr_bdev); } return 0; case XFS_IOC_THAW: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (vp->v_vfsp->vfs_frozen != SB_UNFROZEN) { thaw_bdev(mp->m_ddev_targp->pbr_bdev, inode->i_sb); if (mp->m_rtdev_targp) thaw_bdev(mp->m_ddev_targp->pbr_bdev, NULL); } return 0; case XFS_IOC_GOINGDOWN: { __uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(in, (__uint32_t *)arg)) return -XFS_ERROR(EFAULT); error = xfs_fs_goingdown(mp, in); return -error; } case XFS_IOC_ERROR_INJECTION: { xfs_error_injection_t in; if (!capable(CAP_SYS_ADMIN)) return EPERM; if (copy_from_user(&in, (char *)arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_errortag_add(in.errtag, mp); return -error; } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_errortag_clearall(mp); return -error; default: return -ENOTTY; } }
/* * xfs_rename */ int xfs_rename( bhv_desc_t *src_dir_bdp, bhv_vname_t *src_vname, bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; xfs_inode_t *src_dp, *target_dp, *src_ip, *target_ip; xfs_mount_t *mp; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ bhv_vnode_t *src_dir_vp; int spaceres; int target_link_zero = 0; int num_inodes; char *src_name = VNAME(src_vname); char *target_name = VNAME(target_vname); int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); src_dir_vp = BHV_TO_VNODE(src_dir_bdp); vn_trace_entry(src_dir_vp, "xfs_rename", (inst_t *)__return_address); vn_trace_entry(target_dir_vp, "xfs_rename", (inst_t *)__return_address); /* * Find the XFS behavior descriptor for the target directory * vnode since it was not handed to us. */ target_dp = xfs_vtoi(target_dir_vp); if (target_dp == NULL) { return XFS_ERROR(EXDEV); } src_dp = XFS_BHVTOI(src_dir_bdp); mp = src_dp->i_mount; if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, 0, 0); if (error) { return error; } } /* Return through std_return after this point. */ /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. * xfs_lock_for_rename() will return ENOENT if src_name * does not exist in the source directory. */ tp = NULL; error = xfs_lock_for_rename(src_dp, target_dp, src_vname, target_vname, &src_ip, &target_ip, inodes, &num_inodes); if (error) { /* * We have nothing locked, no inode references, and * no transaction, so just get out. */ goto std_return; } ASSERT(src_ip != NULL); if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); /* * Drop the locks on our inodes so that we can start the transaction. */ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); XFS_BMAP_INIT(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto rele_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto rele_return; } /* * Reacquire the inode locks we dropped above. */ xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ VN_HOLD(src_dir_vp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { VN_HOLD(target_dir_vp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } if ((src_ip != src_dp) && (src_ip != target_dp)) { xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); } if ((target_ip != NULL) && (target_ip != src_ip) && (target_ip != src_dp) && (target_ip != target_dp)) { xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ if (spaceres == 0 && (error = xfs_dir_canenter(tp, target_dp, target_name, target_namelen))) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; target_ip_dropped = 1; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } /* Do this test while we still hold the locks */ target_link_zero = (target_ip)->i_d.di_nlink==0; } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { /* * We always want to hit the ctime on the source inode. * We do it in the if clause above for the 'new_parent && * src_is_directory' case, and here we get all the other * cases. This isn't strictly required by the standards * since the source inode isn't really being changed, * but old unix file systems did it and some incremental * backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Update the generation counts on all the directory inodes * that we're modifying. */ src_dp->i_gen++; xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) { target_dp->i_gen++; xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); } /* * If there was a target inode, take an extra reference on * it here so that it doesn't go to xfs_inactive() from * within the commit. */ if (target_ip != NULL) { IHOLD(target_ip); } /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } /* * Take refs. for vop_link_removed calls below. No need to worry * about directory refs. because the caller holds them. * * Do holds before the xfs_bmap_finish since it might rele them down * to zero. */ if (target_ip_dropped) IHOLD(target_ip); IHOLD(src_ip); error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); if (target_ip != NULL) { IRELE(target_ip); } if (target_ip_dropped) { IRELE(target_ip); } IRELE(src_ip); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); } /* * Let interposed file systems know about removed links. */ if (target_ip_dropped) { bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } IRELE(src_ip); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; rele_return: IRELE(src_ip); if (target_ip != NULL) { IRELE(target_ip); } goto std_return; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_isem = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_isem = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_isem) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; down(&inode->i_sem); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_isem) dmflags |= DM_FLAGS_ISEM; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (!(ioflags & IO_INVIS)) { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); inode_update_time(inode, 1); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize, pos + count); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); up(&inode->i_sem); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_isem = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_isem = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_unlock_isem; xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); if (error) goto out_unlock_internal; } } xfs_rwunlock(bdp, locktype); if (need_isem) up(&inode->i_sem); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) up(&inode->i_sem); return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; bhv_vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; bhv_vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_i_mutex = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; vfs_wait_for_freeze(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_i_mutex = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_i_mutex) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; mutex_lock(&inode->i_mutex); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } if (likely(!(ioflags & IO_INVIS))) { file_update_time(file); xfs_ichgtime_fast(xip, inode, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_path.dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_i_mutex = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_i_mutex = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_nounlocks; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; ret = 0; goto retry; } isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) *offset = isize; if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { error = xfs_write_sync_logforce(mp, xip); if (error) goto out_unlock_internal; xfs_rwunlock(bdp, locktype); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); out_nounlocks: return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, uio_t *uio, int ioflag, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_fsize_t size; xfs_iocore_t *io; xfs_vnode_t *vp; int iolock; //int eventsent = 0; vrwlock_t locktype; xfs_off_t offset_c; xfs_off_t *offset; xfs_off_t pos; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return EIO; } size = uio->uio_resid; pos = offset_c = uio->uio_offset; offset = &offset_c; if (unlikely(ioflag & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return EINVAL; } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { if (io->io_flags & XFS_IOCORE_RT) return EINVAL; iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (ioflag & O_APPEND) *offset = isize; //start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } #ifdef RMC /* probably be a long time before if ever that we do dmapi */ if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } #endif /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflag & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); #if 0 /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } } #endif //retry: if (unlikely(ioflag & IO_ISDIRECT)) { #ifdef RMC xfs_off_t pos = *offset; struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; ret = precheck_file_write(file, inode, &size, &pos); if (ret || size == 0) goto error; xfs_inval_cached_pages(vp, io, pos, 1, 1); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* mark_inode_dirty_sync(inode); - we do this later */ xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, pos, ioflags); ret = generic_file_direct_IO(WRITE, file, (char *)buf, size, pos); xfs_inval_cached_pages(vp, io, pos, 1, 1); if (ret > 0) *offset += ret; #endif } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = xfs_write_file(xip,uio,ioflag); } xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); //error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { printf("xfs_write look at doing more here %s:%d\n",__FILE__,__LINE__); #ifdef RMC struct inode *inode = LINVFS_GET_IP(vp); i_size_write(inode, *offset); mark_inode_dirty_sync(inode); #endif xip->i_d.di_size = *offset; xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ #if 0 // if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { #endif if (ioflag & IO_SYNC) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (error) goto out_unlock_internal; } xfs_rwunlock(bdp, locktype); return ret; } /* (ioflags & O_SYNC) */ out_unlock_internal: xfs_rwunlock(bdp, locktype); #if 0 out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); #endif //out_nounlocks: return -error; }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; vnode_t *vp; unsigned long seg; ip = XFS_BHVTOI(bdp); vp = BHV_TO_VNODE(bdp); mp = ip->i_mount; vn_trace_entry(vp, "xfs_read", (inst_t *)__return_address); XFS_STATS_INC(xs_read_calls); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (ioflags & IO_ISDIRECT) { pb_target_t *target = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { if (*offset == ip->i_d.di_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } /* OK so we are holding the I/O lock for the duration * of the submission, then what happens if the I/O * does not really happen here, but is scheduled * later? */ xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int error; vrwlock_t locktype = VRWLOCK_READ; error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } ret = __generic_file_aio_read(iocb, iovp, segs, offset); xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(ip, XFS_ICHGTIME_ACC); return ret; }
/* * Syssgi interface for swapext */ int xfs_swapext( xfs_swapext_t __user *sxp) { xfs_swapext_t sx; xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; xfs_trans_t *tp; xfs_mount_t *mp; xfs_bstat_t *sbp; struct file *fp = NULL, *tfp = NULL; vnode_t *vp, *tvp; bhv_desc_t *bdp, *tbdp; vn_bhv_head_t *bhp, *tbhp; uint lock_flags=0; int ilf_fields, tilf_fields; int error = 0; xfs_ifork_t tempif, *ifp, *tifp; __uint64_t tmp; int aforkblks = 0; int taforkblks = 0; int locked = 0; if (copy_from_user(&sx, sxp, sizeof(sx))) return XFS_ERROR(EFAULT); /* Pull information for the target fd */ if (((fp = fget((int)sx.sx_fdtarget)) == NULL) || ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } bhp = VN_BHV_HEAD(vp); bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); if (bdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { ip = XFS_BHVTOI(bdp); } if (((tfp = fget((int)sx.sx_fdtmp)) == NULL) || ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } tbhp = VN_BHV_HEAD(tvp); tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); if (tbdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { tip = XFS_BHVTOI(tbdp); } if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); goto error0; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); goto error0; } mp = ip->i_mount; sbp = &sx.sx_stat; if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); goto error0; } locked = 1; /* Lock in i_ino order */ if (ip->i_ino < tip->i_ino) { ips[0] = ip; ips[1] = tip; } else { ips[0] = tip; ips[1] = ip; } lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; xfs_lock_inodes(ips, 2, 0, lock_flags); /* Check permissions */ error = xfs_iaccess(ip, S_IWUSR, NULL); if (error) goto error0; error = xfs_iaccess(tip, S_IWUSR, NULL); if (error) goto error0; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { error = XFS_ERROR(EINVAL); goto error0; } /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(tvp) != 0) xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), (xfs_off_t)0, 0, 0); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sx.sx_offset != 0 || sx.sx_length != ip->i_d.di_size || sx.sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* * If the target has extended attributes, the tmp file * must also in order to ensure the correct data fork * format. */ if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { error = XFS_ERROR(EINVAL); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * VOP_READ (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(vp)) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); return error; } xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; tempif = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = tempif; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } /* * Increment vnode ref counts since xfs_trans_commit & * xfs_trans_cancel will both unlock the inodes and * decrement the associated ref counts. */ VN_HOLD(vp); VN_HOLD(tvp); xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); fput(fp); fput(tfp); return error; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (fp != NULL) fput(fp); if (tfp != NULL) fput(tfp); return error; }
struct xfs_inode * xfs_vtoi(struct xfs_vnode *xvp) { return(XFS_BHVTOI(xvp->v_fbhv)); }
STATIC long xfs_compat_ioctl( int mode, struct file *file, unsigned cmd, unsigned long arg) { struct inode *inode = file->f_path.dentry->d_inode; bhv_vnode_t *vp = vn_from_inode(inode); int error; switch (cmd) { case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: case XFS_IOC_GETVERSION: case XFS_IOC_GETXFLAGS: case XFS_IOC_SETXFLAGS: case XFS_IOC_FSGETXATTR: case XFS_IOC_FSSETXATTR: case XFS_IOC_FSGETXATTRA: case XFS_IOC_FSSETDM: case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: /* not handled case XFS_IOC_FSSETDM_BY_HANDLE: case XFS_IOC_ATTRLIST_BY_HANDLE: case XFS_IOC_ATTRMULTI_BY_HANDLE: */ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSLOG: case XFS_IOC_FSGROWFSRT: case XFS_IOC_FREEZE: case XFS_IOC_THAW: case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: break; #ifdef BROKEN_X86_ALIGNMENT /* xfs_flock_t has wrong u32 vs u64 alignment */ case XFS_IOC_ALLOCSP_32: case XFS_IOC_FREESP_32: case XFS_IOC_ALLOCSP64_32: case XFS_IOC_FREESP64_32: case XFS_IOC_RESVSP_32: case XFS_IOC_UNRESVSP_32: case XFS_IOC_RESVSP64_32: case XFS_IOC_UNRESVSP64_32: arg = xfs_ioctl32_flock(arg); cmd = _NATIVE_IOC(cmd, struct xfs_flock64); break; case XFS_IOC_FSGEOMETRY_V1_32: arg = xfs_ioctl32_geom_v1(arg); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); break; #else /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: case XFS_IOC_FSGEOMETRY_V1: break; /* xfs_bstat_t still has wrong u32 vs u64 alignment */ case XFS_IOC_SWAPEXT: break; #endif case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); return xfs_ioc_bulkstat_compat(XFS_BHVTOI(VNHEAD(vp))->i_mount, cmd, (void __user*)arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: case XFS_IOC_PATH_TO_FSHANDLE_32: case XFS_IOC_OPEN_BY_HANDLE_32: case XFS_IOC_READLINK_BY_HANDLE_32: arg = xfs_ioctl32_fshandle(arg); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); break; default: return -ENOIOCTLCMD; } error = bhv_vop_ioctl(vp, inode, file, mode, cmd, (void __user *)arg); VMODIFY(vp); return error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct file *file, const char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); if (size == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return -EIO; } if (unlikely(ioflags & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } if (ioflags & IO_ISLOCKED) iolock = 0; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } if ((ssize_t) size < 0) { ret = -EINVAL; goto error; } if (!access_ok(VERIFY_READ, buf, size)) { ret = -EINVAL; goto error; } retry: if (unlikely(ioflags & IO_ISDIRECT)) { xfs_inval_cached_pages(vp, io, *offset, 1, 1); xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_direct_write(file, buf, size, offset); } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_file_write(file, buf, size, offset); } if (unlikely(ioflags & IO_INVIS)) { /* generic_file_write updates the mtime/ctime but we need * to undo that because this I/O was supposed to be * invisible. */ struct inode *inode = LINVFS_GET_IP(vp); inode->i_mtime = xip->i_d.di_mtime.t_sec; inode->i_ctime = xip->i_d.di_ctime.t_sec; } else { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; mark_inode_dirty_sync(inode); } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ /* * If we are coming from an nfsd thread then insert into the * reference cache. */ if (!strcmp(current->comm, "nfsd")) xfs_refcache_insert(xip); /* Drop lock this way - the old refcache release is in here */ if (iolock) xfs_rwunlock(bdp, locktype); return(ret); }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; size_t size = 0; ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; vnode_t *vp; unsigned long seg; ip = XFS_BHVTOI(bdp); vp = BHV_TO_VNODE(bdp); mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = (ip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { if (*offset == ip->i_d.di_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (unlikely(ioflags & IO_ISDIRECT)) down(&inode->i_sem); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(vp->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { vrwlock_t locktype = VRWLOCK_READ; ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, FILP_DELAY_FLAG(file), &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); goto unlock_isem; } } xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); ret = __generic_file_aio_read(iocb, iovp, segs, offset); if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(iocb); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(ip, XFS_ICHGTIME_ACC); unlock_isem: if (unlikely(ioflags & IO_ISDIRECT)) up(&inode->i_sem); return ret; }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, uio_t *uio, int ioflags, cred_t *credp) { ssize_t ret, size; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; ip = XFS_BHVTOI(bdp); mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); if (unlikely(ioflags & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (uio->uio_offset & mp->m_blockmask) || (uio->uio_resid & mp->m_blockmask)) { if (uio->uio_offset >= ip->i_d.di_size) { return (0); } return EINVAL; } } if (uio->uio_resid == 0) return 0; n = XFS_MAXIOFFSET(mp) - uio->uio_offset; if (n <= 0) return EFBIG; size = (n < uio->uio_resid)? n : uio->uio_resid; if (XFS_FORCED_SHUTDOWN(mp)) { return EIO; } xfs_ilock(ip, XFS_IOLOCK_SHARED); #ifdef XXX if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int error; vrwlock_t locktype = VRWLOCK_READ; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), uio->uio_offset, size, dmflags, &locktype); if (error) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return (error); } } #endif ret = xfs_read_file(mp, ip, uio, ioflags); xfs_iunlock(ip, XFS_IOLOCK_SHARED); XFS_STATS_ADD(xs_read_bytes, ret); if (likely((ioflags & IO_INVIS) == 0)) { xfs_ichgtime(ip, XFS_ICHGTIME_ACC); } return ret; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address); xip = XFS_BHVTOI(bdp); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (size == 0) return 0; io = &(xip->i_iocore); mp = io->io_mount; xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (ioflags & IO_ISDIRECT) { pb_target_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (size && !(ioflags & IO_INVIS)) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } retry: if (ioflags & IO_ISDIRECT) { xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); } ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret <= 0) { xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ xfs_rwunlock(bdp, locktype); return(ret); }