void xfs_initialize_vnode( bhv_desc_t *bdp, vnode_t *vp, bhv_desc_t *inode_bhv, int unlock) { xfs_inode_t *ip = XFS_BHVTOI(inode_bhv); struct inode *inode = LINVFS_GET_IP(vp); if (!inode_bhv->bd_vobj) { vp->v_vfsp = bhvtovfs(bdp); bhv_desc_init(inode_bhv, ip, vp, &xfs_vnodeops); bhv_insert(VN_BHV_HEAD(vp), inode_bhv); } vp->v_type = IFTOVT(ip->i_d.di_mode); /* Have we been called during the new inode create process, * in which case we are too early to fill in the Linux inode. */ if (vp->v_type == VNON) return; xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip); /* For new inodes we need to set the ops vectors, * and unlock the inode. */ if (unlock && (inode->i_state & I_NEW)) { xfs_set_inodeops(inode); unlock_new_inode(inode); } }
STATIC struct dentry * linvfs_get_parent( struct dentry *child) { int error; vnode_t *vp, *cvp; struct dentry *parent; struct inode *ip = NULL; struct dentry dotdot; dotdot.d_name.name = ".."; dotdot.d_name.len = 2; dotdot.d_inode = 0; cvp = NULL; vp = LINVFS_GET_VP(child->d_inode); VOP_LOOKUP(vp, &dotdot, &cvp, 0, NULL, NULL, error); if (!error) { ASSERT(cvp); ip = LINVFS_GET_IP(cvp); if (!ip) { VN_RELE(cvp); return ERR_PTR(-EACCES); } } if (error) return ERR_PTR(-error); parent = d_alloc_anon(ip); if (!parent) { VN_RELE(cvp); parent = ERR_PTR(-ENOMEM); } return parent; }
/* * Revalidate the Linux inode from the vattr. * Note: i_size _not_ updated; we must hold the inode * semaphore when doing that - callers responsibility. */ void vn_revalidate_core( struct vnode *vp, vattr_t *vap) { struct inode *inode = LINVFS_GET_IP(vp); inode->i_mode = VTTOIF(vap->va_type) | vap->va_mode; inode->i_nlink = vap->va_nlink; inode->i_uid = vap->va_uid; inode->i_gid = vap->va_gid; inode->i_blocks = vap->va_nblocks; inode->i_mtime = vap->va_mtime; inode->i_ctime = vap->va_ctime; inode->i_atime = vap->va_atime; if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; if (vap->va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; if (vap->va_xflags & XFS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; if (vap->va_xflags & XFS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; }
/* * Get a reference on a vnode. */ vnode_t * vn_get( struct vnode *vp, vmap_t *vmap) { struct inode *inode; XFS_STATS_INC(vn_get); inode = LINVFS_GET_IP(vp); if (inode->i_state & I_FREEING) return NULL; inode = VFS_GET_INODE(vmap->v_vfsp, vmap->v_ino, IGET_NOALLOC); if (!inode) /* Inode not present */ return NULL; /* We do not want to create new inodes via vn_get, * returning NULL here is OK. */ if (inode->i_state & I_NEW) { vn_mark_bad(vp); unlock_new_inode(inode); iput(inode); return NULL; } vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); return vp; }
STATIC struct dentry * linvfs_get_dentry( struct super_block *sb, void *data) { vnode_t *vp; struct inode *inode; struct dentry *result; xfs_fid2_t xfid; vfs_t *vfsp = LINVFS_GET_VFS(sb); int error; xfid.fid_len = sizeof(xfs_fid2_t) - sizeof(xfid.fid_len); xfid.fid_pad = 0; xfid.fid_gen = ((__u32 *)data)[1]; xfid.fid_ino = ((__u32 *)data)[0]; VFS_VGET(vfsp, &vp, (fid_t *)&xfid, error); if (error || vp == NULL) return ERR_PTR(-ESTALE) ; inode = LINVFS_GET_IP(vp); result = d_alloc_anon(inode); if (!result) { iput(inode); return ERR_PTR(-ENOMEM); } return result; }
STATIC int linvfs_symlink( struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *ip; vattr_t va; vnode_t *dvp; /* directory containing name of symlink */ vnode_t *cvp; /* used to lookup symlink to put in dentry */ int error; dvp = LINVFS_GET_VP(dir); cvp = NULL; memset(&va, 0, sizeof(va)); va.va_mode = S_IFLNK | (irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (likely(!error && cvp)) { error = linvfs_init_security(cvp, dir); if (likely(!error)) { ip = LINVFS_GET_IP(cvp); d_instantiate(dentry, ip); validate_fields(dir); validate_fields(ip); } } return -error; }
/* * Hook in SELinux. This is not quite correct yet, what we really need * here (as we do for default ACLs) is a mechanism by which creation of * these attrs can be journalled at inode creation time (along with the * inode, of course, such that log replay can't cause these to be lost). */ STATIC int linvfs_init_security( struct vnode *vp, struct inode *dir) { struct inode *ip = LINVFS_GET_IP(vp); size_t length; void *value; char *name; int error; error = security_inode_init_security(ip, dir, &name, &value, &length); if (error) { if (error == -EOPNOTSUPP) return 0; return -error; } VOP_ATTR_SET(vp, name, value, length, ATTR_SECURE, NULL, error); if (!error) VMODIFY(vp); kfree(name); kfree(value); return error; }
STATIC struct dentry * linvfs_lookup( struct inode *dir, struct dentry *dentry) { struct inode *ip = NULL; vnode_t *vp, *cvp = NULL; int error; if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); vp = LINVFS_GET_VP(dir); VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); if (!error) { ASSERT(cvp); ip = LINVFS_GET_IP(cvp); if (!ip) { VN_RELE(cvp); return ERR_PTR(-EACCES); } } if (error && (error != ENOENT)) return ERR_PTR(-error); d_add(dentry, ip); /* Negative entry goes in if ip is NULL */ return NULL; }
STATIC int linvfs_symlink( struct inode *dir, struct dentry *dentry, const char *symname) { struct inode *ip; vattr_t va; vnode_t *dvp; /* directory containing name of symlink */ vnode_t *cvp; /* used to lookup symlink to put in dentry */ int error; dvp = LINVFS_GET_VP(dir); cvp = NULL; memset(&va, 0, sizeof(va)); va.va_type = VLNK; va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (!error && cvp) { ASSERT(cvp->v_type == VLNK); ip = LINVFS_GET_IP(cvp); d_instantiate(dentry, ip); validate_fields(dir); validate_fields(ip); /* size needs update */ } return -error; }
STATIC void init_once( void *data, kmem_cache_t *cachep, unsigned long flags) { vnode_t *vp = (vnode_t *)data; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) inode_init_once(LINVFS_GET_IP(vp)); }
STATIC void linvfs_unfreeze_fs( struct super_block *sb) { vfs_t *vfsp = LINVFS_GET_VFS(sb); vnode_t *vp; int error; VFS_ROOT(vfsp, &vp, error); VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_THAW, 0, error); VN_RELE(vp); }
STATIC struct inode * linvfs_alloc_inode( struct super_block *sb) { vnode_t *vp; vp = (vnode_t *)kmem_cache_alloc(linvfs_inode_cachep, kmem_flags_convert(KM_SLEEP)); if (!vp) return NULL; return LINVFS_GET_IP(vp); }
/* * vnode pcache layer for vnode_tosspages. * 'last' parameter unused but left in for IRIX compatibility */ void fs_tosspages( bhv_desc_t *bdp, xfs_off_t first, xfs_off_t last, int fiopt) { vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = LINVFS_GET_IP(vp); if (VN_CACHED(vp)) truncate_inode_pages(ip->i_mapping, first); }
STATIC void linvfs_freeze_fs( struct super_block *sb) { vfs_t *vfsp = LINVFS_GET_VFS(sb); vnode_t *vp; int error; if (sb->s_flags & MS_RDONLY) return; VFS_ROOT(vfsp, &vp, error); VOP_IOCTL(vp, LINVFS_GET_IP(vp), NULL, 0, XFS_IOC_FREEZE, 0, error); VN_RELE(vp); }
/* * Add a reference to a referenced vnode. */ struct vnode * vn_hold( struct vnode *vp) { struct inode *inode; XFS_STATS_INC(vn_hold); VN_LOCK(vp); inode = igrab(LINVFS_GET_IP(vp)); ASSERT(inode); VN_UNLOCK(vp, 0); return vp; }
STATIC __inline__ void xfs_revalidate_inode( xfs_mount_t *mp, vnode_t *vp, xfs_inode_t *ip) { struct inode *inode = LINVFS_GET_IP(vp); inode->i_mode = (ip->i_d.di_mode & MODEMASK) | VTTOIF(vp->v_type); inode->i_nlink = ip->i_d.di_nlink; inode->i_uid = ip->i_d.di_uid; inode->i_gid = ip->i_d.di_gid; if (((1 << vp->v_type) & ((1<<VBLK) | (1<<VCHR))) == 0) { inode->i_rdev = 0; } else { xfs_dev_t dev = ip->i_df.if_u2.if_rdev; inode->i_rdev = MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); } inode->i_blksize = PAGE_CACHE_SIZE; inode->i_generation = ip->i_d.di_gen; i_size_write(inode, ip->i_d.di_size); inode->i_blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; vp->v_flag &= ~VMODIFIED; }
/* * vnode pcache layer for vnode_flushinval_pages. * 'last' parameter unused but left in for IRIX compatibility */ void fs_flushinval_pages( bhv_desc_t *bdp, xfs_off_t first, xfs_off_t last, int fiopt) { vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = LINVFS_GET_IP(vp); if (VN_CACHED(vp)) { filemap_fdatasync(ip->i_mapping); fsync_inode_data_buffers(ip); filemap_fdatawait(ip->i_mapping); truncate_inode_pages(ip->i_mapping, first); } }
/* * Revalidate the Linux inode from the vnode. */ int vn_revalidate( struct vnode *vp) { struct inode *inode; vattr_t va; int error; vn_trace_entry(vp, "vn_revalidate", (inst_t *)__return_address); ASSERT(vp->v_fbhv != NULL); va.va_mask = XFS_AT_STAT|XFS_AT_XFLAGS; VOP_GETATTR(vp, &va, 0, NULL, error); if (!error) { inode = LINVFS_GET_IP(vp); inode->i_mode = VTTOIF(va.va_type) | va.va_mode; inode->i_nlink = va.va_nlink; inode->i_uid = va.va_uid; inode->i_gid = va.va_gid; inode->i_blocks = va.va_nblocks; inode->i_mtime = va.va_mtime; inode->i_ctime = va.va_ctime; inode->i_atime = va.va_atime; if (va.va_xflags & XFS_XFLAG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; if (va.va_xflags & XFS_XFLAG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; if (va.va_xflags & XFS_XFLAG_SYNC) inode->i_flags |= S_SYNC; else inode->i_flags &= ~S_SYNC; if (va.va_xflags & XFS_XFLAG_NOATIME) inode->i_flags |= S_NOATIME; else inode->i_flags &= ~S_NOATIME; VUNMODIFY(vp); } return -error; }
/* * vnode pcache layer for vnode_flush_pages. * 'last' parameter unused but left in for IRIX compatibility */ int fs_flush_pages( bhv_desc_t *bdp, xfs_off_t first, xfs_off_t last, uint64_t flags, int fiopt) { vnode_t *vp = BHV_TO_VNODE(bdp); struct inode *ip = LINVFS_GET_IP(vp); if (VN_CACHED(vp)) { filemap_fdatasync(ip->i_mapping); fsync_inode_data_buffers(ip); filemap_fdatawait(ip->i_mapping); } return 0; }
/* * Get a reference on a vnode. */ vnode_t * vn_get( struct vnode *vp, vmap_t *vmap) { struct inode *inode; XFS_STATS_INC(vn_get); inode = LINVFS_GET_IP(vp); if (inode->i_state & I_FREEING) return NULL; inode = ilookup(vmap->v_vfsp->vfs_super, vmap->v_ino); if (!inode) /* Inode not present */ return NULL; vn_trace_exit(vp, "vn_get", (inst_t *)__return_address); return vp; }
STATIC int linvfs_symlink( struct inode *dir, struct dentry *dentry, const char *symname) { int error; vnode_t *dvp; /* directory containing name to remove */ vnode_t *cvp; /* used to lookup symlink to put in dentry */ vattr_t va; struct inode *ip = NULL; dvp = LINVFS_GET_VP(dir); bzero(&va, sizeof(va)); va.va_type = VLNK; va.va_mode = irix_symlink_mode ? 0777 & ~current->fs->umask : S_IRWXUGO; va.va_mask = AT_TYPE|AT_MODE; error = 0; VOP_SYMLINK(dvp, dentry, &va, (char *)symname, &cvp, NULL, error); if (!error) { ASSERT(cvp); ASSERT(cvp->v_type == VLNK); ip = LINVFS_GET_IP(cvp); if (!ip) { error = ENOMEM; VN_RELE(cvp); } else { /* linvfs_revalidate_core returns (-) errors */ error = -linvfs_revalidate_core(ip, ATTR_COMM); d_instantiate(dentry, ip); validate_fields(dir); validate_fields(ip); /* size needs update */ mark_inode_dirty_sync(ip); mark_inode_dirty_sync(dir); } } return -error; }
/* * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. * * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG * with XFS_ICHGTIME_ACC to be sure that access time * update will take. Calling first with XFS_ICHGTIME_ACC * and then XFS_ICHGTIME_MOD may fail to modify the access * timestamp if the filesystem is mounted noacctm. */ void xfs_ichgtime( xfs_inode_t *ip, int flags) { struct inode *inode = LINVFS_GET_IP(XFS_ITOV(ip)); timespec_t tv; nanotime(&tv); if (flags & XFS_ICHGTIME_MOD) { inode->i_mtime = tv; ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; } if (flags & XFS_ICHGTIME_ACC) { inode->i_atime = tv; ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; } if (flags & XFS_ICHGTIME_CHG) { inode->i_ctime = tv; ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; } /* * We update the i_update_core field _after_ changing * the timestamps in order to coordinate properly with * xfs_iflush() so that we don't lose timestamp updates. * This keeps us from having to hold the inode lock * while doing this. We use the SYNCHRONIZE macro to * ensure that the compiler does not reorder the update * of i_update_core above the timestamp updates above. */ SYNCHRONIZE(); ip->i_update_core = 1; if (!(inode->i_state & I_LOCK)) mark_inode_dirty_sync(inode); }
STATIC struct dentry * linvfs_lookup( struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct vnode *vp = LINVFS_GET_VP(dir), *cvp; int error; if (dentry->d_name.len >= MAXNAMELEN) return ERR_PTR(-ENAMETOOLONG); VOP_LOOKUP(vp, dentry, &cvp, 0, NULL, NULL, error); if (error) { if (unlikely(error != ENOENT)) return ERR_PTR(-error); d_add(dentry, NULL); return NULL; } return d_splice_alias(LINVFS_GET_IP(cvp), dentry); }
STATIC inline void cleanup_inode( vnode_t *dvp, vnode_t *vp, struct dentry *dentry, int mode) { struct dentry teardown = {}; int err2; /* Oh, the horror. * If we can't add the ACL or we fail in * linvfs_init_security we must back out. * ENOSPC can hit here, among other things. */ teardown.d_inode = LINVFS_GET_IP(vp); teardown.d_name = dentry->d_name; if (S_ISDIR(mode)) VOP_RMDIR(dvp, &teardown, NULL, err2); else VOP_REMOVE(dvp, &teardown, NULL, err2); VN_RELE(vp); }
STATIC int linvfs_mknod( struct inode *dir, struct dentry *dentry, int mode, int rdev) { struct inode *ip; vattr_t va; vnode_t *vp = NULL, *dvp = LINVFS_GET_VP(dir); xfs_acl_t *default_acl = NULL; attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; int error; if (test_default_acl && test_default_acl(dvp)) { if (!_ACL_ALLOC(default_acl)) return -ENOMEM; if (!_ACL_GET_DEFAULT(dvp, default_acl)) { _ACL_FREE(default_acl); default_acl = NULL; } } #ifdef CONFIG_XFS_POSIX_ACL /* * Conditionally compiled so that the ACL base kernel changes can be * split out into separate patches - remove this once MS_POSIXACL is * accepted, or some other way to implement this exists. */ if (IS_POSIXACL(dir) && !default_acl && has_fs_struct(current)) mode &= ~current->fs->umask; #endif memset(&va, 0, sizeof(va)); va.va_mask = XFS_AT_TYPE|XFS_AT_MODE; va.va_type = IFTOVT(mode); va.va_mode = mode; switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: va.va_rdev = XFS_MKDEV(MAJOR(rdev), MINOR(rdev)); va.va_mask |= XFS_AT_RDEV; /*FALLTHROUGH*/ case S_IFREG: VOP_CREATE(dvp, dentry, &va, &vp, NULL, error); break; case S_IFDIR: VOP_MKDIR(dvp, dentry, &va, &vp, NULL, error); break; default: error = EINVAL; break; } if (default_acl) { if (!error) { error = _ACL_INHERIT(vp, &va, default_acl); if (!error) { VMODIFY(vp); } else { struct dentry teardown = {}; int err2; /* Oh, the horror. * If we can't add the ACL we must back out. * ENOSPC can hit here, among other things. */ teardown.d_inode = ip = LINVFS_GET_IP(vp); teardown.d_name = dentry->d_name; vn_mark_bad(vp); if (S_ISDIR(mode)) VOP_RMDIR(dvp, &teardown, NULL, err2); else VOP_REMOVE(dvp, &teardown, NULL, err2); VN_RELE(vp); } } _ACL_FREE(default_acl); } if (!error) { ASSERT(vp); ip = LINVFS_GET_IP(vp); if (S_ISCHR(mode) || S_ISBLK(mode)) ip->i_rdev = to_kdev_t(rdev); else if (S_ISDIR(mode)) validate_fields(ip); d_instantiate(dentry, ip); validate_fields(dir); } return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct file *file, const char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); if (size == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return -EIO; } if (unlikely(ioflags & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } if (ioflags & IO_ISLOCKED) iolock = 0; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } if ((ssize_t) size < 0) { ret = -EINVAL; goto error; } if (!access_ok(VERIFY_READ, buf, size)) { ret = -EINVAL; goto error; } retry: if (unlikely(ioflags & IO_ISDIRECT)) { xfs_inval_cached_pages(vp, io, *offset, 1, 1); xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_direct_write(file, buf, size, offset); } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_file_write(file, buf, size, offset); } if (unlikely(ioflags & IO_INVIS)) { /* generic_file_write updates the mtime/ctime but we need * to undo that because this I/O was supposed to be * invisible. */ struct inode *inode = LINVFS_GET_IP(vp); inode->i_mtime = xip->i_d.di_mtime.t_sec; inode->i_ctime = xip->i_d.di_ctime.t_sec; } else { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; mark_inode_dirty_sync(inode); } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ /* * If we are coming from an nfsd thread then insert into the * reference cache. */ if (!strcmp(current->comm, "nfsd")) xfs_refcache_insert(xip); /* Drop lock this way - the old refcache release is in here */ if (iolock) xfs_rwunlock(bdp, locktype); return(ret); }
int /* error (positive) */ xfs_zero_eof( vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize, /* current inode size */ xfs_fsize_t end_size) /* terminal inode size */ { struct inode *ip = LINVFS_GET_IP(vp); xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t prev_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_extlen_t buf_len_fsb; xfs_extlen_t prev_zero_count; xfs_mount_t *mp; int nimaps; int error = 0; xfs_bmbt_irec_t imap; loff_t loff; size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); mp = io->io_mount; /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, io, offset, isize, end_size); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks in the range requested. * Zero them a single write at a time. We actually * don't zero the entire range returned if it is * too big and simply loop around to get the rest. * That is not the most efficient thing to do, but it * is simple and this path should not be exercised often. */ buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, mp->m_writeio_blocks << 8); /* * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, start_zero_fsb); lsize = XFS_FSB_TO_B(mp, buf_len_fsb); error = xfs_iozero(ip, loff, lsize, end_size); if (error) { goto out_lock; } prev_zero_fsb = start_zero_fsb; prev_zero_count = buf_len_fsb; start_zero_fsb = imap.br_startoff + buf_len_fsb; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
ssize_t /* bytes read, or (-) error */ xfs_read( bhv_desc_t *bdp, struct file *file, char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { ssize_t ret; xfs_fsize_t n; xfs_inode_t *ip; xfs_mount_t *mp; ip = XFS_BHVTOI(bdp); mp = ip->i_mount; XFS_STATS_INC(xs_read_calls); if (unlikely(ioflags & IO_ISDIRECT)) { if ((ssize_t)size < 0) return -XFS_ERROR(EINVAL); if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { if (*offset >= ip->i_d.di_size) { return (0); } return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - *offset; if ((n <= 0) || (size == 0)) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (!(ioflags & IO_ISLOCKED)) xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(BHV_TO_VNODE(bdp)->v_vfsp, ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int error; vrwlock_t locktype = VRWLOCK_READ; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); error = XFS_SEND_DATA(mp, DM_EVENT_READ, BHV_TO_VNODE(bdp), *offset, size, dmflags, &locktype); if (error) { if (!(ioflags & IO_ISLOCKED)) xfs_iunlock(ip, XFS_IOLOCK_SHARED); return -error; } } if (unlikely(ioflags & IO_ISDIRECT)) { xfs_rw_enter_trace(XFS_DIORD_ENTER, &ip->i_iocore, buf, size, *offset, ioflags); ret = (*offset < ip->i_d.di_size) ? do_generic_direct_read(file, buf, size, offset) : 0; UPDATE_ATIME(file->f_dentry->d_inode); } else { xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, buf, size, *offset, ioflags); ret = generic_file_read(file, buf, size, offset); } if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); if (!(ioflags & IO_ISLOCKED)) xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (unlikely(ioflags & IO_INVIS)) { /* generic_file_read updates the atime but we need to * undo that because this I/O was supposed to be invisible. */ struct inode *inode = LINVFS_GET_IP(BHV_TO_VNODE(bdp)); inode->i_atime = ip->i_d.di_atime.t_sec; } else { xfs_ichgtime(ip, XFS_ICHGTIME_ACC); } return ret; }
/* * Convert userspace handle data into vnode (and inode). * We [ab]use the fact that all the fsop_handlereq ioctl calls * have a data structure argument whose first component is always * a xfs_fsop_handlereq_t, so we can cast to and from this type. * This allows us to optimise the copy_from_user calls and gives * a handy, shared routine. * * If no error, caller must always VN_RELE the returned vp. */ STATIC int xfs_vget_fsop_handlereq( xfs_mount_t *mp, struct inode *parinode, /* parent inode pointer */ int cap, /* capability level for op */ unsigned long arg, /* userspace data pointer */ unsigned long size, /* size of expected struct */ /* output arguments */ xfs_fsop_handlereq_t *hreq, vnode_t **vp, struct inode **inode) { void *hanp; size_t hlen; xfs_fid_t *xfid; xfs_handle_t *handlep; xfs_handle_t handle; xfs_inode_t *ip; struct inode *inodep; vnode_t *vpp; xfs_ino_t ino; __u32 igen; int error; if (!capable(cap)) return XFS_ERROR(EPERM); /* * Only allow handle opens under a directory. */ if (!S_ISDIR(parinode->i_mode)) return XFS_ERROR(ENOTDIR); /* * Copy the handle down from the user and validate * that it looks to be in the correct format. */ if (copy_from_user(hreq, (struct xfs_fsop_handlereq *)arg, size)) return XFS_ERROR(EFAULT); hanp = hreq->ihandle; hlen = hreq->ihandlen; handlep = &handle; if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) return XFS_ERROR(EINVAL); if (copy_from_user(handlep, hanp, hlen)) return XFS_ERROR(EFAULT); if (hlen < sizeof(*handlep)) memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); if (hlen > sizeof(handlep->ha_fsid)) { if (handlep->ha_fid.xfs_fid_len != (hlen - sizeof(handlep->ha_fsid) - sizeof(handlep->ha_fid.xfs_fid_len)) || handlep->ha_fid.xfs_fid_pad) return XFS_ERROR(EINVAL); } /* * Crack the handle, obtain the inode # & generation # */ xfid = (struct xfs_fid *)&handlep->ha_fid; if (xfid->xfs_fid_len == sizeof(*xfid) - sizeof(xfid->xfs_fid_len)) { ino = xfid->xfs_fid_ino; igen = xfid->xfs_fid_gen; } else { return XFS_ERROR(EINVAL); } /* * Get the XFS inode, building a vnode to go with it. */ error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); if (error) return error; if (ip == NULL) return XFS_ERROR(EIO); if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) { xfs_iput_new(ip, XFS_ILOCK_SHARED); return XFS_ERROR(ENOENT); } vpp = XFS_ITOV(ip); inodep = LINVFS_GET_IP(vpp); xfs_iunlock(ip, XFS_ILOCK_SHARED); *vp = vpp; *inode = inodep; return 0; }
STATIC int xfs_ioc_xattr( vnode_t *vp, xfs_inode_t *ip, struct file *filp, unsigned int cmd, unsigned long arg) { struct fsxattr fa; vattr_t va; int error; int attr_flags; unsigned int flags; switch (cmd) { case XFS_IOC_FSGETXATTR: { va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_nextents; if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSSETXATTR: { if (copy_from_user(&fa, (struct fsxattr *)arg, sizeof(fa))) return -XFS_ERROR(EFAULT); attr_flags = 0; if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; va.va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE; va.va_xflags = fa.fsx_xflags; va.va_extsize = fa.fsx_extsize; VOP_SETATTR(vp, &va, attr_flags, NULL, error); if (!error) vn_revalidate(vp); /* update Linux inode flags */ return -error; } case XFS_IOC_FSGETXATTRA: { va.va_mask = XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_ANEXTENTS; VOP_GETATTR(vp, &va, 0, NULL, error); if (error) return -error; fa.fsx_xflags = va.va_xflags; fa.fsx_extsize = va.va_extsize; fa.fsx_nextents = va.va_anextents; if (copy_to_user((struct fsxattr *)arg, &fa, sizeof(fa))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GETXFLAGS: { flags = xfs_di2lxflags(ip->i_d.di_flags); if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SETXFLAGS: { if (copy_from_user(&flags, (unsigned int *)arg, sizeof(flags))) return -XFS_ERROR(EFAULT); if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \ LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \ LINUX_XFLAG_SYNC)) return -XFS_ERROR(EOPNOTSUPP); attr_flags = 0; if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= ATTR_NONBLOCK; va.va_mask = XFS_AT_XFLAGS; va.va_xflags = xfs_merge_ioc_xflags(flags, xfs_dic2xflags(&ip->i_d, ARCH_NOCONVERT)); VOP_SETATTR(vp, &va, attr_flags, NULL, error); if (!error) vn_revalidate(vp); /* update Linux inode flags */ return -error; } case XFS_IOC_GETVERSION: { flags = LINVFS_GET_IP(vp)->i_generation; if (copy_to_user((unsigned int *)arg, &flags, sizeof(flags))) return -XFS_ERROR(EFAULT); return 0; } default: return -ENOTTY; } }