STATIC loff_t xfs_seek_hole( struct file *file, loff_t start) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); xfs_fsize_t isize; xfs_fileoff_t fsbno; xfs_filblks_t end; uint lock; int error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { error = ENXIO; goto out_unlock; } fsbno = XFS_B_TO_FSBT(mp, start); end = XFS_B_TO_FSB(mp, isize); for (;;) { struct xfs_bmbt_irec map[2]; int nmap = 2; unsigned int i; error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) goto out_unlock; /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { error = ENXIO; goto out_unlock; } for (i = 0; i < nmap; i++) { offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[i].br_startoff)); /* Landed in a hole */ if (map[i].br_startblock == HOLESTARTBLOCK) goto out; /* * Landed in an unwritten extent, try to search hole * from page cache. */ if (map[i].br_state == XFS_EXT_UNWRITTEN) { if (xfs_find_get_desired_pgoff(inode, &map[i], HOLE_OFF, &offset)) goto out; } } /* * map[0] contains data or its unwritten but contains * data in page cache, probably means that we are * reading after EOF. We should fix offset to point * to the end of the file(i.e., there is an implicit * hole at the end of any file). */ if (nmap == 1) { offset = isize; break; } ASSERT(i > 1); /* * Both mappings contains data, proceed to the next round of * search if the current reading offset not beyond or hit EOF. */ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { offset = isize; break; } } out: /* * At this point, we must have found a hole. However, the returned * offset may be bigger than the file size as it may be aligned to * page boundary for unwritten extents, we need to deal with this * situation in particular. */ offset = min_t(loff_t, offset, isize); offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: xfs_iunlock(ip, lock); if (error) return -error; return offset; }
STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; size_t size = 0; ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; XFS_STATS_INC(xs_read_calls); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); if (ret < 0) return ret; if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((iocb->ki_pos & target->bt_smask) || (size & target->bt_smask)) { if (iocb->ki_pos == i_size_read(inode)) return 0; return -XFS_ERROR(EINVAL); } } n = mp->m_super->s_maxbytes - iocb->ki_pos; if (n <= 0 || size == 0) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* * Locking is a bit tricky here. If we take an exclusive lock * for direct IO, we effectively serialise all new concurrent * read IO to this file and block it behind IO that is currently in * progress because IO in progress holds the IO lock shared. We only * need to hold the lock exclusive to blow away the page cache, so * only take lock exclusively if the page cache needs invalidation. * This allows the normal direct IO case of no page cache pages to * proceeed concurrently without serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { ret = -xfs_flushinval_pages(ip, (iocb->ki_pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
STATIC loff_t xfs_seek_data( struct file *file, loff_t start, u32 type) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec map[2]; int nmap = 2; loff_t uninitialized_var(offset); xfs_fsize_t isize; xfs_fileoff_t fsbno; xfs_filblks_t end; uint lock; int error; lock = xfs_ilock_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { error = ENXIO; goto out_unlock; } fsbno = XFS_B_TO_FSBT(mp, start); /* * Try to read extents from the first block indicated * by fsbno to the end block of the file. */ end = XFS_B_TO_FSB(mp, isize); error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) goto out_unlock; /* * Treat unwritten extent as data extent since it might * contains dirty data in page cache. */ if (map[0].br_startblock != HOLESTARTBLOCK) { offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[0].br_startoff)); } else { if (nmap == 1) { error = ENXIO; goto out_unlock; } offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[1].br_startoff)); } if (offset != file->f_pos) file->f_pos = offset; out_unlock: xfs_iunlock_map_shared(ip, lock); if (error) return -error; return offset; }
STATIC int xfs_file_fsync( struct file *file, struct dentry *dentry, int datasync) { struct xfs_inode *ip = XFS_I(dentry->d_inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. */ if (!log_flushed) xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); /* * If this inode is on the RT dev we need to flush that * cache as well. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } return -error; }
STATIC int xfs_map_blocks( struct inode *inode, loff_t offset, struct xfs_bmbt_irec *imap, int type, int nonblocking) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = 1 << inode->i_blkbits; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); if (type == IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { if (nonblocking) return -XFS_ERROR(EAGAIN); xfs_ilock(ip, XFS_ILOCK_SHARED); } ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_maxioffset); if (offset + count > mp->m_maxioffset) count = mp->m_maxioffset - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, imap, &nimaps, bmapi_flags); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) return -XFS_ERROR(error); if (type == IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, count, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return -XFS_ERROR(error); } #ifdef DEBUG if (type == IO_UNWRITTEN) { ASSERT(nimaps); ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); } #endif if (nimaps) trace_xfs_map_blocks_found(ip, offset, count, type, imap); return 0; }
STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); ssize_t ret; int iolock; size_t ocount = 0; XFS_STATS_INC(xs_write_calls); BUG_ON(iocb->ki_pos != pos); ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); if (ret) return ret; if (ocount == 0) return 0; xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; if (unlikely(file->f_flags & O_DIRECT)) ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount, &iolock); else ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, ocount, &iolock); xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); if (ret <= 0) goto out_unlock; /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error, error2; xfs_rw_iunlock(ip, iolock); error = filemap_write_and_wait_range(mapping, pos, end); xfs_rw_ilock(ip, iolock); error2 = -xfs_file_fsync(file, (file->f_flags & __O_SYNC) ? 0 : 1); if (error) ret = error; else if (error2) ret = error2; } out_unlock: xfs_aio_write_newsize_update(ip); xfs_rw_iunlock(ip, iolock); return ret; }
STATIC ssize_t xfs_file_splice_write( struct pipe_inode_info *pipe, struct file *outfilp, loff_t *ppos, size_t count, unsigned int flags) { struct inode *inode = outfilp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; xfs_fsize_t isize, new_size; int ioflags = 0; ssize_t ret; XFS_STATS_INC(xs_write_calls); if (outfilp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -EIO; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) { int iolock = XFS_IOLOCK_EXCL; int error; error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count, FILP_DELAY_FLAG(outfilp), &iolock); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; } } new_size = *ppos + count; xfs_ilock(ip, XFS_ILOCK_EXCL); if (new_size > ip->i_size) ip->i_new_size = new_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_file_splice_write(ip, count, *ppos, ioflags); ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) *ppos = isize; if (*ppos > ip->i_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); if (*ppos > ip->i_size) ip->i_size = *ppos; xfs_iunlock(ip, XFS_ILOCK_EXCL); } if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
STATIC int xfs_file_readdir( struct file *filp, void *dirent, filldir_t filldir) { struct inode *inode = filp->f_path.dentry->d_inode; xfs_inode_t *ip = XFS_I(inode); struct hack_callback buf; struct hack_dirent *de; int error; loff_t size; int eof = 0; xfs_off_t start_offset, curr_offset, offset; /* * Try fairly hard to get memory */ buf.len = PAGE_CACHE_SIZE; do { buf.dirent = kmalloc(buf.len, GFP_KERNEL); if (buf.dirent) break; buf.len >>= 1; } while (buf.len >= 1024); if (!buf.dirent) return -ENOMEM; curr_offset = filp->f_pos; if (curr_offset == 0x7fffffff) offset = 0xffffffff; else offset = filp->f_pos; while (!eof) { unsigned int reclen; start_offset = offset; buf.used = 0; error = -xfs_readdir(ip, &buf, buf.len, &offset, xfs_hack_filldir); if (error || offset == start_offset) { size = 0; break; } size = buf.used; de = (struct hack_dirent *)buf.dirent; curr_offset = de->offset /* & 0x7fffffff */; while (size > 0) { if (filldir(dirent, de->name, de->namlen, curr_offset & 0x7fffffff, de->ino, de->d_type)) { goto done; } reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen, sizeof(u64)); size -= reclen; de = (struct hack_dirent *)((char *)de + reclen); curr_offset = de->offset /* & 0x7fffffff */; } } done: if (!error) { if (size == 0) filp->f_pos = offset & 0x7fffffff; else if (de) filp->f_pos = curr_offset; } kfree(buf.dirent); return error; }
/* * ioctl interface for swapext */ int xfs_swapext( xfs_swapext_t *sxp) { xfs_inode_t *ip, *tip; struct file *file, *tmp_file; int error = 0; /* Pull information for the target fd */ file = fget((int)sxp->sx_fdtarget); if (!file) { error = XFS_ERROR(EINVAL); goto out; } if (!(file->f_mode & FMODE_WRITE) || !(file->f_mode & FMODE_READ) || (file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_file; } tmp_file = fget((int)sxp->sx_fdtmp); if (!tmp_file) { error = XFS_ERROR(EINVAL); goto out_put_file; } if (!(tmp_file->f_mode & FMODE_WRITE) || !(tmp_file->f_mode & FMODE_READ) || (tmp_file->f_flags & O_APPEND)) { error = XFS_ERROR(EBADF); goto out_put_tmp_file; } if (IS_SWAPFILE(file->f_path.dentry->d_inode) || IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } ip = XFS_I(file->f_path.dentry->d_inode); tip = XFS_I(tmp_file->f_path.dentry->d_inode); if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); goto out_put_tmp_file; } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = XFS_ERROR(EIO); goto out_put_tmp_file; } error = xfs_swap_extents(ip, tip, sxp); out_put_tmp_file: fput(tmp_file); out_put_file: fput(file); out: return error; }
STATIC int xfs_vn_mknod( struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { struct inode *ip; bhv_vnode_t *vp = NULL, *dvp = vn_from_inode(dir); xfs_acl_t *default_acl = NULL; attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; int error; /* * Irix uses Missed'em'V split, but doesn't want to see * the upper 5 bits of (14bit) major. */ if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) return -EINVAL; if (unlikely(test_default_acl && test_default_acl(dvp))) { if (!_ACL_ALLOC(default_acl)) { return -ENOMEM; } if (!_ACL_GET_DEFAULT(dvp, default_acl)) { _ACL_FREE(default_acl); default_acl = NULL; } } if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current)) mode &= ~current->fs->umask; switch (mode & S_IFMT) { case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK: rdev = sysv_encode_dev(rdev); case S_IFREG: error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL); break; case S_IFDIR: error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL); break; default: error = EINVAL; break; } if (unlikely(!error)) { error = xfs_init_security(vp, dir); if (error) xfs_cleanup_inode(dir, vp, dentry, mode); } if (unlikely(default_acl)) { if (!error) { error = _ACL_INHERIT(vp, mode, default_acl); if (!error) xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED); else xfs_cleanup_inode(dir, vp, dentry, mode); } _ACL_FREE(default_acl); } if (likely(!error)) { ASSERT(vp); ip = vn_to_inode(vp); if (S_ISDIR(mode)) xfs_validate_fields(ip); d_instantiate(dentry, ip); xfs_validate_fields(dir); } return -error; }
struct posix_acl * xfs_get_acl(struct inode *inode, int type) { struct xfs_inode *ip = XFS_I(inode); struct posix_acl *acl; struct xfs_acl *xfs_acl; unsigned char *ea_name; int error; int len; acl = get_cached_acl(inode, type); if (acl != ACL_NOT_CACHED) return acl; trace_xfs_get_acl(ip); switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; break; case ACL_TYPE_DEFAULT: ea_name = SGI_ACL_DEFAULT; break; default: BUG(); } /* * If we have a cached ACLs value just return it, not need to * go out to the disk. */ len = XFS_ACL_MAX_SIZE(ip->i_mount); xfs_acl = kzalloc(len, GFP_KERNEL); if (!xfs_acl) return ERR_PTR(-ENOMEM); error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, &len, ATTR_ROOT); if (error) { /* * If the attribute doesn't exist make sure we have a negative * cache entry, for any other error assume it is transient and * leave the cache entry as ACL_NOT_CACHED. */ if (error == -ENOATTR) { acl = NULL; goto out_update_cache; } goto out; } acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); if (IS_ERR(acl)) goto out; out_update_cache: set_cached_acl(inode, type, acl); out: kfree(xfs_acl); return acl; }
STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; size_t size = 0; ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; XFS_STATS_INC(xs_read_calls); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); if (ret < 0) return ret; if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; return -XFS_ERROR(EINVAL); } } n = mp->m_super->s_maxbytes - pos; if (n <= 0 || size == 0) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* * Locking is a bit tricky here. If we take an exclusive lock * for direct IO, we effectively serialise all new concurrent * read IO to this file and block it behind IO that is currently in * progress because IO in progress holds the IO lock shared. We only * need to hold the lock exclusive to blow away the page cache, so * only take lock exclusively if the page cache needs invalidation. * This allows the normal direct IO case of no page cache pages to * proceeed concurrently without serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, pos, pos + size - 1); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } /* * Invalidate whole pages. This can return an error if * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping, pos >> PAGE_CACHE_SHIFT, (pos + size - 1) >> PAGE_CACHE_SHIFT); WARN_ON_ONCE(ret); ret = 0; }
int xfs_ioc_space( struct file *filp, unsigned int cmd, xfs_flock64_t *bf) { struct inode *inode = file_inode(filp); struct xfs_inode *ip = XFS_I(inode); struct iattr iattr; enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL; int error; /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -EPERM; if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; if (!S_ISREG(inode->i_mode)) return -EINVAL; if (filp->f_flags & O_DSYNC) flags |= XFS_PREALLOC_SYNC; if (filp->f_mode & FMODE_NOCMTIME) flags |= XFS_PREALLOC_INVISIBLE; error = mnt_want_write_file(filp); if (error) return error; xfs_ilock(ip, iolock); error = xfs_break_layouts(inode, &iolock); if (error) goto out_unlock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock |= XFS_MMAPLOCK_EXCL; switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; case 1: /*SEEK_CUR*/ bf->l_start += filp->f_pos; break; case 2: /*SEEK_END*/ bf->l_start += XFS_ISIZE(ip); break; default: error = -EINVAL; goto out_unlock; } /* * length of <= 0 for resv/unresv/zero is invalid. length for * alloc/free is ignored completely and we have no idea what userspace * might have set it to, so set it to zero to allow range * checks to pass. */ switch (cmd) { case XFS_IOC_ZERO_RANGE: case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { error = -EINVAL; goto out_unlock; } break; default: bf->l_len = 0; break; } if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) { error = -EINVAL; goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: flags |= XFS_PREALLOC_SET; error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: flags |= XFS_PREALLOC_SET; error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: error = xfs_free_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_ALLOCSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: flags |= XFS_PREALLOC_CLEAR; if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); if (error) goto out_unlock; } iattr.ia_valid = ATTR_SIZE; iattr.ia_size = bf->l_start; error = xfs_vn_setattr_size(file_dentry(filp), &iattr); break; default: ASSERT(0); error = -EINVAL; } if (error) goto out_unlock; error = xfs_update_prealloc_flags(ip, flags); out_unlock: xfs_iunlock(ip, iolock); mnt_drop_write_file(filp); return error; }
STATIC long xfs_file_fallocate( struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); struct xfs_trans *tp; long error; loff_t new_size = 0; if (!S_ISREG(inode->i_mode)) return -EINVAL; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { unsigned blksize_mask = (1 << inode->i_blkbits) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; goto out_unlock; } ASSERT(offset + len < i_size_read(inode)); new_size = i_size_read(inode) - len; error = xfs_collapse_file_space(ip, offset, len); if (error) goto out_unlock; } else { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; error = -inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } if (mode & FALLOC_FL_ZERO_RANGE) error = xfs_zero_file_space(ip, offset, len); else error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); if (error) { xfs_trans_cancel(tp, 0); goto out_unlock; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (file->f_flags & O_DSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); if (error) goto out_unlock; /* Change file size if needed */ if (new_size) { struct iattr iattr; iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; error = xfs_setattr_size(ip, &iattr); } out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; }
/* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to * a file or fs handle. * * XFS_IOC_PATH_TO_FSHANDLE * returns fs handle for a mount point or path within that mount point * XFS_IOC_FD_TO_HANDLE * returns full handle for a FD opened in user space * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ STATIC int xfs_find_handle( unsigned int cmd, void __user *arg) { int hsize; xfs_handle_t handle; xfs_fsop_handlereq_t hreq; struct inode *inode; if (copy_from_user(&hreq, arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); memset((char *)&handle, 0, sizeof(handle)); switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { struct nameidata nd; int error; error = user_path_walk_link((const char __user *)hreq.path, &nd); if (error) return error; ASSERT(nd.path.dentry); ASSERT(nd.path.dentry->d_inode); inode = igrab(nd.path.dentry->d_inode); path_put(&nd.path); break; } case XFS_IOC_FD_TO_HANDLE: { struct file *file; file = fget(hreq.fd); if (!file) return -EBADF; ASSERT(file->f_path.dentry); ASSERT(file->f_path.dentry->d_inode); inode = igrab(file->f_path.dentry->d_inode); fput(file); break; } default: ASSERT(0); return -XFS_ERROR(EINVAL); } if (inode->i_sb->s_magic != XFS_SB_MAGIC) { /* we're not in XFS anymore, Toto */ iput(inode); return -XFS_ERROR(EINVAL); } switch (inode->i_mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: break; default: iput(inode); return -XFS_ERROR(EBADF); } /* now we can grab the fsid */ memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); hsize = sizeof(xfs_fsid_t); if (cmd != XFS_IOC_PATH_TO_FSHANDLE) { xfs_inode_t *ip = XFS_I(inode); int lock_mode; /* need to get access to the xfs_inode to read the generation */ lock_mode = xfs_ilock_map_shared(ip); /* fill in fid section of handle from inode */ handle.ha_fid.fid_len = sizeof(xfs_fid_t) - sizeof(handle.ha_fid.fid_len); handle.ha_fid.fid_pad = 0; handle.ha_fid.fid_gen = ip->i_d.di_gen; handle.ha_fid.fid_ino = ip->i_ino; xfs_iunlock_map_shared(ip, lock_mode); hsize = XFS_HSIZE(handle); } /* now copy our handle into the user buffer & write out the size */ if (copy_to_user(hreq.ohandle, &handle, hsize) || copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { iput(inode); return -XFS_ERROR(EFAULT); } iput(inode); return 0; }
/* * Link a range of blocks from one file to another. */ int xfs_reflink_remap_range( struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe) { struct inode *inode_in = file_inode(file_in); struct xfs_inode *src = XFS_I(inode_in); struct inode *inode_out = file_inode(file_out); struct xfs_inode *dest = XFS_I(inode_out); struct xfs_mount *mp = src->i_mount; bool same_inode = (inode_in == inode_out); xfs_fileoff_t sfsbno, dfsbno; xfs_filblks_t fsblen; xfs_extlen_t cowextsize; ssize_t ret; if (!xfs_sb_version_hasreflink(&mp->m_sb)) return -EOPNOTSUPP; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* Lock both files against IO */ lock_two_nondirectories(inode_in, inode_out); if (same_inode) xfs_ilock(src, XFS_MMAPLOCK_EXCL); else xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL); /* Check file eligibility and prepare for block sharing. */ ret = -EINVAL; /* Don't reflink realtime inodes */ if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest)) goto out_unlock; /* Don't share DAX file data for now. */ if (IS_DAX(inode_in) || IS_DAX(inode_out)) goto out_unlock; ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out, &len, is_dedupe); if (ret <= 0) goto out_unlock; trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); /* Set flags and remap blocks. */ ret = xfs_reflink_set_inode_flag(src, dest); if (ret) goto out_unlock; dfsbno = XFS_B_TO_FSBT(mp, pos_out); sfsbno = XFS_B_TO_FSBT(mp, pos_in); fsblen = XFS_B_TO_FSB(mp, len); ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen, pos_out + len); if (ret) goto out_unlock; /* Zap any page cache for the destination file's range. */ truncate_inode_pages_range(&inode_out->i_data, pos_out, PAGE_ALIGN(pos_out + len) - 1); /* * Carry the cowextsize hint from src to dest if we're sharing the * entire source file to the entire destination file, the source file * has a cowextsize hint, and the destination file does not. */ cowextsize = 0; if (pos_in == 0 && len == i_size_read(inode_in) && (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && pos_out == 0 && len >= i_size_read(inode_out) && !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) cowextsize = src->i_d.di_cowextsize; ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, is_dedupe); out_unlock: xfs_iunlock(src, XFS_MMAPLOCK_EXCL); if (!same_inode) xfs_iunlock(dest, XFS_MMAPLOCK_EXCL); unlock_two_nondirectories(inode_in, inode_out); if (ret) trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); return ret; }
/* * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * * If there are cached pages or we're extending the file, we need IOLOCK_EXCL * until we're sure the bytes at the new EOF have been zeroed and/or the cached * pages are flushed out. * * In most cases the direct IO writes will be done holding IOLOCK_SHARED * allowing them to be done in parallel with reads and other direct IO writes. * However, if the IO is not aligned to filesystem blocks, the direct IO layer * needs to do sub-block zeroing and that requires serialisation against other * direct IOs to the same block. In this case we need to serialise the * submission of the unaligned IOs so that we don't get racing block zeroing in * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by * hitting it with a big hammer (i.e. xfs_ioend_wait()). * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos, size_t ocount, int *iolock) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; size_t count = ocount; int unaligned_io = 0; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; *iolock = 0; if ((pos & target->bt_smask) || (count & target->bt_smask)) return -XFS_ERROR(EINVAL); if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; if (unaligned_io || mapping->nrpages || pos > ip->i_size) *iolock = XFS_IOLOCK_EXCL; else *iolock = XFS_IOLOCK_SHARED; xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); if (ret) return ret; if (mapping->nrpages) { WARN_ON(*iolock != XFS_IOLOCK_EXCL); ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (ret) return ret; } /* * If we are doing unaligned IO, wait for all other IO to drain, * otherwise demote the lock if we had to flush cached pages */ if (unaligned_io) xfs_ioend_wait(ip); else if (*iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); *iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); ret = generic_file_direct_write(iocb, iovp, &nr_segs, pos, &iocb->ki_pos, count, ocount); /* No fallback to buffered IO on errors for XFS. */ ASSERT(ret < 0 || ret == count); return ret; }
STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = 0; int log_flushed = 0; trace_xfs_file_fsync(ip); error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ioend_wait(ip); xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((inode->i_state & I_DIRTY_DATASYNC) || ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; }
STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; size_t size = 0; ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; unsigned long seg; XFS_STATS_INC(xs_read_calls); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; /* START copy & waste from filemap.c */ for (seg = 0; seg < nr_segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((iocb->ki_pos & target->bt_smask) || (size & target->bt_smask)) { if (iocb->ki_pos == ip->i_size) return 0; return -XFS_ERROR(EINVAL); } } n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; if (n <= 0 || size == 0) return 0; if (n < size) size = n; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (unlikely(ioflags & IO_ISDIRECT)) mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_IOLOCK_SHARED); if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) { int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); int iolock = XFS_IOLOCK_SHARED; ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size, dmflags, &iolock); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (unlikely(ioflags & IO_ISDIRECT)) mutex_unlock(&inode->i_mutex); return ret; } } if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) { ret = -xfs_flushinval_pages(ip, (iocb->ki_pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); } mutex_unlock(&inode->i_mutex); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; } } trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
/* * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to * a file or fs handle. * * XFS_IOC_PATH_TO_FSHANDLE * returns fs handle for a mount point or path within that mount point * XFS_IOC_FD_TO_HANDLE * returns full handle for a FD opened in user space * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ int xfs_find_handle( unsigned int cmd, xfs_fsop_handlereq_t *hreq) { int hsize; xfs_handle_t handle; struct inode *inode; struct fd f = {0}; struct path path; int error; struct xfs_inode *ip; if (cmd == XFS_IOC_FD_TO_HANDLE) { f = fdget(hreq->fd); if (!f.file) return -EBADF; inode = file_inode(f.file); } else { error = user_lpath((const char __user *)hreq->path, &path); if (error) return error; inode = path.dentry->d_inode; } ip = XFS_I(inode); /* * We can only generate handles for inodes residing on a XFS filesystem, * and only for regular files, directories or symbolic links. */ error = -EINVAL; if (inode->i_sb->s_magic != XFS_SB_MAGIC) goto out_put; error = -EBADF; if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode)) goto out_put; memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { /* * This handle only contains an fsid, zero the rest. */ memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); hsize = sizeof(xfs_fsid_t); } else { int lock_mode; lock_mode = xfs_ilock_map_shared(ip); handle.ha_fid.fid_len = sizeof(xfs_fid_t) - sizeof(handle.ha_fid.fid_len); handle.ha_fid.fid_pad = 0; handle.ha_fid.fid_gen = ip->i_d.di_gen; handle.ha_fid.fid_ino = ip->i_ino; xfs_iunlock_map_shared(ip, lock_mode); hsize = XFS_HSIZE(handle); } error = -EFAULT; if (hsize > sizeof handle || copy_to_user(hreq->ohandle, &handle, hsize) || copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) goto out_put; error = 0; out_put: if (cmd == XFS_IOC_FD_TO_HANDLE) fdput(f); else path_put(&path); return error; }
STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0, error = 0; int ioflags = 0; xfs_fsize_t isize, new_size; int iolock; int eventsent = 0; size_t ocount = 0, count; int need_i_mutex; XFS_STATS_INC(xs_write_calls); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); if (error) return error; count = ocount; if (count == 0) return 0; xfs_wait_for_freeze(mp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; relock: if (ioflags & IO_ISDIRECT) { iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } else { iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); } xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); goto out_unlock_mutex; } if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { int dmflags = FILP_DELAY_FLAG(file); if (need_i_mutex) dmflags |= DM_FLAGS_IMUX; xfs_iunlock(ip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip, pos, count, dmflags, &iolock); if (error) { goto out_unlock_internal; } xfs_ilock(ip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reacquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && pos != ip->i_size) goto start; } if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); return XFS_ERROR(-EINVAL); } if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) { xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock); iolock = XFS_IOLOCK_EXCL; need_i_mutex = 1; mutex_lock(&inode->i_mutex); xfs_ilock(ip, XFS_ILOCK_EXCL|iolock); goto start; } } new_size = pos + count; if (new_size > ip->i_size) ip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) file_update_time(file); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > ip->i_size) { error = xfs_zero_eof(ip, pos, ip->i_size); if (error) { xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out_unlock_internal; } } xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ error = -file_remove_suid(file); if (unlikely(error)) goto out_unlock_internal; /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (mapping->nrpages) { WARN_ON(need_i_mutex == 0); error = xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (error) goto out_unlock_internal; } if (need_i_mutex) { /* demote the lock now the cached pages are gone */ xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); mutex_unlock(&inode->i_mutex); iolock = XFS_IOLOCK_SHARED; need_i_mutex = 0; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags); ret = generic_file_direct_write(iocb, iovp, &nr_segs, pos, &iocb->ki_pos, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; ioflags &= ~IO_ISDIRECT; xfs_iunlock(ip, iolock); goto relock; } } else { int enospc = 0; ssize_t ret2 = 0; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags); ret2 = generic_file_buffered_write(iocb, iovp, nr_segs, pos, &iocb->ki_pos, count, ret); /* * if we just got an ENOSPC, flush the inode now we * aren't holding any page locks and retry *once* */ if (ret2 == -ENOSPC && !enospc) { error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE); if (error) goto out_unlock_internal; enospc = 1; goto write_retry; } ret = ret2; } current->backing_dev_info = NULL; isize = i_size_read(inode); if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize)) iocb->ki_pos = isize; if (iocb->ki_pos > ip->i_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); if (iocb->ki_pos > ip->i_size) ip->i_size = iocb->ki_pos; xfs_iunlock(ip, XFS_ILOCK_EXCL); } if (ret == -ENOSPC && DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(ip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip, DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); if (error) goto out_unlock_internal; goto start; } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; int error2; xfs_iunlock(ip, iolock); if (need_i_mutex) mutex_unlock(&inode->i_mutex); error2 = filemap_write_and_wait_range(mapping, pos, end); if (!error) error = error2; if (need_i_mutex) mutex_lock(&inode->i_mutex); xfs_ilock(ip, iolock); error2 = -xfs_file_fsync(file, file->f_path.dentry, (file->f_flags & __O_SYNC) ? 0 : 1); if (!error) error = error2; } out_unlock_internal: if (ip->i_new_size) { xfs_ilock(ip, XFS_ILOCK_EXCL); ip->i_new_size = 0; /* * If this was a direct or synchronous I/O that failed (such * as ENOSPC) then part of the I/O may have been written to * disk before the error occured. In this case the on-disk * file size may have been adjusted beyond the in-memory file * size and now needs to be truncated back. */ if (ip->i_d.di_size > ip->i_size) ip->i_d.di_size = ip->i_size; xfs_iunlock(ip, XFS_ILOCK_EXCL); } xfs_iunlock(ip, iolock); out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); return -error; }
STATIC int xfs_generic_create( struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev, bool tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; struct posix_acl *default_acl, *acl; struct xfs_name name; int error; /* * Irix uses Missed'em'V split, but doesn't want to see * the upper 5 bits of (14bit) major. */ if (S_ISCHR(mode) || S_ISBLK(mode)) { if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) return -EINVAL; rdev = sysv_encode_dev(rdev); } else { rdev = 0; } error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) return error; if (!tmpfile) { xfs_dentry_to_name(&name, dentry, mode); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); } else { error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip); } if (unlikely(error)) goto out_free_acl; inode = VFS_I(ip); error = xfs_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; #ifdef CONFIG_XFS_POSIX_ACL if (default_acl) { error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); if (error) goto out_cleanup_inode; } if (acl) { error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); if (error) goto out_cleanup_inode; } #endif xfs_setup_iops(ip); if (tmpfile) d_tmpfile(dentry, inode); else d_instantiate(dentry, inode); xfs_finish_inode_setup(ip); out_free_acl: if (default_acl) posix_acl_release(default_acl); if (acl) posix_acl_release(acl); return error; out_cleanup_inode: xfs_finish_inode_setup(ip); if (!tmpfile) xfs_cleanup_inode(dir, inode, dentry); iput(inode); goto out_free_acl; }
/* * Fast and loose check if this write could update the on-disk inode size. */ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) { return ioend->io_offset + ioend->io_size > XFS_I(ioend->io_inode)->i_d.di_size; }
STATIC int xfs_vn_mknod( struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { struct inode *inode; struct xfs_inode *ip = NULL; struct posix_acl *default_acl = NULL; struct xfs_name name; int error; /* * Irix uses Missed'em'V split, but doesn't want to see * the upper 5 bits of (14bit) major. */ if (S_ISCHR(mode) || S_ISBLK(mode)) { if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) return -EINVAL; rdev = sysv_encode_dev(rdev); } else { rdev = 0; } if (IS_POSIXACL(dir)) { default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); if (IS_ERR(default_acl)) return PTR_ERR(default_acl); if (!default_acl) mode &= ~current_umask(); } xfs_dentry_to_name(&name, dentry); error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); if (unlikely(error)) goto out_free_acl; inode = VFS_I(ip); error = xfs_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; if (default_acl) { error = -xfs_inherit_acl(inode, default_acl); if (unlikely(error)) goto out_cleanup_inode; posix_acl_release(default_acl); } d_instantiate(dentry, inode); return -error; out_cleanup_inode: xfs_cleanup_inode(dir, inode, dentry); out_free_acl: posix_acl_release(default_acl); return -error; }
STATIC int xfs_file_fsync( struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; int error = 0; int log_flushed = 0; xfs_lsn_t lsn = 0; trace_xfs_file_fsync(ip); error = filemap_write_and_wait_range(inode->i_mapping, start, end); if (error) return error; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } /* * All metadata updates are logged, which means that we just have * to flush the log up to the latest LSN that touched the inode. */ xfs_ilock(ip, XFS_ILOCK_SHARED); if (xfs_ipincount(ip)) { if (!datasync || (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) lsn = ip->i_itemp->ili_last_lsn; } xfs_iunlock(ip, XFS_ILOCK_SHARED); if (lsn) error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; }
STATIC int xfs_fs_encode_fh( struct dentry *dentry, __u32 *fh, int *max_len, int connectable) { struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; struct inode *inode = dentry->d_inode; int fileid_type; int len; /* Directories don't need their parent encoded, they have ".." */ if (S_ISDIR(inode->i_mode) || !connectable) fileid_type = FILEID_INO32_GEN; else fileid_type = FILEID_INO32_GEN_PARENT; /* * If the the filesystem may contain 64bit inode numbers, we need * to use larger file handles that can represent them. * * While we only allocate inodes that do not fit into 32 bits any * large enough filesystem may contain them, thus the slightly * confusing looking conditional below. */ if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) fileid_type |= XFS_FILEID_TYPE_64FLAG; /* * Only encode if there is enough space given. In practice * this means we can't export a filesystem with 64bit inodes * over NFSv2 with the subtree_check export option; the other * seven combinations work. The real answer is "don't use v2". */ len = xfs_fileid_length(fileid_type); if (*max_len < len) { *max_len = len; return 255; } *max_len = len; switch (fileid_type) { case FILEID_INO32_GEN_PARENT: spin_lock(&dentry->d_lock); fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN: fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: spin_lock(&dentry->d_lock); fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; fid64->parent_gen = dentry->d_parent->d_inode->i_generation; spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: fid64->ino = XFS_I(inode)->i_ino; fid64->gen = inode->i_generation; break; } return fileid_type; }
/* * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * * If there are cached pages or we're extending the file, we need IOLOCK_EXCL * until we're sure the bytes at the new EOF have been zeroed and/or the cached * pages are flushed out. * * In most cases the direct IO writes will be done holding IOLOCK_SHARED * allowing them to be done in parallel with reads and other direct IO writes. * However, if the IO is not aligned to filesystem blocks, the direct IO layer * needs to do sub-block zeroing and that requires serialisation against other * direct IOs to the same block. In this case we need to serialise the * submission of the unaligned IOs so that we don't get racing block zeroing in * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by * hitting it with a big hammer (i.e. inode_dio_wait()). * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, const struct iovec *iovp, unsigned long nr_segs, loff_t pos, size_t ocount) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; size_t count = ocount; int unaligned_io = 0; int iolock; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->bt_smask) || (count & target->bt_smask)) return -XFS_ERROR(EINVAL); if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; /* * We don't need to take an exclusive lock unless there page cache needs * to be invalidated or unaligned IO is being executed. We don't need to * consider the EOF extension case here because * xfs_file_aio_write_checks() will relock the inode as necessary for * EOF zeroing cases and fill out the new inode size as appropriate. */ if (unaligned_io || mapping->nrpages) iolock = XFS_IOLOCK_EXCL; else iolock = XFS_IOLOCK_SHARED; xfs_rw_ilock(ip, iolock); /* * Recheck if there are cached pages that need invalidate after we got * the iolock to protect against other threads adding new pages while * we were waiting for the iolock. */ if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) { xfs_rw_iunlock(ip, iolock); iolock = XFS_IOLOCK_EXCL; xfs_rw_ilock(ip, iolock); } ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; if (mapping->nrpages) { ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); if (ret) goto out; } /* * If we are doing unaligned IO, wait for all other IO to drain, * otherwise demote the lock if we had to flush cached pages */ if (unaligned_io) inode_dio_wait(inode); else if (iolock == XFS_IOLOCK_EXCL) { xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); ret = generic_file_direct_write(iocb, iovp, &nr_segs, pos, &iocb->ki_pos, count, ocount); out: xfs_rw_iunlock(ip, iolock); /* No fallback to buffered IO on errors for XFS. */ ASSERT(ret < 0 || ret == count); return ret; }
int xfs_ioctl( xfs_inode_t *ip, struct file *filp, int ioflags, unsigned int cmd, void __user *arg) { struct inode *inode = filp->f_path.dentry->d_inode; xfs_mount_t *mp = ip->i_mount; int error; xfs_itrace_entry(XFS_I(inode)); switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && !capable(CAP_SYS_ADMIN)) return -EPERM; return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; da.d_mem = da.d_miniosz = 1 << target->bt_sshift; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: return xfs_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry_v1(mp, arg); case XFS_IOC_FSGEOMETRY: return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); case XFS_IOC_FSGETXATTR: return xfs_ioc_fsgetxattr(ip, 0, arg); case XFS_IOC_FSGETXATTRA: return xfs_ioc_fsgetxattr(ip, 1, arg); case XFS_IOC_FSSETXATTR: return xfs_ioc_fssetxattr(ip, filp, arg); case XFS_IOC_GETXFLAGS: return xfs_ioc_getxflags(ip, arg); case XFS_IOC_SETXFLAGS: return xfs_ioc_setxflags(ip, filp, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; if (copy_from_user(&dmi, arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); return -error; } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: return xfs_ioc_getbmap(ip, ioflags, cmd, arg); case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(ip, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: return xfs_find_handle(cmd, arg); case XFS_IOC_OPEN_BY_HANDLE: return xfs_open_by_handle(mp, arg, filp, inode); case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, inode); case XFS_IOC_READLINK_BY_HANDLE: return xfs_readlink_by_handle(mp, arg, inode); case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, inode); case XFS_IOC_ATTRMULTI_BY_HANDLE: return xfs_attrmulti_by_handle(mp, arg, inode); case XFS_IOC_SWAPEXT: { error = xfs_swapext((struct xfs_swapext __user *)arg); return -error; } case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; error = xfs_fs_counts(mp, &out); if (error) return -error; if (copy_to_user(arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; __uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); if (error) return -error; if (copy_to_user(arg, &inout, sizeof(inout))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GET_RESBLKS: { xfs_fsop_resblks_t out; if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_reserve_blocks(mp, NULL, &out); if (error) return -error; if (copy_to_user(arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_data(mp, &in); return -error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_log(mp, &in); return -error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_rt(mp, &in); return -error; } case XFS_IOC_FREEZE: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (inode->i_sb->s_frozen == SB_UNFROZEN) freeze_bdev(inode->i_sb->s_bdev); return 0; case XFS_IOC_THAW: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (inode->i_sb->s_frozen != SB_UNFROZEN) thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); return 0; case XFS_IOC_GOINGDOWN: { __uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(in, (__uint32_t __user *)arg)) return -XFS_ERROR(EFAULT); error = xfs_fs_goingdown(mp, in); return -error; } case XFS_IOC_ERROR_INJECTION: { xfs_error_injection_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_errortag_add(in.errtag, mp); return -error; } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_errortag_clearall(mp, 1); return -error; default: return -ENOTTY; } }
/* * Note: some of the ioctl's return positive numbers as a * byte count indicating success, such as readlink_by_handle. * So we don't "sign flip" like most other routines. This means * true errors need to be returned as a negative value. */ long xfs_file_ioctl( struct file *filp, unsigned int cmd, unsigned long p) { struct inode *inode = filp->f_path.dentry->d_inode; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; void __user *arg = (void __user *)p; int ioflags = 0; int error; if (filp->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; trace_xfs_file_ioctl(ip); switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: case XFS_IOC_UNRESVSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: { xfs_flock64_t bf; if (copy_from_user(&bf, arg, sizeof(bf))) return -XFS_ERROR(EFAULT); return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); } case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; da.d_mem = da.d_miniosz = 1 << target->bt_sshift; da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); if (copy_to_user(arg, &da, sizeof(da))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSBULKSTAT_SINGLE: case XFS_IOC_FSBULKSTAT: case XFS_IOC_FSINUMBERS: return xfs_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FSGEOMETRY_V1: return xfs_ioc_fsgeometry_v1(mp, arg); case XFS_IOC_FSGEOMETRY: return xfs_ioc_fsgeometry(mp, arg); case XFS_IOC_GETVERSION: return put_user(inode->i_generation, (int __user *)arg); case XFS_IOC_FSGETXATTR: return xfs_ioc_fsgetxattr(ip, 0, arg); case XFS_IOC_FSGETXATTRA: return xfs_ioc_fsgetxattr(ip, 1, arg); case XFS_IOC_FSSETXATTR: return xfs_ioc_fssetxattr(ip, filp, arg); case XFS_IOC_GETXFLAGS: return xfs_ioc_getxflags(ip, arg); case XFS_IOC_SETXFLAGS: return xfs_ioc_setxflags(ip, filp, arg); case XFS_IOC_FSSETDM: { struct fsdmidata dmi; if (copy_from_user(&dmi, arg, sizeof(dmi))) return -XFS_ERROR(EFAULT); error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, dmi.fsd_dmstate); return -error; } case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: return xfs_ioc_getbmap(ip, ioflags, cmd, arg); case XFS_IOC_GETBMAPX: return xfs_ioc_getbmapx(ip, arg); case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: case XFS_IOC_PATH_TO_FSHANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(hreq))) return -XFS_ERROR(EFAULT); return xfs_find_handle(cmd, &hreq); } case XFS_IOC_OPEN_BY_HANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); return xfs_open_by_handle(filp, &hreq); } case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(filp, arg); case XFS_IOC_READLINK_BY_HANDLE: { xfs_fsop_handlereq_t hreq; if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) return -XFS_ERROR(EFAULT); return xfs_readlink_by_handle(filp, &hreq); } case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(filp, arg); case XFS_IOC_ATTRMULTI_BY_HANDLE: return xfs_attrmulti_by_handle(filp, arg); case XFS_IOC_SWAPEXT: { struct xfs_swapext sxp; if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) return -XFS_ERROR(EFAULT); error = xfs_swapext(&sxp); return -error; } case XFS_IOC_FSCOUNTS: { xfs_fsop_counts_t out; error = xfs_fs_counts(mp, &out); if (error) return -error; if (copy_to_user(arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_SET_RESBLKS: { xfs_fsop_resblks_t inout; __uint64_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (mp->m_flags & XFS_MOUNT_RDONLY) return -XFS_ERROR(EROFS); if (copy_from_user(&inout, arg, sizeof(inout))) return -XFS_ERROR(EFAULT); /* input parameter is passed in resblks field of structure */ in = inout.resblks; error = xfs_reserve_blocks(mp, &in, &inout); if (error) return -error; if (copy_to_user(arg, &inout, sizeof(inout))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_GET_RESBLKS: { xfs_fsop_resblks_t out; if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_reserve_blocks(mp, NULL, &out); if (error) return -error; if (copy_to_user(arg, &out, sizeof(out))) return -XFS_ERROR(EFAULT); return 0; } case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_data(mp, &in); return -error; } case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_log(mp, &in); return -error; } case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_growfs_rt(mp, &in); return -error; } case XFS_IOC_GOINGDOWN: { __uint32_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(in, (__uint32_t __user *)arg)) return -XFS_ERROR(EFAULT); error = xfs_fs_goingdown(mp, in); return -error; } case XFS_IOC_ERROR_INJECTION: { xfs_error_injection_t in; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); error = xfs_errortag_add(in.errtag, mp); return -error; } case XFS_IOC_ERROR_CLEARALL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; error = xfs_errortag_clearall(mp, 1); return -error; default: return -ENOTTY; } }
STATIC loff_t xfs_seek_data( struct file *file, loff_t start) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); xfs_fsize_t isize; xfs_fileoff_t fsbno; xfs_filblks_t end; uint lock; int error; lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { error = ENXIO; goto out_unlock; } /* * Try to read extents from the first block indicated * by fsbno to the end block of the file. */ fsbno = XFS_B_TO_FSBT(mp, start); end = XFS_B_TO_FSB(mp, isize); for (;;) { struct xfs_bmbt_irec map[2]; int nmap = 2; unsigned int i; error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) goto out_unlock; /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { error = ENXIO; goto out_unlock; } for (i = 0; i < nmap; i++) { offset = max_t(loff_t, start, XFS_FSB_TO_B(mp, map[i].br_startoff)); /* Landed in a data extent */ if (map[i].br_startblock == DELAYSTARTBLOCK || (map[i].br_state == XFS_EXT_NORM && !isnullstartblock(map[i].br_startblock))) goto out; /* * Landed in an unwritten extent, try to search data * from page cache. */ if (map[i].br_state == XFS_EXT_UNWRITTEN) { if (xfs_find_get_desired_pgoff(inode, &map[i], DATA_OFF, &offset)) goto out; } } /* * map[0] is hole or its an unwritten extent but * without data in page cache. Probably means that * we are reading after EOF if nothing in map[1]. */ if (nmap == 1) { error = ENXIO; goto out_unlock; } ASSERT(i > 1); /* * Nothing was found, proceed to the next round of search * if reading offset not beyond or hit EOF. */ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); if (start >= isize) { error = ENXIO; goto out_unlock; } } out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: xfs_iunlock(ip, lock); if (error) return -error; return offset; }