示例#1
0
STATIC loff_t
xfs_seek_hole(
	struct file		*file,
	loff_t			start)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	loff_t			uninitialized_var(offset);
	xfs_fsize_t		isize;
	xfs_fileoff_t		fsbno;
	xfs_filblks_t		end;
	uint			lock;
	int			error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -XFS_ERROR(EIO);

	lock = xfs_ilock_data_map_shared(ip);

	isize = i_size_read(inode);
	if (start >= isize) {
		error = ENXIO;
		goto out_unlock;
	}

	fsbno = XFS_B_TO_FSBT(mp, start);
	end = XFS_B_TO_FSB(mp, isize);

	for (;;) {
		struct xfs_bmbt_irec	map[2];
		int			nmap = 2;
		unsigned int		i;

		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
				       XFS_BMAPI_ENTIRE);
		if (error)
			goto out_unlock;

		/* No extents at given offset, must be beyond EOF */
		if (nmap == 0) {
			error = ENXIO;
			goto out_unlock;
		}

		for (i = 0; i < nmap; i++) {
			offset = max_t(loff_t, start,
				       XFS_FSB_TO_B(mp, map[i].br_startoff));

			/* Landed in a hole */
			if (map[i].br_startblock == HOLESTARTBLOCK)
				goto out;

			/*
			 * Landed in an unwritten extent, try to search hole
			 * from page cache.
			 */
			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
				if (xfs_find_get_desired_pgoff(inode, &map[i],
							HOLE_OFF, &offset))
					goto out;
			}
		}

		/*
		 * map[0] contains data or its unwritten but contains
		 * data in page cache, probably means that we are
		 * reading after EOF.  We should fix offset to point
		 * to the end of the file(i.e., there is an implicit
		 * hole at the end of any file).
		 */
		if (nmap == 1) {
			offset = isize;
			break;
		}

		ASSERT(i > 1);

		/*
		 * Both mappings contains data, proceed to the next round of
		 * search if the current reading offset not beyond or hit EOF.
		 */
		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
		start = XFS_FSB_TO_B(mp, fsbno);
		if (start >= isize) {
			offset = isize;
			break;
		}
	}

out:
	/*
	 * At this point, we must have found a hole.  However, the returned
	 * offset may be bigger than the file size as it may be aligned to
	 * page boundary for unwritten extents, we need to deal with this
	 * situation in particular.
	 */
	offset = min_t(loff_t, offset, isize);
	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);

out_unlock:
	xfs_iunlock(ip, lock);

	if (error)
		return -error;
	return offset;
}
示例#2
0
STATIC ssize_t
xfs_file_aio_read(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = 0;
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;

	XFS_STATS_INC(xs_read_calls);

	BUG_ON(iocb->ki_pos != pos);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
	if (ret < 0)
		return ret;

	if (unlikely(ioflags & IO_ISDIRECT)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
		if ((iocb->ki_pos & target->bt_smask) ||
		    (size & target->bt_smask)) {
			if (iocb->ki_pos == i_size_read(inode))
				return 0;
			return -XFS_ERROR(EINVAL);
		}
	}

	n = mp->m_super->s_maxbytes - iocb->ki_pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * Locking is a bit tricky here. If we take an exclusive lock
	 * for direct IO, we effectively serialise all new concurrent
	 * read IO to this file and block it behind IO that is currently in
	 * progress because IO in progress holds the IO lock shared. We only
	 * need to hold the lock exclusive to blow away the page cache, so
	 * only take lock exclusively if the page cache needs invalidation.
	 * This allows the normal direct IO case of no page cache pages to
	 * proceeed concurrently without serialisation.
	 */
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

		if (inode->i_mapping->nrpages) {
			ret = -xfs_flushinval_pages(ip,
					(iocb->ki_pos & PAGE_CACHE_MASK),
					-1, FI_REMAPF_LOCKED);
			if (ret) {
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
			}
		}
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
	}

	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);

	ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
	if (ret > 0)
		XFS_STATS_ADD(xs_read_bytes, ret);

	xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
	return ret;
}
示例#3
0
STATIC loff_t
xfs_seek_data(
	struct file		*file,
	loff_t			start,
	u32			type)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_bmbt_irec	map[2];
	int			nmap = 2;
	loff_t			uninitialized_var(offset);
	xfs_fsize_t		isize;
	xfs_fileoff_t		fsbno;
	xfs_filblks_t		end;
	uint			lock;
	int			error;

	lock = xfs_ilock_map_shared(ip);

	isize = i_size_read(inode);
	if (start >= isize) {
		error = ENXIO;
		goto out_unlock;
	}

	fsbno = XFS_B_TO_FSBT(mp, start);

	/*
	 * Try to read extents from the first block indicated
	 * by fsbno to the end block of the file.
	 */
	end = XFS_B_TO_FSB(mp, isize);

	error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
			       XFS_BMAPI_ENTIRE);
	if (error)
		goto out_unlock;

	/*
	 * Treat unwritten extent as data extent since it might
	 * contains dirty data in page cache.
	 */
	if (map[0].br_startblock != HOLESTARTBLOCK) {
		offset = max_t(loff_t, start,
			       XFS_FSB_TO_B(mp, map[0].br_startoff));
	} else {
		if (nmap == 1) {
			error = ENXIO;
			goto out_unlock;
		}

		offset = max_t(loff_t, start,
			       XFS_FSB_TO_B(mp, map[1].br_startoff));
	}

	if (offset != file->f_pos)
		file->f_pos = offset;

out_unlock:
	xfs_iunlock_map_shared(ip, lock);

	if (error)
		return -error;
	return offset;
}
示例#4
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	struct dentry		*dentry,
	int			datasync)
{
	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
	struct xfs_trans	*tp;
	int			error = 0;
	int			log_flushed = 0;

	xfs_itrace_entry(ip);

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -XFS_ERROR(EIO);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	/*
	 * We always need to make sure that the required inode state is safe on
	 * disk.  The inode might be clean but we still might need to force the
	 * log because of committed transactions that haven't hit the disk yet.
	 * Likewise, there could be unflushed non-transactional changes to the
	 * inode core that have to go to disk and this requires us to issue
	 * a synchronous transaction to capture these changes correctly.
	 *
	 * This code relies on the assumption that if the i_update_core field
	 * of the inode is clear and the inode is unpinned then it is clean
	 * and no action is required.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);

	/*
	 * First check if the VFS inode is marked dirty.  All the dirtying
	 * of non-transactional updates no goes through mark_inode_dirty*,
	 * which allows us to distinguish beteeen pure timestamp updates
	 * and i_size updates which need to be caught for fdatasync.
	 * After that also theck for the dirty state in the XFS inode, which
	 * might gets cleared when the inode gets written out via the AIL
	 * or xfs_iflush_cluster.
	 */
	if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) ||
	    ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
	    ip->i_update_core) {
		/*
		 * Kick off a transaction to log the inode core to get the
		 * updates.  The sync transaction will also force the log.
		 */
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
		error = xfs_trans_reserve(tp, 0,
				XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
		if (error) {
			xfs_trans_cancel(tp, 0);
			return -error;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);

		/*
		 * Note - it's possible that we might have pushed ourselves out
		 * of the way during trans_reserve which would flush the inode.
		 * But there's no guarantee that the inode buffer has actually
		 * gone out yet (it's delwri).	Plus the buffer could be pinned
		 * anyway if it's part of an inode in another recent
		 * transaction.	 So we play it safe and fire off the
		 * transaction anyway.
		 */
		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
		xfs_trans_ihold(tp, ip);
		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		xfs_trans_set_sync(tp);
		error = _xfs_trans_commit(tp, 0, &log_flushed);

		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	} else {
		/*
		 * Timestamps/size haven't changed since last inode flush or
		 * inode transaction commit.  That means either nothing got
		 * written or a transaction committed which caught the updates.
		 * If the latter happened and the transaction hasn't hit the
		 * disk yet, the inode will be still be pinned.  If it is,
		 * force the log.
		 */
		if (xfs_ipincount(ip)) {
			error = _xfs_log_force_lsn(ip->i_mount,
					ip->i_itemp->ili_last_lsn,
					XFS_LOG_SYNC, &log_flushed);
		}
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
	}

	if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If the log write didn't issue an ordered tag we need
		 * to flush the disk cache for the data device now.
		 */
		if (!log_flushed)
			xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);

		/*
		 * If this inode is on the RT dev we need to flush that
		 * cache as well.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
	}

	return -error;
}
示例#5
0
STATIC int
xfs_map_blocks(
	struct inode		*inode,
	loff_t			offset,
	struct xfs_bmbt_irec	*imap,
	int			type,
	int			nonblocking)
{
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			count = 1 << inode->i_blkbits;
	xfs_fileoff_t		offset_fsb, end_fsb;
	int			error = 0;
	int			bmapi_flags = XFS_BMAPI_ENTIRE;
	int			nimaps = 1;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -XFS_ERROR(EIO);

	if (type == IO_UNWRITTEN)
		bmapi_flags |= XFS_BMAPI_IGSTATE;

	if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
		if (nonblocking)
			return -XFS_ERROR(EAGAIN);
		xfs_ilock(ip, XFS_ILOCK_SHARED);
	}

	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
	       (ip->i_df.if_flags & XFS_IFEXTENTS));
	ASSERT(offset <= mp->m_maxioffset);

	if (offset + count > mp->m_maxioffset)
		count = mp->m_maxioffset - offset;
	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
	offset_fsb = XFS_B_TO_FSBT(mp, offset);
	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
				imap, &nimaps, bmapi_flags);
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	if (error)
		return -XFS_ERROR(error);

	if (type == IO_DELALLOC &&
	    (!nimaps || isnullstartblock(imap->br_startblock))) {
		error = xfs_iomap_write_allocate(ip, offset, count, imap);
		if (!error)
			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
		return -XFS_ERROR(error);
	}

#ifdef DEBUG
	if (type == IO_UNWRITTEN) {
		ASSERT(nimaps);
		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
	}
#endif
	if (nimaps)
		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
	return 0;
}
示例#6
0
STATIC ssize_t
xfs_file_aio_write(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	ssize_t			ret;
	int			iolock;
	size_t			ocount = 0;

	XFS_STATS_INC(xs_write_calls);

	BUG_ON(iocb->ki_pos != pos);

	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
	if (ret)
		return ret;

	if (ocount == 0)
		return 0;

	xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	if (unlikely(file->f_flags & O_DIRECT))
		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
						ocount, &iolock);
	else
		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
						ocount, &iolock);

	xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);

	if (ret <= 0)
		goto out_unlock;

	/* Handle various SYNC-type writes */
	if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
		loff_t end = pos + ret - 1;
		int error, error2;

		xfs_rw_iunlock(ip, iolock);
		error = filemap_write_and_wait_range(mapping, pos, end);
		xfs_rw_ilock(ip, iolock);

		error2 = -xfs_file_fsync(file,
					 (file->f_flags & __O_SYNC) ? 0 : 1);
		if (error)
			ret = error;
		else if (error2)
			ret = error2;
	}

out_unlock:
	xfs_aio_write_newsize_update(ip);
	xfs_rw_iunlock(ip, iolock);
	return ret;
}
示例#7
0
STATIC ssize_t
xfs_file_splice_write(
	struct pipe_inode_info	*pipe,
	struct file		*outfilp,
	loff_t			*ppos,
	size_t			count,
	unsigned int		flags)
{
	struct inode		*inode = outfilp->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	xfs_fsize_t		isize, new_size;
	int			ioflags = 0;
	ssize_t			ret;

	XFS_STATS_INC(xs_write_calls);

	if (outfilp->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
		return -EIO;

	xfs_ilock(ip, XFS_IOLOCK_EXCL);

	if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
		int iolock = XFS_IOLOCK_EXCL;
		int error;

		error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
					FILP_DELAY_FLAG(outfilp), &iolock);
		if (error) {
			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
			return -error;
		}
	}

	new_size = *ppos + count;

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	if (new_size > ip->i_size)
		ip->i_new_size = new_size;
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	trace_xfs_file_splice_write(ip, count, *ppos, ioflags);

	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
	if (ret > 0)
		XFS_STATS_ADD(xs_write_bytes, ret);

	isize = i_size_read(inode);
	if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize))
		*ppos = isize;

	if (*ppos > ip->i_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		if (*ppos > ip->i_size)
			ip->i_size = *ppos;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	}

	if (ip->i_new_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		ip->i_new_size = 0;
		if (ip->i_d.di_size > ip->i_size)
			ip->i_d.di_size = ip->i_size;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	}
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
	return ret;
}
示例#8
0
STATIC int
xfs_file_readdir(
	struct file	*filp,
	void		*dirent,
	filldir_t	filldir)
{
	struct inode	*inode = filp->f_path.dentry->d_inode;
	xfs_inode_t	*ip = XFS_I(inode);
	struct hack_callback buf;
	struct hack_dirent *de;
	int		error;
	loff_t		size;
	int		eof = 0;
	xfs_off_t       start_offset, curr_offset, offset;

	/*
	 * Try fairly hard to get memory
	 */
	buf.len = PAGE_CACHE_SIZE;
	do {
		buf.dirent = kmalloc(buf.len, GFP_KERNEL);
		if (buf.dirent)
			break;
		buf.len >>= 1;
	} while (buf.len >= 1024);

	if (!buf.dirent)
		return -ENOMEM;

	curr_offset = filp->f_pos;
	if (curr_offset == 0x7fffffff)
		offset = 0xffffffff;
	else
		offset = filp->f_pos;

	while (!eof) {
		unsigned int reclen;

		start_offset = offset;

		buf.used = 0;
		error = -xfs_readdir(ip, &buf, buf.len, &offset,
				     xfs_hack_filldir);
		if (error || offset == start_offset) {
			size = 0;
			break;
		}

		size = buf.used;
		de = (struct hack_dirent *)buf.dirent;
		curr_offset = de->offset /* & 0x7fffffff */;
		while (size > 0) {
			if (filldir(dirent, de->name, de->namlen,
					curr_offset & 0x7fffffff,
					de->ino, de->d_type)) {
				goto done;
			}

			reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
				       sizeof(u64));
			size -= reclen;
			de = (struct hack_dirent *)((char *)de + reclen);
			curr_offset = de->offset /* & 0x7fffffff */;
		}
	}

 done:
	if (!error) {
		if (size == 0)
			filp->f_pos = offset & 0x7fffffff;
		else if (de)
			filp->f_pos = curr_offset;
	}

	kfree(buf.dirent);
	return error;
}
示例#9
0
/*
 * ioctl interface for swapext
 */
int
xfs_swapext(
	xfs_swapext_t	*sxp)
{
	xfs_inode_t     *ip, *tip;
	struct file	*file, *tmp_file;
	int		error = 0;

	/* Pull information for the target fd */
	file = fget((int)sxp->sx_fdtarget);
	if (!file) {
		error = XFS_ERROR(EINVAL);
		goto out;
	}

	if (!(file->f_mode & FMODE_WRITE) ||
	    !(file->f_mode & FMODE_READ) ||
	    (file->f_flags & O_APPEND)) {
		error = XFS_ERROR(EBADF);
		goto out_put_file;
	}

	tmp_file = fget((int)sxp->sx_fdtmp);
	if (!tmp_file) {
		error = XFS_ERROR(EINVAL);
		goto out_put_file;
	}

	if (!(tmp_file->f_mode & FMODE_WRITE) ||
	    !(tmp_file->f_mode & FMODE_READ) ||
	    (tmp_file->f_flags & O_APPEND)) {
		error = XFS_ERROR(EBADF);
		goto out_put_tmp_file;
	}

	if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
	    IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
		error = XFS_ERROR(EINVAL);
		goto out_put_tmp_file;
	}

	ip = XFS_I(file->f_path.dentry->d_inode);
	tip = XFS_I(tmp_file->f_path.dentry->d_inode);

	if (ip->i_mount != tip->i_mount) {
		error = XFS_ERROR(EINVAL);
		goto out_put_tmp_file;
	}

	if (ip->i_ino == tip->i_ino) {
		error = XFS_ERROR(EINVAL);
		goto out_put_tmp_file;
	}

	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
		error = XFS_ERROR(EIO);
		goto out_put_tmp_file;
	}

	error = xfs_swap_extents(ip, tip, sxp);

 out_put_tmp_file:
	fput(tmp_file);
 out_put_file:
	fput(file);
 out:
	return error;
}
示例#10
0
STATIC int
xfs_vn_mknod(
	struct inode	*dir,
	struct dentry	*dentry,
	int		mode,
	dev_t		rdev)
{
	struct inode	*ip;
	bhv_vnode_t	*vp = NULL, *dvp = vn_from_inode(dir);
	xfs_acl_t	*default_acl = NULL;
	attrexists_t	test_default_acl = _ACL_DEFAULT_EXISTS;
	int		error;

	/*
	 * Irix uses Missed'em'V split, but doesn't want to see
	 * the upper 5 bits of (14bit) major.
	 */
	if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
		return -EINVAL;

	if (unlikely(test_default_acl && test_default_acl(dvp))) {
		if (!_ACL_ALLOC(default_acl)) {
			return -ENOMEM;
		}
		if (!_ACL_GET_DEFAULT(dvp, default_acl)) {
			_ACL_FREE(default_acl);
			default_acl = NULL;
		}
	}

	if (IS_POSIXACL(dir) && !default_acl && xfs_has_fs_struct(current))
		mode &= ~current->fs->umask;

	switch (mode & S_IFMT) {
	case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
		rdev = sysv_encode_dev(rdev);
	case S_IFREG:
		error = xfs_create(XFS_I(dir), dentry, mode, rdev, &vp, NULL);
		break;
	case S_IFDIR:
		error = xfs_mkdir(XFS_I(dir), dentry, mode, &vp, NULL);
		break;
	default:
		error = EINVAL;
		break;
	}

	if (unlikely(!error)) {
		error = xfs_init_security(vp, dir);
		if (error)
			xfs_cleanup_inode(dir, vp, dentry, mode);
	}

	if (unlikely(default_acl)) {
		if (!error) {
			error = _ACL_INHERIT(vp, mode, default_acl);
			if (!error)
				xfs_iflags_set(XFS_I(vp), XFS_IMODIFIED);
			else
				xfs_cleanup_inode(dir, vp, dentry, mode);
		}
		_ACL_FREE(default_acl);
	}

	if (likely(!error)) {
		ASSERT(vp);
		ip = vn_to_inode(vp);

		if (S_ISDIR(mode))
			xfs_validate_fields(ip);
		d_instantiate(dentry, ip);
		xfs_validate_fields(dir);
	}
	return -error;
}
示例#11
0
struct posix_acl *
xfs_get_acl(struct inode *inode, int type)
{
	struct xfs_inode *ip = XFS_I(inode);
	struct posix_acl *acl;
	struct xfs_acl *xfs_acl;
	unsigned char *ea_name;
	int error;
	int len;

	acl = get_cached_acl(inode, type);
	if (acl != ACL_NOT_CACHED)
		return acl;

	trace_xfs_get_acl(ip);

	switch (type) {
	case ACL_TYPE_ACCESS:
		ea_name = SGI_ACL_FILE;
		break;
	case ACL_TYPE_DEFAULT:
		ea_name = SGI_ACL_DEFAULT;
		break;
	default:
		BUG();
	}

	/*
	 * If we have a cached ACLs value just return it, not need to
	 * go out to the disk.
	 */
	len = XFS_ACL_MAX_SIZE(ip->i_mount);
	xfs_acl = kzalloc(len, GFP_KERNEL);
	if (!xfs_acl)
		return ERR_PTR(-ENOMEM);

	error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
							&len, ATTR_ROOT);
	if (error) {
		/*
		 * If the attribute doesn't exist make sure we have a negative
		 * cache entry, for any other error assume it is transient and
		 * leave the cache entry as ACL_NOT_CACHED.
		 */
		if (error == -ENOATTR) {
			acl = NULL;
			goto out_update_cache;
		}
		goto out;
	}

	acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
	if (IS_ERR(acl))
		goto out;

 out_update_cache:
	set_cached_acl(inode, type, acl);
 out:
	kfree(xfs_acl);
	return acl;
}
示例#12
0
STATIC ssize_t
xfs_file_aio_read(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = 0;
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;

	XFS_STATS_INC(xs_read_calls);

	BUG_ON(iocb->ki_pos != pos);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
	if (ret < 0)
		return ret;

	if (unlikely(ioflags & IO_ISDIRECT)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
		/* DIO must be aligned to device logical sector size */
		if ((pos | size) & target->bt_logical_sectormask) {
			if (pos == i_size_read(inode))
				return 0;
			return -XFS_ERROR(EINVAL);
		}
	}

	n = mp->m_super->s_maxbytes - pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/*
	 * Locking is a bit tricky here. If we take an exclusive lock
	 * for direct IO, we effectively serialise all new concurrent
	 * read IO to this file and block it behind IO that is currently in
	 * progress because IO in progress holds the IO lock shared. We only
	 * need to hold the lock exclusive to blow away the page cache, so
	 * only take lock exclusively if the page cache needs invalidation.
	 * This allows the normal direct IO case of no page cache pages to
	 * proceeed concurrently without serialisation.
	 */
	xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
	if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
		xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);

		if (inode->i_mapping->nrpages) {
			ret = filemap_write_and_wait_range(
							VFS_I(ip)->i_mapping,
							pos, pos + size - 1);
			if (ret) {
				xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
				return ret;
			}

			/*
			 * Invalidate whole pages. This can return an error if
			 * we fail to invalidate a page, but this should never
			 * happen on XFS. Warn if it does fail.
			 */
			ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
					pos >> PAGE_CACHE_SHIFT,
					(pos + size - 1) >> PAGE_CACHE_SHIFT);
			WARN_ON_ONCE(ret);
			ret = 0;
		}
示例#13
0
int
xfs_ioc_space(
	struct file		*filp,
	unsigned int		cmd,
	xfs_flock64_t		*bf)
{
	struct inode		*inode = file_inode(filp);
	struct xfs_inode	*ip = XFS_I(inode);
	struct iattr		iattr;
	enum xfs_prealloc_flags	flags = 0;
	uint			iolock = XFS_IOLOCK_EXCL;
	int			error;

	/*
	 * Only allow the sys admin to reserve space unless
	 * unwritten extents are enabled.
	 */
	if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
	    !capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
		return -EPERM;

	if (!(filp->f_mode & FMODE_WRITE))
		return -EBADF;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;

	if (filp->f_flags & O_DSYNC)
		flags |= XFS_PREALLOC_SYNC;
	if (filp->f_mode & FMODE_NOCMTIME)
		flags |= XFS_PREALLOC_INVISIBLE;

	error = mnt_want_write_file(filp);
	if (error)
		return error;

	xfs_ilock(ip, iolock);
	error = xfs_break_layouts(inode, &iolock);
	if (error)
		goto out_unlock;

	xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
	iolock |= XFS_MMAPLOCK_EXCL;

	switch (bf->l_whence) {
	case 0: /*SEEK_SET*/
		break;
	case 1: /*SEEK_CUR*/
		bf->l_start += filp->f_pos;
		break;
	case 2: /*SEEK_END*/
		bf->l_start += XFS_ISIZE(ip);
		break;
	default:
		error = -EINVAL;
		goto out_unlock;
	}

	/*
	 * length of <= 0 for resv/unresv/zero is invalid.  length for
	 * alloc/free is ignored completely and we have no idea what userspace
	 * might have set it to, so set it to zero to allow range
	 * checks to pass.
	 */
	switch (cmd) {
	case XFS_IOC_ZERO_RANGE:
	case XFS_IOC_RESVSP:
	case XFS_IOC_RESVSP64:
	case XFS_IOC_UNRESVSP:
	case XFS_IOC_UNRESVSP64:
		if (bf->l_len <= 0) {
			error = -EINVAL;
			goto out_unlock;
		}
		break;
	default:
		bf->l_len = 0;
		break;
	}

	if (bf->l_start < 0 ||
	    bf->l_start > inode->i_sb->s_maxbytes ||
	    bf->l_start + bf->l_len < 0 ||
	    bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) {
		error = -EINVAL;
		goto out_unlock;
	}

	switch (cmd) {
	case XFS_IOC_ZERO_RANGE:
		flags |= XFS_PREALLOC_SET;
		error = xfs_zero_file_space(ip, bf->l_start, bf->l_len);
		break;
	case XFS_IOC_RESVSP:
	case XFS_IOC_RESVSP64:
		flags |= XFS_PREALLOC_SET;
		error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len,
						XFS_BMAPI_PREALLOC);
		break;
	case XFS_IOC_UNRESVSP:
	case XFS_IOC_UNRESVSP64:
		error = xfs_free_file_space(ip, bf->l_start, bf->l_len);
		break;
	case XFS_IOC_ALLOCSP:
	case XFS_IOC_ALLOCSP64:
	case XFS_IOC_FREESP:
	case XFS_IOC_FREESP64:
		flags |= XFS_PREALLOC_CLEAR;
		if (bf->l_start > XFS_ISIZE(ip)) {
			error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
					bf->l_start - XFS_ISIZE(ip), 0);
			if (error)
				goto out_unlock;
		}

		iattr.ia_valid = ATTR_SIZE;
		iattr.ia_size = bf->l_start;

		error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
		break;
	default:
		ASSERT(0);
		error = -EINVAL;
	}

	if (error)
		goto out_unlock;

	error = xfs_update_prealloc_flags(ip, flags);

out_unlock:
	xfs_iunlock(ip, iolock);
	mnt_drop_write_file(filp);
	return error;
}
示例#14
0
STATIC long
xfs_file_fallocate(
	struct file		*file,
	int			mode,
	loff_t			offset,
	loff_t			len)
{
	struct inode		*inode = file_inode(file);
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_trans	*tp;
	long			error;
	loff_t			new_size = 0;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;
	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
		return -EOPNOTSUPP;

	xfs_ilock(ip, XFS_IOLOCK_EXCL);
	if (mode & FALLOC_FL_PUNCH_HOLE) {
		error = xfs_free_file_space(ip, offset, len);
		if (error)
			goto out_unlock;
	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
		unsigned blksize_mask = (1 << inode->i_blkbits) - 1;

		if (offset & blksize_mask || len & blksize_mask) {
			error = -EINVAL;
			goto out_unlock;
		}

		ASSERT(offset + len < i_size_read(inode));
		new_size = i_size_read(inode) - len;

		error = xfs_collapse_file_space(ip, offset, len);
		if (error)
			goto out_unlock;
	} else {
		if (!(mode & FALLOC_FL_KEEP_SIZE) &&
		    offset + len > i_size_read(inode)) {
			new_size = offset + len;
			error = -inode_newsize_ok(inode, new_size);
			if (error)
				goto out_unlock;
		}

		if (mode & FALLOC_FL_ZERO_RANGE)
			error = xfs_zero_file_space(ip, offset, len);
		else
			error = xfs_alloc_file_space(ip, offset, len,
						     XFS_BMAPI_PREALLOC);
		if (error)
			goto out_unlock;
	}

	tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
	error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
	if (error) {
		xfs_trans_cancel(tp, 0);
		goto out_unlock;
	}

	xfs_ilock(ip, XFS_ILOCK_EXCL);
	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
	ip->i_d.di_mode &= ~S_ISUID;
	if (ip->i_d.di_mode & S_IXGRP)
		ip->i_d.di_mode &= ~S_ISGID;

	if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;

	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

	if (file->f_flags & O_DSYNC)
		xfs_trans_set_sync(tp);
	error = xfs_trans_commit(tp, 0);
	if (error)
		goto out_unlock;

	/* Change file size if needed */
	if (new_size) {
		struct iattr iattr;

		iattr.ia_valid = ATTR_SIZE;
		iattr.ia_size = new_size;
		error = xfs_setattr_size(ip, &iattr);
	}

out_unlock:
	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
	return -error;
}
示例#15
0
/*
 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
 * a file or fs handle.
 *
 * XFS_IOC_PATH_TO_FSHANDLE
 *    returns fs handle for a mount point or path within that mount point
 * XFS_IOC_FD_TO_HANDLE
 *    returns full handle for a FD opened in user space
 * XFS_IOC_PATH_TO_HANDLE
 *    returns full handle for a path
 */
STATIC int
xfs_find_handle(
	unsigned int		cmd,
	void			__user *arg)
{
	int			hsize;
	xfs_handle_t		handle;
	xfs_fsop_handlereq_t	hreq;
	struct inode		*inode;

	if (copy_from_user(&hreq, arg, sizeof(hreq)))
		return -XFS_ERROR(EFAULT);

	memset((char *)&handle, 0, sizeof(handle));

	switch (cmd) {
	case XFS_IOC_PATH_TO_FSHANDLE:
	case XFS_IOC_PATH_TO_HANDLE: {
		struct nameidata	nd;
		int			error;

		error = user_path_walk_link((const char __user *)hreq.path, &nd);
		if (error)
			return error;

		ASSERT(nd.path.dentry);
		ASSERT(nd.path.dentry->d_inode);
		inode = igrab(nd.path.dentry->d_inode);
		path_put(&nd.path);
		break;
	}

	case XFS_IOC_FD_TO_HANDLE: {
		struct file	*file;

		file = fget(hreq.fd);
		if (!file)
		    return -EBADF;

		ASSERT(file->f_path.dentry);
		ASSERT(file->f_path.dentry->d_inode);
		inode = igrab(file->f_path.dentry->d_inode);
		fput(file);
		break;
	}

	default:
		ASSERT(0);
		return -XFS_ERROR(EINVAL);
	}

	if (inode->i_sb->s_magic != XFS_SB_MAGIC) {
		/* we're not in XFS anymore, Toto */
		iput(inode);
		return -XFS_ERROR(EINVAL);
	}

	switch (inode->i_mode & S_IFMT) {
	case S_IFREG:
	case S_IFDIR:
	case S_IFLNK:
		break;
	default:
		iput(inode);
		return -XFS_ERROR(EBADF);
	}

	/* now we can grab the fsid */
	memcpy(&handle.ha_fsid, XFS_I(inode)->i_mount->m_fixedfsid,
			sizeof(xfs_fsid_t));
	hsize = sizeof(xfs_fsid_t);

	if (cmd != XFS_IOC_PATH_TO_FSHANDLE) {
		xfs_inode_t	*ip = XFS_I(inode);
		int		lock_mode;

		/* need to get access to the xfs_inode to read the generation */
		lock_mode = xfs_ilock_map_shared(ip);

		/* fill in fid section of handle from inode */
		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
					sizeof(handle.ha_fid.fid_len);
		handle.ha_fid.fid_pad = 0;
		handle.ha_fid.fid_gen = ip->i_d.di_gen;
		handle.ha_fid.fid_ino = ip->i_ino;

		xfs_iunlock_map_shared(ip, lock_mode);

		hsize = XFS_HSIZE(handle);
	}

	/* now copy our handle into the user buffer & write out the size */
	if (copy_to_user(hreq.ohandle, &handle, hsize) ||
	    copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) {
		iput(inode);
		return -XFS_ERROR(EFAULT);
	}

	iput(inode);
	return 0;
}
示例#16
0
/*
 * Link a range of blocks from one file to another.
 */
int
xfs_reflink_remap_range(
	struct file		*file_in,
	loff_t			pos_in,
	struct file		*file_out,
	loff_t			pos_out,
	u64			len,
	bool			is_dedupe)
{
	struct inode		*inode_in = file_inode(file_in);
	struct xfs_inode	*src = XFS_I(inode_in);
	struct inode		*inode_out = file_inode(file_out);
	struct xfs_inode	*dest = XFS_I(inode_out);
	struct xfs_mount	*mp = src->i_mount;
	bool			same_inode = (inode_in == inode_out);
	xfs_fileoff_t		sfsbno, dfsbno;
	xfs_filblks_t		fsblen;
	xfs_extlen_t		cowextsize;
	ssize_t			ret;

	if (!xfs_sb_version_hasreflink(&mp->m_sb))
		return -EOPNOTSUPP;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	/* Lock both files against IO */
	lock_two_nondirectories(inode_in, inode_out);
	if (same_inode)
		xfs_ilock(src, XFS_MMAPLOCK_EXCL);
	else
		xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);

	/* Check file eligibility and prepare for block sharing. */
	ret = -EINVAL;
	/* Don't reflink realtime inodes */
	if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
		goto out_unlock;

	/* Don't share DAX file data for now. */
	if (IS_DAX(inode_in) || IS_DAX(inode_out))
		goto out_unlock;

	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
			&len, is_dedupe);
	if (ret <= 0)
		goto out_unlock;

	trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);

	/* Set flags and remap blocks. */
	ret = xfs_reflink_set_inode_flag(src, dest);
	if (ret)
		goto out_unlock;

	dfsbno = XFS_B_TO_FSBT(mp, pos_out);
	sfsbno = XFS_B_TO_FSBT(mp, pos_in);
	fsblen = XFS_B_TO_FSB(mp, len);
	ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
			pos_out + len);
	if (ret)
		goto out_unlock;

	/* Zap any page cache for the destination file's range. */
	truncate_inode_pages_range(&inode_out->i_data, pos_out,
				   PAGE_ALIGN(pos_out + len) - 1);

	/*
	 * Carry the cowextsize hint from src to dest if we're sharing the
	 * entire source file to the entire destination file, the source file
	 * has a cowextsize hint, and the destination file does not.
	 */
	cowextsize = 0;
	if (pos_in == 0 && len == i_size_read(inode_in) &&
	    (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
	    pos_out == 0 && len >= i_size_read(inode_out) &&
	    !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
		cowextsize = src->i_d.di_cowextsize;

	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
			is_dedupe);

out_unlock:
	xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
	if (!same_inode)
		xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
	unlock_two_nondirectories(inode_in, inode_out);
	if (ret)
		trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
	return ret;
}
示例#17
0
/*
 * xfs_file_dio_aio_write - handle direct IO writes
 *
 * Lock the inode appropriately to prepare for and issue a direct IO write.
 * By separating it from the buffered write path we remove all the tricky to
 * follow locking changes and looping.
 *
 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
 * pages are flushed out.
 *
 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
 * allowing them to be done in parallel with reads and other direct IO writes.
 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
 * needs to do sub-block zeroing and that requires serialisation against other
 * direct IOs to the same block. In this case we need to serialise the
 * submission of the unaligned IOs so that we don't get racing block zeroing in
 * the dio layer.  To avoid the problem with aio, we also need to wait for
 * outstanding IOs to complete so that unwritten extent conversion is completed
 * before we try to map the overlapping block. This is currently implemented by
 * hitting it with a big hammer (i.e. xfs_ioend_wait()).
 *
 * Returns with locks held indicated by @iolock and errors indicated by
 * negative return values.
 */
STATIC ssize_t
xfs_file_dio_aio_write(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos,
	size_t			ocount,
	int			*iolock)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	size_t			count = ocount;
	int			unaligned_io = 0;
	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
					mp->m_rtdev_targp : mp->m_ddev_targp;

	*iolock = 0;
	if ((pos & target->bt_smask) || (count & target->bt_smask))
		return -XFS_ERROR(EINVAL);

	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
		unaligned_io = 1;

	if (unaligned_io || mapping->nrpages || pos > ip->i_size)
		*iolock = XFS_IOLOCK_EXCL;
	else
		*iolock = XFS_IOLOCK_SHARED;
	xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);

	ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
	if (ret)
		return ret;

	if (mapping->nrpages) {
		WARN_ON(*iolock != XFS_IOLOCK_EXCL);
		ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
							FI_REMAPF_LOCKED);
		if (ret)
			return ret;
	}

	/*
	 * If we are doing unaligned IO, wait for all other IO to drain,
	 * otherwise demote the lock if we had to flush cached pages
	 */
	if (unaligned_io)
		xfs_ioend_wait(ip);
	else if (*iolock == XFS_IOLOCK_EXCL) {
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
		*iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
	ret = generic_file_direct_write(iocb, iovp,
			&nr_segs, pos, &iocb->ki_pos, count, ocount);

	/* No fallback to buffered IO on errors for XFS. */
	ASSERT(ret < 0 || ret == count);
	return ret;
}
示例#18
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	loff_t			start,
	loff_t			end,
	int			datasync)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	struct xfs_trans	*tp;
	int			error = 0;
	int			log_flushed = 0;

	trace_xfs_file_fsync(ip);

	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (error)
		return error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -XFS_ERROR(EIO);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	xfs_ilock(ip, XFS_IOLOCK_SHARED);
	xfs_ioend_wait(ip);
	xfs_iunlock(ip, XFS_IOLOCK_SHARED);

	if (mp->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If we have an RT and/or log subvolume we need to make sure
		 * to flush the write cache the device used for file data
		 * first.  This is to ensure newly written file data make
		 * it to disk before logging the new inode size in case of
		 * an extending write.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
		else if (mp->m_logdev_targp != mp->m_ddev_targp)
			xfs_blkdev_issue_flush(mp->m_ddev_targp);
	}

	/*
	 * We always need to make sure that the required inode state is safe on
	 * disk.  The inode might be clean but we still might need to force the
	 * log because of committed transactions that haven't hit the disk yet.
	 * Likewise, there could be unflushed non-transactional changes to the
	 * inode core that have to go to disk and this requires us to issue
	 * a synchronous transaction to capture these changes correctly.
	 *
	 * This code relies on the assumption that if the i_update_core field
	 * of the inode is clear and the inode is unpinned then it is clean
	 * and no action is required.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);

	/*
	 * First check if the VFS inode is marked dirty.  All the dirtying
	 * of non-transactional updates no goes through mark_inode_dirty*,
	 * which allows us to distinguish beteeen pure timestamp updates
	 * and i_size updates which need to be caught for fdatasync.
	 * After that also theck for the dirty state in the XFS inode, which
	 * might gets cleared when the inode gets written out via the AIL
	 * or xfs_iflush_cluster.
	 */
	if (((inode->i_state & I_DIRTY_DATASYNC) ||
	    ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
	    ip->i_update_core) {
		/*
		 * Kick off a transaction to log the inode core to get the
		 * updates.  The sync transaction will also force the log.
		 */
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
		tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
		error = xfs_trans_reserve(tp, 0,
				XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
		if (error) {
			xfs_trans_cancel(tp, 0);
			return -error;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);

		/*
		 * Note - it's possible that we might have pushed ourselves out
		 * of the way during trans_reserve which would flush the inode.
		 * But there's no guarantee that the inode buffer has actually
		 * gone out yet (it's delwri).	Plus the buffer could be pinned
		 * anyway if it's part of an inode in another recent
		 * transaction.	 So we play it safe and fire off the
		 * transaction anyway.
		 */
		xfs_trans_ijoin(tp, ip);
		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
		xfs_trans_set_sync(tp);
		error = _xfs_trans_commit(tp, 0, &log_flushed);

		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	} else {
		/*
		 * Timestamps/size haven't changed since last inode flush or
		 * inode transaction commit.  That means either nothing got
		 * written or a transaction committed which caught the updates.
		 * If the latter happened and the transaction hasn't hit the
		 * disk yet, the inode will be still be pinned.  If it is,
		 * force the log.
		 */
		if (xfs_ipincount(ip)) {
			error = _xfs_log_force_lsn(mp,
					ip->i_itemp->ili_last_lsn,
					XFS_LOG_SYNC, &log_flushed);
		}
		xfs_iunlock(ip, XFS_ILOCK_SHARED);
	}

	/*
	 * If we only have a single device, and the log force about was
	 * a no-op we might have to flush the data device cache here.
	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
	 * an already allocated file and thus do not have any metadata to
	 * commit.
	 */
	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
	    mp->m_logdev_targp == mp->m_ddev_targp &&
	    !XFS_IS_REALTIME_INODE(ip) &&
	    !log_flushed)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	return -error;
}
示例#19
0
STATIC ssize_t
xfs_file_aio_read(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	size_t			size = 0;
	ssize_t			ret = 0;
	int			ioflags = 0;
	xfs_fsize_t		n;
	unsigned long		seg;

	XFS_STATS_INC(xs_read_calls);

	BUG_ON(iocb->ki_pos != pos);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	/* START copy & waste from filemap.c */
	for (seg = 0; seg < nr_segs; seg++) {
		const struct iovec *iv = &iovp[seg];

		/*
		 * If any segment has a negative length, or the cumulative
		 * length ever wraps negative then return -EINVAL.
		 */
		size += iv->iov_len;
		if (unlikely((ssize_t)(size|iv->iov_len) < 0))
			return XFS_ERROR(-EINVAL);
	}
	/* END copy & waste from filemap.c */

	if (unlikely(ioflags & IO_ISDIRECT)) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;
		if ((iocb->ki_pos & target->bt_smask) ||
		    (size & target->bt_smask)) {
			if (iocb->ki_pos == ip->i_size)
				return 0;
			return -XFS_ERROR(EINVAL);
		}
	}

	n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
	if (n <= 0 || size == 0)
		return 0;

	if (n < size)
		size = n;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

	if (unlikely(ioflags & IO_ISDIRECT))
		mutex_lock(&inode->i_mutex);
	xfs_ilock(ip, XFS_IOLOCK_SHARED);

	if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
		int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
		int iolock = XFS_IOLOCK_SHARED;

		ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
					dmflags, &iolock);
		if (ret) {
			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			if (unlikely(ioflags & IO_ISDIRECT))
				mutex_unlock(&inode->i_mutex);
			return ret;
		}
	}

	if (unlikely(ioflags & IO_ISDIRECT)) {
		if (inode->i_mapping->nrpages) {
			ret = -xfs_flushinval_pages(ip,
					(iocb->ki_pos & PAGE_CACHE_MASK),
					-1, FI_REMAPF_LOCKED);
		}
		mutex_unlock(&inode->i_mutex);
		if (ret) {
			xfs_iunlock(ip, XFS_IOLOCK_SHARED);
			return ret;
		}
	}

	trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);

	ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
	if (ret > 0)
		XFS_STATS_ADD(xs_read_bytes, ret);

	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
	return ret;
}
示例#20
0
/*
 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
 * a file or fs handle.
 *
 * XFS_IOC_PATH_TO_FSHANDLE
 *    returns fs handle for a mount point or path within that mount point
 * XFS_IOC_FD_TO_HANDLE
 *    returns full handle for a FD opened in user space
 * XFS_IOC_PATH_TO_HANDLE
 *    returns full handle for a path
 */
int
xfs_find_handle(
	unsigned int		cmd,
	xfs_fsop_handlereq_t	*hreq)
{
	int			hsize;
	xfs_handle_t		handle;
	struct inode		*inode;
	struct fd		f = {0};
	struct path		path;
	int			error;
	struct xfs_inode	*ip;

	if (cmd == XFS_IOC_FD_TO_HANDLE) {
		f = fdget(hreq->fd);
		if (!f.file)
			return -EBADF;
		inode = file_inode(f.file);
	} else {
		error = user_lpath((const char __user *)hreq->path, &path);
		if (error)
			return error;
		inode = path.dentry->d_inode;
	}
	ip = XFS_I(inode);

	/*
	 * We can only generate handles for inodes residing on a XFS filesystem,
	 * and only for regular files, directories or symbolic links.
	 */
	error = -EINVAL;
	if (inode->i_sb->s_magic != XFS_SB_MAGIC)
		goto out_put;

	error = -EBADF;
	if (!S_ISREG(inode->i_mode) &&
	    !S_ISDIR(inode->i_mode) &&
	    !S_ISLNK(inode->i_mode))
		goto out_put;


	memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));

	if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
		/*
		 * This handle only contains an fsid, zero the rest.
		 */
		memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
		hsize = sizeof(xfs_fsid_t);
	} else {
		int		lock_mode;

		lock_mode = xfs_ilock_map_shared(ip);
		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
					sizeof(handle.ha_fid.fid_len);
		handle.ha_fid.fid_pad = 0;
		handle.ha_fid.fid_gen = ip->i_d.di_gen;
		handle.ha_fid.fid_ino = ip->i_ino;
		xfs_iunlock_map_shared(ip, lock_mode);

		hsize = XFS_HSIZE(handle);
	}

	error = -EFAULT;
	if (hsize > sizeof handle || copy_to_user(hreq->ohandle, &handle, hsize) ||
	    copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
		goto out_put;

	error = 0;

 out_put:
	if (cmd == XFS_IOC_FD_TO_HANDLE)
		fdput(f);
	else
		path_put(&path);
	return error;
}
示例#21
0
STATIC ssize_t
xfs_file_aio_write(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0, error = 0;
	int			ioflags = 0;
	xfs_fsize_t		isize, new_size;
	int			iolock;
	int			eventsent = 0;
	size_t			ocount = 0, count;
	int			need_i_mutex;

	XFS_STATS_INC(xs_write_calls);

	BUG_ON(iocb->ki_pos != pos);

	if (unlikely(file->f_flags & O_DIRECT))
		ioflags |= IO_ISDIRECT;
	if (file->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	error = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
	if (error)
		return error;

	count = ocount;
	if (count == 0)
		return 0;

	xfs_wait_for_freeze(mp, SB_FREEZE_WRITE);

	if (XFS_FORCED_SHUTDOWN(mp))
		return -EIO;

relock:
	if (ioflags & IO_ISDIRECT) {
		iolock = XFS_IOLOCK_SHARED;
		need_i_mutex = 0;
	} else {
		iolock = XFS_IOLOCK_EXCL;
		need_i_mutex = 1;
		mutex_lock(&inode->i_mutex);
	}

	xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);

start:
	error = -generic_write_checks(file, &pos, &count,
					S_ISBLK(inode->i_mode));
	if (error) {
		xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
		goto out_unlock_mutex;
	}

	if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
	    !(ioflags & IO_INVIS) && !eventsent)) {
		int		dmflags = FILP_DELAY_FLAG(file);

		if (need_i_mutex)
			dmflags |= DM_FLAGS_IMUX;

		xfs_iunlock(ip, XFS_ILOCK_EXCL);
		error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
				      pos, count, dmflags, &iolock);
		if (error) {
			goto out_unlock_internal;
		}
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		eventsent = 1;

		/*
		 * The iolock was dropped and reacquired in XFS_SEND_DATA
		 * so we have to recheck the size when appending.
		 * We will only "goto start;" once, since having sent the
		 * event prevents another call to XFS_SEND_DATA, which is
		 * what allows the size to change in the first place.
		 */
		if ((file->f_flags & O_APPEND) && pos != ip->i_size)
			goto start;
	}

	if (ioflags & IO_ISDIRECT) {
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
				mp->m_rtdev_targp : mp->m_ddev_targp;

		if ((pos & target->bt_smask) || (count & target->bt_smask)) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
			return XFS_ERROR(-EINVAL);
		}

		if (!need_i_mutex && (mapping->nrpages || pos > ip->i_size)) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL|iolock);
			iolock = XFS_IOLOCK_EXCL;
			need_i_mutex = 1;
			mutex_lock(&inode->i_mutex);
			xfs_ilock(ip, XFS_ILOCK_EXCL|iolock);
			goto start;
		}
	}

	new_size = pos + count;
	if (new_size > ip->i_size)
		ip->i_new_size = new_size;

	if (likely(!(ioflags & IO_INVIS)))
		file_update_time(file);

	/*
	 * If the offset is beyond the size of the file, we have a couple
	 * of things to do. First, if there is already space allocated
	 * we need to either create holes or zero the disk or ...
	 *
	 * If there is a page where the previous size lands, we need
	 * to zero it out up to the new size.
	 */

	if (pos > ip->i_size) {
		error = xfs_zero_eof(ip, pos, ip->i_size);
		if (error) {
			xfs_iunlock(ip, XFS_ILOCK_EXCL);
			goto out_unlock_internal;
		}
	}
	xfs_iunlock(ip, XFS_ILOCK_EXCL);

	/*
	 * If we're writing the file then make sure to clear the
	 * setuid and setgid bits if the process is not being run
	 * by root.  This keeps people from modifying setuid and
	 * setgid binaries.
	 */
	error = -file_remove_suid(file);
	if (unlikely(error))
		goto out_unlock_internal;

	/* We can write back this queue in page reclaim */
	current->backing_dev_info = mapping->backing_dev_info;

	if ((ioflags & IO_ISDIRECT)) {
		if (mapping->nrpages) {
			WARN_ON(need_i_mutex == 0);
			error = xfs_flushinval_pages(ip,
					(pos & PAGE_CACHE_MASK),
					-1, FI_REMAPF_LOCKED);
			if (error)
				goto out_unlock_internal;
		}

		if (need_i_mutex) {
			/* demote the lock now the cached pages are gone */
			xfs_ilock_demote(ip, XFS_IOLOCK_EXCL);
			mutex_unlock(&inode->i_mutex);

			iolock = XFS_IOLOCK_SHARED;
			need_i_mutex = 0;
		}

		trace_xfs_file_direct_write(ip, count, iocb->ki_pos, ioflags);
		ret = generic_file_direct_write(iocb, iovp,
				&nr_segs, pos, &iocb->ki_pos, count, ocount);

		/*
		 * direct-io write to a hole: fall through to buffered I/O
		 * for completing the rest of the request.
		 */
		if (ret >= 0 && ret != count) {
			XFS_STATS_ADD(xs_write_bytes, ret);

			pos += ret;
			count -= ret;

			ioflags &= ~IO_ISDIRECT;
			xfs_iunlock(ip, iolock);
			goto relock;
		}
	} else {
		int enospc = 0;
		ssize_t ret2 = 0;

write_retry:
		trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, ioflags);
		ret2 = generic_file_buffered_write(iocb, iovp, nr_segs,
				pos, &iocb->ki_pos, count, ret);
		/*
		 * if we just got an ENOSPC, flush the inode now we
		 * aren't holding any page locks and retry *once*
		 */
		if (ret2 == -ENOSPC && !enospc) {
			error = xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
			if (error)
				goto out_unlock_internal;
			enospc = 1;
			goto write_retry;
		}
		ret = ret2;
	}

	current->backing_dev_info = NULL;

	isize = i_size_read(inode);
	if (unlikely(ret < 0 && ret != -EFAULT && iocb->ki_pos > isize))
		iocb->ki_pos = isize;

	if (iocb->ki_pos > ip->i_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		if (iocb->ki_pos > ip->i_size)
			ip->i_size = iocb->ki_pos;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	}

	if (ret == -ENOSPC &&
	    DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
		xfs_iunlock(ip, iolock);
		if (need_i_mutex)
			mutex_unlock(&inode->i_mutex);
		error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
				DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
				0, 0, 0); /* Delay flag intentionally  unused */
		if (need_i_mutex)
			mutex_lock(&inode->i_mutex);
		xfs_ilock(ip, iolock);
		if (error)
			goto out_unlock_internal;
		goto start;
	}

	error = -ret;
	if (ret <= 0)
		goto out_unlock_internal;

	XFS_STATS_ADD(xs_write_bytes, ret);

	/* Handle various SYNC-type writes */
	if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
		loff_t end = pos + ret - 1;
		int error2;

		xfs_iunlock(ip, iolock);
		if (need_i_mutex)
			mutex_unlock(&inode->i_mutex);

		error2 = filemap_write_and_wait_range(mapping, pos, end);
		if (!error)
			error = error2;
		if (need_i_mutex)
			mutex_lock(&inode->i_mutex);
		xfs_ilock(ip, iolock);

		error2 = -xfs_file_fsync(file, file->f_path.dentry,
					 (file->f_flags & __O_SYNC) ? 0 : 1);
		if (!error)
			error = error2;
	}

 out_unlock_internal:
	if (ip->i_new_size) {
		xfs_ilock(ip, XFS_ILOCK_EXCL);
		ip->i_new_size = 0;
		/*
		 * If this was a direct or synchronous I/O that failed (such
		 * as ENOSPC) then part of the I/O may have been written to
		 * disk before the error occured.  In this case the on-disk
		 * file size may have been adjusted beyond the in-memory file
		 * size and now needs to be truncated back.
		 */
		if (ip->i_d.di_size > ip->i_size)
			ip->i_d.di_size = ip->i_size;
		xfs_iunlock(ip, XFS_ILOCK_EXCL);
	}
	xfs_iunlock(ip, iolock);
 out_unlock_mutex:
	if (need_i_mutex)
		mutex_unlock(&inode->i_mutex);
	return -error;
}
示例#22
0
文件: xfs_iops.c 项目: gxt/linux
STATIC int
xfs_generic_create(
	struct inode	*dir,
	struct dentry	*dentry,
	umode_t		mode,
	dev_t		rdev,
	bool		tmpfile)	/* unnamed file */
{
	struct inode	*inode;
	struct xfs_inode *ip = NULL;
	struct posix_acl *default_acl, *acl;
	struct xfs_name	name;
	int		error;

	/*
	 * Irix uses Missed'em'V split, but doesn't want to see
	 * the upper 5 bits of (14bit) major.
	 */
	if (S_ISCHR(mode) || S_ISBLK(mode)) {
		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
			return -EINVAL;
		rdev = sysv_encode_dev(rdev);
	} else {
		rdev = 0;
	}

	error = posix_acl_create(dir, &mode, &default_acl, &acl);
	if (error)
		return error;

	if (!tmpfile) {
		xfs_dentry_to_name(&name, dentry, mode);
		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
	} else {
		error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
	}
	if (unlikely(error))
		goto out_free_acl;

	inode = VFS_I(ip);

	error = xfs_init_security(inode, dir, &dentry->d_name);
	if (unlikely(error))
		goto out_cleanup_inode;

#ifdef CONFIG_XFS_POSIX_ACL
	if (default_acl) {
		error = xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
		if (error)
			goto out_cleanup_inode;
	}
	if (acl) {
		error = xfs_set_acl(inode, acl, ACL_TYPE_ACCESS);
		if (error)
			goto out_cleanup_inode;
	}
#endif

	xfs_setup_iops(ip);

	if (tmpfile)
		d_tmpfile(dentry, inode);
	else
		d_instantiate(dentry, inode);

	xfs_finish_inode_setup(ip);

 out_free_acl:
	if (default_acl)
		posix_acl_release(default_acl);
	if (acl)
		posix_acl_release(acl);
	return error;

 out_cleanup_inode:
	xfs_finish_inode_setup(ip);
	if (!tmpfile)
		xfs_cleanup_inode(dir, inode, dentry);
	iput(inode);
	goto out_free_acl;
}
示例#23
0
/*
 * Fast and loose check if this write could update the on-disk inode size.
 */
static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
{
	return ioend->io_offset + ioend->io_size >
		XFS_I(ioend->io_inode)->i_d.di_size;
}
示例#24
0
STATIC int
xfs_vn_mknod(
	struct inode	*dir,
	struct dentry	*dentry,
	int		mode,
	dev_t		rdev)
{
	struct inode	*inode;
	struct xfs_inode *ip = NULL;
	struct posix_acl *default_acl = NULL;
	struct xfs_name	name;
	int		error;

	/*
	 * Irix uses Missed'em'V split, but doesn't want to see
	 * the upper 5 bits of (14bit) major.
	 */
	if (S_ISCHR(mode) || S_ISBLK(mode)) {
		if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
			return -EINVAL;
		rdev = sysv_encode_dev(rdev);
	} else {
		rdev = 0;
	}

	if (IS_POSIXACL(dir)) {
		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
		if (IS_ERR(default_acl))
			return PTR_ERR(default_acl);

		if (!default_acl)
			mode &= ~current_umask();
	}

	xfs_dentry_to_name(&name, dentry);
	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
	if (unlikely(error))
		goto out_free_acl;

	inode = VFS_I(ip);

	error = xfs_init_security(inode, dir, &dentry->d_name);
	if (unlikely(error))
		goto out_cleanup_inode;

	if (default_acl) {
		error = -xfs_inherit_acl(inode, default_acl);
		if (unlikely(error))
			goto out_cleanup_inode;
		posix_acl_release(default_acl);
	}

	d_instantiate(dentry, inode);
	return -error;

 out_cleanup_inode:
	xfs_cleanup_inode(dir, inode, dentry);
 out_free_acl:
	posix_acl_release(default_acl);
	return -error;
}
示例#25
0
STATIC int
xfs_file_fsync(
	struct file		*file,
	loff_t			start,
	loff_t			end,
	int			datasync)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	int			error = 0;
	int			log_flushed = 0;
	xfs_lsn_t		lsn = 0;

	trace_xfs_file_fsync(ip);

	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
	if (error)
		return error;

	if (XFS_FORCED_SHUTDOWN(mp))
		return -XFS_ERROR(EIO);

	xfs_iflags_clear(ip, XFS_ITRUNCATED);

	if (mp->m_flags & XFS_MOUNT_BARRIER) {
		/*
		 * If we have an RT and/or log subvolume we need to make sure
		 * to flush the write cache the device used for file data
		 * first.  This is to ensure newly written file data make
		 * it to disk before logging the new inode size in case of
		 * an extending write.
		 */
		if (XFS_IS_REALTIME_INODE(ip))
			xfs_blkdev_issue_flush(mp->m_rtdev_targp);
		else if (mp->m_logdev_targp != mp->m_ddev_targp)
			xfs_blkdev_issue_flush(mp->m_ddev_targp);
	}

	/*
	 * All metadata updates are logged, which means that we just have
	 * to flush the log up to the latest LSN that touched the inode.
	 */
	xfs_ilock(ip, XFS_ILOCK_SHARED);
	if (xfs_ipincount(ip)) {
		if (!datasync ||
		    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
			lsn = ip->i_itemp->ili_last_lsn;
	}
	xfs_iunlock(ip, XFS_ILOCK_SHARED);

	if (lsn)
		error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);

	/*
	 * If we only have a single device, and the log force about was
	 * a no-op we might have to flush the data device cache here.
	 * This can only happen for fdatasync/O_DSYNC if we were overwriting
	 * an already allocated file and thus do not have any metadata to
	 * commit.
	 */
	if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
	    mp->m_logdev_targp == mp->m_ddev_targp &&
	    !XFS_IS_REALTIME_INODE(ip) &&
	    !log_flushed)
		xfs_blkdev_issue_flush(mp->m_ddev_targp);

	return -error;
}
示例#26
0
STATIC int
xfs_fs_encode_fh(
	struct dentry		*dentry,
	__u32			*fh,
	int			*max_len,
	int			connectable)
{
	struct fid		*fid = (struct fid *)fh;
	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fh;
	struct inode		*inode = dentry->d_inode;
	int			fileid_type;
	int			len;

	/* Directories don't need their parent encoded, they have ".." */
	if (S_ISDIR(inode->i_mode) || !connectable)
		fileid_type = FILEID_INO32_GEN;
	else
		fileid_type = FILEID_INO32_GEN_PARENT;

	/*
	 * If the the filesystem may contain 64bit inode numbers, we need
	 * to use larger file handles that can represent them.
	 *
	 * While we only allocate inodes that do not fit into 32 bits any
	 * large enough filesystem may contain them, thus the slightly
	 * confusing looking conditional below.
	 */
	if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
	    (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
		fileid_type |= XFS_FILEID_TYPE_64FLAG;

	/*
	 * Only encode if there is enough space given.  In practice
	 * this means we can't export a filesystem with 64bit inodes
	 * over NFSv2 with the subtree_check export option; the other
	 * seven combinations work.  The real answer is "don't use v2".
	 */
	len = xfs_fileid_length(fileid_type);
	if (*max_len < len) {
		*max_len = len;
		return 255;
	}
	*max_len = len;

	switch (fileid_type) {
	case FILEID_INO32_GEN_PARENT:
		spin_lock(&dentry->d_lock);
		fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
		fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
		spin_unlock(&dentry->d_lock);
		/*FALLTHRU*/
	case FILEID_INO32_GEN:
		fid->i32.ino = XFS_I(inode)->i_ino;
		fid->i32.gen = inode->i_generation;
		break;
	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
		spin_lock(&dentry->d_lock);
		fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
		fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
		spin_unlock(&dentry->d_lock);
		/*FALLTHRU*/
	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
		fid64->ino = XFS_I(inode)->i_ino;
		fid64->gen = inode->i_generation;
		break;
	}

	return fileid_type;
}
示例#27
0
/*
 * xfs_file_dio_aio_write - handle direct IO writes
 *
 * Lock the inode appropriately to prepare for and issue a direct IO write.
 * By separating it from the buffered write path we remove all the tricky to
 * follow locking changes and looping.
 *
 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
 * until we're sure the bytes at the new EOF have been zeroed and/or the cached
 * pages are flushed out.
 *
 * In most cases the direct IO writes will be done holding IOLOCK_SHARED
 * allowing them to be done in parallel with reads and other direct IO writes.
 * However, if the IO is not aligned to filesystem blocks, the direct IO layer
 * needs to do sub-block zeroing and that requires serialisation against other
 * direct IOs to the same block. In this case we need to serialise the
 * submission of the unaligned IOs so that we don't get racing block zeroing in
 * the dio layer.  To avoid the problem with aio, we also need to wait for
 * outstanding IOs to complete so that unwritten extent conversion is completed
 * before we try to map the overlapping block. This is currently implemented by
 * hitting it with a big hammer (i.e. inode_dio_wait()).
 *
 * Returns with locks held indicated by @iolock and errors indicated by
 * negative return values.
 */
STATIC ssize_t
xfs_file_dio_aio_write(
	struct kiocb		*iocb,
	const struct iovec	*iovp,
	unsigned long		nr_segs,
	loff_t			pos,
	size_t			ocount)
{
	struct file		*file = iocb->ki_filp;
	struct address_space	*mapping = file->f_mapping;
	struct inode		*inode = mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	ssize_t			ret = 0;
	size_t			count = ocount;
	int			unaligned_io = 0;
	int			iolock;
	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?
					mp->m_rtdev_targp : mp->m_ddev_targp;

	if ((pos & target->bt_smask) || (count & target->bt_smask))
		return -XFS_ERROR(EINVAL);

	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
		unaligned_io = 1;

	/*
	 * We don't need to take an exclusive lock unless there page cache needs
	 * to be invalidated or unaligned IO is being executed. We don't need to
	 * consider the EOF extension case here because
	 * xfs_file_aio_write_checks() will relock the inode as necessary for
	 * EOF zeroing cases and fill out the new inode size as appropriate.
	 */
	if (unaligned_io || mapping->nrpages)
		iolock = XFS_IOLOCK_EXCL;
	else
		iolock = XFS_IOLOCK_SHARED;
	xfs_rw_ilock(ip, iolock);

	/*
	 * Recheck if there are cached pages that need invalidate after we got
	 * the iolock to protect against other threads adding new pages while
	 * we were waiting for the iolock.
	 */
	if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
		xfs_rw_iunlock(ip, iolock);
		iolock = XFS_IOLOCK_EXCL;
		xfs_rw_ilock(ip, iolock);
	}

	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
	if (ret)
		goto out;

	if (mapping->nrpages) {
		ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
							FI_REMAPF_LOCKED);
		if (ret)
			goto out;
	}

	/*
	 * If we are doing unaligned IO, wait for all other IO to drain,
	 * otherwise demote the lock if we had to flush cached pages
	 */
	if (unaligned_io)
		inode_dio_wait(inode);
	else if (iolock == XFS_IOLOCK_EXCL) {
		xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
		iolock = XFS_IOLOCK_SHARED;
	}

	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
	ret = generic_file_direct_write(iocb, iovp,
			&nr_segs, pos, &iocb->ki_pos, count, ocount);

out:
	xfs_rw_iunlock(ip, iolock);

	/* No fallback to buffered IO on errors for XFS. */
	ASSERT(ret < 0 || ret == count);
	return ret;
}
示例#28
0
int
xfs_ioctl(
	xfs_inode_t		*ip,
	struct file		*filp,
	int			ioflags,
	unsigned int		cmd,
	void			__user *arg)
{
	struct inode		*inode = filp->f_path.dentry->d_inode;
	xfs_mount_t		*mp = ip->i_mount;
	int			error;

	xfs_itrace_entry(XFS_I(inode));
	switch (cmd) {

	case XFS_IOC_ALLOCSP:
	case XFS_IOC_FREESP:
	case XFS_IOC_RESVSP:
	case XFS_IOC_UNRESVSP:
	case XFS_IOC_ALLOCSP64:
	case XFS_IOC_FREESP64:
	case XFS_IOC_RESVSP64:
	case XFS_IOC_UNRESVSP64:
		/*
		 * Only allow the sys admin to reserve space unless
		 * unwritten extents are enabled.
		 */
		if (!xfs_sb_version_hasextflgbit(&mp->m_sb) &&
		    !capable(CAP_SYS_ADMIN))
			return -EPERM;

		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg);

	case XFS_IOC_DIOINFO: {
		struct dioattr	da;
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
			mp->m_rtdev_targp : mp->m_ddev_targp;

		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);

		if (copy_to_user(arg, &da, sizeof(da)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_FSBULKSTAT_SINGLE:
	case XFS_IOC_FSBULKSTAT:
	case XFS_IOC_FSINUMBERS:
		return xfs_ioc_bulkstat(mp, cmd, arg);

	case XFS_IOC_FSGEOMETRY_V1:
		return xfs_ioc_fsgeometry_v1(mp, arg);

	case XFS_IOC_FSGEOMETRY:
		return xfs_ioc_fsgeometry(mp, arg);

	case XFS_IOC_GETVERSION:
		return put_user(inode->i_generation, (int __user *)arg);

	case XFS_IOC_FSGETXATTR:
		return xfs_ioc_fsgetxattr(ip, 0, arg);
	case XFS_IOC_FSGETXATTRA:
		return xfs_ioc_fsgetxattr(ip, 1, arg);
	case XFS_IOC_FSSETXATTR:
		return xfs_ioc_fssetxattr(ip, filp, arg);
	case XFS_IOC_GETXFLAGS:
		return xfs_ioc_getxflags(ip, arg);
	case XFS_IOC_SETXFLAGS:
		return xfs_ioc_setxflags(ip, filp, arg);

	case XFS_IOC_FSSETDM: {
		struct fsdmidata	dmi;

		if (copy_from_user(&dmi, arg, sizeof(dmi)))
			return -XFS_ERROR(EFAULT);

		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
				dmi.fsd_dmstate);
		return -error;
	}

	case XFS_IOC_GETBMAP:
	case XFS_IOC_GETBMAPA:
		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);

	case XFS_IOC_GETBMAPX:
		return xfs_ioc_getbmapx(ip, arg);

	case XFS_IOC_FD_TO_HANDLE:
	case XFS_IOC_PATH_TO_HANDLE:
	case XFS_IOC_PATH_TO_FSHANDLE:
		return xfs_find_handle(cmd, arg);

	case XFS_IOC_OPEN_BY_HANDLE:
		return xfs_open_by_handle(mp, arg, filp, inode);

	case XFS_IOC_FSSETDM_BY_HANDLE:
		return xfs_fssetdm_by_handle(mp, arg, inode);

	case XFS_IOC_READLINK_BY_HANDLE:
		return xfs_readlink_by_handle(mp, arg, inode);

	case XFS_IOC_ATTRLIST_BY_HANDLE:
		return xfs_attrlist_by_handle(mp, arg, inode);

	case XFS_IOC_ATTRMULTI_BY_HANDLE:
		return xfs_attrmulti_by_handle(mp, arg, inode);

	case XFS_IOC_SWAPEXT: {
		error = xfs_swapext((struct xfs_swapext __user *)arg);
		return -error;
	}

	case XFS_IOC_FSCOUNTS: {
		xfs_fsop_counts_t out;

		error = xfs_fs_counts(mp, &out);
		if (error)
			return -error;

		if (copy_to_user(arg, &out, sizeof(out)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_SET_RESBLKS: {
		xfs_fsop_resblks_t inout;
		__uint64_t	   in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&inout, arg, sizeof(inout)))
			return -XFS_ERROR(EFAULT);

		/* input parameter is passed in resblks field of structure */
		in = inout.resblks;
		error = xfs_reserve_blocks(mp, &in, &inout);
		if (error)
			return -error;

		if (copy_to_user(arg, &inout, sizeof(inout)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_GET_RESBLKS: {
		xfs_fsop_resblks_t out;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		error = xfs_reserve_blocks(mp, NULL, &out);
		if (error)
			return -error;

		if (copy_to_user(arg, &out, sizeof(out)))
			return -XFS_ERROR(EFAULT);

		return 0;
	}

	case XFS_IOC_FSGROWFSDATA: {
		xfs_growfs_data_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_data(mp, &in);
		return -error;
	}

	case XFS_IOC_FSGROWFSLOG: {
		xfs_growfs_log_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_log(mp, &in);
		return -error;
	}

	case XFS_IOC_FSGROWFSRT: {
		xfs_growfs_rt_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_rt(mp, &in);
		return -error;
	}

	case XFS_IOC_FREEZE:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (inode->i_sb->s_frozen == SB_UNFROZEN)
			freeze_bdev(inode->i_sb->s_bdev);
		return 0;

	case XFS_IOC_THAW:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;
		if (inode->i_sb->s_frozen != SB_UNFROZEN)
			thaw_bdev(inode->i_sb->s_bdev, inode->i_sb);
		return 0;

	case XFS_IOC_GOINGDOWN: {
		__uint32_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (get_user(in, (__uint32_t __user *)arg))
			return -XFS_ERROR(EFAULT);

		error = xfs_fs_goingdown(mp, in);
		return -error;
	}

	case XFS_IOC_ERROR_INJECTION: {
		xfs_error_injection_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_errortag_add(in.errtag, mp);
		return -error;
	}

	case XFS_IOC_ERROR_CLEARALL:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		error = xfs_errortag_clearall(mp, 1);
		return -error;

	default:
		return -ENOTTY;
	}
}
示例#29
0
/*
 * Note: some of the ioctl's return positive numbers as a
 * byte count indicating success, such as readlink_by_handle.
 * So we don't "sign flip" like most other routines.  This means
 * true errors need to be returned as a negative value.
 */
long
xfs_file_ioctl(
	struct file		*filp,
	unsigned int		cmd,
	unsigned long		p)
{
	struct inode		*inode = filp->f_path.dentry->d_inode;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	void			__user *arg = (void __user *)p;
	int			ioflags = 0;
	int			error;

	if (filp->f_mode & FMODE_NOCMTIME)
		ioflags |= IO_INVIS;

	trace_xfs_file_ioctl(ip);

	switch (cmd) {
	case XFS_IOC_ALLOCSP:
	case XFS_IOC_FREESP:
	case XFS_IOC_RESVSP:
	case XFS_IOC_UNRESVSP:
	case XFS_IOC_ALLOCSP64:
	case XFS_IOC_FREESP64:
	case XFS_IOC_RESVSP64:
	case XFS_IOC_UNRESVSP64: {
		xfs_flock64_t		bf;

		if (copy_from_user(&bf, arg, sizeof(bf)))
			return -XFS_ERROR(EFAULT);
		return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
	}
	case XFS_IOC_DIOINFO: {
		struct dioattr	da;
		xfs_buftarg_t	*target =
			XFS_IS_REALTIME_INODE(ip) ?
			mp->m_rtdev_targp : mp->m_ddev_targp;

		da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);

		if (copy_to_user(arg, &da, sizeof(da)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_FSBULKSTAT_SINGLE:
	case XFS_IOC_FSBULKSTAT:
	case XFS_IOC_FSINUMBERS:
		return xfs_ioc_bulkstat(mp, cmd, arg);

	case XFS_IOC_FSGEOMETRY_V1:
		return xfs_ioc_fsgeometry_v1(mp, arg);

	case XFS_IOC_FSGEOMETRY:
		return xfs_ioc_fsgeometry(mp, arg);

	case XFS_IOC_GETVERSION:
		return put_user(inode->i_generation, (int __user *)arg);

	case XFS_IOC_FSGETXATTR:
		return xfs_ioc_fsgetxattr(ip, 0, arg);
	case XFS_IOC_FSGETXATTRA:
		return xfs_ioc_fsgetxattr(ip, 1, arg);
	case XFS_IOC_FSSETXATTR:
		return xfs_ioc_fssetxattr(ip, filp, arg);
	case XFS_IOC_GETXFLAGS:
		return xfs_ioc_getxflags(ip, arg);
	case XFS_IOC_SETXFLAGS:
		return xfs_ioc_setxflags(ip, filp, arg);

	case XFS_IOC_FSSETDM: {
		struct fsdmidata	dmi;

		if (copy_from_user(&dmi, arg, sizeof(dmi)))
			return -XFS_ERROR(EFAULT);

		error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
				dmi.fsd_dmstate);
		return -error;
	}

	case XFS_IOC_GETBMAP:
	case XFS_IOC_GETBMAPA:
		return xfs_ioc_getbmap(ip, ioflags, cmd, arg);

	case XFS_IOC_GETBMAPX:
		return xfs_ioc_getbmapx(ip, arg);

	case XFS_IOC_FD_TO_HANDLE:
	case XFS_IOC_PATH_TO_HANDLE:
	case XFS_IOC_PATH_TO_FSHANDLE: {
		xfs_fsop_handlereq_t	hreq;

		if (copy_from_user(&hreq, arg, sizeof(hreq)))
			return -XFS_ERROR(EFAULT);
		return xfs_find_handle(cmd, &hreq);
	}
	case XFS_IOC_OPEN_BY_HANDLE: {
		xfs_fsop_handlereq_t	hreq;

		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
			return -XFS_ERROR(EFAULT);
		return xfs_open_by_handle(filp, &hreq);
	}
	case XFS_IOC_FSSETDM_BY_HANDLE:
		return xfs_fssetdm_by_handle(filp, arg);

	case XFS_IOC_READLINK_BY_HANDLE: {
		xfs_fsop_handlereq_t	hreq;

		if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
			return -XFS_ERROR(EFAULT);
		return xfs_readlink_by_handle(filp, &hreq);
	}
	case XFS_IOC_ATTRLIST_BY_HANDLE:
		return xfs_attrlist_by_handle(filp, arg);

	case XFS_IOC_ATTRMULTI_BY_HANDLE:
		return xfs_attrmulti_by_handle(filp, arg);

	case XFS_IOC_SWAPEXT: {
		struct xfs_swapext	sxp;

		if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
			return -XFS_ERROR(EFAULT);
		error = xfs_swapext(&sxp);
		return -error;
	}

	case XFS_IOC_FSCOUNTS: {
		xfs_fsop_counts_t out;

		error = xfs_fs_counts(mp, &out);
		if (error)
			return -error;

		if (copy_to_user(arg, &out, sizeof(out)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_SET_RESBLKS: {
		xfs_fsop_resblks_t inout;
		__uint64_t	   in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (mp->m_flags & XFS_MOUNT_RDONLY)
			return -XFS_ERROR(EROFS);

		if (copy_from_user(&inout, arg, sizeof(inout)))
			return -XFS_ERROR(EFAULT);

		/* input parameter is passed in resblks field of structure */
		in = inout.resblks;
		error = xfs_reserve_blocks(mp, &in, &inout);
		if (error)
			return -error;

		if (copy_to_user(arg, &inout, sizeof(inout)))
			return -XFS_ERROR(EFAULT);
		return 0;
	}

	case XFS_IOC_GET_RESBLKS: {
		xfs_fsop_resblks_t out;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		error = xfs_reserve_blocks(mp, NULL, &out);
		if (error)
			return -error;

		if (copy_to_user(arg, &out, sizeof(out)))
			return -XFS_ERROR(EFAULT);

		return 0;
	}

	case XFS_IOC_FSGROWFSDATA: {
		xfs_growfs_data_t in;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_data(mp, &in);
		return -error;
	}

	case XFS_IOC_FSGROWFSLOG: {
		xfs_growfs_log_t in;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_log(mp, &in);
		return -error;
	}

	case XFS_IOC_FSGROWFSRT: {
		xfs_growfs_rt_t in;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_growfs_rt(mp, &in);
		return -error;
	}

	case XFS_IOC_GOINGDOWN: {
		__uint32_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (get_user(in, (__uint32_t __user *)arg))
			return -XFS_ERROR(EFAULT);

		error = xfs_fs_goingdown(mp, in);
		return -error;
	}

	case XFS_IOC_ERROR_INJECTION: {
		xfs_error_injection_t in;

		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		if (copy_from_user(&in, arg, sizeof(in)))
			return -XFS_ERROR(EFAULT);

		error = xfs_errortag_add(in.errtag, mp);
		return -error;
	}

	case XFS_IOC_ERROR_CLEARALL:
		if (!capable(CAP_SYS_ADMIN))
			return -EPERM;

		error = xfs_errortag_clearall(mp, 1);
		return -error;

	default:
		return -ENOTTY;
	}
}
示例#30
0
STATIC loff_t
xfs_seek_data(
	struct file		*file,
	loff_t			start)
{
	struct inode		*inode = file->f_mapping->host;
	struct xfs_inode	*ip = XFS_I(inode);
	struct xfs_mount	*mp = ip->i_mount;
	loff_t			uninitialized_var(offset);
	xfs_fsize_t		isize;
	xfs_fileoff_t		fsbno;
	xfs_filblks_t		end;
	uint			lock;
	int			error;

	lock = xfs_ilock_data_map_shared(ip);

	isize = i_size_read(inode);
	if (start >= isize) {
		error = ENXIO;
		goto out_unlock;
	}

	/*
	 * Try to read extents from the first block indicated
	 * by fsbno to the end block of the file.
	 */
	fsbno = XFS_B_TO_FSBT(mp, start);
	end = XFS_B_TO_FSB(mp, isize);
	for (;;) {
		struct xfs_bmbt_irec	map[2];
		int			nmap = 2;
		unsigned int		i;

		error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
				       XFS_BMAPI_ENTIRE);
		if (error)
			goto out_unlock;

		/* No extents at given offset, must be beyond EOF */
		if (nmap == 0) {
			error = ENXIO;
			goto out_unlock;
		}

		for (i = 0; i < nmap; i++) {
			offset = max_t(loff_t, start,
				       XFS_FSB_TO_B(mp, map[i].br_startoff));

			/* Landed in a data extent */
			if (map[i].br_startblock == DELAYSTARTBLOCK ||
			    (map[i].br_state == XFS_EXT_NORM &&
			     !isnullstartblock(map[i].br_startblock)))
				goto out;

			/*
			 * Landed in an unwritten extent, try to search data
			 * from page cache.
			 */
			if (map[i].br_state == XFS_EXT_UNWRITTEN) {
				if (xfs_find_get_desired_pgoff(inode, &map[i],
							DATA_OFF, &offset))
					goto out;
			}
		}

		/*
		 * map[0] is hole or its an unwritten extent but
		 * without data in page cache.  Probably means that
		 * we are reading after EOF if nothing in map[1].
		 */
		if (nmap == 1) {
			error = ENXIO;
			goto out_unlock;
		}

		ASSERT(i > 1);

		/*
		 * Nothing was found, proceed to the next round of search
		 * if reading offset not beyond or hit EOF.
		 */
		fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
		start = XFS_FSB_TO_B(mp, fsbno);
		if (start >= isize) {
			error = ENXIO;
			goto out_unlock;
		}
	}

out:
	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);

out_unlock:
	xfs_iunlock(ip, lock);

	if (error)
		return -error;
	return offset;
}