STATIC int xfs_sync_inode_data( struct xfs_inode *ip, struct xfs_perag *pag, int flags) { struct inode *inode = VFS_I(ip); struct address_space *mapping = inode->i_mapping; int error = 0; if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) goto out_wait; if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { if (flags & SYNC_TRYLOCK) goto out_wait; xfs_ilock(ip, XFS_IOLOCK_SHARED); } error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? 0 : XBF_ASYNC, FI_NONE); xfs_iunlock(ip, XFS_IOLOCK_SHARED); out_wait: if (flags & SYNC_WAIT) xfs_ioend_wait(ip); return error; }
static noinline ssize_t xfs_file_dax_read( struct kiocb *iocb, struct iov_iter *to) { struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); size_t count = iov_iter_count(to); ssize_t ret = 0; trace_xfs_file_dax_read(ip, count, iocb->ki_pos); if (!count) return 0; /* skip atime */ if (iocb->ki_flags & IOCB_NOWAIT) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); file_accessed(iocb->ki_filp); return ret; }
/* * This is called to attempt to lock the inode associated with this * inode log item, in preparation for the push routine which does the actual * iflush. Don't sleep on the inode lock or the flush lock. * * If the flush lock is already held, indicating that the inode has * been or is in the process of being flushed, then (ideally) we'd like to * see if the inode's buffer is still incore, and if so give it a nudge. * We delay doing so until the pushbuf routine, though, to avoid holding * the AIL lock across a call to the blackhole which is the buffer cache. * Also we don't want to sleep in any device strategy routines, which can happen * if we do the subsequent bawrite in here. */ STATIC uint xfs_inode_item_trylock( xfs_inode_log_item_t *iip) { register xfs_inode_t *ip; ip = iip->ili_inode; if (xfs_ipincount(ip) > 0) { return XFS_ITEM_PINNED; } if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { return XFS_ITEM_LOCKED; } if (!xfs_iflock_nowait(ip)) { /* * If someone else isn't already trying to push the inode * buffer, we get to do it. */ if (iip->ili_pushbuf_flag == 0) { iip->ili_pushbuf_flag = 1; #ifdef DEBUG iip->ili_push_owner = current_pid(); #endif /* * Inode is left locked in shared mode. * Pushbuf routine gets to unlock it. */ return XFS_ITEM_PUSHBUF; } else { /* * We hold the AIL lock, so we must specify the * NONOTIFY flag so that we won't double trip. */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_FLUSHING; } /* NOTREACHED */ } /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } #ifdef DEBUG if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { ASSERT(iip->ili_format.ilf_fields != 0); ASSERT(iip->ili_logged == 0); ASSERT(iip->ili_item.li_flags & XFS_LI_IN_AIL); } #endif return XFS_ITEM_SUCCESS; }
STATIC int xfs_inode_free_eofblocks( struct xfs_inode *ip, int flags, void *args) { int ret = 0; struct xfs_eofblocks *eofb = args; int match; if (!xfs_can_free_eofblocks(ip, false)) { /* inode could be preallocated or append-only */ trace_xfs_inode_free_eofblocks_invalid(ip); xfs_inode_clear_eofblocks_tag(ip); return 0; } /* * If the mapping is dirty the operation can block and wait for some * time. Unless we are waiting, skip it. */ if (!(flags & SYNC_WAIT) && mapping_tagged(VFS_I(ip)->i_mapping, PAGECACHE_TAG_DIRTY)) return 0; if (eofb) { if (eofb->eof_flags & XFS_EOF_FLAGS_UNION) match = xfs_inode_match_id_union(ip, eofb); else match = xfs_inode_match_id(ip, eofb); if (!match) return 0; /* skip the inode if the file size is too small */ if (eofb->eof_flags & XFS_EOF_FLAGS_MINFILESIZE && XFS_ISIZE(ip) < eofb->eof_min_file_size) return 0; } /* * If the caller is waiting, return -EAGAIN to keep the background * scanner moving and revisit the inode in a subsequent pass. */ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { if (flags & SYNC_WAIT) ret = -EAGAIN; return ret; } ret = xfs_free_eofblocks(ip); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; }
static noinline ssize_t xfs_file_dax_write( struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); int iolock = XFS_IOLOCK_EXCL; ssize_t ret, error = 0; size_t count; loff_t pos; if (iocb->ki_flags & IOCB_NOWAIT) { if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; } else { xfs_ilock(ip, iolock); } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; pos = iocb->ki_pos; count = iov_iter_count(from); trace_xfs_file_dax_write(ip, count, pos); ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops); if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { i_size_write(inode, iocb->ki_pos); error = xfs_setfilesize(ip, pos, ret); } out: xfs_iunlock(ip, iolock); if (error) return error; if (ret > 0) { XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); /* Handle various SYNC-type writes */ ret = generic_write_sync(iocb, ret); } return ret; }
/* * This is called to attempt to lock the inode associated with this * inode log item, in preparation for the push routine which does the actual * iflush. Don't sleep on the inode lock or the flush lock. * * If the flush lock is already held, indicating that the inode has * been or is in the process of being flushed, then (ideally) we'd like to * see if the inode's buffer is still incore, and if so give it a nudge. * We delay doing so until the pushbuf routine, though, to avoid holding * the AIL lock across a call to the blackhole which is the buffer cache. * Also we don't want to sleep in any device strategy routines, which can happen * if we do the subsequent bawrite in here. */ STATIC uint xfs_inode_item_trylock( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; if (!xfs_iflock_nowait(ip)) { /* * inode has already been flushed to the backing buffer, * leave it locked in shared mode, pushbuf routine will * unlock it. */ return XFS_ITEM_PUSHBUF; } /* Stale items should force out the iclog */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); /* * we hold the AIL lock - notify the unlock routine of this * so it doesn't try to get the lock again. */ xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY); return XFS_ITEM_PINNED; } #ifdef DEBUG if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { ASSERT(iip->ili_format.ilf_fields != 0); ASSERT(iip->ili_logged == 0); ASSERT(lip->li_flags & XFS_LI_IN_AIL); } #endif return XFS_ITEM_SUCCESS; }
/* * Update on-disk file size now that data has been written to disk. The * current in-memory file size is i_size. If a write is beyond eof i_new_size * will be the intended file size until i_size is updated. If this write does * not extend all the way to the valid file size then restrict this update to * the end of the write. * * This function does not block as blocking on the inode lock in IO completion * can lead to IO completion order dependency deadlocks.. If it can't get the * inode ilock it will return EAGAIN. Callers must handle this. */ STATIC int xfs_setfilesize( xfs_ioend_t *ioend) { xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) return EAGAIN; isize = xfs_ioend_new_eof(ioend); if (isize) { trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); ip->i_d.di_size = isize; xfs_mark_inode_dirty(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; }
STATIC ssize_t xfs_file_buffered_aio_read( struct kiocb *iocb, struct iov_iter *to) { struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); ssize_t ret; trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); if (iocb->ki_flags & IOCB_NOWAIT) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) return -EAGAIN; } else { xfs_ilock(ip, XFS_IOLOCK_SHARED); } ret = generic_file_read_iter(iocb, to); xfs_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
STATIC uint xfs_inode_item_trylock( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; if (!xfs_iflock_nowait(ip)) { /* */ return XFS_ITEM_PUSHBUF; } /* */ if (ip->i_flags & XFS_ISTALE) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); return XFS_ITEM_PINNED; } #ifdef DEBUG if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { ASSERT(iip->ili_fields != 0); ASSERT(iip->ili_logged == 0); ASSERT(lip->li_flags & XFS_LI_IN_AIL); } #endif return XFS_ITEM_SUCCESS; }
int xfs_filestream_associate( xfs_inode_t *pip, xfs_inode_t *ip) { xfs_mount_t *mp; xfs_mru_cache_t *cache; fstrm_item_t *item; xfs_agnumber_t ag, rotorstep, startag; int err = 0; ASSERT(pip->i_d.di_mode & S_IFDIR); ASSERT(ip->i_d.di_mode & S_IFREG); if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG)) return -EINVAL; mp = pip->i_mount; cache = mp->m_filestream; /* * We have a problem, Houston. * * Taking the iolock here violates inode locking order - we already * hold the ilock. Hence if we block getting this lock we may never * wake. Unfortunately, that means if we can't get the lock, we're * screwed in terms of getting a stream association - we can't spin * waiting for the lock because someone else is waiting on the lock we * hold and we cannot drop that as we are in a transaction here. * * Lucky for us, this inversion is not a problem because it's a * directory inode that we are trying to lock here. * * So, if we can't get the iolock without sleeping then just give up */ if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) return 1; /* If the parent directory is already in the cache, use its AG. */ item = xfs_mru_cache_lookup(cache, pip->i_ino); if (item) { ASSERT(item->ip == pip); ag = item->ag; xfs_mru_cache_done(cache); TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); err = _xfs_filestream_update_ag(ip, pip, ag); goto exit; } /* * Set the starting AG using the rotor for inode32, otherwise * use the directory inode's AG. */ if (mp->m_flags & XFS_MOUNT_32BITINODES) { rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); /* Pick a new AG for the parent inode starting at startag. */ err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); if (err || ag == NULLAGNUMBER) goto exit_did_pick; /* Associate the parent inode with the AG. */ err = _xfs_filestream_update_ag(pip, NULL, ag); if (err) goto exit_did_pick; /* Associate the file inode with the AG. */ err = _xfs_filestream_update_ag(ip, pip, ag); if (err) goto exit_did_pick; TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); exit_did_pick: /* * If _xfs_filestream_pick_ag() returned a valid AG, remove the * reference it took on it, since the file and directory will have taken * their own now if they were successfully cached. */ if (ag != NULLAGNUMBER) xfs_filestream_put_ag(mp, ag); exit: xfs_iunlock(pip, XFS_IOLOCK_EXCL); return -err; }
/* * Go thru all the inodes in the file system, releasing their dquots. * Note that the mount structure gets modified to indicate that quotas are off * AFTER this, in the case of quotaoff. This also gets called from * xfs_rootumount. */ void xfs_qm_dqrele_all_inodes( struct xfs_mount *mp, uint flags) { xfs_inode_t *ip, *topino; uint ireclaims; bhv_vnode_t *vp; boolean_t vnode_refd; ASSERT(mp->m_quotainfo); XFS_MOUNT_ILOCK(mp); again: ip = mp->m_inodes; if (ip == NULL) { XFS_MOUNT_IUNLOCK(mp); return; } do { /* Skip markers inserted by xfs_sync */ if (ip->i_mount == NULL) { ip = ip->i_mnext; continue; } /* Root inode, rbmip and rsumip have associated blocks */ if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); ip = ip->i_mnext; continue; } vp = XFS_ITOV_NULL(ip); if (!vp) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); ip = ip->i_mnext; continue; } vnode_refd = B_FALSE; if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { ireclaims = mp->m_ireclaims; topino = mp->m_inodes; vp = vn_grab(vp); if (!vp) goto again; XFS_MOUNT_IUNLOCK(mp); /* XXX restart limit ? */ xfs_ilock(ip, XFS_ILOCK_EXCL); vnode_refd = B_TRUE; } else { ireclaims = mp->m_ireclaims; topino = mp->m_inodes; XFS_MOUNT_IUNLOCK(mp); } /* * We don't keep the mountlock across the dqrele() call, * since it can take a while.. */ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { xfs_qm_dqrele(ip->i_udquot); ip->i_udquot = NULL; } if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { xfs_qm_dqrele(ip->i_gdquot); ip->i_gdquot = NULL; } xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * Wait until we've dropped the ilock and mountlock to * do the vn_rele. Or be condemned to an eternity in the * inactive code in hell. */ if (vnode_refd) VN_RELE(vp); XFS_MOUNT_ILOCK(mp); /* * If an inode was inserted or removed, we gotta * start over again. */ if (topino != mp->m_inodes || mp->m_ireclaims != ireclaims) { /* XXX use a sentinel */ goto again; } ip = ip->i_mnext; } while (ip != mp->m_inodes); XFS_MOUNT_IUNLOCK(mp); }
STATIC int xfs_map_blocks( struct inode *inode, loff_t offset, struct xfs_bmbt_irec *imap, int type, int nonblocking) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t count = 1 << inode->i_blkbits; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); if (type == IO_UNWRITTEN) bmapi_flags |= XFS_BMAPI_IGSTATE; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { if (nonblocking) return -XFS_ERROR(EAGAIN); xfs_ilock(ip, XFS_ILOCK_SHARED); } ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_maxioffset); if (offset + count > mp->m_maxioffset) count = mp->m_maxioffset - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, imap, &nimaps, bmapi_flags); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) return -XFS_ERROR(error); if (type == IO_DELALLOC && (!nimaps || isnullstartblock(imap->br_startblock))) { error = xfs_iomap_write_allocate(ip, offset, count, imap); if (!error) trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); return -XFS_ERROR(error); } #ifdef DEBUG if (type == IO_UNWRITTEN) { ASSERT(nimaps); ASSERT(imap->br_startblock != HOLESTARTBLOCK); ASSERT(imap->br_startblock != DELAYSTARTBLOCK); } #endif if (nimaps) trace_xfs_map_blocks_found(ip, offset, count, type, imap); return 0; }
/* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, bool need_iolock) { xfs_trans_t *tp; int error; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return 0; map_len = last_fsb - end_fsb; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { /* * Attach the dquots to the inode up front. */ error = xfs_qm_dqattach(ip, 0); if (error) return error; /* * There are blocks after the end of file. * Free them up now by truncating the file to * its current size. */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); return EAGAIN; } } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Do not update the on-disk file size. If we update the * on-disk file size and then the system crashes before the * contents of the file are flushed to disk then the files * may be full of holes (ie NULL files bug). */ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, XFS_ISIZE(ip)); if (error) { /* * If we get an error at this point we simply don't * bother truncating the file. */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (!error) xfs_inode_clear_eofblocks_tag(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; }
static int xfs_iget_cache_miss( struct xfs_mount *mp, struct xfs_perag *pag, xfs_trans_t *tp, xfs_ino_t ino, struct xfs_inode **ipp, int flags, int lock_flags) { struct xfs_inode *ip; int error; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); int iflags; ip = xfs_inode_alloc(mp, ino); if (!ip) return ENOMEM; error = xfs_iread(mp, tp, ip, flags); if (error) goto out_destroy; trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; goto out_destroy; } /* * Preload the radix tree so we can insert safely under the * write spinlock. Note that we cannot sleep inside the preload * region. */ if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; goto out_destroy; } /* * Because the inode hasn't been added to the radix-tree yet it can't * be found by another thread, so we can do the non-sleeping lock here. */ if (lock_flags) { if (!xfs_ilock_nowait(ip, lock_flags)) BUG(); } /* * These values must be set before inserting the inode into the radix * tree as the moment it is inserted a concurrent lookup (allowed by the * RCU locking mechanism) can find it and that lookup must see that this * is an inode currently under construction (i.e. that XFS_INEW is set). * The ip->i_flags_lock that protects the XFS_INEW flag forms the * memory barrier that ensures this detection works correctly at lookup * time. */ iflags = XFS_INEW; if (flags & XFS_IGET_DONTCACHE) iflags |= XFS_IDONTCACHE; ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, iflags); /* insert the new inode */ spin_lock(&pag->pag_ici_lock); error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); error = EAGAIN; goto out_preload_end; } spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); *ipp = ip; return 0; out_preload_end: spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: __destroy_inode(VFS_I(ip)); xfs_inode_free(ip); return error; }
int xfs_filestream_associate( xfs_inode_t *pip, xfs_inode_t *ip) { xfs_mount_t *mp; xfs_mru_cache_t *cache; fstrm_item_t *item; xfs_agnumber_t ag, rotorstep, startag; int err = 0; ASSERT(S_ISDIR(pip->i_d.di_mode)); ASSERT(S_ISREG(ip->i_d.di_mode)); if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) return -EINVAL; mp = pip->i_mount; cache = mp->m_filestream; if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) return 1; item = xfs_mru_cache_lookup(cache, pip->i_ino); if (item) { ASSERT(item->ip == pip); ag = item->ag; xfs_mru_cache_done(cache); TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); err = _xfs_filestream_update_ag(ip, pip, ag); goto exit; } if (mp->m_flags & XFS_MOUNT_32BITINODES) { rotorstep = xfs_rotorstep; startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; mp->m_agfrotor = (mp->m_agfrotor + 1) % (mp->m_sb.sb_agcount * rotorstep); } else startag = XFS_INO_TO_AGNO(mp, pip->i_ino); err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); if (err || ag == NULLAGNUMBER) goto exit_did_pick; err = _xfs_filestream_update_ag(pip, NULL, ag); if (err) goto exit_did_pick; err = _xfs_filestream_update_ag(ip, pip, ag); if (err) goto exit_did_pick; TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); exit_did_pick: if (ag != NULLAGNUMBER) xfs_filestream_put_ag(mp, ag); exit: xfs_iunlock(pip, XFS_IOLOCK_EXCL); return -err; }
STATIC uint xfs_inode_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = NULL; uint rval = XFS_ITEM_SUCCESS; int error; if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; /* * Re-check the pincount now that we stabilized the value by * taking the ilock. */ if (xfs_ipincount(ip) > 0) { rval = XFS_ITEM_PINNED; goto out_unlock; } /* * Stale inode items should force out the iclog. */ if (ip->i_flags & XFS_ISTALE) { rval = XFS_ITEM_PINNED; goto out_unlock; } /* * Someone else is already flushing the inode. Nothing we can do * here but wait for the flush to finish and remove the item from * the AIL. */ if (!xfs_iflock_nowait(ip)) { rval = XFS_ITEM_FLUSHING; goto out_unlock; } ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); spin_unlock(&lip->li_ailp->xa_lock); error = xfs_iflush(ip, &bp); if (!error) { if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_relse(bp); } spin_lock(&lip->li_ailp->xa_lock); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); return rval; }
static int xfs_iget_cache_miss( struct xfs_mount *mp, struct xfs_perag *pag, xfs_trans_t *tp, xfs_ino_t ino, struct xfs_inode **ipp, int flags, int lock_flags) { struct xfs_inode *ip; int error; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); ip = xfs_inode_alloc(mp, ino); if (!ip) return ENOMEM; error = xfs_iread(mp, tp, ip, flags); if (error) goto out_destroy; trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; goto out_destroy; } /* * Preload the radix tree so we can insert safely under the * write spinlock. Note that we cannot sleep inside the preload * region. */ if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; goto out_destroy; } /* * Because the inode hasn't been added to the radix-tree yet it can't * be found by another thread, so we can do the non-sleeping lock here. */ if (lock_flags) { if (!xfs_ilock_nowait(ip, lock_flags)) BUG(); } spin_lock(&pag->pag_ici_lock); /* insert the new inode */ error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); error = EAGAIN; goto out_preload_end; } /* These values _must_ be set before releasing the radix tree lock! */ ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, XFS_INEW); spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); *ipp = ip; return 0; out_preload_end: spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: __destroy_inode(VFS_I(ip)); xfs_inode_free(ip); return error; }
/* * xfs sync routine for internal use * * This routine supports all of the flags defined for the generic VFS_SYNC * interface as explained above under xfs_sync. In the interests of not * changing interfaces within the 6.5 family, additional internallly- * required functions are specified within a separate xflags parameter, * only available by calling this routine. * */ STATIC int xfs_sync_inodes( xfs_mount_t *mp, int flags, int xflags, int *bypassed) { xfs_inode_t *ip = NULL; xfs_inode_t *ip_next; xfs_buf_t *bp; vnode_t *vp = NULL; vmap_t vmap; int error; int last_error; uint64_t fflag; uint lock_flags; uint base_lock_flags; boolean_t mount_locked; boolean_t vnode_refed; int preempt; xfs_dinode_t *dip; xfs_iptr_t *ipointer; #ifdef DEBUG boolean_t ipointer_in = B_FALSE; #define IPOINTER_SET ipointer_in = B_TRUE #define IPOINTER_CLR ipointer_in = B_FALSE #else #define IPOINTER_SET #define IPOINTER_CLR #endif /* Insert a marker record into the inode list after inode ip. The list * must be locked when this is called. After the call the list will no * longer be locked. */ #define IPOINTER_INSERT(ip, mp) { \ ASSERT(ipointer_in == B_FALSE); \ ipointer->ip_mnext = ip->i_mnext; \ ipointer->ip_mprev = ip; \ ip->i_mnext = (xfs_inode_t *)ipointer; \ ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ preempt = 0; \ XFS_MOUNT_IUNLOCK(mp); \ mount_locked = B_FALSE; \ IPOINTER_SET; \ } /* Remove the marker from the inode list. If the marker was the only item * in the list then there are no remaining inodes and we should zero out * the whole list. If we are the current head of the list then move the head * past us. */ #define IPOINTER_REMOVE(ip, mp) { \ ASSERT(ipointer_in == B_TRUE); \ if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ ip = ipointer->ip_mnext; \ ip->i_mprev = ipointer->ip_mprev; \ ipointer->ip_mprev->i_mnext = ip; \ if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ mp->m_inodes = ip; \ } \ } else { \ ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ mp->m_inodes = NULL; \ ip = NULL; \ } \ IPOINTER_CLR; \ } #define XFS_PREEMPT_MASK 0x7f if (bypassed) *bypassed = 0; if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) return 0; error = 0; last_error = 0; preempt = 0; /* Allocate a reference marker */ ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); fflag = XFS_B_ASYNC; /* default is don't wait */ if (flags & SYNC_BDFLUSH) fflag = XFS_B_DELWRI; if (flags & SYNC_WAIT) fflag = 0; /* synchronous overrides all */ base_lock_flags = XFS_ILOCK_SHARED; if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { /* * We need the I/O lock if we're going to call any of * the flush/inval routines. */ base_lock_flags |= XFS_IOLOCK_SHARED; } XFS_MOUNT_ILOCK(mp); ip = mp->m_inodes; mount_locked = B_TRUE; vnode_refed = B_FALSE; IPOINTER_CLR; do { ASSERT(ipointer_in == B_FALSE); ASSERT(vnode_refed == B_FALSE); lock_flags = base_lock_flags; /* * There were no inodes in the list, just break out * of the loop. */ if (ip == NULL) { break; } /* * We found another sync thread marker - skip it */ if (ip->i_mount == NULL) { ip = ip->i_mnext; continue; } vp = XFS_ITOV_NULL(ip); /* * If the vnode is gone then this is being torn down, * call reclaim if it is flushed, else let regular flush * code deal with it later in the loop. */ if (vp == NULL) { /* Skip ones already in reclaim */ if (ip->i_flags & XFS_IRECLAIM) { ip = ip->i_mnext; continue; } if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { ip = ip->i_mnext; } else if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { IPOINTER_INSERT(ip, mp); xfs_finish_reclaim(ip, 1, XFS_IFLUSH_DELWRI_ELSE_ASYNC); XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; IPOINTER_REMOVE(ip, mp); } else { xfs_iunlock(ip, XFS_ILOCK_EXCL); ip = ip->i_mnext; } continue; } if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { XFS_MOUNT_IUNLOCK(mp); kmem_free(ipointer, sizeof(xfs_iptr_t)); return 0; } /* * If this is just vfs_sync() or pflushd() calling * then we can skip inodes for which it looks like * there is nothing to do. Since we don't have the * inode locked this is racey, but these are periodic * calls so it doesn't matter. For the others we want * to know for sure, so we at least try to lock them. */ if (flags & SYNC_BDFLUSH) { if (((ip->i_itemp == NULL) || !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && (ip->i_update_core == 0)) { ip = ip->i_mnext; continue; } } /* * Try to lock without sleeping. We're out of order with * the inode list lock here, so if we fail we need to drop * the mount lock and try again. If we're called from * bdflush() here, then don't bother. * * The inode lock here actually coordinates with the * almost spurious inode lock in xfs_ireclaim() to prevent * the vnode we handle here without a reference from * being freed while we reference it. If we lock the inode * while it's on the mount list here, then the spurious inode * lock in xfs_ireclaim() after the inode is pulled from * the mount list will sleep until we release it here. * This keeps the vnode from being freed while we reference * it. It is also cheaper and simpler than actually doing * a vn_get() for every inode we touch here. */ if (xfs_ilock_nowait(ip, lock_flags) == 0) { if ((flags & SYNC_BDFLUSH) || (vp == NULL)) { ip = ip->i_mnext; continue; } /* * We need to unlock the inode list lock in order * to lock the inode. Insert a marker record into * the inode list to remember our position, dropping * the lock is now done inside the IPOINTER_INSERT * macro. * * We also use the inode list lock to protect us * in taking a snapshot of the vnode version number * for use in calling vn_get(). */ VMAP(vp, vmap); IPOINTER_INSERT(ip, mp); vp = vn_get(vp, &vmap); if (vp == NULL) { /* * The vnode was reclaimed once we let go * of the inode list lock. Skip to the * next list entry. Remove the marker. */ XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; vnode_refed = B_FALSE; IPOINTER_REMOVE(ip, mp); continue; } xfs_ilock(ip, lock_flags); ASSERT(vp == XFS_ITOV(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; } /* From here on in the loop we may have a marker record * in the inode list. */ if ((flags & SYNC_CLOSE) && (vp != NULL)) { /* * This is the shutdown case. We just need to * flush and invalidate all the pages associated * with the inode. Drop the inode lock since * we can't hold it across calls to the buffer * cache. * * We don't set the VREMAPPING bit in the vnode * here, because we don't hold the vnode lock * exclusively. It doesn't really matter, though, * because we only come here when we're shutting * down anyway. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); if (XFS_FORCED_SHUTDOWN(mp)) { VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); } else { VOP_FLUSHINVAL_PAGES(vp, 0, -1, FI_REMAPF); } xfs_ilock(ip, XFS_ILOCK_SHARED); } else if ((flags & SYNC_DELWRI) && (vp != NULL)) { if (VN_DIRTY(vp)) { /* We need to have dropped the lock here, * so insert a marker if we have not already * done so. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } /* * Drop the inode lock since we can't hold it * across calls to the buffer cache. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, fflag, FI_NONE, error); xfs_ilock(ip, XFS_ILOCK_SHARED); } } if (flags & SYNC_BDFLUSH) { if ((flags & SYNC_ATTR) && ((ip->i_update_core) || ((ip->i_itemp != NULL) && (ip->i_itemp->ili_format.ilf_fields != 0)))) { /* Insert marker and drop lock if not already * done. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } /* * We don't want the periodic flushing of the * inodes by vfs_sync() to interfere with * I/O to the file, especially read I/O * where it is only the access time stamp * that is being flushed out. To prevent * long periods where we have both inode * locks held shared here while reading the * inode's buffer in from disk, we drop the * inode lock while reading in the inode * buffer. We have to release the buffer * and reacquire the inode lock so that they * are acquired in the proper order (inode * locks first). The buffer will go at the * end of the lru chain, though, so we can * expect it to still be there when we go * for it again in xfs_iflush(). */ if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_SHARED); error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0); if (!error) { xfs_buf_relse(bp); } else { /* Bailing out, remove the * marker and free it. */ XFS_MOUNT_ILOCK(mp); IPOINTER_REMOVE(ip, mp); XFS_MOUNT_IUNLOCK(mp); ASSERT(!(lock_flags & XFS_IOLOCK_SHARED)); kmem_free(ipointer, sizeof(xfs_iptr_t)); return (0); } /* * Since we dropped the inode lock, * the inode may have been reclaimed. * Therefore, we reacquire the mount * lock and check to see if we were the * inode reclaimed. If this happened * then the ipointer marker will no * longer point back at us. In this * case, move ip along to the inode * after the marker, remove the marker * and continue. */ XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; if (ip != ipointer->ip_mprev) { IPOINTER_REMOVE(ip, mp); ASSERT(!vnode_refed); ASSERT(!(lock_flags & XFS_IOLOCK_SHARED)); continue; } ASSERT(ip->i_mount == mp); if (xfs_ilock_nowait(ip, XFS_ILOCK_SHARED) == 0) { ASSERT(ip->i_mount == mp); /* * We failed to reacquire * the inode lock without * sleeping, so just skip * the inode for now. We * clear the ILOCK bit from * the lock_flags so that we * won't try to drop a lock * we don't hold below. */ lock_flags &= ~XFS_ILOCK_SHARED; IPOINTER_REMOVE(ip_next, mp); } else if ((xfs_ipincount(ip) == 0) && xfs_iflock_nowait(ip)) { ASSERT(ip->i_mount == mp); /* * Since this is vfs_sync() * calling we only flush the * inode out if we can lock * it without sleeping and * it is not pinned. Drop * the mount lock here so * that we don't hold it for * too long. We already have * a marker in the list here. */ XFS_MOUNT_IUNLOCK(mp); mount_locked = B_FALSE; error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); } else { ASSERT(ip->i_mount == mp); IPOINTER_REMOVE(ip_next, mp); } } } } else { if ((flags & SYNC_ATTR) && ((ip->i_update_core) || ((ip->i_itemp != NULL) && (ip->i_itemp->ili_format.ilf_fields != 0)))) { if (mount_locked) { IPOINTER_INSERT(ip, mp); } if (flags & SYNC_WAIT) { xfs_iflock(ip); error = xfs_iflush(ip, XFS_IFLUSH_SYNC); } else { /* * If we can't acquire the flush * lock, then the inode is already * being flushed so don't bother * waiting. If we can lock it then * do a delwri flush so we can * combine multiple inode flushes * in each disk write. */ if (xfs_iflock_nowait(ip)) { error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); } else if (bypassed) (*bypassed)++; } } } if (lock_flags != 0) { xfs_iunlock(ip, lock_flags); } if (vnode_refed) { /* * If we had to take a reference on the vnode * above, then wait until after we've unlocked * the inode to release the reference. This is * because we can be already holding the inode * lock when VN_RELE() calls xfs_inactive(). * * Make sure to drop the mount lock before calling * VN_RELE() so that we don't trip over ourselves if * we have to go for the mount lock again in the * inactive code. */ if (mount_locked) { IPOINTER_INSERT(ip, mp); } VN_RELE(vp); vnode_refed = B_FALSE; } if (error) { last_error = error; } /* * bail out if the filesystem is corrupted. */ if (error == EFSCORRUPTED) { if (!mount_locked) { XFS_MOUNT_ILOCK(mp); IPOINTER_REMOVE(ip, mp); } XFS_MOUNT_IUNLOCK(mp); ASSERT(ipointer_in == B_FALSE); kmem_free(ipointer, sizeof(xfs_iptr_t)); return XFS_ERROR(error); } /* Let other threads have a chance at the mount lock * if we have looped many times without dropping the * lock. */ if ((++preempt & XFS_PREEMPT_MASK) == 0) { if (mount_locked) { IPOINTER_INSERT(ip, mp); } } if (mount_locked == B_FALSE) { XFS_MOUNT_ILOCK(mp); mount_locked = B_TRUE; IPOINTER_REMOVE(ip, mp); continue; } ASSERT(ipointer_in == B_FALSE); ip = ip->i_mnext; } while (ip != mp->m_inodes); XFS_MOUNT_IUNLOCK(mp); ASSERT(ipointer_in == B_FALSE); kmem_free(ipointer, sizeof(xfs_iptr_t)); return XFS_ERROR(last_error); }
int xfs_iomap( xfs_inode_t *ip, xfs_off_t offset, ssize_t count, int flags, xfs_iomap_t *iomapp, int *niomaps) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, end_fsb; int error = 0; int lockmode = 0; xfs_bmbt_irec_t imap; int nimaps = 1; int bmapi_flags = 0; int iomap_flags = 0; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) { case BMAPI_READ: xfs_iomap_enter_trace(XFS_IOMAP_READ_ENTER, ip, offset, count); lockmode = xfs_ilock_map_shared(ip); bmapi_flags = XFS_BMAPI_ENTIRE; break; case BMAPI_WRITE: xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count); lockmode = XFS_ILOCK_EXCL; if (flags & BMAPI_IGNSTATE) bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE; xfs_ilock(ip, lockmode); break; case BMAPI_ALLOCATE: xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count); lockmode = XFS_ILOCK_SHARED; bmapi_flags = XFS_BMAPI_ENTIRE; /* Attempt non-blocking lock */ if (flags & BMAPI_TRYLOCK) { if (!xfs_ilock_nowait(ip, lockmode)) return XFS_ERROR(EAGAIN); } else { xfs_ilock(ip, lockmode); } break; default: BUG(); } ASSERT(offset <= mp->m_maxioffset); if ((xfs_fsize_t)offset + count > mp->m_maxioffset) count = mp->m_maxioffset - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(end_fsb - offset_fsb), bmapi_flags, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) goto out; switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) { case BMAPI_WRITE: /* If we found an extent, return it */ if (nimaps && (imap.br_startblock != HOLESTARTBLOCK) && (imap.br_startblock != DELAYSTARTBLOCK)) { xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) { error = xfs_iomap_write_direct(ip, offset, count, flags, &imap, &nimaps, nimaps); } else { error = xfs_iomap_write_delay(ip, offset, count, flags, &imap, &nimaps); } if (!error) { xfs_iomap_map_trace(XFS_IOMAP_ALLOC_MAP, ip, offset, count, iomapp, &imap, flags); } iomap_flags = IOMAP_NEW; break; case BMAPI_ALLOCATE: /* If we found an extent, return it */ xfs_iunlock(ip, lockmode); lockmode = 0; if (nimaps && !isnullstartblock(imap.br_startblock)) { xfs_iomap_map_trace(XFS_IOMAP_WRITE_MAP, ip, offset, count, iomapp, &imap, flags); break; } error = xfs_iomap_write_allocate(ip, offset, count, &imap, &nimaps); break; } if (nimaps) { *niomaps = xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, *niomaps, iomap_flags); } else if (niomaps) { *niomaps = 0; } out: if (lockmode) xfs_iunlock(ip, lockmode); return XFS_ERROR(error); }
/* * xfs_file_dio_aio_write - handle direct IO writes * * Lock the inode appropriately to prepare for and issue a direct IO write. * By separating it from the buffered write path we remove all the tricky to * follow locking changes and looping. * * If there are cached pages or we're extending the file, we need IOLOCK_EXCL * until we're sure the bytes at the new EOF have been zeroed and/or the cached * pages are flushed out. * * In most cases the direct IO writes will be done holding IOLOCK_SHARED * allowing them to be done in parallel with reads and other direct IO writes. * However, if the IO is not aligned to filesystem blocks, the direct IO layer * needs to do sub-block zeroing and that requires serialisation against other * direct IOs to the same block. In this case we need to serialise the * submission of the unaligned IOs so that we don't get racing block zeroing in * the dio layer. To avoid the problem with aio, we also need to wait for * outstanding IOs to complete so that unwritten extent conversion is completed * before we try to map the overlapping block. This is currently implemented by * hitting it with a big hammer (i.e. inode_dio_wait()). * * Returns with locks held indicated by @iolock and errors indicated by * negative return values. */ STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; int unaligned_io = 0; int iolock; size_t count = iov_iter_count(from); struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) return -EINVAL; /* * Don't take the exclusive iolock here unless the I/O is unaligned to * the file system block size. We don't need to consider the EOF * extension case here because xfs_file_aio_write_checks() will relock * the inode as necessary for EOF zeroing cases and fill out the new * inode size as appropriate. */ if ((iocb->ki_pos & mp->m_blockmask) || ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; /* * We can't properly handle unaligned direct I/O to reflink * files yet, as we can't unshare a partial block. */ if (xfs_is_reflink_inode(ip)) { trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); return -EREMCHG; } iolock = XFS_IOLOCK_EXCL; } else { iolock = XFS_IOLOCK_SHARED; } if (iocb->ki_flags & IOCB_NOWAIT) { if (!xfs_ilock_nowait(ip, iolock)) return -EAGAIN; } else { xfs_ilock(ip, iolock); } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; count = iov_iter_count(from); /* * If we are doing unaligned IO, wait for all other IO to drain, * otherwise demote the lock if we had to take the exclusive lock * for other reasons in xfs_file_aio_write_checks. */ if (unaligned_io) { /* If we are going to wait for other DIO to finish, bail */ if (iocb->ki_flags & IOCB_NOWAIT) { if (atomic_read(&inode->i_dio_count)) return -EAGAIN; } else { inode_dio_wait(inode); } } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos); ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); out: xfs_iunlock(ip, iolock); /* * No fallback to buffered IO on errors for XFS, direct IO will either * complete fully or fail. */ ASSERT(ret < 0 || ret == count); return ret; }
static int xfs_iget_cache_miss( struct xfs_mount *mp, struct xfs_perag *pag, xfs_trans_t *tp, xfs_ino_t ino, struct xfs_inode **ipp, int flags, int lock_flags) { struct xfs_inode *ip; int error; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ino); int iflags; ip = xfs_inode_alloc(mp, ino); if (!ip) return ENOMEM; error = xfs_iread(mp, tp, ip, flags); if (error) goto out_destroy; trace_xfs_iget_miss(ip); if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) { error = ENOENT; goto out_destroy; } if (radix_tree_preload(GFP_KERNEL)) { error = EAGAIN; goto out_destroy; } if (lock_flags) { if (!xfs_ilock_nowait(ip, lock_flags)) BUG(); } iflags = XFS_INEW; if (flags & XFS_IGET_DONTCACHE) iflags |= XFS_IDONTCACHE; ip->i_udquot = ip->i_gdquot = NULL; xfs_iflags_set(ip, iflags); spin_lock(&pag->pag_ici_lock); error = radix_tree_insert(&pag->pag_ici_root, agino, ip); if (unlikely(error)) { WARN_ON(error != -EEXIST); XFS_STATS_INC(xs_ig_dup); error = EAGAIN; goto out_preload_end; } spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); *ipp = ip; return 0; out_preload_end: spin_unlock(&pag->pag_ici_lock); radix_tree_preload_end(); if (lock_flags) xfs_iunlock(ip, lock_flags); out_destroy: __destroy_inode(VFS_I(ip)); xfs_inode_free(ip); return error; }