STATIC void xfs_inode_item_push( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); ASSERT(xfs_isiflocked(ip)); /* */ ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_fields != 0); /* */ (void) xfs_iflush(ip, SYNC_TRYLOCK); xfs_iunlock(ip, XFS_ILOCK_SHARED); }
STATIC bool xfs_inode_item_pushbuf( struct xfs_log_item *lip) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp; bool ret = true; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* */ if (!xfs_isiflocked(ip) || !(lip->li_flags & XFS_LI_IN_AIL)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); return true; } bp = xfs_incore(ip->i_mount->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XBF_TRYLOCK); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!bp) return true; if (XFS_BUF_ISDELAYWRITE(bp)) xfs_buf_delwri_promote(bp); if (xfs_buf_ispinned(bp)) ret = false; xfs_buf_relse(bp); return ret; }
void xfs_inode_free( struct xfs_inode *ip) { switch (ip->i_d.di_mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: xfs_idestroy_fork(ip, XFS_DATA_FORK); break; } if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); if (ip->i_itemp) { /* * Only if we are shutting down the fs will we see an * inode still in the AIL. If it is there, we should remove * it to prevent a use-after-free from occurring. */ xfs_log_item_t *lip = &ip->i_itemp->ili_item; struct xfs_ail *ailp = lip->li_ailp; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { spin_lock(&ailp->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) xfs_trans_ail_delete(ailp, lip); else spin_unlock(&ailp->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; } /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the * free state. The ip->i_flags_lock provides the barrier against lookup * races. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); }
static void __xfs_inode_free( struct xfs_inode *ip) { /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(ip->i_mount, vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); }
void __xfs_iflock( struct xfs_inode *ip) { wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT); DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT); do { prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE); if (xfs_isiflocked(ip)) io_schedule(); } while (!xfs_iflock_nowait(ip)); finish_wait(wq, &wait.wait); }
/* * Allocate and initialise an xfs_inode. */ struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) { struct xfs_inode *ip; /* * if this didn't occur in transactions, we could use * KM_MAYFAIL and return NULL here on ENOMEM. Set the * code up to do this anyway. */ ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; if (inode_init_always(mp->m_super, VFS_I(ip))) { kmem_zone_free(xfs_inode_zone, ip); return NULL; } /* VFS doesn't initialise i_mode! */ VFS_I(ip)->i_mode = 0; XFS_STATS_INC(mp, vn_active); ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; ip->i_cowfp = NULL; ip->i_cnextents = 0; ip->i_cformat = XFS_DINODE_FMT_EXTENTS; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(ip->i_d)); return ip; }
void xfs_inode_free( struct xfs_inode *ip) { switch (ip->i_d.di_mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: xfs_idestroy_fork(ip, XFS_DATA_FORK); break; } if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); if (ip->i_itemp) { xfs_log_item_t *lip = &ip->i_itemp->ili_item; struct xfs_ail *ailp = lip->li_ailp; ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) || XFS_FORCED_SHUTDOWN(ip->i_mount)); if (lip->li_flags & XFS_LI_IN_AIL) { spin_lock(&ailp->xa_lock); if (lip->li_flags & XFS_LI_IN_AIL) xfs_trans_ail_delete(ailp, lip); else spin_unlock(&ailp->xa_lock); } xfs_inode_item_destroy(ip); ip->i_itemp = NULL; } ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); }
void xfs_inode_free( struct xfs_inode *ip) { ASSERT(!xfs_isiflocked(ip)); /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the * free state. The ip->i_flags_lock provides the barrier against lookup * races. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); __xfs_inode_free(ip); }
/* * Allocate and initialise an xfs_inode. */ STATIC struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) { struct xfs_inode *ip; /* * if this didn't occur in transactions, we could use * KM_MAYFAIL and return NULL here on ENOMEM. Set the * code up to do this anyway. */ ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; if (inode_init_always(mp->m_super, VFS_I(ip))) { kmem_zone_free(xfs_inode_zone, ip); return NULL; } ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); lockdep_set_class_and_name(&ip->i_iolock.mr_lock, &xfs_iolock_active, "xfs_iolock_active"); /* initialise the xfs inode */ ip->i_ino = ino; ip->i_mount = mp; memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_update_core = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); return ip; }
void xfs_inode_free( struct xfs_inode *ip) { switch (ip->i_d.di_mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: xfs_idestroy_fork(ip, XFS_DATA_FORK); break; } if (ip->i_afp) xfs_idestroy_fork(ip, XFS_ATTR_FORK); if (ip->i_itemp) { ASSERT(!(ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL)); xfs_inode_item_destroy(ip); ip->i_itemp = NULL; } /* * Because we use RCU freeing we need to ensure the inode always * appears to be reclaimed with an invalid inode number when in the * free state. The ip->i_flags_lock provides the barrier against lookup * races. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); /* asserts to verify all state is correct here */ ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!xfs_isiflocked(ip)); XFS_STATS_DEC(vn_active); call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); }
STATIC struct xfs_inode * xfs_inode_alloc( struct xfs_mount *mp, xfs_ino_t ino) { struct xfs_inode *ip; ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; if (inode_init_always(mp->m_super, VFS_I(ip))) { kmem_zone_free(xfs_inode_zone, ip); return NULL; } ASSERT(atomic_read(&ip->i_pincount) == 0); ASSERT(!spin_is_locked(&ip->i_flags_lock)); ASSERT(!xfs_isiflocked(ip)); ASSERT(ip->i_ino == 0); mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); lockdep_set_class_and_name(&ip->i_iolock.mr_lock, &xfs_iolock_active, "xfs_iolock_active"); ip->i_ino = ino; ip->i_mount = mp; memset(&ip->i_imap, 0, sizeof(struct xfs_imap)); ip->i_afp = NULL; memset(&ip->i_df, 0, sizeof(xfs_ifork_t)); ip->i_flags = 0; ip->i_delayed_blks = 0; memset(&ip->i_d, 0, sizeof(xfs_icdinode_t)); return ip; }
/* * Inodes in different states need to be treated differently. The following * table lists the inode states and the reclaim actions necessary: * * inode state iflush ret required action * --------------- ---------- --------------- * bad - reclaim * shutdown EIO unpin and reclaim * clean, unpinned 0 reclaim * stale, unpinned 0 reclaim * clean, pinned(*) 0 requeue * stale, pinned EAGAIN requeue * dirty, async - requeue * dirty, sync 0 reclaim * * (*) dgc: I don't think the clean, pinned state is possible but it gets * handled anyway given the order of checks implemented. * * Also, because we get the flush lock first, we know that any inode that has * been flushed delwri has had the flush completed by the time we check that * the inode is clean. * * Note that because the inode is flushed delayed write by AIL pushing, the * flush lock may already be held here and waiting on it can result in very * long latencies. Hence for sync reclaims, where we wait on the flush lock, * the caller should push the AIL first before trying to reclaim inodes to * minimise the amount of time spent waiting. For background relaim, we only * bother to reclaim clean inodes anyway. * * Hence the order of actions after gaining the locks should be: * bad => reclaim * shutdown => unpin and reclaim * pinned, async => requeue * pinned, sync => unpin * stale => reclaim * clean => reclaim * dirty, async => requeue * dirty, sync => flush, wait and reclaim */ STATIC int xfs_reclaim_inode( struct xfs_inode *ip, struct xfs_perag *pag, int sync_mode) { struct xfs_buf *bp = NULL; xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ int error; restart: error = 0; xfs_ilock(ip, XFS_ILOCK_EXCL); if (!xfs_iflock_nowait(ip)) { if (!(sync_mode & SYNC_WAIT)) goto out; xfs_iflock(ip); } if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { xfs_iunpin_wait(ip); /* xfs_iflush_abort() drops the flush lock */ xfs_iflush_abort(ip, false); goto reclaim; } if (xfs_ipincount(ip)) { if (!(sync_mode & SYNC_WAIT)) goto out_ifunlock; xfs_iunpin_wait(ip); } if (xfs_iflags_test(ip, XFS_ISTALE) || xfs_inode_clean(ip)) { xfs_ifunlock(ip); goto reclaim; } /* * Never flush out dirty data during non-blocking reclaim, as it would * just contend with AIL pushing trying to do the same job. */ if (!(sync_mode & SYNC_WAIT)) goto out_ifunlock; /* * Now we have an inode that needs flushing. * * Note that xfs_iflush will never block on the inode buffer lock, as * xfs_ifree_cluster() can lock the inode buffer before it locks the * ip->i_lock, and we are doing the exact opposite here. As a result, * doing a blocking xfs_imap_to_bp() to get the cluster buffer would * result in an ABBA deadlock with xfs_ifree_cluster(). * * As xfs_ifree_cluser() must gather all inodes that are active in the * cache to mark them stale, if we hit this case we don't actually want * to do IO here - we want the inode marked stale so we can simply * reclaim it. Hence if we get an EAGAIN error here, just unlock the * inode, back off and try again. Hopefully the next pass through will * see the stale flag set on the inode. */ error = xfs_iflush(ip, &bp); if (error == -EAGAIN) { xfs_iunlock(ip, XFS_ILOCK_EXCL); /* backoff longer than in xfs_ifree_cluster */ delay(2); goto restart; } if (!error) { error = xfs_bwrite(bp); xfs_buf_relse(bp); } reclaim: ASSERT(!xfs_isiflocked(ip)); /* * Because we use RCU freeing we need to ensure the inode always appears * to be reclaimed with an invalid inode number when in the free state. * We do this as early as possible under the ILOCK so that * xfs_iflush_cluster() can be guaranteed to detect races with us here. * By doing this, we guarantee that once xfs_iflush_cluster has locked * XFS_ILOCK that it will see either a valid, flushable inode that will * serialise correctly, or it will see a clean (and invalid) inode that * it can skip. */ spin_lock(&ip->i_flags_lock); ip->i_flags = XFS_IRECLAIM; ip->i_ino = 0; spin_unlock(&ip->i_flags_lock); xfs_iunlock(ip, XFS_ILOCK_EXCL); XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); /* * Remove the inode from the per-AG radix tree. * * Because radix_tree_delete won't complain even if the item was never * added to the tree assert that it's been there before to catch * problems with the inode life time early on. */ spin_lock(&pag->pag_ici_lock); if (!radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(ip->i_mount, ino))) ASSERT(0); xfs_perag_clear_reclaim_tag(pag); spin_unlock(&pag->pag_ici_lock); /* * Here we do an (almost) spurious inode lock in order to coordinate * with inode cache radix tree lookups. This is because the lookup * can reference the inodes in the cache without taking references. * * We make that OK here by ensuring that we wait until the inode is * unlocked after the lookup before we go ahead and free it. */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_qm_dqdetach(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); __xfs_inode_free(ip); return error; out_ifunlock: xfs_ifunlock(ip); out: xfs_iflags_clear(ip, XFS_IRECLAIM); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * We could return -EAGAIN here to make reclaim rescan the inode tree in * a short while. However, this just burns CPU time scanning the tree * waiting for IO to complete and the reclaim work never goes back to * the idle state. Instead, return 0 to let the next scheduled * background reclaim attempt to reclaim the inode again. */ return 0; }