/* * This is called by the efd item code below to release references to * the given efi item. Each efd calls this with the number of * extents that it has logged, and when the sum of these reaches * the total number of extents logged by this efi item we can free * the efi item. * * Freeing the efi item requires that we remove it from the AIL. * We'll use the AIL lock to protect our counters as well as * the removal from the AIL. */ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { xfs_mount_t *mp; int extents_left; SPLDECL(s); mp = efip->efi_item.li_mountp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); AIL_LOCK(mp, s); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; if (extents_left == 0) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); xfs_efi_item_free(efip); } else { AIL_UNLOCK(mp, s); } }
/* * This is called when the transaction that should be committing the * EFD corresponding to the given EFI is aborted. The committed and * canceled flags are used to coordinate the freeing of the EFI and * the references by the transaction that committed it. */ STATIC void xfs_efi_cancel( xfs_efi_log_item_t *efip) { int nexts; int size; xfs_mount_t *mp; SPLDECL(s); mp = efip->efi_item.li_mountp; AIL_LOCK(mp, s); if (efip->efi_flags & XFS_EFI_COMMITTED) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); nexts = efip->efi_format.efi_nextents; if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { size = sizeof(xfs_efi_log_item_t); size += (nexts - 1) * sizeof(xfs_extent_t); kmem_free(efip, size); } else { kmem_zone_free(xfs_efi_zone, efip); } } else { efip->efi_flags |= XFS_EFI_CANCELED; AIL_UNLOCK(mp, s); } return; }
/* * like unpin only we have to also clear the xaction descriptor * pointing the log item if we free the item. This routine duplicates * unpin because efi_flags is protected by the AIL lock. Freeing * the descriptor and then calling unpin would force us to drop the AIL * lock which would open up a race condition. */ STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { xfs_mount_t *mp; xfs_log_item_desc_t *lidp; SPLDECL(s); mp = efip->efi_item.li_mountp; AIL_LOCK(mp, s); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item */ lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); xfs_trans_free_item(tp, lidp); /* * pull the item off the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } }
/*ARGSUSED*/ void xfs_iflush_done( xfs_buf_t *bp, xfs_inode_log_item_t *iip) { xfs_inode_t *ip; SPLDECL(s); ip = iip->ili_inode; /* * We only want to pull the item from the AIL if it is * actually there and its location in the log has not * changed since we started the flush. Thus, we only bother * if the ili_logged flag is set and the inode's lsn has not * changed. First we check the lsn outside * the lock since it's cheaper, and then we recheck while * holding the lock before removing the inode from the AIL. */ if (iip->ili_logged && (iip->ili_item.li_lsn == iip->ili_flush_lsn)) { AIL_LOCK(ip->i_mount, s); if (iip->ili_item.li_lsn == iip->ili_flush_lsn) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(ip->i_mount, (xfs_log_item_t*)iip, s); } else { AIL_UNLOCK(ip->i_mount, s); } } iip->ili_logged = 0; /* * Clear the ili_last_fields bits now that we know that the * data corresponding to them is safely on disk. */ iip->ili_last_fields = 0; /* * Release the inode's flush lock since we're done with it. */ xfs_ifunlock(ip); return; }
/* * This is called to unpin the buffer associated with the buf log * item which was previously pinned with a call to xfs_buf_item_pin(). * Just call bunpin() on the buffer to do this. * * Also drop the reference to the buf item for the current transaction. * If the XFS_BLI_STALE flag is set and we are the last reference, * then free up the buf log item and unlock the buffer. */ void xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) { xfs_mount_t *mp; xfs_buf_t *bp; int freed; SPLDECL(s); bp = bip->bli_buf; ASSERT(bp != NULL); ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); xfs_buf_item_trace("UNPIN", bip); xfs_buftrace("XFS_UNPIN", bp); freed = atomic_dec_and_test(&bip->bli_refcount); mp = bip->bli_item.li_mountp; xfs_bunpin(bp); if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); xfs_buf_item_trace("UNPIN STALE", bip); xfs_buftrace("XFS_UNPIN STALE", bp); /* * If we get called here because of an IO error, we may * or may not have the item on the AIL. xfs_trans_delete_ail() * will take care of that situation. * xfs_trans_delete_ail() drops the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { AIL_LOCK(mp,s); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } xfs_buf_relse(bp); }
/*ARGSUSED*/ STATIC void xfs_efi_item_unpin(xfs_efi_log_item_t *efip, int stale) { xfs_mount_t *mp; SPLDECL(s); mp = efip->efi_item.li_mountp; AIL_LOCK(mp, s); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); xfs_efi_item_free(efip); } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } }
/*ARGSUSED*/ STATIC xfs_lsn_t xfs_qm_qoffend_logitem_committed( xfs_qoff_logitem_t *qfe, xfs_lsn_t lsn) { xfs_qoff_logitem_t *qfs; SPLDECL(s); qfs = qfe->qql_start_lip; AIL_LOCK(qfs->qql_item.li_mountp,s); /* * Delete the qoff-start logitem from the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs, s); kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); return (xfs_lsn_t)-1; }
/* * This is called by the log manager code to determine the LSN * of the tail of the log. This is exactly the LSN of the first * item in the AIL. If the AIL is empty, then this function * returns 0. * * We need the AIL lock in order to get a coherent read of the * lsn of the last item in the AIL. */ xfs_lsn_t xfs_trans_tail_ail( xfs_mount_t *mp) { xfs_lsn_t lsn; xfs_log_item_t *lip; SPLDECL(s); AIL_LOCK(mp,s); lip = xfs_ail_min(&(mp->m_ail)); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } AIL_UNLOCK(mp, s); return lsn; }
/* * like unpin only we have to also clear the xaction descriptor * pointing the log item if we free the item. This routine duplicates * unpin because efi_flags is protected by the AIL lock. Freeing * the descriptor and then calling unpin would force us to drop the AIL * lock which would open up a race condition. */ STATIC void xfs_efi_item_unpin_remove(xfs_efi_log_item_t *efip, xfs_trans_t *tp) { int nexts; int size; xfs_mount_t *mp; xfs_log_item_desc_t *lidp; SPLDECL(s); mp = efip->efi_item.li_mountp; AIL_LOCK(mp, s); if (efip->efi_flags & XFS_EFI_CANCELED) { /* * free the xaction descriptor pointing to this item */ lidp = xfs_trans_find_item(tp, (xfs_log_item_t *) efip); xfs_trans_free_item(tp, lidp); /* * pull the item off the AIL. * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); /* * now free the item itself */ nexts = efip->efi_format.efi_nextents; if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { size = sizeof(xfs_efi_log_item_t); size += (nexts - 1) * sizeof(xfs_extent_t); kmem_free(efip, size); } else { kmem_zone_free(xfs_efi_zone, efip); } } else { efip->efi_flags |= XFS_EFI_COMMITTED; AIL_UNLOCK(mp, s); } return; }
/* * This is the inode flushing abort routine. It is called * from xfs_iflush when the filesystem is shutting down to clean * up the inode state. * It is responsible for removing the inode item * from the AIL if it has not been re-logged, and unlocking the inode's * flush lock. */ void xfs_iflush_abort( xfs_inode_t *ip) { xfs_inode_log_item_t *iip; xfs_mount_t *mp; SPLDECL(s); iip = ip->i_itemp; mp = ip->i_mount; if (iip) { if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { AIL_LOCK(mp, s); if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)iip, s); } else AIL_UNLOCK(mp, s); } iip->ili_logged = 0; /* * Clear the ili_last_fields bits now that we know that the * data corresponding to them is safely on disk. */ iip->ili_last_fields = 0; /* * Clear the inode logging fields so no more flushes are * attempted. */ iip->ili_format.ilf_fields = 0; } /* * Release the inode's flush lock since we're done with it. */ xfs_ifunlock(ip); }
/* * This is called by the efd item code below to release references to * the given efi item. Each efd calls this with the number of * extents that it has logged, and when the sum of these reaches * the total number of extents logged by this efi item we can free * the efi item. * * Freeing the efi item requires that we remove it from the AIL. * We'll use the AIL lock to protect our counters as well as * the removal from the AIL. */ void xfs_efi_release(xfs_efi_log_item_t *efip, uint nextents) { xfs_mount_t *mp; int extents_left; uint size; int nexts; SPLDECL(s); mp = efip->efi_item.li_mountp; ASSERT(efip->efi_next_extent > 0); ASSERT(efip->efi_flags & XFS_EFI_COMMITTED); AIL_LOCK(mp, s); ASSERT(efip->efi_next_extent >= nextents); efip->efi_next_extent -= nextents; extents_left = efip->efi_next_extent; if (extents_left == 0) { /* * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)efip, s); } else { AIL_UNLOCK(mp, s); } if (extents_left == 0) { nexts = efip->efi_format.efi_nextents; if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { size = sizeof(xfs_efi_log_item_t); size += (nexts - 1) * sizeof(xfs_extent_t); kmem_free(efip, size); } else { kmem_zone_free(xfs_efi_zone, efip); } } }
/* * This is called to wait for the given dquot to be unpinned. * Most of these pin/unpin routines are plagiarized from inode code. */ void xfs_qm_dqunpin_wait( xfs_dquot_t *dqp) { SPLDECL(s); ASSERT(XFS_DQ_IS_LOCKED(dqp)); if (dqp->q_pincount == 0) { return; } /* * Give the log a push so we don't wait here too long. */ xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); s = XFS_DQ_PINLOCK(dqp); if (dqp->q_pincount == 0) { XFS_DQ_PINUNLOCK(dqp, s); return; } sv_wait(&(dqp->q_pinwait), PINOD, &(XFS_DQ_TO_QINF(dqp)->qi_pinlock), s); }
/*ARGSUSED*/ STATIC void xfs_qm_dqflush_done( xfs_buf_t *bp, xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; SPLDECL(s); dqp = qip->qli_dquot; /* * We only want to pull the item from the AIL if its * location in the log has not changed since we started the flush. * Thus, we only bother if the dquot's lsn has * not changed. First we check the lsn outside the lock * since it's cheaper, and then we recheck while * holding the lock before removing the dquot from the AIL. */ if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && qip->qli_item.li_lsn == qip->qli_flush_lsn) { AIL_LOCK(dqp->q_mount, s); /* * xfs_trans_delete_ail() drops the AIL lock. */ if (qip->qli_item.li_lsn == qip->qli_flush_lsn) xfs_trans_delete_ail(dqp->q_mount, (xfs_log_item_t*)qip, s); else AIL_UNLOCK(dqp->q_mount, s); } /* * Release the dq's flush lock since we're done with it. */ xfs_dqfunlock(dqp); }
/* * Write a modified dquot to disk. * The dquot must be locked and the flush lock too taken by caller. * The flush lock will not be unlocked until the dquot reaches the disk, * but the dquot is free to be unlocked and modified by the caller * in the interim. Dquot is still locked on return. This behavior is * identical to that of inodes. */ int xfs_qm_dqflush( xfs_dquot_t *dqp, uint flags) { xfs_mount_t *mp; xfs_buf_t *bp; xfs_disk_dquot_t *ddqp; int error; SPLDECL(s); ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); xfs_dqtrace_entry(dqp, "DQFLUSH"); /* * If not dirty, nada. */ if (!XFS_DQ_IS_DIRTY(dqp)) { xfs_dqfunlock(dqp); return (0); } /* * Cant flush a pinned dquot. Wait for it. */ xfs_qm_dqunpin_wait(dqp); /* * This may have been unpinned because the filesystem is shutting * down forcibly. If that's the case we must not write this dquot * to disk, because the log record didn't make it to disk! */ if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { dqp->dq_flags &= ~(XFS_DQ_DIRTY); xfs_dqfunlock(dqp); return XFS_ERROR(EIO); } /* * Get the buffer containing the on-disk dquot * We don't need a transaction envelope because we know that the * the ondisk-dquot has already been allocated for. */ if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { xfs_dqtrace_entry(dqp, "DQTOBP FAIL"); ASSERT(error != ENOENT); /* * Quotas could have gotten turned off (ESRCH) */ xfs_dqfunlock(dqp); return (error); } if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { xfs_force_shutdown(dqp->q_mount, XFS_CORRUPT_INCORE); return XFS_ERROR(EIO); } /* This is the only portion of data that needs to persist */ memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); /* * Clear the dirty field and remember the flush lsn for later use. */ dqp->dq_flags &= ~(XFS_DQ_DIRTY); mp = dqp->q_mount; /* lsn is 64 bits */ AIL_LOCK(mp, s); dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn; AIL_UNLOCK(mp, s); /* * Attach an iodone routine so that we can remove this dquot from the * AIL and release the flush lock once the dquot is synced to disk. */ xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *)) xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item)); /* * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. */ if (XFS_BUF_ISPINNED(bp)) { xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE"); xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (flags & XFS_QMOPT_DELWRI) { xfs_bdwrite(mp, bp); } else if (flags & XFS_QMOPT_ASYNC) { xfs_bawrite(mp, bp); } else { error = xfs_bwrite(mp, bp); } xfs_dqtrace_entry(dqp, "DQFLUSH END"); /* * dqp is still locked, but caller is free to unlock it now. */ return (error); }
/* * xfs_trans_push_ail * * This routine is called to move the tail of the AIL * forward. It does this by trying to flush items in the AIL * whose lsns are below the given threshold_lsn. * * The routine returns the lsn of the tail of the log. */ xfs_lsn_t xfs_trans_push_ail( xfs_mount_t *mp, xfs_lsn_t threshold_lsn) { xfs_lsn_t lsn; xfs_log_item_t *lip; int gen; int restarts; int lock_result; int flush_log; SPLDECL(s); #define XFS_TRANS_PUSH_AIL_RESTARTS 10 AIL_LOCK(mp,s); lip = xfs_trans_first_ail(mp, &gen); if (lip == NULL || XFS_FORCED_SHUTDOWN(mp)) { /* * Just return if the AIL is empty. */ AIL_UNLOCK(mp, s); return (xfs_lsn_t)0; } XFS_STATS_INC(xs_push_ail); /* * While the item we are looking at is below the given threshold * try to flush it out. Make sure to limit the number of times * we allow xfs_trans_next_ail() to restart scanning from the * beginning of the list. We'd like not to stop until we've at least * tried to push on everything in the AIL with an LSN less than * the given threshold. However, we may give up before that if * we realize that we've been holding the AIL_LOCK for 'too long', * blocking interrupts. Currently, too long is < 500us roughly. */ flush_log = 0; restarts = 0; while (((restarts < XFS_TRANS_PUSH_AIL_RESTARTS) && (XFS_LSN_CMP(lip->li_lsn, threshold_lsn) < 0))) { /* * If we can lock the item without sleeping, unlock * the AIL lock and flush the item. Then re-grab the * AIL lock so we can look for the next item on the * AIL. Since we unlock the AIL while we flush the * item, the next routine may start over again at the * the beginning of the list if anything has changed. * That is what the generation count is for. * * If we can't lock the item, either its holder will flush * it or it is already being flushed or it is being relogged. * In any of these case it is being taken care of and we * can just skip to the next item in the list. */ lock_result = IOP_TRYLOCK(lip); switch (lock_result) { case XFS_ITEM_SUCCESS: AIL_UNLOCK(mp, s); XFS_STATS_INC(xs_push_ail_success); IOP_PUSH(lip); AIL_LOCK(mp,s); break; case XFS_ITEM_PUSHBUF: AIL_UNLOCK(mp, s); XFS_STATS_INC(xs_push_ail_pushbuf); #ifdef XFSRACEDEBUG delay_for_intr(); delay(300); #endif ASSERT(lip->li_ops->iop_pushbuf); ASSERT(lip); IOP_PUSHBUF(lip); AIL_LOCK(mp,s); break; case XFS_ITEM_PINNED: XFS_STATS_INC(xs_push_ail_pinned); flush_log = 1; break; case XFS_ITEM_LOCKED: XFS_STATS_INC(xs_push_ail_locked); break; case XFS_ITEM_FLUSHING: XFS_STATS_INC(xs_push_ail_flushing); break; default: ASSERT(0); break; } lip = xfs_trans_next_ail(mp, lip, &gen, &restarts); if (lip == NULL) { break; } if (XFS_FORCED_SHUTDOWN(mp)) { /* * Just return if we shut down during the last try. */ AIL_UNLOCK(mp, s); return (xfs_lsn_t)0; } } if (flush_log) { /* * If something we need to push out was pinned, then * push out the log so it will become unpinned and * move forward in the AIL. */ AIL_UNLOCK(mp, s); XFS_STATS_INC(xs_push_ail_flush); xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); AIL_LOCK(mp, s); } lip = xfs_ail_min(&(mp->m_ail)); if (lip == NULL) { lsn = (xfs_lsn_t)0; } else { lsn = lip->li_lsn; } AIL_UNLOCK(mp, s); return lsn; } /* xfs_trans_push_ail */
/* * This is called to perform the commit processing for each * item described by the given chunk. * * The commit processing consists of unlocking items which were * held locked with the SYNC_UNLOCK attribute, calling the committed * routine of each logged item, updating the item's position in the AIL * if necessary, and unpinning each item. If the committed routine * returns -1, then do nothing further with the item because it * may have been freed. * * Since items are unlocked when they are copied to the incore * log, it is possible for two transactions to be completing * and manipulating the same item simultaneously. The AIL lock * will protect the lsn field of each item. The value of this * field can never go backwards. * * We unpin the items after repositioning them in the AIL, because * otherwise they could be immediately flushed and we'd have to race * with the flusher trying to pull the item from the AIL as we add it. */ STATIC void xfs_trans_chunk_committed( xfs_log_item_chunk_t *licp, xfs_lsn_t lsn, int aborted) { xfs_log_item_desc_t *lidp; xfs_log_item_t *lip; xfs_lsn_t item_lsn; struct xfs_mount *mp; int i; SPLDECL(s); lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { if (XFS_LIC_ISFREE(licp, i)) { continue; } lip = lidp->lid_item; if (aborted) lip->li_flags |= XFS_LI_ABORTED; /* * Send in the ABORTED flag to the COMMITTED routine * so that it knows whether the transaction was aborted * or not. */ item_lsn = IOP_COMMITTED(lip, lsn); /* * If the committed routine returns -1, make * no more references to the item. */ if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) { continue; } /* * If the returned lsn is greater than what it * contained before, update the location of the * item in the AIL. If it is not, then do nothing. * Items can never move backwards in the AIL. * * While the new lsn should usually be greater, it * is possible that a later transaction completing * simultaneously with an earlier one using the * same item could complete first with a higher lsn. * This would cause the earlier transaction to fail * the test below. */ mp = lip->li_mountp; AIL_LOCK(mp,s); if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) { /* * This will set the item's lsn to item_lsn * and update the position of the item in * the AIL. * * xfs_trans_update_ail() drops the AIL lock. */ xfs_trans_update_ail(mp, lip, item_lsn, s); } else { AIL_UNLOCK(mp, s); } /* * Now that we've repositioned the item in the AIL, * unpin it so it can be flushed. Pass information * about buffer stale state down from the log item * flags, if anyone else stales the buffer we do not * want to pay any attention to it. */ IOP_UNPIN(lip, lidp->lid_flags & XFS_LID_BUF_STALE); } }
/* * Look up an inode by number in the given file system. * The inode is looked up in the hash table for the file system * represented by the mount point parameter mp. Each bucket of * the hash table is guarded by an individual semaphore. * * If the inode is found in the hash table, its corresponding vnode * is obtained with a call to vn_get(). This call takes care of * coordination with the reclamation of the inode and vnode. Note * that the vmap structure is filled in while holding the hash lock. * This gives us the state of the inode/vnode when we found it and * is used for coordination in vn_get(). * * If it is not in core, read it in from the file system's device and * add the inode into the hash table. * * The inode is locked according to the value of the lock_flags parameter. * This flag parameter indicates how and if the inode's IO lock and inode lock * should be taken. * * mp -- the mount point structure for the current file system. It points * to the inode hash table. * tp -- a pointer to the current transaction if there is one. This is * simply passed through to the xfs_iread() call. * ino -- the number of the inode desired. This is the unique identifier * within the file system for the inode being requested. * lock_flags -- flags indicating how to lock the inode. See the comment * for xfs_ilock() for a list of valid values. * bno -- the block number starting the buffer containing the inode, * if known (as by bulkstat), else 0. */ int xfs_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp, xfs_daddr_t bno) { xfs_ihash_t *ih; xfs_inode_t *ip; xfs_inode_t *iq; xfs_vnode_t *vp; ulong version; int error; /* REFERENCED */ int newnode; xfs_chash_t *ch; xfs_chashlist_t *chl, *chlnew; vmap_t vmap; SPLDECL(s); XFS_STATS_INC(xs_ig_attempts); ih = XFS_IHASH(mp, ino); again: read_lock(&ih->ih_lock); for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) { if (ip->i_ino == ino) { vp = XFS_ITOV(ip); VMAP(vp, vmap); /* * Inode cache hit: if ip is not at the front of * its hash chain, move it there now. * Do this with the lock held for update, but * do statistics after releasing the lock. */ if (ip->i_prevp != &ih->ih_next && rwlock_trypromote(&ih->ih_lock)) { if ((iq = ip->i_next)) { iq->i_prevp = ip->i_prevp; } *ip->i_prevp = iq; iq = ih->ih_next; iq->i_prevp = &ip->i_next; ip->i_next = iq; ip->i_prevp = &ih->ih_next; ih->ih_next = ip; write_unlock(&ih->ih_lock); } else { read_unlock(&ih->ih_lock); } XFS_STATS_INC(xs_ig_found); /* * Get a reference to the vnode/inode. * vn_get() takes care of coordination with * the file system inode release and reclaim * functions. If it returns NULL, the inode * has been reclaimed so just start the search * over again. We probably won't find it, * but we could be racing with another cpu * looking for the same inode so we have to at * least look. */ if (!(vp = vn_get(vp, &vmap))) { XFS_STATS_INC(xs_ig_frecycle); goto again; } if (lock_flags != 0) { ip->i_flags &= ~XFS_IRECLAIM; xfs_ilock(ip, lock_flags); } newnode = (ip->i_d.di_mode == 0); if (newnode) { xfs_iocore_inode_reinit(ip); } ip->i_flags &= ~XFS_ISTALE; vn_trace_exit(vp, "xfs_iget.found", (inst_t *)__return_address); goto return_ip; } } /* * Inode cache miss: save the hash chain version stamp and unlock * the chain, so we don't deadlock in vn_alloc. */ XFS_STATS_INC(xs_ig_missed); version = ih->ih_version; read_unlock(&ih->ih_lock); /* * Read the disk inode attributes into a new inode structure and get * a new vnode for it. This should also initialize i_ino and i_mount. */ error = xfs_iread(mp, tp, ino, &ip, bno); if (error) { return error; } error = xfs_vn_allocate(mp, ip, &vp); if (error) { return error; } vn_trace_exit(vp, "xfs_iget.alloc", (inst_t *)__return_address); xfs_inode_lock_init(ip, vp); xfs_iocore_inode_init(ip); if (lock_flags != 0) { xfs_ilock(ip, lock_flags); } /* * Put ip on its hash chain, unless someone else hashed a duplicate * after we released the hash lock. */ write_lock(&ih->ih_lock); if (ih->ih_version != version) { for (iq = ih->ih_next; iq != NULL; iq = iq->i_next) { if (iq->i_ino == ino) { write_unlock(&ih->ih_lock); xfs_idestroy(ip); XFS_STATS_INC(xs_ig_dup); goto again; } } } /* * These values _must_ be set before releasing ihlock! */ ip->i_hash = ih; if ((iq = ih->ih_next)) { iq->i_prevp = &ip->i_next; } ip->i_next = iq; ip->i_prevp = &ih->ih_next; ih->ih_next = ip; ip->i_udquot = ip->i_gdquot = NULL; ih->ih_version++; write_unlock(&ih->ih_lock); /* * put ip on its cluster's hash chain */ ASSERT(ip->i_chash == NULL && ip->i_cprev == NULL && ip->i_cnext == NULL); chlnew = NULL; ch = XFS_CHASH(mp, ip->i_blkno); chlredo: s = mutex_spinlock(&ch->ch_lock); for (chl = ch->ch_list; chl != NULL; chl = chl->chl_next) { if (chl->chl_blkno == ip->i_blkno) { /* insert this inode into the doubly-linked list * where chl points */ if ((iq = chl->chl_ip)) { ip->i_cprev = iq->i_cprev; iq->i_cprev->i_cnext = ip; iq->i_cprev = ip; ip->i_cnext = iq; } else { ip->i_cnext = ip; ip->i_cprev = ip; } chl->chl_ip = ip; ip->i_chash = chl; break; } } /* no hash list found for this block; add a new hash list */ if (chl == NULL) { if (chlnew == NULL) { mutex_spinunlock(&ch->ch_lock, s); ASSERT(xfs_chashlist_zone != NULL); chlnew = (xfs_chashlist_t *) kmem_zone_alloc(xfs_chashlist_zone, KM_SLEEP); ASSERT(chlnew != NULL); goto chlredo; } else { ip->i_cnext = ip; ip->i_cprev = ip; ip->i_chash = chlnew; chlnew->chl_ip = ip; chlnew->chl_blkno = ip->i_blkno; chlnew->chl_next = ch->ch_list; ch->ch_list = chlnew; chlnew = NULL; } } else { if (chlnew != NULL) { kmem_zone_free(xfs_chashlist_zone, chlnew); } } mutex_spinunlock(&ch->ch_lock, s); /* * Link ip to its mount and thread it on the mount's inode list. */ XFS_MOUNT_ILOCK(mp); if ((iq = mp->m_inodes)) { ASSERT(iq->i_mprev->i_mnext == iq); ip->i_mprev = iq->i_mprev; iq->i_mprev->i_mnext = ip; iq->i_mprev = ip; ip->i_mnext = iq; } else { ip->i_mnext = ip; ip->i_mprev = ip; } mp->m_inodes = ip; XFS_MOUNT_IUNLOCK(mp); newnode = 1; return_ip: ASSERT(ip->i_df.if_ext_max == XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t)); ASSERT(((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != 0) == ((ip->i_iocore.io_flags & XFS_IOCORE_RT) != 0)); *ipp = ip; /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ XVFS_INIT_VNODE(XFS_MTOVFS(mp), vp, XFS_ITOBHV(ip), 1); return 0; }