STATIC int xfs_qm_flush_one( struct xfs_dquot *dqp, void *data) { struct list_head *buffer_list = data; struct xfs_buf *bp = NULL; int error = 0; xfs_dqlock(dqp); if (dqp->dq_flags & XFS_DQ_FREEING) goto out_unlock; if (!XFS_DQ_IS_DIRTY(dqp)) goto out_unlock; xfs_dqflock(dqp); error = xfs_qm_dqflush(dqp, &bp); if (error) goto out_unlock; xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); out_unlock: xfs_dqunlock(dqp); return error; }
STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; uint rval = XFS_ITEM_SUCCESS; if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (!xfs_buf_trylock(bp)) { /* * If we have just raced with a buffer being pinned and it has * been marked stale, we could end up stalling until someone else * issues a log force to unpin the stale buffer. Check for the * race condition here so xfsaild recognizes the buffer is pinned * and queues a log force to move it along. */ if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; return XFS_ITEM_LOCKED; } ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_push(bip); if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); return rval; }
STATIC int xfs_qm_dqiter_bufs( struct xfs_mount *mp, xfs_dqid_t firstid, xfs_fsblock_t bno, xfs_filblks_t blkcnt, uint flags, struct list_head *buffer_list) { struct xfs_buf *bp; int error; int type; ASSERT(blkcnt > 0); type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; /* * Blkcnt arg can be a very big number, and might even be * larger than the log itself. So, we have to break it up into * manageable-sized transactions. * Note that we don't start a permanent transaction here; we might * not be able to get a log reservation for the whole thing up front, * and we don't really care to either, because we just discard * everything if we were to crash in the middle of this loop. */ while (blkcnt--) { error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); if (error) break; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); /* * goto the next block. */ bno++; firstid += mp->m_quotainfo->qi_dqperchunk; } return error; }
static int xfs_ag_init_hdr( struct xfs_mount *mp, struct aghdr_init_data *id, aghdr_init_work_f work, const struct xfs_buf_ops *ops) { struct xfs_buf *bp; bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops); if (!bp) return -ENOMEM; (*work)(mp, bp, id); xfs_buf_delwri_queue(bp, &id->buffer_list); xfs_buf_relse(bp); return 0; }
STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; uint rval = XFS_ITEM_SUCCESS; if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (!xfs_buf_trylock(bp)) { /* * If we have just raced with a buffer being pinned and it has * been marked stale, we could end up stalling until someone else * issues a log force to unpin the stale buffer. Check for the * race condition here so xfsaild recognizes the buffer is pinned * and queues a log force to move it along. */ if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; return XFS_ITEM_LOCKED; } ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_push(bip); /* has a previous flush failed due to IO errors? */ if ((bp->b_flags & XBF_WRITE_FAIL) && ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { xfs_warn(bp->b_target->bt_mount, "Detected failing async write on buffer block 0x%llx. Retrying async write.\n", (long long)bp->b_bn); } if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); return rval; }
STATIC uint xfs_buf_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); struct xfs_buf *bp = bip->bli_buf; uint rval = XFS_ITEM_SUCCESS; if (xfs_buf_ispinned(bp)) return XFS_ITEM_PINNED; if (!xfs_buf_trylock(bp)) return XFS_ITEM_LOCKED; ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); trace_xfs_buf_item_push(bip); if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_unlock(bp); return rval; }
STATIC void xfs_qm_dqreclaim_one( struct xfs_dquot *dqp, struct list_head *buffer_list, struct list_head *dispose_list) { struct xfs_mount *mp = dqp->q_mount; struct xfs_quotainfo *qi = mp->m_quotainfo; int error; if (!xfs_dqlock_nowait(dqp)) goto out_move_tail; /* * This dquot has acquired a reference in the meantime remove it from * the freelist and try again. */ if (dqp->q_nrefs) { xfs_dqunlock(dqp); trace_xfs_dqreclaim_want(dqp); XFS_STATS_INC(xs_qm_dqwants); list_del_init(&dqp->q_lru); qi->qi_lru_count--; XFS_STATS_DEC(xs_qm_dquot_unused); return; } /* * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ if (!xfs_dqflock_nowait(dqp)) goto out_unlock_move_tail; if (XFS_DQ_IS_DIRTY(dqp)) { struct xfs_buf *bp = NULL; trace_xfs_dqreclaim_dirty(dqp); error = xfs_qm_dqflush(dqp, &bp); if (error) { xfs_warn(mp, "%s: dquot %p flush failed", __func__, dqp); goto out_unlock_move_tail; } xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); /* * Give the dquot another try on the freelist, as the * flushing will take some time. */ goto out_unlock_move_tail; } xfs_dqfunlock(dqp); /* * Prevent lookups now that we are past the point of no return. */ dqp->dq_flags |= XFS_DQ_FREEING; xfs_dqunlock(dqp); ASSERT(dqp->q_nrefs == 0); list_move_tail(&dqp->q_lru, dispose_list); qi->qi_lru_count--; XFS_STATS_DEC(xs_qm_dquot_unused); trace_xfs_dqreclaim_done(dqp); XFS_STATS_INC(xs_qm_dqreclaims); return; /* * Move the dquot to the tail of the list so that we don't spin on it. */ out_unlock_move_tail: xfs_dqunlock(dqp); out_move_tail: list_move_tail(&dqp->q_lru, &qi->qi_lru_list); trace_xfs_dqreclaim_busy(dqp); XFS_STATS_INC(xs_qm_dqreclaim_misses); }
/* * Initialise a new set of inodes. When called without a transaction context * (e.g. from recovery) we initiate a delayed write of the inode buffers rather * than logging them (which in a transaction context puts them into the AIL * for writeback rather than the xfsbufd queue). */ int xfs_ialloc_inode_init( struct xfs_mount *mp, struct xfs_trans *tp, struct list_head *buffer_list, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen) { struct xfs_buf *fbuf; struct xfs_dinode *free; int blks_per_cluster, nbufs, ninodes; int version; int i, j; xfs_daddr_t d; xfs_ino_t ino = 0; /* * Loop over the new block(s), filling in the inodes. * For small block sizes, manipulate the inodes in buffers * which are multiples of the blocks size. */ if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { blks_per_cluster = 1; nbufs = length; ninodes = mp->m_sb.sb_inopblock; } else { blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / mp->m_sb.sb_blocksize; nbufs = length / blks_per_cluster; ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; } /* * Figure out what version number to use in the inodes we create. If * the superblock version has caught up to the one that supports the new * inode format, then use the new inode version. Otherwise use the old * version so that old kernels will continue to be able to use the file * system. * * For v3 inodes, we also need to write the inode number into the inode, * so calculate the first inode number of the chunk here as * XFS_OFFBNO_TO_AGINO() only works within a filesystem block, not * across multiple filesystem blocks (such as a cluster) and so cannot * be used in the cluster buffer loop below. * * Further, because we are writing the inode directly into the buffer * and calculating a CRC on the entire inode, we have ot log the entire * inode so that the entire range the CRC covers is present in the log. * That means for v3 inode we log the entire buffer rather than just the * inode cores. */ if (xfs_sb_version_hascrc(&mp->m_sb)) { version = 3; ino = XFS_AGINO_TO_INO(mp, agno, XFS_OFFBNO_TO_AGINO(mp, agbno, 0)); /* * log the initialisation that is about to take place as an * logical operation. This means the transaction does not * need to log the physical changes to the inode buffers as log * recovery will know what initialisation is actually needed. * Hence we only need to log the buffers as "ordered" buffers so * they track in the AIL as if they were physically logged. */ if (tp) xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), mp->m_sb.sb_inodesize, length, gen); } else if (xfs_sb_version_hasnlink(&mp->m_sb)) version = 2; else version = 1; for (j = 0; j < nbufs; j++) { /* * Get the block. */ d = XFS_AGB_TO_DADDR(mp, agno, agbno + (j * blks_per_cluster)); fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize * blks_per_cluster, XBF_UNMAPPED); if (!fbuf) return ENOMEM; /* Initialize the inode buffers and log them appropriately. */ fbuf->b_ops = &xfs_inode_buf_ops; xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); for (i = 0; i < ninodes; i++) { int ioffset = i << mp->m_sb.sb_inodelog; uint isize = xfs_dinode_size(version); free = xfs_make_iptr(mp, fbuf, i); free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); free->di_version = version; free->di_gen = cpu_to_be32(gen); free->di_next_unlinked = cpu_to_be32(NULLAGINO); if (version == 3) { free->di_ino = cpu_to_be64(ino); ino++; uuid_copy(&free->di_uuid, &mp->m_sb.sb_uuid); xfs_dinode_calc_crc(mp, free); } else if (tp) { /* just log the inode core */ xfs_trans_log_buf(tp, fbuf, ioffset, ioffset + isize - 1); } } if (tp) { /* * Mark the buffer as an inode allocation buffer so it * sticks in AIL at the point of this allocation * transaction. This ensures the they are on disk before * the tail of the log can be moved past this * transaction (i.e. by preventing relogging from moving * it forward in the log). */ xfs_trans_inode_alloc_buf(tp, fbuf); if (version == 3) { /* * Mark the buffer as ordered so that they are * not physically logged in the transaction but * still tracked in the AIL as part of the * transaction and pin the log appropriately. */ xfs_trans_ordered_buf(tp, fbuf); xfs_trans_log_buf(tp, fbuf, 0, BBTOB(fbuf->b_length) - 1); } } else { fbuf->b_flags |= XBF_DONE; xfs_buf_delwri_queue(fbuf, buffer_list); xfs_buf_relse(fbuf); } } return 0; }
STATIC int xfs_qm_dqiter_bufs( struct xfs_mount *mp, xfs_dqid_t firstid, xfs_fsblock_t bno, xfs_filblks_t blkcnt, uint flags, struct list_head *buffer_list) { struct xfs_buf *bp; int error; int type; ASSERT(blkcnt > 0); type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); error = 0; /* * Blkcnt arg can be a very big number, and might even be * larger than the log itself. So, we have to break it up into * manageable-sized transactions. * Note that we don't start a permanent transaction here; we might * not be able to get a log reservation for the whole thing up front, * and we don't really care to either, because we just discard * everything if we were to crash in the middle of this loop. */ while (blkcnt--) { error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); /* * CRC and validation errors will return a EFSCORRUPTED here. If * this occurs, re-read without CRC validation so that we can * repair the damage via xfs_qm_reset_dqcounts(). This process * will leave a trace in the log indicating corruption has * been detected. */ if (error == -EFSCORRUPTED) { error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, bno), mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL); } if (error) break; /* * A corrupt buffer might not have a verifier attached, so * make sure we have the correct one attached before writeback * occurs. */ bp->b_ops = &xfs_dquot_buf_ops; xfs_qm_reset_dqcounts(mp, bp, firstid, type); xfs_buf_delwri_queue(bp, buffer_list); xfs_buf_relse(bp); /* goto the next block. */ bno++; firstid += mp->m_quotainfo->qi_dqperchunk; } return error; }
/* * Write a modified dquot to disk. * The dquot must be locked and the flush lock too taken by caller. * The flush lock will not be unlocked until the dquot reaches the disk, * but the dquot is free to be unlocked and modified by the caller * in the interim. Dquot is still locked on return. This behavior is * identical to that of inodes. */ int xfs_qm_dqflush( xfs_dquot_t *dqp, uint flags) { struct xfs_mount *mp = dqp->q_mount; struct xfs_buf *bp; struct xfs_disk_dquot *ddqp; int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); ASSERT(!completion_done(&dqp->q_flush)); trace_xfs_dqflush(dqp); /* * If not dirty, or it's pinned and we are not supposed to block, nada. */ if (!XFS_DQ_IS_DIRTY(dqp) || ((flags & SYNC_TRYLOCK) && atomic_read(&dqp->q_pincount) > 0)) { xfs_dqfunlock(dqp); return 0; } xfs_qm_dqunpin_wait(dqp); /* * This may have been unpinned because the filesystem is shutting * down forcibly. If that's the case we must not write this dquot * to disk, because the log record didn't make it to disk! */ if (XFS_FORCED_SHUTDOWN(mp)) { dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_dqfunlock(dqp); return XFS_ERROR(EIO); } /* * Get the buffer containing the on-disk dquot */ error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp); if (error) { ASSERT(error != ENOENT); xfs_dqfunlock(dqp); return error; } /* * Calculate the location of the dquot inside the buffer. */ ddqp = bp->b_addr + dqp->q_bufoffset; /* * A simple sanity check in case we got a corrupted dquot.. */ error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)"); if (error) { xfs_buf_relse(bp); xfs_dqfunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); return XFS_ERROR(EIO); } /* This is the only portion of data that needs to persist */ memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); /* * Clear the dirty field and remember the flush lsn for later use. */ dqp->dq_flags &= ~XFS_DQ_DIRTY; xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, &dqp->q_logitem.qli_item.li_lsn); /* * Attach an iodone routine so that we can remove this dquot from the * AIL and release the flush lock once the dquot is synced to disk. */ xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, &dqp->q_logitem.qli_item); /* * If the buffer is pinned then push on the log so we won't * get stuck waiting in the write for too long. */ if (xfs_buf_ispinned(bp)) { trace_xfs_dqflush_force(dqp); xfs_log_force(mp, 0); } if (flags & SYNC_WAIT) error = xfs_bwrite(bp); else xfs_buf_delwri_queue(bp); xfs_buf_relse(bp); trace_xfs_dqflush_done(dqp); /* * dqp is still locked, but caller is free to unlock it now. */ return error; }
STATIC uint xfs_inode_item_push( struct xfs_log_item *lip, struct list_head *buffer_list) { struct xfs_inode_log_item *iip = INODE_ITEM(lip); struct xfs_inode *ip = iip->ili_inode; struct xfs_buf *bp = NULL; uint rval = XFS_ITEM_SUCCESS; int error; if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; /* * Re-check the pincount now that we stabilized the value by * taking the ilock. */ if (xfs_ipincount(ip) > 0) { rval = XFS_ITEM_PINNED; goto out_unlock; } /* * Stale inode items should force out the iclog. */ if (ip->i_flags & XFS_ISTALE) { rval = XFS_ITEM_PINNED; goto out_unlock; } /* * Someone else is already flushing the inode. Nothing we can do * here but wait for the flush to finish and remove the item from * the AIL. */ if (!xfs_iflock_nowait(ip)) { rval = XFS_ITEM_FLUSHING; goto out_unlock; } ASSERT(iip->ili_fields != 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); ASSERT(iip->ili_logged == 0 || XFS_FORCED_SHUTDOWN(ip->i_mount)); spin_unlock(&lip->li_ailp->xa_lock); error = xfs_iflush(ip, &bp); if (!error) { if (!xfs_buf_delwri_queue(bp, buffer_list)) rval = XFS_ITEM_FLUSHING; xfs_buf_relse(bp); } spin_lock(&lip->li_ailp->xa_lock); out_unlock: xfs_iunlock(ip, XFS_ILOCK_SHARED); return rval; }