Example #1
0
static void
xfs_dir3_block_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;

	if (xfs_sb_version_hascrc(&mp->m_sb) &&
	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
		xfs_buf_ioerror(bp, -EFSBADCRC);
	else if (!xfs_dir3_block_verify(bp))
		xfs_buf_ioerror(bp, -EFSCORRUPTED);

	if (bp->b_error)
		xfs_verifier_error(bp);
}
Example #2
0
/*
 * If the superblock has the CRC feature bit set or the CRC field is non-null,
 * check that the CRC is valid.  We check the CRC field is non-null because a
 * single bit error could clear the feature bit and unused parts of the
 * superblock are supposed to be zero. Hence a non-null crc field indicates that
 * we've potentially lost a feature bit and we should check it anyway.
 *
 * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the
 * last field in V4 secondary superblocks.  So for secondary superblocks,
 * we are more forgiving, and ignore CRC failures if the primary doesn't
 * indicate that the fs version is V5.
 */
static void
xfs_sb_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);
	int		error;

	/*
	 * open code the version check to avoid needing to convert the entire
	 * superblock from disk order just to check the version number
	 */
	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC) &&
	    (((be16_to_cpu(dsb->sb_versionnum) & XFS_SB_VERSION_NUMBITS) ==
						XFS_SB_VERSION_5) ||
	     dsb->sb_crc != 0)) {

		if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
			/* Only fail bad secondaries on a known V5 filesystem */
			if (bp->b_bn == XFS_SB_DADDR ||
			    xfs_sb_version_hascrc(&mp->m_sb)) {
				error = -EFSBADCRC;
				goto out_error;
			}
		}
	}
	error = xfs_sb_verify(bp, true);

out_error:
	if (error) {
		xfs_buf_ioerror(bp, error);
		if (error == -EFSCORRUPTED || error == -EFSBADCRC)
			xfs_verifier_error(bp);
	}
}
Example #3
0
static void
xfs_dquot_buf_verify(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr;
	struct xfs_disk_dquot	*ddq;
	xfs_dqid_t		id = 0;
	int			i;

	/*
	 * On the first read of the buffer, verify that each dquot is valid.
	 * We don't know what the id of the dquot is supposed to be, just that
	 * they should be increasing monotonically within the buffer. If the
	 * first id is corrupt, then it will fail on the second dquot in the
	 * buffer so corruptions could point to the wrong dquot in this case.
	 */
	for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {
		int	error;

		ddq = &d[i].dd_diskdq;

		if (i == 0)
			id = be32_to_cpu(ddq->d_id);

		error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
					"xfs_dquot_read_verify");
		if (error) {
			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, d);
			xfs_buf_ioerror(bp, EFSCORRUPTED);
			break;
		}
	}
}
Example #4
0
static void
xfs_attr3_rmt_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	int		blksize = mp->m_attr_geo->blksize;
	char		*ptr;
	int		len;
	xfs_daddr_t	bno;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	ptr = bp->b_addr;
	bno = bp->b_bn;
	len = BBTOB(bp->b_length);
	ASSERT(len >= blksize);

	while (len > 0) {
		struct xfs_attr3_rmt_hdr *rmt = (struct xfs_attr3_rmt_hdr *)ptr;

		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
			return;
		}

		/*
		 * Ensure we aren't writing bogus LSNs to disk. See
		 * xfs_attr3_rmt_hdr_set() for the explanation.
		 */
		if (rmt->rm_lsn != cpu_to_be64(NULLCOMMITLSN)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
			return;
		}
		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);

		len -= blksize;
		ptr += blksize;
		bno += BTOBB(blksize);
	}
	ASSERT(len == 0);
}
Example #5
0
static void
xfs_allocbt_verify(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);
	struct xfs_perag	*pag = bp->b_pag;
	unsigned int		level;
	int			sblock_ok; /* block passes checks */

	/*
	 * magic number and level verification
	 *
	 * During growfs operations, we can't verify the exact level as the
	 * perag is not fully initialised and hence not attached to the buffer.
	 * In this case, check against the maximum tree depth.
	 */
	level = be16_to_cpu(block->bb_level);
	switch (block->bb_magic) {
	case cpu_to_be32(XFS_ABTB_MAGIC):
		if (pag)
			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_BNOi];
		else
			sblock_ok = level < mp->m_ag_maxlevels;
		break;
	case cpu_to_be32(XFS_ABTC_MAGIC):
		if (pag)
			sblock_ok = level < pag->pagf_levels[XFS_BTNUM_CNTi];
		else
			sblock_ok = level < mp->m_ag_maxlevels;
		break;
	default:
		sblock_ok = 0;
		break;
	}

	/* numrecs verification */
	sblock_ok = sblock_ok &&
		be16_to_cpu(block->bb_numrecs) <= mp->m_alloc_mxr[level != 0];

	/* sibling pointer verification */
	sblock_ok = sblock_ok &&
		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
		 be32_to_cpu(block->bb_u.s.bb_leftsib) < mp->m_sb.sb_agblocks) &&
		block->bb_u.s.bb_leftsib &&
		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
		 be32_to_cpu(block->bb_u.s.bb_rightsib) < mp->m_sb.sb_agblocks) &&
		block->bb_u.s.bb_rightsib;

	if (!sblock_ok) {
		trace_xfs_btree_corrupt(bp, _RET_IP_);
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, block);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	}
}
static void
xfs_dquot_buf_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;

	if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	}
}
Example #7
0
static void
xfs_attr3_rmt_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	char		*ptr;
	int		len;
	xfs_daddr_t	bno;
	int		blksize = mp->m_attr_geo->blksize;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	ptr = bp->b_addr;
	bno = bp->b_bn;
	len = BBTOB(bp->b_length);
	ASSERT(len >= blksize);

	while (len > 0) {
		if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) {
			xfs_buf_ioerror(bp, -EFSBADCRC);
			break;
		}
		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			break;
		}
		len -= blksize;
		ptr += blksize;
		bno += BTOBB(blksize);
	}

	if (bp->b_error)
		xfs_verifier_error(bp);
	else
		ASSERT(len == 0);
}
static void
xfs_dir3_block_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;

	if ((xfs_sb_version_hascrc(&mp->m_sb) &&
	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
					  XFS_DIR3_DATA_CRC_OFF)) ||
	    !xfs_dir3_block_verify(bp)) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	}
}
Example #9
0
/*
 * We may be probed for a filesystem match, so we may not want to emit
 * messages when the superblock buffer is not actually an XFS superblock.
 * If we find an XFS superblock, then run a normal, noisy mount because we are
 * really going to mount it and want to know about errors.
 */
static void
xfs_sb_quiet_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp);

	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {
		/* XFS filesystem, verify noisily! */
		xfs_sb_read_verify(bp);
		return;
	}
	/* quietly fail */
	xfs_buf_ioerror(bp, -EWRONGFS);
}
Example #10
0
static void
xfs_dir2_block_verify(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_dir2_data_hdr *hdr = bp->b_addr;
	int			block_ok = 0;

	block_ok = hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
	block_ok = block_ok && __xfs_dir2_data_check(NULL, bp) == 0;

	if (!block_ok) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	}
}
Example #11
0
/*
 * If we are doing readahead on an inode buffer, we might be in log recovery
 * reading an inode allocation buffer that hasn't yet been replayed, and hence
 * has not had the inode cores stamped into it. Hence for readahead, the buffer
 * may be potentially invalid.
 *
 * If the readahead buffer is invalid, we need to mark it with an error and
 * clear the DONE status of the buffer so that a followup read will re-read it
 * from disk. We don't report the error otherwise to avoid warnings during log
 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
 * because all we want to do is say readahead failed; there is no-one to report
 * the error to, so this will distinguish it from a non-ra verifier failure.
 * Changes to this readahead error behavour also need to be reflected in
 * xfs_dquot_buf_readahead_verify().
 */
static void
xfs_inode_buf_verify(
	struct xfs_buf	*bp,
	bool		readahead)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	xfs_agnumber_t	agno;
	int		i;
	int		ni;

	/*
	 * Validate the magic number and version of every inode in the buffer
	 */
	agno = xfs_daddr_to_agno(mp, XFS_BUF_ADDR(bp));
	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
	for (i = 0; i < ni; i++) {
		int		di_ok;
		xfs_dinode_t	*dip;
		xfs_agino_t	unlinked_ino;

		dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
		unlinked_ino = be32_to_cpu(dip->di_next_unlinked);
		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
			xfs_dinode_good_version(mp, dip->di_version) &&
			(unlinked_ino == NULLAGINO ||
			 xfs_verify_agino(mp, agno, unlinked_ino));
		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
						XFS_ERRTAG_ITOBP_INOTOBP))) {
			if (readahead) {
				bp->b_flags &= ~XBF_DONE;
				xfs_buf_ioerror(bp, -EIO);
				return;
			}

#ifdef DEBUG
			xfs_alert(mp,
				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
				(unsigned long long)bp->b_bn, i,
				be16_to_cpu(dip->di_magic));
#endif
			xfs_buf_verifier_error(bp, -EFSCORRUPTED,
					__func__, dip, sizeof(*dip),
					NULL);
			return;
		}
	}
}
Example #12
0
/*
 * If we are doing readahead on an inode buffer, we might be in log recovery
 * reading an inode allocation buffer that hasn't yet been replayed, and hence
 * has not had the inode cores stamped into it. Hence for readahead, the buffer
 * may be potentially invalid.
 *
 * If the readahead buffer is invalid, we don't want to mark it with an error,
 * but we do want to clear the DONE status of the buffer so that a followup read
 * will re-read it from disk. This will ensure that we don't get an unnecessary
 * warnings during log recovery and we don't get unnecssary panics on debug
 * kernels.
 */
static void
xfs_inode_buf_verify(
	struct xfs_buf	*bp,
	bool		readahead)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	int		i;
	int		ni;

	/*
	 * Validate the magic number and version of every inode in the buffer
	 */
	ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock;
	for (i = 0; i < ni; i++) {
		int		di_ok;
		xfs_dinode_t	*dip;

		dip = (struct xfs_dinode *)xfs_buf_offset(bp,
					(i << mp->m_sb.sb_inodelog));
		di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
			    XFS_DINODE_GOOD_VERSION(dip->di_version);
		if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
						XFS_ERRTAG_ITOBP_INOTOBP,
						XFS_RANDOM_ITOBP_INOTOBP))) {
			if (readahead) {
				bp->b_flags &= ~XBF_DONE;
				return;
			}

			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
#ifdef DEBUG
			xfs_alert(mp,
				"bad inode magic/vsn daddr %lld #%d (magic=%x)",
				(unsigned long long)bp->b_bn, i,
				be16_to_cpu(dip->di_magic));
#endif
		}
	}
	xfs_inobp_check(mp, bp);
}
Example #13
0
static void
xfs_attr3_rmt_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	char		*ptr;
	int		len;
	xfs_daddr_t	bno;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	ptr = bp->b_addr;
	bno = bp->b_bn;
	len = BBTOB(bp->b_length);
	ASSERT(len >= XFS_LBSIZE(mp));

	while (len > 0) {
		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
			XFS_CORRUPTION_ERROR(__func__,
					    XFS_ERRLEVEL_LOW, mp, bp->b_addr);
			xfs_buf_ioerror(bp, EFSCORRUPTED);
			return;
		}
		if (bip) {
			struct xfs_attr3_rmt_hdr *rmt;

			rmt = (struct xfs_attr3_rmt_hdr *)ptr;
			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
		}
		xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF);

		len -= XFS_LBSIZE(mp);
		ptr += XFS_LBSIZE(mp);
		bno += mp->m_bsize;
	}
	ASSERT(len == 0);
}
Example #14
0
static void
xfs_attr3_rmt_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	char		*ptr;
	int		len;
	xfs_daddr_t	bno;
	int		blksize = mp->m_attr_geo->blksize;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	ptr = bp->b_addr;
	bno = bp->b_bn;
	len = BBTOB(bp->b_length);
	ASSERT(len >= blksize);

	while (len > 0) {
		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {
			xfs_buf_ioerror(bp, -EFSCORRUPTED);
			xfs_verifier_error(bp);
			return;
		}
		if (bip) {
			struct xfs_attr3_rmt_hdr *rmt;

			rmt = (struct xfs_attr3_rmt_hdr *)ptr;
			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);
		}
		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF);

		len -= blksize;
		ptr += blksize;
		bno += BTOBB(blksize);
	}
	ASSERT(len == 0);
}
Example #15
0
static void
xfs_attr3_rmt_read_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount *mp = bp->b_target->bt_mount;
	char		*ptr;
	int		len;
	bool		corrupt = false;
	xfs_daddr_t	bno;

	/* no verification of non-crc buffers */
	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	ptr = bp->b_addr;
	bno = bp->b_bn;
	len = BBTOB(bp->b_length);
	ASSERT(len >= XFS_LBSIZE(mp));

	while (len > 0) {
		if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
				      XFS_ATTR3_RMT_CRC_OFF)) {
			corrupt = true;
			break;
		}
		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
			corrupt = true;
			break;
		}
		len -= XFS_LBSIZE(mp);
		ptr += XFS_LBSIZE(mp);
		bno += mp->m_bsize;
	}

	if (corrupt) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
	} else
		ASSERT(len == 0);
}
Example #16
0
static void
xfs_dir3_block_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;

	if (!xfs_dir3_block_verify(bp)) {
		xfs_buf_ioerror(bp, -EFSCORRUPTED);
		xfs_verifier_error(bp);
		return;
	}

	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	if (bip)
		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);

	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
}
static void
xfs_dir3_block_write_verify(
	struct xfs_buf	*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;

	if (!xfs_dir3_block_verify(bp)) {
		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
		xfs_buf_ioerror(bp, EFSCORRUPTED);
		return;
	}

	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	if (bip)
		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);

	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
}
Example #18
0
static void
xfs_sb_write_verify(
	struct xfs_buf		*bp)
{
	struct xfs_mount	*mp = bp->b_target->bt_mount;
	struct xfs_buf_log_item	*bip = bp->b_fspriv;
	int			error;

	error = xfs_sb_verify(bp, false);
	if (error) {
		xfs_buf_ioerror(bp, error);
		xfs_verifier_error(bp);
		return;
	}

	if (!xfs_sb_version_hascrc(&mp->m_sb))
		return;

	if (bip)
		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);

	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
}
/*
 * Readahead of the first block of the directory when it is opened is completely
 * oblivious to the format of the directory. Hence we can either get a block
 * format buffer or a data format buffer on readahead.
 */
static void
xfs_dir3_data_reada_verify(
	struct xfs_buf		*bp)
{
	struct xfs_dir2_data_hdr *hdr = bp->b_addr;

	switch (hdr->magic) {
	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
		bp->b_ops = &xfs_dir3_block_buf_ops;
		bp->b_ops->verify_read(bp);
		return;
	case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
		bp->b_ops = &xfs_dir3_data_buf_ops;
		bp->b_ops->verify_read(bp);
		return;
	default:
		xfs_buf_ioerror(bp, -EFSCORRUPTED);
		xfs_verifier_error(bp);
		break;
	}
}
Example #20
0
/*
 * This is called to unpin the buffer associated with the buf log
 * item which was previously pinned with a call to xfs_buf_item_pin().
 *
 * Also drop the reference to the buf item for the current transaction.
 * If the XFS_BLI_STALE flag is set and we are the last reference,
 * then free up the buf log item and unlock the buffer.
 *
 * If the remove flag is set we are called from uncommit in the
 * forced-shutdown path.  If that is true and the reference count on
 * the log item is going to drop to zero we need to free the item's
 * descriptor in the transaction.
 */
STATIC void
xfs_buf_item_unpin(
	struct xfs_log_item	*lip,
	int			remove)
{
	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
	xfs_buf_t	*bp = bip->bli_buf;
	struct xfs_ail	*ailp = lip->li_ailp;
	int		stale = bip->bli_flags & XFS_BLI_STALE;
	int		freed;

	ASSERT(bp->b_fspriv == bip);
	ASSERT(atomic_read(&bip->bli_refcount) > 0);

	trace_xfs_buf_item_unpin(bip);

	freed = atomic_dec_and_test(&bip->bli_refcount);

	if (atomic_dec_and_test(&bp->b_pin_count))
		wake_up_all(&bp->b_waiters);

	if (freed && stale) {
		ASSERT(bip->bli_flags & XFS_BLI_STALE);
		ASSERT(xfs_buf_islocked(bp));
		ASSERT(XFS_BUF_ISSTALE(bp));
		ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);

		trace_xfs_buf_item_unpin_stale(bip);

		if (remove) {
			/*
			 * If we are in a transaction context, we have to
			 * remove the log item from the transaction as we are
			 * about to release our reference to the buffer.  If we
			 * don't, the unlock that occurs later in
			 * xfs_trans_uncommit() will try to reference the
			 * buffer which we no longer have a hold on.
			 */
			if (lip->li_desc)
				xfs_trans_del_item(lip);

			/*
			 * Since the transaction no longer refers to the buffer,
			 * the buffer should no longer refer to the transaction.
			 */
			bp->b_transp = NULL;
		}

		/*
		 * If we get called here because of an IO error, we may
		 * or may not have the item on the AIL. xfs_trans_ail_delete()
		 * will take care of that situation.
		 * xfs_trans_ail_delete() drops the AIL lock.
		 */
		if (bip->bli_flags & XFS_BLI_STALE_INODE) {
			xfs_buf_do_callbacks(bp);
			bp->b_fspriv = NULL;
			bp->b_iodone = NULL;
		} else {
			spin_lock(&ailp->xa_lock);
			xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
			xfs_buf_item_relse(bp);
			ASSERT(bp->b_fspriv == NULL);
		}
		xfs_buf_relse(bp);
	} else if (freed && remove) {
		/*
		 * There are currently two references to the buffer - the active
		 * LRU reference and the buf log item. What we are about to do
		 * here - simulate a failed IO completion - requires 3
		 * references.
		 *
		 * The LRU reference is removed by the xfs_buf_stale() call. The
		 * buf item reference is removed by the xfs_buf_iodone()
		 * callback that is run by xfs_buf_do_callbacks() during ioend
		 * processing (via the bp->b_iodone callback), and then finally
		 * the ioend processing will drop the IO reference if the buffer
		 * is marked XBF_ASYNC.
		 *
		 * Hence we need to take an additional reference here so that IO
		 * completion processing doesn't free the buffer prematurely.
		 */
		xfs_buf_lock(bp);
		xfs_buf_hold(bp);
		bp->b_flags |= XBF_ASYNC;
		xfs_buf_ioerror(bp, EIO);
		XFS_BUF_UNDONE(bp);
		xfs_buf_stale(bp);
		xfs_buf_ioend(bp, 0);
	}
}
Example #21
0
/*
 * This is the iodone() function for buffers which have had callbacks
 * attached to them by xfs_buf_attach_iodone().  It should remove each
 * log item from the buffer's list and call the callback of each in turn.
 * When done, the buffer's fsprivate field is set to NULL and the buffer
 * is unlocked with a call to iodone().
 */
void
xfs_buf_iodone_callbacks(
	struct xfs_buf		*bp)
{
	struct xfs_log_item	*lip = bp->b_fspriv;
	struct xfs_mount	*mp = lip->li_mountp;
	static ulong		lasttime;
	static xfs_buftarg_t	*lasttarg;

	if (likely(!xfs_buf_geterror(bp)))
		goto do_callbacks;

	/*
	 * If we've already decided to shutdown the filesystem because of
	 * I/O errors, there's no point in giving this a retry.
	 */
	if (XFS_FORCED_SHUTDOWN(mp)) {
		xfs_buf_stale(bp);
		XFS_BUF_DONE(bp);
		trace_xfs_buf_item_iodone(bp, _RET_IP_);
		goto do_callbacks;
	}

	if (bp->b_target != lasttarg ||
	    time_after(jiffies, (lasttime + 5*HZ))) {
		lasttime = jiffies;
		xfs_buf_ioerror_alert(bp, __func__);
	}
	lasttarg = bp->b_target;

	/*
	 * If the write was asynchronous then no one will be looking for the
	 * error.  Clear the error state and write the buffer out again.
	 *
	 * XXX: This helps against transient write errors, but we need to find
	 * a way to shut the filesystem down if the writes keep failing.
	 *
	 * In practice we'll shut the filesystem down soon as non-transient
	 * erorrs tend to affect the whole device and a failing log write
	 * will make us give up.  But we really ought to do better here.
	 */
	if (XFS_BUF_ISASYNC(bp)) {
		ASSERT(bp->b_iodone != NULL);

		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);

		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */

		if (!XFS_BUF_ISSTALE(bp)) {
			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
			xfs_buf_iorequest(bp);
		} else {
			xfs_buf_relse(bp);
		}

		return;
	}

	/*
	 * If the write of the buffer was synchronous, we want to make
	 * sure to return the error to the caller of xfs_bwrite().
	 */
	xfs_buf_stale(bp);
	XFS_BUF_DONE(bp);

	trace_xfs_buf_error_relse(bp, _RET_IP_);

do_callbacks:
	xfs_buf_do_callbacks(bp);
	bp->b_fspriv = NULL;
	bp->b_iodone = NULL;
	xfs_buf_ioend(bp, 0);
}