/* * Get and lock the buffer for the caller if it is not already * locked within the given transaction. If it is already locked * within the transaction, just increment its lock recursion count * and return a pointer to it. * * Use the fast path function xfs_trans_buf_item_match() or the buffer * cache routine incore_match() to find the buffer * if it is already owned by this transaction. * * If we don't already own the buffer, use get_buf() to get it. * If it doesn't yet have an associated xfs_buf_log_item structure, * then allocate one and add the item to this transaction. * * If the transaction pointer is NULL, make this just a normal * get_buf() call. */ xfs_buf_t * xfs_trans_get_buf(xfs_trans_t *tp, xfs_buftarg_t *target_dev, xfs_daddr_t blkno, int len, uint flags) { xfs_buf_t *bp; xfs_buf_log_item_t *bip; if (flags == 0) flags = XFS_BUF_LOCK | XFS_BUF_MAPPED; /* * Default to a normal get_buf() call if the tp is NULL. */ if (tp == NULL) { bp = xfs_buf_get_flags(target_dev, blkno, len, flags | BUF_BUSY); return(bp); } /* * If we find the buffer in the cache with this transaction * pointer in its b_fsprivate2 field, then we know we already * have it locked. In this case we just increment the lock * recursion count and return the buffer to the caller. */ if (tp->t_items.lic_next == NULL) { bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); } else { bp = xfs_trans_buf_item_match_all(tp, target_dev, blkno, len); } if (bp != NULL) { ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) { xfs_buftrace("TRANS GET RECUR SHUT", bp); XFS_BUF_SUPER_STALE(bp); } /* * If the buffer is stale then it was binval'ed * since last read. This doesn't matter since the * caller isn't allowed to use the data anyway. */ else if (XFS_BUF_ISSTALE(bp)) { xfs_buftrace("TRANS GET RECUR STALE", bp); ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); } ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); ASSERT(bip != NULL); ASSERT(atomic_read(&bip->bli_refcount) > 0); bip->bli_recur++; xfs_buftrace("TRANS GET RECUR", bp); xfs_buf_item_trace("GET RECUR", bip); return (bp); }
/* * Called when we want to stop a buffer from getting written or read. * We attach the EIO error, muck with its flags, and call biodone * so that the proper iodone callbacks get called. */ int xfs_bioerror( xfs_buf_t *bp) { #ifdef XFSERRORDEBUG ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); #endif /* * No need to wait until the buffer is unpinned. * We aren't flushing it. */ xfs_buftrace("XFS IOERROR", bp); XFS_BUF_ERROR(bp, EIO); /* * We're calling biodone, so delete B_DONE flag. Either way * we have to call the iodone callback, and calling biodone * probably is the best way since it takes care of * GRIO as well. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_UNDONE(bp); XFS_BUF_STALE(bp); XFS_BUF_CLR_BDSTRAT_FUNC(bp); xfs_biodone(bp); return (EIO); }
/* * This is called to unpin the buffer associated with the buf log * item which was previously pinned with a call to xfs_buf_item_pin(). * Just call bunpin() on the buffer to do this. * * Also drop the reference to the buf item for the current transaction. * If the XFS_BLI_STALE flag is set and we are the last reference, * then free up the buf log item and unlock the buffer. */ void xfs_buf_item_unpin( xfs_buf_log_item_t *bip, int stale) { xfs_mount_t *mp; xfs_buf_t *bp; int freed; SPLDECL(s); bp = bip->bli_buf; ASSERT(bp != NULL); ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); ASSERT(atomic_read(&bip->bli_refcount) > 0); xfs_buf_item_trace("UNPIN", bip); xfs_buftrace("XFS_UNPIN", bp); freed = atomic_dec_and_test(&bip->bli_refcount); mp = bip->bli_item.li_mountp; xfs_bunpin(bp); if (freed && stale) { ASSERT(bip->bli_flags & XFS_BLI_STALE); ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); ASSERT(XFS_BUF_ISSTALE(bp)); ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL); xfs_buf_item_trace("UNPIN STALE", bip); xfs_buftrace("XFS_UNPIN STALE", bp); /* * If we get called here because of an IO error, we may * or may not have the item on the AIL. xfs_trans_delete_ail() * will take care of that situation. * xfs_trans_delete_ail() drops the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip); XFS_BUF_SET_FSPRIVATE(bp, NULL); XFS_BUF_CLR_IODONE_FUNC(bp); } else { AIL_LOCK(mp,s); xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s); xfs_buf_item_relse(bp); ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); } xfs_buf_relse(bp); }
/* * Wrapper around bdstrat so that we can stop data from going to disk in case * we are shutting down the filesystem. Typically user data goes thru this * path; one of the exceptions is the superblock. */ void xfsbdstrat( struct xfs_mount *mp, struct xfs_buf *bp) { ASSERT(mp); if (!XFS_FORCED_SHUTDOWN(mp)) { xfs_buf_iorequest(bp); return; } xfs_buftrace("XFSBDSTRAT IOERROR", bp); xfs_bioerror_relse(bp); }
/* * This is called to pin the buffer associated with the buf log * item in memory so it cannot be written out. Simply call bpin() * on the buffer to do this. */ void xfs_buf_item_pin( xfs_buf_log_item_t *bip) { xfs_buf_t *bp; bp = bip->bli_buf; ASSERT(XFS_BUF_ISBUSY(bp)); ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT((bip->bli_flags & XFS_BLI_LOGGED) || (bip->bli_flags & XFS_BLI_STALE)); xfs_buf_item_trace("PIN", bip); xfs_buftrace("XFS_PIN", bp); xfs_bpin(bp); }
/* * Wrapper around bdstrat so that we can stop data * from going to disk in case we are shutting down the filesystem. * Typically user data goes thru this path; one of the exceptions * is the superblock. */ int xfsbdstrat( struct xfs_mount *mp, struct xfs_buf *bp) { ASSERT(mp); if (!XFS_FORCED_SHUTDOWN(mp)) { /* Grio redirection would go here * if (XFS_BUF_IS_GRIO(bp)) { */ pagebuf_iorequest(bp); return 0; } xfs_buftrace("XFSBDSTRAT IOERROR", bp); return (xfs_bioerror_relse(bp)); }
/* * All xfs metadata buffers except log state machine buffers * get this attached as their b_bdstrat callback function. * This is so that we can catch a buffer * after prematurely unpinning it to forcibly shutdown the filesystem. */ int xfs_bdstrat_cb(struct xfs_buf *bp) { if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but * written delayed anyway. These aren't associated * with a transaction, and can be ignored. */ if (XFS_BUF_IODONE_FUNC(bp) == NULL && (XFS_BUF_ISREAD(bp)) == 0) return (xfs_bioerror_relse(bp)); else return (xfs_bioerror(bp)); } xfs_buf_iorequest(bp); return 0; }
/* * Same as xfs_bioerror, except that we are releasing the buffer * here ourselves, and avoiding the biodone call. * This is meant for userdata errors; metadata bufs come with * iodone functions attached, so that we can track down errors. */ int xfs_bioerror_relse( xfs_buf_t *bp) { int64_t fl; ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks); ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone); xfs_buftrace("XFS IOERRELSE", bp); fl = XFS_BUF_BFLAGS(bp); /* * No need to wait until the buffer is unpinned. * We aren't flushing it. * * chunkhold expects B_DONE to be set, whether * we actually finish the I/O or not. We don't want to * change that interface. */ XFS_BUF_UNREAD(bp); XFS_BUF_UNDELAYWRITE(bp); XFS_BUF_DONE(bp); XFS_BUF_STALE(bp); XFS_BUF_CLR_IODONE_FUNC(bp); XFS_BUF_CLR_BDSTRAT_FUNC(bp); if (!(fl & XFS_B_ASYNC)) { /* * Mark b_error and B_ERROR _both_. * Lot's of chunkcache code assumes that. * There's no reason to mark error for * ASYNC buffers. */ XFS_BUF_ERROR(bp, EIO); XFS_BUF_V_IODONESEMA(bp); } else { xfs_buf_relse(bp); } return (EIO); }
/* * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * * We aren't holding the AIL lock (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void xfs_inode_item_pushbuf( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* * The ili_pushbuf_flag keeps others from * trying to duplicate our effort. */ ASSERT(iip->ili_pushbuf_flag != 0); ASSERT(iip->ili_push_owner == current_pid()); /* * If a flush is not in progress anymore, chances are that the * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold * the AIL lock or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && !completion_done(&ip->i_flush)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (dopush) { int error; error = xfs_bawrite(mp, bp); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", error, iip, bp); } else { xfs_buf_relse(bp); } } else { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buf_relse(bp); } return; } /* * We have to be careful about resetting pushbuf flag too early (above). * Even though in theory we can do it as soon as we have the buflock, * we don't want others to be doing work needlessly. They'll come to * this function thinking that pushing the buffer is their * responsibility only to find that the buffer is still locked by * another doing the same thing */ iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; }
/* * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * * We aren't holding the AIL_LOCK (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void xfs_inode_item_pushbuf( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; ip = iip->ili_inode; ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); /* * The ili_pushbuf_flag keeps others from * trying to duplicate our effort. */ ASSERT(iip->ili_pushbuf_flag != 0); ASSERT(iip->ili_push_owner == get_thread_id()); /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. * So, just get out. */ if ((valusema(&(ip->i_flock)) > 0) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targ, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold * the AIL_LOCK or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && (valusema(&(ip->i_flock)) <= 0)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (dopush) { xfs_bawrite(mp, bp); } else { xfs_buf_relse(bp); } } else { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buf_relse(bp); } return; } /* * We have to be careful about resetting pushbuf flag too early (above). * Eventhough in theory we can do it as soon as we have the buflock, * we don't want others to be doing work needlessly. They'll come to * this function thinking that pushing the buffer is there responsibility * only to find that the buffer is still locked by another doing the * same thing.XXX */ iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; }