int xfs_dir_ino_validate( xfs_mount_t *mp, xfs_ino_t ino) { xfs_agblock_t agblkno; xfs_agino_t agino; xfs_agnumber_t agno; int ino_ok; int ioff; agno = XFS_INO_TO_AGNO(mp, ino); agblkno = XFS_INO_TO_AGBNO(mp, ino); ioff = XFS_INO_TO_OFFSET(mp, ino); agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff); ino_ok = agno < mp->m_sb.sb_agcount && agblkno < mp->m_sb.sb_agblocks && agblkno != 0 && ioff < (1 << mp->m_sb.sb_inopblog) && XFS_AGINO_TO_INO(mp, agno, agino) == ino; if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE, XFS_RANDOM_DIR_INO_VALIDATE))) { xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx", (unsigned long long) ino); XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; }
/* * Give the buffer a little push if it is incore and * wait on the flush lock. */ void xfs_qm_dqflock_pushbuf_wait( xfs_dquot_t *dqp) { xfs_buf_t *bp; /* * Check to see if the dquot has been flushed delayed * write. If so, grab its buffer and send it * out immediately. We'll be able to acquire * the flush lock when the I/O completes. */ bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(dqp->q_mount), XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { int error; if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(dqp->q_mount, (xfs_lsn_t)0, XFS_LOG_FORCE); } error = xfs_bawrite(dqp->q_mount, bp); if (error) xfs_fs_cmn_err(CE_WARN, dqp->q_mount, "xfs_qm_dqflock_pushbuf_wait: " "pushbuf error %d on dqp %p, bp %p", error, dqp, bp); } else { xfs_buf_relse(bp); } } xfs_dqflock(dqp); }
/* * Clear the quotaflags in memory and in the superblock. */ int xfs_mount_reset_sbqflags(xfs_mount_t *mp) { int error; xfs_trans_t *tp; unsigned long s; mp->m_qflags = 0; /* * It is OK to look at sb_qflags here in mount path, * without SB_LOCK. */ if (mp->m_sb.sb_qflags == 0) return 0; s = XFS_SB_LOCK(mp); mp->m_sb.sb_qflags = 0; XFS_SB_UNLOCK(mp, s); /* * if the fs is readonly, let the incore superblock run * with quotas off but don't flush the update out to disk */ if (XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY) return 0; #ifdef QUOTADEBUG xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes"); #endif tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, XFS_DEFAULT_LOG_COUNT))) { xfs_trans_cancel(tp, 0); xfs_fs_cmn_err(CE_ALERT, mp, "xfs_mount_reset_sbqflags: Superblock update failed!"); return error; } xfs_mod_sb(tp, XFS_SB_QFLAGS); error = xfs_trans_commit(tp, 0, NULL); return error; }
/* * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that * the dquot is locked by us, but the flush lock isn't. So, here we are * going to see if the relevant dquot buffer is incore, waiting on DELWRI. * If so, we want to push it out to help us take this item off the AIL as soon * as possible. * * We must not be holding the AIL lock at this point. Calling incore() to * search the buffer cache can be a time consuming thing, and AIL lock is a * spinlock. */ STATIC void xfs_qm_dquot_logitem_pushbuf( xfs_dq_logitem_t *qip) { xfs_dquot_t *dqp; xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; dqp = qip->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); /* * The qli_pushbuf_flag keeps others from * trying to duplicate our effort. */ ASSERT(qip->qli_pushbuf_flag != 0); ASSERT(qip->qli_push_owner == current_pid()); /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. * So, just get out. */ if (!issemalocked(&(dqp->q_flock)) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); return; } mp = dqp->q_mount; bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno, XFS_QI_DQCHUNKLEN(mp), XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && issemalocked(&(dqp->q_flock))); qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (dopush) { int error; #ifdef XFSRACEDEBUG delay_for_intr(); delay(300); #endif error = xfs_bawrite(mp, bp); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p", error, qip, bp); } else { xfs_buf_relse(bp); } } else { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); xfs_buf_relse(bp); } return; } qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); }
/* * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * * We aren't holding the AIL lock (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void xfs_inode_item_pushbuf( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); /* * The ili_pushbuf_flag keeps others from * trying to duplicate our effort. */ ASSERT(iip->ili_pushbuf_flag != 0); ASSERT(iip->ili_push_owner == current_pid()); /* * If a flush is not in progress anymore, chances are that the * inode was taken off the AIL. So, just get out. */ if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold * the AIL lock or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && !completion_done(&ip->i_flush)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (dopush) { int error; error = xfs_bawrite(mp, bp); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_inode_item_pushbuf: pushbuf error %d on iip %p, bp %p", error, iip, bp); } else { xfs_buf_relse(bp); } } else { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buf_relse(bp); } return; } /* * We have to be careful about resetting pushbuf flag too early (above). * Even though in theory we can do it as soon as we have the buflock, * we don't want others to be doing work needlessly. They'll come to * this function thinking that pushing the buffer is their * responsibility only to find that the buffer is still locked by * another doing the same thing */ iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; }
int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int ilf_fields, tilf_fields; static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; char locked = 0; mp = ip->i_mount; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto error0; } sbp = &sxp->sx_stat; /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); locked = 1; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(VFS_I(tip)) != 0) { xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) goto error0; } /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_fs_cmn_err(CE_NOTE, mp, "%s: inode 0x%llx format is incompatible for exchanging.", __FILE__, ip->i_ino); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ xfs_tosspages(ip, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); locked = 0; goto error0; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_trans_cancel(tp, 0); goto error0; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_trans_cancel(tp, 0); goto error0; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the in-memory data fork values that are dependent on the fork * offset in the inode. We can't assume they remain the same as attr2 * has dynamic fork offsets. */ ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / (uint)sizeof(xfs_bmbt_rec_t); tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / (uint)sizeof(xfs_bmbt_rec_t); /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } IHOLD(ip); xfs_trans_ijoin(tp, ip, lock_flags); IHOLD(tip); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT); locked = 0; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (tempifp != NULL) kmem_free(tempifp); return error; }
/* ARGSUSED */ int xfs_qm_dqpurge( xfs_dquot_t *dqp) { xfs_dqhash_t *thishash; xfs_mount_t *mp = dqp->q_mount; ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp)); ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); xfs_dqlock(dqp); /* * We really can't afford to purge a dquot that is * referenced, because these are hard refs. * It shouldn't happen in general because we went thru _all_ inodes in * dqrele_all_inodes before calling this and didn't let the mountlock go. * However it is possible that we have dquots with temporary * references that are not attached to an inode. e.g. see xfs_setattr(). */ if (dqp->q_nrefs != 0) { xfs_dqunlock(dqp); mutex_unlock(&dqp->q_hash->qh_lock); return (1); } ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); /* * If we're turning off quotas, we have to make sure that, for * example, we don't delete quota disk blocks while dquots are * in the process of getting written to those disk blocks. * This dquot might well be on AIL, and we can't leave it there * if we're turning off quotas. Basically, we need this flush * lock, and are willing to block on it. */ if (!xfs_dqflock_nowait(dqp)) { /* * Block on the flush lock after nudging dquot buffer, * if it is incore. */ xfs_qm_dqflock_pushbuf_wait(dqp); } /* * XXXIf we're turning this type of quotas off, we don't care * about the dirty metadata sitting in this dquot. OTOH, if * we're unmounting, we do care, so we flush it and wait. */ if (XFS_DQ_IS_DIRTY(dqp)) { int error; xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY"); /* dqflush unlocks dqflock */ /* * Given that dqpurge is a very rare occurrence, it is OK * that we're holding the hashlist and mplist locks * across the disk write. But, ... XXXsup * * We don't care about getting disk errors here. We need * to purge this dquot anyway, so we go ahead regardless. */ error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC); if (error) xfs_fs_cmn_err(CE_WARN, mp, "xfs_qm_dqpurge: dquot %p flush failed", dqp); xfs_dqflock(dqp); } ASSERT(atomic_read(&dqp->q_pincount) == 0); ASSERT(XFS_FORCED_SHUTDOWN(mp) || !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); thishash = dqp->q_hash; XQM_HASHLIST_REMOVE(thishash, dqp); XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp); /* * XXX Move this to the front of the freelist, if we can get the * freelist lock. */ ASSERT(XFS_DQ_IS_ON_FREELIST(dqp)); dqp->q_mount = NULL; dqp->q_hash = NULL; dqp->dq_flags = XFS_DQ_INACTIVE; memset(&dqp->q_core, 0, sizeof(dqp->q_core)); xfs_dqfunlock(dqp); xfs_dqunlock(dqp); mutex_unlock(&thishash->qh_lock); return (0); }