/* * Read in the in-core dquot's on-disk metadata and return the buffer. * Returns ENOENT to signal a hole. */ STATIC int xfs_dquot_disk_read( struct xfs_mount *mp, struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_bmbt_irec map; struct xfs_buf *bp; struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); uint lock_mode; int nmaps = 1; int error; lock_mode = xfs_ilock_data_map_shared(quotip); if (!xfs_this_quota_on(mp, dqp->dq_flags)) { /* * Return if this type of quotas is turned off while we * didn't have the quota inode lock. */ xfs_iunlock(quotip, lock_mode); return -ESRCH; } /* * Find the block map; no allocations yet */ error = xfs_bmapi_read(quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); xfs_iunlock(quotip, lock_mode); if (error) return error; ASSERT(nmaps == 1); ASSERT(map.br_blockcount >= 1); ASSERT(map.br_startblock != DELAYSTARTBLOCK); if (map.br_startblock == HOLESTARTBLOCK) return -ENOENT; trace_xfs_dqtobp_read(dqp); /* * store the blkno etc so that we don't have to do the * mapping all the time */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0, &bp, &xfs_dquot_buf_ops); if (error) { ASSERT(bp == NULL); return error; } ASSERT(xfs_buf_islocked(bp)); xfs_buf_set_ref(bp, XFS_DQUOT_REF); *bpp = bp; return 0; }
/* * Read the disk inode attributes into the in-core inode structure. * * For version 5 superblocks, if we are initialising a new inode and we are not * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new * inode core with a random generation number. If we are keeping inodes around, * we need to read the inode cluster to get the existing generation number off * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode * format) then log recovery is dependent on the di_flushiter field being * initialised from the current on-disk value and hence we must also read the * inode off disk. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; xfs_failaddr_t fa; int error; /* * Fill in the location information in the in-core inode. */ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); VFS_I(ip)->i_generation = prandom_u32(); ip->i_d.di_version = 3; return 0; } /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; /* even unallocated inodes are verified */ fa = xfs_dinode_verify(mp, ip->i_ino, dip); if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "dinode", dip, sizeof(*dip), fa); error = -EFSCORRUPTED; goto out_brelse; } /* * If the on-disk inode is already linked to a directory * entry, copy all of the inode into the in-core inode. * xfs_iformat_fork() handles copying in the inode format * specific information. * Otherwise, just get the truly permanent information. */ if (dip->di_mode) { xfs_inode_from_disk(ip, dip); error = xfs_iformat_fork(ip, dip); if (error) { #ifdef DEBUG xfs_alert(mp, "%s: xfs_iformat() returned error %d", __func__, error); #endif /* DEBUG */ goto out_brelse; } } else { /* * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ ip->i_d.di_version = dip->di_version; VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); /* * Make sure to pull in the mode here as well in * case the inode is released without being used. * This ensures that xfs_inactive() will see that * the inode is already free and not try to mess * with the uninitialized part of it. */ VFS_I(ip)->i_mode = 0; } ASSERT(ip->i_d.di_version >= 2); ip->i_delayed_blks = 0; /* * Mark the buffer containing the inode as something to keep * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ xfs_buf_set_ref(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the on-disk * inode, because it was acquired with xfs_trans_read_buf() in * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, * because inodes on disk are never destroyed and we will be locking the * new in-core inode before putting it in the cache where other * processes can find it. Thus we don't have to worry about the inode * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); return error; }
/* * Read the disk inode attributes into the in-core inode structure. * * For version 5 superblocks, if we are initialising a new inode and we are not * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new * inode core with a random generation number. If we are keeping inodes around, * we need to read the inode cluster to get the existing generation number off * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode * format) then log recovery is dependent on the di_flushiter field being * initialised from the current on-disk value and hence we must also read the * inode off disk. */ int xfs_iread( xfs_mount_t *mp, xfs_trans_t *tp, xfs_inode_t *ip, uint iget_flags) { xfs_buf_t *bp; xfs_dinode_t *dip; int error; /* * Fill in the location information in the in-core inode. */ error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, iget_flags); if (error) return error; /* shortcut IO on inode allocation if possible */ if ((iget_flags & XFS_IGET_CREATE) && xfs_sb_version_hascrc(&mp->m_sb) && !(mp->m_flags & XFS_MOUNT_IKEEP)) { /* initialise the on-disk inode core */ memset(&ip->i_d, 0, sizeof(ip->i_d)); ip->i_d.di_magic = XFS_DINODE_MAGIC; ip->i_d.di_gen = prandom_u32(); if (xfs_sb_version_hascrc(&mp->m_sb)) { ip->i_d.di_version = 3; ip->i_d.di_ino = ip->i_ino; uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_uuid); } else ip->i_d.di_version = 2; return 0; } /* * Get pointers to the on-disk inode and the buffer containing it. */ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags); if (error) return error; /* even unallocated inodes are verified */ if (!xfs_dinode_verify(mp, ip, dip)) { xfs_alert(mp, "%s: validation failed for inode %lld failed", __func__, ip->i_ino); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, dip); error = -EFSCORRUPTED; goto out_brelse; } /* * If the on-disk inode is already linked to a directory * entry, copy all of the inode into the in-core inode. * xfs_iformat_fork() handles copying in the inode format * specific information. * Otherwise, just get the truly permanent information. */ if (dip->di_mode) { xfs_dinode_from_disk(&ip->i_d, dip); error = xfs_iformat_fork(ip, dip); if (error) { #ifdef DEBUG xfs_alert(mp, "%s: xfs_iformat() returned error %d", __func__, error); #endif /* DEBUG */ goto out_brelse; } } else { /* * Partial initialisation of the in-core inode. Just the bits * that xfs_ialloc won't overwrite or relies on being correct. */ ip->i_d.di_magic = be16_to_cpu(dip->di_magic); ip->i_d.di_version = dip->di_version; ip->i_d.di_gen = be32_to_cpu(dip->di_gen); ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter); if (dip->di_version == 3) { ip->i_d.di_ino = be64_to_cpu(dip->di_ino); uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid); } /* * Make sure to pull in the mode here as well in * case the inode is released without being used. * This ensures that xfs_inactive() will see that * the inode is already free and not try to mess * with the uninitialized part of it. */ ip->i_d.di_mode = 0; } /* * Automatically convert version 1 inode formats in memory to version 2 * inode format. If the inode is modified, it will get logged and * rewritten as a version 2 inode. We can do this because we set the * superblock feature bit for v2 inodes unconditionally during mount * and it means the reast of the code can assume the inode version is 2 * or higher. */ if (ip->i_d.di_version == 1) { ip->i_d.di_version = 2; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); ip->i_d.di_nlink = ip->i_d.di_onlink; ip->i_d.di_onlink = 0; xfs_set_projid(ip, 0); } ip->i_delayed_blks = 0; /* * Mark the buffer containing the inode as something to keep * around for a while. This helps to keep recently accessed * meta-data in-core longer. */ xfs_buf_set_ref(bp, XFS_INO_REF); /* * Use xfs_trans_brelse() to release the buffer containing the on-disk * inode, because it was acquired with xfs_trans_read_buf() in * xfs_imap_to_bp() above. If tp is NULL, this is just a normal * brelse(). If we're within a transaction, then xfs_trans_brelse() * will only release the buffer if it is not dirty within the * transaction. It will be OK to release the buffer in this case, * because inodes on disk are never destroyed and we will be locking the * new in-core inode before putting it in the cache where other * processes can find it. Thus we don't have to worry about the inode * being changed just because we released the buffer. */ out_brelse: xfs_trans_brelse(tp, bp); return error; }
/* * Read in the ondisk dquot using dqtobp() then copy it to an incore version, * and release the buffer immediately. * * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. */ int xfs_qm_dqread( struct xfs_mount *mp, xfs_dqid_t id, uint type, uint flags, struct xfs_dquot **O_dqpp) { struct xfs_dquot *dqp; struct xfs_disk_dquot *ddqp; struct xfs_buf *bp; struct xfs_trans *tp = NULL; int error; int cancelflags = 0; dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); dqp->dq_flags = type; dqp->q_core.d_id = cpu_to_be32(id); dqp->q_mount = mp; INIT_LIST_HEAD(&dqp->q_lru); mutex_init(&dqp->q_qlock); init_waitqueue_head(&dqp->q_pinwait); /* * Because we want to use a counting completion, complete * the flush completion once to allow a single access to * the flush completion without blocking. */ init_completion(&dqp->q_flush); complete(&dqp->q_flush); /* * Make sure group quotas have a different lock class than user * quotas. */ switch (type) { case XFS_DQ_USER: /* uses the default lock class */ break; case XFS_DQ_GROUP: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); break; case XFS_DQ_PROJ: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); break; default: ASSERT(0); break; } XFS_STATS_INC(xs_qm_dquot); trace_xfs_dqread(dqp); if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; cancelflags = XFS_TRANS_RELEASE_LOG_RES; } /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); if (error) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to * allocate (ENOENT). */ trace_xfs_dqread_fail(dqp); cancelflags |= XFS_TRANS_ABORT; goto error1; } /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); xfs_qm_dquot_logitem_init(dqp); /* * Reservation counters are defined as reservation plus current usage * to avoid having to add every time. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); /* initialize the dquot speculative prealloc thresholds */ xfs_dquot_set_prealloc_limits(dqp); /* Mark the buf so that this will stay incore a little longer */ xfs_buf_set_ref(bp, XFS_DQUOT_REF); /* * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) * So we need to release with xfs_trans_brelse(). * The strategy here is identical to that of inodes; we lock * the dquot in xfs_qm_dqget() before making it accessible to * others. This is because dquots, like inodes, need a good level of * concurrency, and we don't want to take locks on the entire buffers * for dquot accesses. * Note also that the dquot buffer may even be dirty at this point, if * this particular dquot was repaired. We still aren't afraid to * brelse it because we have the changes incore. */ ASSERT(xfs_buf_islocked(bp)); xfs_trans_brelse(tp, bp); if (tp) { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; } *O_dqpp = dqp; return error; error1: if (tp) xfs_trans_cancel(tp, cancelflags); error0: xfs_qm_dqdestroy(dqp); *O_dqpp = NULL; return error; }
/* * Ensure that the given in-core dquot has a buffer on disk backing it, and * return the buffer locked and held. This is called when the bmapi finds a * hole. */ STATIC int xfs_dquot_disk_alloc( struct xfs_trans **tpp, struct xfs_dquot *dqp, struct xfs_buf **bpp) { struct xfs_bmbt_irec map; struct xfs_trans *tp = *tpp; struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *bp; struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); int nmaps = 1; int error; trace_xfs_dqalloc(dqp); xfs_ilock(quotip, XFS_ILOCK_EXCL); if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ xfs_iunlock(quotip, XFS_ILOCK_EXCL); return -ESRCH; } /* Create the block mapping. */ xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps); if (error) return error; ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); /* * Keep track of the blkno to save a lookup later */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, mp->m_quotainfo->qi_dqchunklen, 0); if (!bp) return -ENOMEM; bp->b_ops = &xfs_dquot_buf_ops; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); xfs_buf_set_ref(bp, XFS_DQUOT_REF); /* * Hold the buffer and join it to the dfops so that we'll still own * the buffer when we return to the caller. The buffer disposal on * error must be paid attention to very carefully, as it has been * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota * code when allocating a new dquot record" in 2005, and the later * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep * the buffer locked across the _defer_finish call. We can now do * this correctly with xfs_defer_bjoin. * * Above, we allocated a disk block for the dquot information and used * get_buf to initialize the dquot. If the _defer_finish fails, the old * transaction is gone but the new buffer is not joined or held to any * transaction, so we must _buf_relse it. * * If everything succeeds, the caller of this function is returned a * buffer that is locked and held to the transaction. The caller * is responsible for unlocking any buffer passed back, either * manually or by committing the transaction. On error, the buffer is * released and not passed back. */ xfs_trans_bhold(tp, bp); error = xfs_defer_finish(tpp); if (error) { xfs_trans_bhold_release(*tpp, bp); xfs_trans_brelse(*tpp, bp); return error; } *bpp = bp; return 0; }