/* * This gets called when the inode's version needs to be changed from 1 to 2. * Currently this happens when the nlink field overflows the old 16-bit value * or when chproj is called to change the project for the first time. * As a side effect the superblock version will also get rev'd * to contain the NLINK bit. */ void xfs_bump_ino_vers2( xfs_trans_t *tp, xfs_inode_t *ip) { xfs_mount_t *mp; unsigned long s; ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); ip->i_d.di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { s = XFS_SB_LOCK(mp); if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { XFS_SB_VERSION_ADDNLINK(&mp->m_sb); XFS_SB_UNLOCK(mp, s); xfs_mod_sb(tp, XFS_SB_VERSIONNUM); } else { XFS_SB_UNLOCK(mp, s); } } /* Caller must log the inode */ }
/* * This is called to mark the fields indicated in fieldmask as needing * to be logged when the transaction is committed. The inode must * already be associated with the given transaction. * * The values for fieldmask are defined in xfs_inode_item.h. We always * log all of the core inode if any of it has changed, and we always log * all of the inline data/extents/b-tree root if any of them has changed. */ void xfs_trans_log_inode( xfs_trans_t *tp, xfs_inode_t *ip, uint flags) { xfs_log_item_desc_t *lidp; ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp)); ASSERT(lidp != NULL); tp->t_flags |= XFS_TRANS_DIRTY; lidp->lid_flags |= XFS_LID_DIRTY; /* * Always OR in the bits from the ili_last_fields field. * This is to coordinate with the xfs_iflush() and xfs_iflush_done() * routines in the eventual clearing of the ilf_fields bits. * See the big comment in xfs_iflush() for an explanation of * this coorination mechanism. */ flags |= ip->i_itemp->ili_last_fields; ip->i_itemp->ili_format.ilf_fields |= flags; }
/* * This is called to pin the inode associated with the inode log * item in memory so it cannot be written out. Do this by calling * xfs_ipin() to bump the pin count in the inode while holding the * inode pin lock. */ STATIC void xfs_inode_item_pin( xfs_inode_log_item_t *iip) { ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); xfs_ipin(iip->ili_inode); }
/* * This is called to asynchronously write the inode associated with this * inode log item out to disk. The inode will already have been locked by * a successful call to xfs_inode_item_trylock(). */ STATIC void xfs_inode_item_push( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; ip = iip->ili_inode; ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); ASSERT(valusema(&(ip->i_flock)) <= 0); /* * Since we were able to lock the inode's flush lock and * we found it on the AIL, the inode must be dirty. This * is because the inode is removed from the AIL while still * holding the flush lock in xfs_iflush_done(). Thus, if * we found it in the AIL and were able to obtain the flush * lock without sleeping, then there must not have been * anyone in the process of flushing the inode. */ ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || iip->ili_format.ilf_fields != 0); /* * Write out the inode. The completion routine ('iflush_done') will * pull it from the AIL, mark it clean, unlock the flush lock. */ (void) xfs_iflush(ip, XFS_IFLUSH_ASYNC); xfs_iunlock(ip, XFS_ILOCK_SHARED); return; }
/* * This gets called when the inode's version needs to be changed from 1 to 2. * Currently this happens when the nlink field overflows the old 16-bit value * or when chproj is called to change the project for the first time. * As a side effect the superblock version will also get rev'd * to contain the NLINK bit. */ void xfs_bump_ino_vers2( xfs_trans_t *tp, xfs_inode_t *ip) { xfs_mount_t *mp; ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE)); ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1); ip->i_d.di_version = XFS_DINODE_VERSION_2; ip->i_d.di_onlink = 0; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); mp = tp->t_mountp; if (!xfs_sb_version_hasnlink(&mp->m_sb)) { spin_lock(&mp->m_sb_lock); if (!xfs_sb_version_hasnlink(&mp->m_sb)) { xfs_sb_version_addnlink(&mp->m_sb); spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, XFS_SB_VERSIONNUM); } else { spin_unlock(&mp->m_sb_lock); } } /* Caller must log the inode */ }
/* * This routine is called to handle zeroing any space in the last * block of the file that is beyond the EOF. We do this since the * size is being increased without writing anything to that block * and we don't want anyone to read the garbage on the disk. */ STATIC int /* error (positive) */ xfs_zero_last_block( xfs_vnode_t *vp, xfs_iocore_t *io, xfs_fsize_t isize, xfs_fsize_t end_size) { xfs_fileoff_t last_fsb; xfs_mount_t *mp; int nimaps; int zero_offset; int zero_len; int error = 0; xfs_bmbt_irec_t imap; xfs_off_t loff; ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0); mp = io->io_mount; zero_offset = XFS_B_FSB_OFFSET(mp, isize); if (zero_offset == 0) { /* * There are no extra bytes in the last block on disk to * zero, so return. */ return 0; } last_fsb = XFS_B_TO_FSBT(mp, isize); nimaps = 1; error = XFS_BMAPI(mp, NULL, io, last_fsb, 1, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { return error; } ASSERT(nimaps > 0); /* * If the block underlying isize is just a hole, then there * is nothing to zero. */ if (imap.br_startblock == HOLESTARTBLOCK) { return 0; } /* * Zero the part of the last block beyond the EOF, and write it * out sync. We need to drop the ilock while we do this so we * don't deadlock when the buffer cache calls back to us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, last_fsb); zero_len = mp->m_sb.sb_blocksize - zero_offset; error = xfs_iozero(vp, loff + zero_offset, zero_len, end_size); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
/*ARGSUSED*/ void xfs_trans_ihold( xfs_trans_t *tp, xfs_inode_t *ip) { ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ip->i_itemp->ili_flags |= XFS_ILI_HOLD; }
/* * Add the locked inode to the transaction. * The inode must be locked, and it cannot be associated with any * transaction. The caller must specify the locks already held * on the inode. */ void xfs_trans_ijoin( xfs_trans_t *tp, xfs_inode_t *ip, uint lock_flags) { xfs_inode_log_item_t *iip; ASSERT(ip->i_transp == NULL); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ASSERT(lock_flags & XFS_ILOCK_EXCL); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); iip = ip->i_itemp; ASSERT(iip->ili_flags == 0); ASSERT(iip->ili_ilock_recur == 0); ASSERT(iip->ili_iolock_recur == 0); /* * Get a log_item_desc to point at the new item. */ (void) xfs_trans_add_item(tp, (xfs_log_item_t*)(iip)); xfs_trans_inode_broot_debug(ip); /* * If the IO lock is already held, mark that in the inode log item. */ if (lock_flags & XFS_IOLOCK_EXCL) { iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL; } else if (lock_flags & XFS_IOLOCK_SHARED) { iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED; } /* * Initialize i_transp so we can find it with xfs_inode_incore() * in xfs_trans_iget() above. */ ip->i_transp = tp; }
int /* error (positive) */ xfs_zero_eof( vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize, /* current inode size */ xfs_fsize_t end_size) /* terminal inode size */ { struct inode *ip = LINVFS_GET_IP(vp); xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t prev_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_extlen_t buf_len_fsb; xfs_extlen_t prev_zero_count; xfs_mount_t *mp; int nimaps; int error = 0; xfs_bmbt_irec_t imap; loff_t loff; size_t lsize; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); mp = io->io_mount; /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, io, offset, isize, end_size); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ prev_zero_fsb = NULLFILEOFF; prev_zero_count = 0; start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks in the range requested. * Zero them a single write at a time. We actually * don't zero the entire range returned if it is * too big and simply loop around to get the rest. * That is not the most efficient thing to do, but it * is simple and this path should not be exercised often. */ buf_len_fsb = XFS_FILBLKS_MIN(imap.br_blockcount, mp->m_writeio_blocks << 8); /* * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); loff = XFS_FSB_TO_B(mp, start_zero_fsb); lsize = XFS_FSB_TO_B(mp, buf_len_fsb); error = xfs_iozero(ip, loff, lsize, end_size); if (error) { goto out_lock; } prev_zero_fsb = start_zero_fsb; prev_zero_count = buf_len_fsb; start_zero_fsb = imap.br_startoff + buf_len_fsb; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
/* * Insert the given inode into the reference cache. */ void xfs_refcache_insert( xfs_inode_t *ip) { vnode_t *vp; xfs_inode_t *release_ip; xfs_inode_t **refcache; ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE)); /* * If an unmount is busy blowing entries out of the cache, * then don't bother. */ if (xfs_refcache_busy) { return; } /* * If we tuned the refcache down to zero, don't do anything. */ if (!xfs_refcache_size) { return; } /* * The inode is already in the refcache, so don't bother * with it. */ if (ip->i_refcache != NULL) { return; } vp = XFS_ITOV(ip); /* ASSERT(vp->v_count > 0); */ VN_HOLD(vp); /* * We allocate the reference cache on use so that we don't * waste the memory on systems not being used as NFS servers. */ if (xfs_refcache == NULL) { refcache = (xfs_inode_t **)kmem_zalloc(XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *), KM_SLEEP); } else { refcache = NULL; } spin_lock(&xfs_refcache_lock); /* * If we allocated memory for the refcache above and it still * needs it, then use the memory we allocated. Otherwise we'll * free the memory below. */ if (refcache != NULL) { if (xfs_refcache == NULL) { xfs_refcache = refcache; refcache = NULL; } } /* * If an unmount is busy clearing out the cache, don't add new * entries to it. */ if (xfs_refcache_busy) { spin_unlock(&xfs_refcache_lock); VN_RELE(vp); /* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } return; } release_ip = xfs_refcache[xfs_refcache_index]; if (release_ip != NULL) { release_ip->i_refcache = NULL; xfs_refcache_count--; ASSERT(xfs_refcache_count >= 0); } xfs_refcache[xfs_refcache_index] = ip; ASSERT(ip->i_refcache == NULL); ip->i_refcache = &(xfs_refcache[xfs_refcache_index]); xfs_refcache_count++; ASSERT(xfs_refcache_count <= xfs_refcache_size); xfs_refcache_index++; if (xfs_refcache_index == xfs_refcache_size) { xfs_refcache_index = 0; } spin_unlock(&xfs_refcache_lock); /* * Save the pointer to the inode to be released so that we can * VN_RELE it once we've dropped our inode locks in xfs_rwunlock(). * The pointer may be NULL, but that's OK. */ ip->i_release = release_ip; /* * If we allocated memory for the refcache above but someone * else beat us to using it, then free the memory now. */ if (refcache != NULL) { kmem_free(refcache, XFS_REFCACHE_SIZE_MAX * sizeof(xfs_inode_t *)); } }
/* * Get and lock the inode for the caller if it is not already * locked within the given transaction. If it is already locked * within the transaction, just increment its lock recursion count * and return a pointer to it. * * For an inode to be locked in a transaction, the inode lock, as * opposed to the io lock, must be taken exclusively. This ensures * that the inode can be involved in only 1 transaction at a time. * Lock recursion is handled on the io lock, but only for lock modes * of equal or lesser strength. That is, you can recur on the io lock * held EXCL with a SHARED request but not vice versa. Also, if * the inode is already a part of the transaction then you cannot * go from not holding the io lock to having it EXCL or SHARED. * * Use the inode cache routine xfs_inode_incore() to find the inode * if it is already owned by this transaction. * * If we don't already own the inode, use xfs_iget() to get it. * Since the inode log item structure is embedded in the incore * inode structure and is initialized when the inode is brought * into memory, there is nothing to do with it here. * * If the given transaction pointer is NULL, just call xfs_iget(). * This simplifies code which must handle both cases. */ int xfs_trans_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint lock_flags, xfs_inode_t **ipp) { int error; xfs_inode_t *ip; xfs_inode_log_item_t *iip; /* * If the transaction pointer is NULL, just call the normal * xfs_iget(). */ if (tp == NULL) { return (xfs_iget(mp, NULL, ino, lock_flags, ipp, 0)); } /* * If we find the inode in core with this transaction * pointer in its i_transp field, then we know we already * have it locked. In this case we just increment the lock * recursion count and return the inode to the caller. * Assert that the inode is already locked in the mode requested * by the caller. We cannot do lock promotions yet, so * die if someone gets this wrong. */ if ((ip = xfs_inode_incore(tp->t_mountp, ino, tp)) != NULL) { /* * Make sure that the inode lock is held EXCL and * that the io lock is never upgraded when the inode * is already a part of the transaction. */ ASSERT(ip->i_itemp != NULL); ASSERT(lock_flags & XFS_ILOCK_EXCL); ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE)); ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || ismrlocked(&ip->i_iolock, MR_UPDATE)); ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) || (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)); ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS))); ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) || (ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY)); if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) { ip->i_itemp->ili_iolock_recur++; } if (lock_flags & XFS_ILOCK_EXCL) { ip->i_itemp->ili_ilock_recur++; } *ipp = ip; return 0; } ASSERT(lock_flags & XFS_ILOCK_EXCL); error = xfs_iget(tp->t_mountp, tp, ino, lock_flags, &ip, 0); if (error) { return error; } ASSERT(ip != NULL); /* * Get a log_item_desc to point at the new item. */ if (ip->i_itemp == NULL) xfs_inode_item_init(ip, mp); iip = ip->i_itemp; (void) xfs_trans_add_item(tp, (xfs_log_item_t *)(iip)); xfs_trans_inode_broot_debug(ip); /* * If the IO lock has been acquired, mark that in * the inode log item so we'll know to unlock it * when the transaction commits. */ ASSERT(iip->ili_flags == 0); if (lock_flags & XFS_IOLOCK_EXCL) { iip->ili_flags |= XFS_ILI_IOLOCKED_EXCL; } else if (lock_flags & XFS_IOLOCK_SHARED) { iip->ili_flags |= XFS_ILI_IOLOCKED_SHARED; } /* * Initialize i_transp so we can find it with xfs_inode_incore() * above. */ ip->i_transp = tp; *ipp = ip; return 0; }
/* * This gets called by xfs_trans_push_ail(), when IOP_TRYLOCK * failed to get the inode flush lock but did get the inode locked SHARED. * Here we're trying to see if the inode buffer is incore, and if so whether it's * marked delayed write. If that's the case, we'll initiate a bawrite on that * buffer to expedite the process. * * We aren't holding the AIL_LOCK (or the flush lock) when this gets called, * so it is inherently race-y. */ STATIC void xfs_inode_item_pushbuf( xfs_inode_log_item_t *iip) { xfs_inode_t *ip; xfs_mount_t *mp; xfs_buf_t *bp; uint dopush; ip = iip->ili_inode; ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS)); /* * The ili_pushbuf_flag keeps others from * trying to duplicate our effort. */ ASSERT(iip->ili_pushbuf_flag != 0); ASSERT(iip->ili_push_owner == get_thread_id()); /* * If flushlock isn't locked anymore, chances are that the * inode flush completed and the inode was taken off the AIL. * So, just get out. */ if ((valusema(&(ip->i_flock)) > 0) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; } mp = ip->i_mount; bp = xfs_incore(mp->m_ddev_targp, iip->ili_format.ilf_blkno, iip->ili_format.ilf_len, XFS_INCORE_TRYLOCK); if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { /* * We were racing with iflush because we don't hold * the AIL_LOCK or the flush lock. However, at this point, * we have the buffer, and we know that it's dirty. * So, it's possible that iflush raced with us, and * this item is already taken off the AIL. * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && (valusema(&(ip->i_flock)) <= 0)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); if (XFS_BUF_ISPINNED(bp)) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); } if (dopush) { xfs_bawrite(mp, bp); } else { xfs_buf_relse(bp); } } else { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buf_relse(bp); } return; } /* * We have to be careful about resetting pushbuf flag too early (above). * Even though in theory we can do it as soon as we have the buflock, * we don't want others to be doing work needlessly. They'll come to * this function thinking that pushing the buffer is their * responsibility only to find that the buffer is still locked by * another doing the same thing */ iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); return; }
/* * Unlock the inode associated with the inode log item. * Clear the fields of the inode and inode log item that * are specific to the current transaction. If the * hold flags is set, do not unlock the inode. */ STATIC void xfs_inode_item_unlock( xfs_inode_log_item_t *iip) { uint hold; uint iolocked; uint lock_flags; xfs_inode_t *ip; ASSERT(iip != NULL); ASSERT(iip->ili_inode->i_itemp != NULL); ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE)); ASSERT((!(iip->ili_inode->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL)) || ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE)); ASSERT((!(iip->ili_inode->i_itemp->ili_flags & XFS_ILI_IOLOCKED_SHARED)) || ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS)); /* * Clear the transaction pointer in the inode. */ ip = iip->ili_inode; ip->i_transp = NULL; /* * If the inode needed a separate buffer with which to log * its extents, then free it now. */ if (iip->ili_extents_buf != NULL) { ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); ASSERT(ip->i_df.if_bytes > 0); kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes); iip->ili_extents_buf = NULL; } if (iip->ili_aextents_buf != NULL) { ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); ASSERT(ip->i_d.di_anextents > 0); ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); ASSERT(ip->i_afp->if_bytes > 0); kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes); iip->ili_aextents_buf = NULL; } /* * Figure out if we should unlock the inode or not. */ hold = iip->ili_flags & XFS_ILI_HOLD; /* * Before clearing out the flags, remember whether we * are holding the inode's IO lock. */ iolocked = iip->ili_flags & XFS_ILI_IOLOCKED_ANY; /* * Clear out the fields of the inode log item particular * to the current transaction. */ iip->ili_ilock_recur = 0; iip->ili_iolock_recur = 0; iip->ili_flags = 0; /* * Unlock the inode if XFS_ILI_HOLD was not set. */ if (!hold) { lock_flags = XFS_ILOCK_EXCL; if (iolocked & XFS_ILI_IOLOCKED_EXCL) { lock_flags |= XFS_IOLOCK_EXCL; } else if (iolocked & XFS_ILI_IOLOCKED_SHARED) { lock_flags |= XFS_IOLOCK_SHARED; } xfs_iput(iip->ili_inode, lock_flags); } }
int xfs_iomap_write_delay( xfs_inode_t *ip, xfs_off_t offset, size_t count, int ioflag, xfs_bmbt_irec_t *ret_imap, int *nmaps) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_off_t aligned_offset; xfs_fileoff_t ioalign; xfs_fsblock_t firstblock; xfs_extlen_t extsz; xfs_fsize_t isize; int nimaps; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; int prealloc, fsynced = 0; int error; ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); /* * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); if (error) return XFS_ERROR(error); if (XFS_IS_REALTIME_INODE(ip)) { if (!(extsz = ip->i_d.di_extsize)) extsz = mp->m_sb.sb_rextsize; } else { extsz = ip->i_d.di_extsize; } offset_fsb = XFS_B_TO_FSBT(mp, offset); retry: isize = ip->i_d.di_size; if (io->io_new_size > isize) isize = io->io_new_size; error = xfs_iomap_eof_want_preallocate(mp, io, isize, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) return error; if (prealloc) { aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); last_fsb = ioalign + mp->m_writeio_blocks; } else { last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); } if (prealloc || extsz) { error = xfs_iomap_eof_align_last_fsb(mp, io, isize, extsz, &last_fsb); if (error) return error; } nimaps = XFS_WRITE_IMAPS; firstblock = NULLFSBLOCK; error = XFS_BMAPI(mp, NULL, io, offset_fsb, (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, &nimaps, NULL, NULL); if (error && (error != ENOSPC)) return XFS_ERROR(error); /* * If bmapi returned us nothing, and if we didn't get back EDQUOT, * then we must have run out of space - flush delalloc, and retry.. */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, io, offset, count); if (xfs_flush_space(ip, &fsynced, &ioflag)) return XFS_ERROR(ENOSPC); error = 0; goto retry; } if (unlikely(!imap[0].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) return xfs_cmn_err_fsblock_zero(ip, &imap[0]); *ret_imap = imap[0]; *nmaps = 1; return 0; }
int /* error (positive) */ xfs_zero_eof( bhv_vnode_t *vp, xfs_iocore_t *io, xfs_off_t offset, /* starting I/O offset */ xfs_fsize_t isize) /* current inode size */ { struct inode *ip = vn_to_inode(vp); xfs_fileoff_t start_zero_fsb; xfs_fileoff_t end_zero_fsb; xfs_fileoff_t zero_count_fsb; xfs_fileoff_t last_fsb; xfs_fileoff_t zero_off; xfs_fsize_t zero_len; xfs_mount_t *mp = io->io_mount; int nimaps; int error = 0; xfs_bmbt_irec_t imap; ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); ASSERT(offset > isize); /* * First handle zeroing the block on which isize resides. * We only zero a part of that block so it is handled specially. */ error = xfs_zero_last_block(ip, io, offset, isize); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } /* * Calculate the range between the new size and the old * where blocks needing to be zeroed may exist. To get the * block where the last byte in the file currently resides, * we need to subtract one from the size and truncate back * to a block boundary. We subtract 1 in case the size is * exactly on a block boundary. */ last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); if (last_fsb == end_zero_fsb) { /* * The size was only incremented on its last block. * We took care of that above, so just return. */ return 0; } ASSERT(start_zero_fsb <= end_zero_fsb); while (start_zero_fsb <= end_zero_fsb) { nimaps = 1; zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; error = XFS_BMAPI(mp, NULL, io, start_zero_fsb, zero_count_fsb, 0, NULL, 0, &imap, &nimaps, NULL, NULL); if (error) { ASSERT(ismrlocked(io->io_lock, MR_UPDATE)); ASSERT(ismrlocked(io->io_iolock, MR_UPDATE)); return error; } ASSERT(nimaps > 0); if (imap.br_state == XFS_EXT_UNWRITTEN || imap.br_startblock == HOLESTARTBLOCK) { /* * This loop handles initializing pages that were * partially initialized by the code below this * loop. It basically zeroes the part of the page * that sits on a hole and sets the page as P_HOLE * and calls remapf if it is a mapped file. */ start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); continue; } /* * There are blocks we need to zero. * Drop the inode lock while we're doing the I/O. * We'll still have the iolock to protect us. */ XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); if ((zero_off + zero_len) > offset) zero_len = offset - zero_off; error = xfs_iozero(ip, zero_off, zero_len); if (error) { goto out_lock; } start_zero_fsb = imap.br_startoff + imap.br_blockcount; ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); } return 0; out_lock: XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD); ASSERT(error >= 0); return error; }
int xfs_iomap_write_delay( xfs_inode_t *ip, xfs_off_t offset, size_t count, int ioflag, xfs_bmbt_irec_t *ret_imap, int *nmaps) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_fsize_t isize; xfs_fsblock_t firstblock; int nimaps; int error; xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS]; int aeof; int fsynced = 0; ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0); /* * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED); if (error) return XFS_ERROR(error); retry: isize = ip->i_d.di_size; if (io->io_new_size > isize) { isize = io->io_new_size; } aeof = 0; offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); /* * If the caller is doing a write at the end of the file, * then extend the allocation (and the buffer used for the write) * out to the file system's write iosize. We clean up any extra * space left over when the file is closed in xfs_inactive(). * * For sync writes, we are flushing delayed allocate space to * try to make additional space available for allocation near * the filesystem full boundary - preallocation hurts in that * situation, of course. */ if (!(ioflag & BMAPI_SYNC) && ((offset + count) > ip->i_d.di_size)) { xfs_off_t aligned_offset; xfs_filblks_t count_fsb; unsigned int iosize; xfs_fileoff_t ioalign; int n; xfs_fileoff_t start_fsb; /* * If there are any real blocks past eof, then don't * do any speculative allocation. */ start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1))); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); while (count_fsb > 0) { nimaps = XFS_WRITE_IMAPS; error = XFS_BMAPI(mp, NULL, io, start_fsb, count_fsb, 0, &firstblock, 0, imap, &nimaps, NULL); if (error) { return error; } for (n = 0; n < nimaps; n++) { if ( !(io->io_flags & XFS_IOCORE_RT) && !imap[n].br_startblock) { cmn_err(CE_PANIC,"Access to block " "zero: fs <%s> inode: %lld " "start_block : %llx start_off " ": %llx blkcnt : %llx " "extent-state : %x \n", (ip->i_mount)->m_fsname, (long long)ip->i_ino, imap[n].br_startblock, imap[n].br_startoff, imap[n].br_blockcount, imap[n].br_state); } if ((imap[n].br_startblock != HOLESTARTBLOCK) && (imap[n].br_startblock != DELAYSTARTBLOCK)) { goto write_map; } start_fsb += imap[n].br_blockcount; count_fsb -= imap[n].br_blockcount; } } iosize = mp->m_writeio_blocks; aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); last_fsb = ioalign + iosize; aeof = 1; } write_map: nimaps = XFS_WRITE_IMAPS; firstblock = NULLFSBLOCK; /* * If mounted with the "-o swalloc" option, roundup the allocation * request to a stripe width boundary if the file size is >= * stripe width and we are allocating past the allocation eof. */ if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) && (isize >= XFS_FSB_TO_B(mp, mp->m_swidth)) && aeof) { int eof; xfs_fileoff_t new_last_fsb; new_last_fsb = roundup_64(last_fsb, mp->m_swidth); error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) { return error; } if (eof) { last_fsb = new_last_fsb; } /* * Roundup the allocation request to a stripe unit (m_dalign) boundary * if the file size is >= stripe unit size, and we are allocating past * the allocation eof. */ } else if (!(io->io_flags & XFS_IOCORE_RT) && mp->m_dalign && (isize >= XFS_FSB_TO_B(mp, mp->m_dalign)) && aeof) { int eof; xfs_fileoff_t new_last_fsb; new_last_fsb = roundup_64(last_fsb, mp->m_dalign); error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof); if (error) { return error; } if (eof) { last_fsb = new_last_fsb; } /* * Round up the allocation request to a real-time extent boundary * if the file is on the real-time subvolume. */ } else if (io->io_flags & XFS_IOCORE_RT && aeof) { int eof; xfs_fileoff_t new_last_fsb; new_last_fsb = roundup_64(last_fsb, mp->m_sb.sb_rextsize); error = XFS_BMAP_EOF(mp, io, new_last_fsb, XFS_DATA_FORK, &eof); if (error) { return error; } if (eof) last_fsb = new_last_fsb; } error = xfs_bmapi(NULL, ip, offset_fsb, (xfs_filblks_t)(last_fsb - offset_fsb), XFS_BMAPI_DELAY | XFS_BMAPI_WRITE | XFS_BMAPI_ENTIRE, &firstblock, 1, imap, &nimaps, NULL); /* * This can be EDQUOT, if nimaps == 0 */ if (error && (error != ENOSPC)) { return XFS_ERROR(error); } /* * If bmapi returned us nothing, and if we didn't get back EDQUOT, * then we must have run out of space. */ if (nimaps == 0) { xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE, io, offset, count); if (xfs_flush_space(ip, &fsynced, &ioflag)) return XFS_ERROR(ENOSPC); error = 0; goto retry; } *ret_imap = imap[0]; *nmaps = 1; if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " "start_block : %llx start_off : %llx blkcnt : %llx " "extent-state : %x \n", (ip->i_mount)->m_fsname, (long long)ip->i_ino, ret_imap->br_startblock, ret_imap->br_startoff, ret_imap->br_blockcount,ret_imap->br_state); } return 0; }