STATIC int xfs_qm_scall_trunc_qfile( struct xfs_mount *mp, xfs_ino_t ino) { struct xfs_inode *ip; struct xfs_trans *tp; int error; if (ino == NULLFSINO) return 0; error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) return error; xfs_ilock(ip, XFS_IOLOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); goto out_put; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); goto out_unlock; } ASSERT(ip->i_d.di_nextents == 0); xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); out_put: IRELE(ip); return error; }
void libxfs_trans_ijoin_ref( xfs_trans_t *tp, xfs_inode_t *ip, int lock_flags) { ASSERT(ip->i_transp == tp); ASSERT(ip->i_itemp != NULL); xfs_trans_ijoin(tp, ip, lock_flags); #ifdef XACT_DEBUG fprintf(stderr, "ijoin_ref'd inode %llu, transaction %p\n", ip->i_ino, tp); #endif }
/* * Cancel CoW reservations for some byte range of an inode. * * If cancel_real is true this function cancels all COW fork extents for the * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count, bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; xfs_fileoff_t end_fsb; int error; trace_xfs_reflink_cancel_cow_range(ip, offset, count); ASSERT(xfs_is_reflink_inode(ip)); offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); if (count == NULLFILEOFF) end_fsb = NULLFILEOFF; else end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count); /* Start a rolling transaction to remove the mappings */ error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 0, 0, 0, &tp); if (error) goto out; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, cancel_real); if (error) goto out_cancel; error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; out_cancel: xfs_trans_cancel(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); out: trace_xfs_reflink_cancel_cow_range_error(ip, error, _RET_IP_); return error; }
/* * Update destination inode size & cowextsize hint, if necessary. */ STATIC int xfs_reflink_update_dest( struct xfs_inode *dest, xfs_off_t newlen, xfs_extlen_t cowextsize, bool is_dedupe) { struct xfs_mount *mp = dest->i_mount; struct xfs_trans *tp; int error; if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0) return 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) goto out_error; xfs_ilock(dest, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, dest, XFS_ILOCK_EXCL); if (newlen > i_size_read(VFS_I(dest))) { trace_xfs_reflink_update_inode_size(dest, newlen); i_size_write(VFS_I(dest), newlen); dest->i_d.di_size = newlen; } if (cowextsize) { dest->i_d.di_cowextsize = cowextsize; dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE; } if (!is_dedupe) { xfs_trans_ichgtime(tp, dest, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } xfs_trans_log_inode(tp, dest, XFS_ILOG_CORE); error = xfs_trans_commit(tp); if (error) goto out_error; return error; out_error: trace_xfs_reflink_update_inode_size_error(dest, error, _RET_IP_); return error; }
/* * Set up the transaction structure for the setattr operation, checking that we * have permission to do so. On success, return a clean transaction and the * inode locked exclusively ready for further operation specific checks. On * failure, return an error without modifying or locking the inode. * * The inode might already be IO locked on call. If this is the case, it is * indicated in @join_flags and we take full responsibility for ensuring they * are unlocked from now on. Hence if we have an error here, we still have to * unlock them. Otherwise, once they are joined to the transaction, they will * be unlocked on commit/cancel. */ static struct xfs_trans * xfs_ioctl_setattr_get_trans( struct xfs_inode *ip, int join_flags) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = -EROFS; if (mp->m_flags & XFS_MOUNT_RDONLY) goto out_unlock; error = -EIO; if (XFS_FORCED_SHUTDOWN(mp)) goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) goto out_cancel; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags); join_flags = 0; /* * CAP_FOWNER overrides the following restrictions: * * The user ID of the calling process must be equal to the file owner * ID, except in cases where the CAP_FSETID capability is applicable. */ if (!inode_owner_or_capable(VFS_I(ip))) { error = -EPERM; goto out_cancel; } if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); return tp; out_cancel: xfs_trans_cancel(tp); out_unlock: if (join_flags) xfs_iunlock(ip, join_flags); return ERR_PTR(error); }
STATIC int xfs_qm_scall_trunc_qfile( struct xfs_mount *mp, xfs_ino_t ino) { struct xfs_inode *ip; struct xfs_trans *tp; int error; if (ino == NULLFSINO) return 0; error = xfs_iget(mp, NULL, ino, 0, 0, &ip); if (error) return error; xfs_ilock(ip, XFS_IOLOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); goto out_put; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); error = xfs_itruncate_data(&tp, ip, 0); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); goto out_unlock; } xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); out_put: IRELE(ip); return error; }
/* * Get an inode and join it to the transaction. */ int xfs_trans_iget( xfs_mount_t *mp, xfs_trans_t *tp, xfs_ino_t ino, uint flags, uint lock_flags, xfs_inode_t **ipp) { int error; error = xfs_iget(mp, tp, ino, flags, lock_flags, ipp); if (!error && tp) { xfs_trans_ijoin(tp, *ipp); (*ipp)->i_itemp->ili_lock_flags = lock_flags; } return error; }
STATIC int xfs_vn_update_time( struct inode *inode, struct timespec *now, int flags) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error; trace_xfs_update_time(ip); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); if (flags & S_CTIME) { inode->i_ctime = *now; ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec; ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec; } if (flags & S_MTIME) { inode->i_mtime = *now; ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec; } if (flags & S_ATIME) { inode->i_atime = *now; ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec; } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP); return -xfs_trans_commit(tp, 0); }
/* * This is a subroutine for xfs_write() and other writers (xfs_ioctl) * which clears the setuid and setgid bits when a file is written. */ int xfs_write_clear_setuid( xfs_inode_t *ip) { xfs_mount_t *mp; xfs_trans_t *tp; int error; mp = ip->i_mount; tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); if ((error = xfs_trans_reserve(tp, 0, XFS_WRITEID_LOG_RES(mp), 0, 0, 0))) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); ip->i_d.di_mode &= ~S_ISUID; /* * Note that we don't have to worry about mandatory * file locking being disabled here because we only * clear the S_ISGID bit if the Group execute bit is * on, but if it was on then mandatory locking wouldn't * have been enabled. */ if (ip->i_d.di_mode & S_IXGRP) { ip->i_d.di_mode &= ~S_ISGID; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; }
STATIC int xfs_commit_dummy_trans( struct xfs_mount *mp, uint flags) { struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; int log_flags = XFS_LOG_FORCE; if (flags & SYNC_WAIT) log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery * that all others are OK. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ xfs_log_force(mp, 0, log_flags); return error; }
/* * This is called by xfs_inactive to free any blocks beyond eof * when the link count isn't zero and by xfs_dm_punch_hole() when * punching a hole to EOF. */ int xfs_free_eofblocks( xfs_mount_t *mp, xfs_inode_t *ip, bool need_iolock) { xfs_trans_t *tp; int error; xfs_fileoff_t end_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t map_len; int nimaps; xfs_bmbt_irec_t imap; /* * Figure out if there are any blocks beyond the end * of the file. If not, then there is nothing to do. */ end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip)); last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes); if (last_fsb <= end_fsb) return 0; map_len = last_fsb - end_fsb; nimaps = 1; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (!error && (nimaps != 0) && (imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks)) { /* * Attach the dquots to the inode up front. */ error = xfs_qm_dqattach(ip, 0); if (error) return error; /* * There are blocks after the end of file. * Free them up now by truncating the file to * its current size. */ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); if (need_iolock) { if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { xfs_trans_cancel(tp, 0); return EAGAIN; } } error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Do not update the on-disk file size. If we update the * on-disk file size and then the system crashes before the * contents of the file are flushed to disk then the files * may be full of holes (ie NULL files bug). */ error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, XFS_ISIZE(ip)); if (error) { /* * If we get an error at this point we simply don't * bother truncating the file. */ xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); } else { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (!error) xfs_inode_clear_eofblocks_tag(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); if (need_iolock) xfs_iunlock(ip, XFS_IOLOCK_EXCL); } return error; }
int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto out; } /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto out_unlock; } error = -filemap_write_and_wait(VFS_I(tip)->i_mapping); if (error) goto out_unlock; truncate_pagecache_range(VFS_I(tip), 0, -1); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto out_unlock; } trace_xfs_swap_extent_before(ip, 0); trace_xfs_swap_extent_before(tip, 1); /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); goto out_unlock; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = XFS_ERROR(EBUSY); goto out_unlock; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ truncate_pagecache_range(VFS_I(ip), 0, -1); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); goto out; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) goto out_trans_cancel; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) goto out_trans_cancel; } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); /* * Before we've swapped the forks, lets set the owners of the forks * appropriately. We have to do this as we are demand paging the btree * buffers, and so the validation done on read will expect the owner * field to be correctly set. Once we change the owners, we can swap the * inode forks. * * Note the trickiness in setting the log flags - we set the owner log * flag on the opposite inode (i.e. the inode we are setting the new * owner to be) because once we swap the forks and log that, log * recovery is going to see the fork as owned by the swapped inode, * not the pre-swapped inodes. */ src_log_flags = XFS_ILOG_CORE; target_log_flags = XFS_ILOG_CORE; if (ip->i_d.di_version == 3 && ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { target_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, tip->i_ino, NULL); if (error) goto out_trans_cancel; } if (tip->i_d.di_version == 3 && tip->i_d.di_format == XFS_DINODE_FMT_BTREE) { src_log_flags |= XFS_ILOG_DOWNER; error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK, ip->i_ino, NULL); if (error) goto out_trans_cancel; } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; /* * The extents in the source inode could still contain speculative * preallocation beyond EOF (e.g. the file is open but not modified * while defrag is in progress). In that case, we need to copy over the * number of delalloc blocks the data fork in the source inode is * tracking beyond EOF so that when the fork is truncated away when the * temporary inode is unlinked we don't underrun the i_delayed_blks * counter on that inode. */ ASSERT(tip->i_delayed_blks == 0); tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } src_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ASSERT(ip->i_d.di_version < 3 || (src_log_flags & XFS_ILOG_DOWNER)); src_log_flags |= XFS_ILOG_DBROOT; break; } switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } target_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: target_log_flags |= XFS_ILOG_DBROOT; ASSERT(tip->i_d.di_version < 3 || (target_log_flags & XFS_ILOG_DOWNER)); break; } xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); goto out; out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; }
int xfs_bmap_rtalloc( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { xfs_alloctype_t atype = 0; /* type for allocation routines */ int error; /* error return value */ xfs_mount_t *mp; /* mount point structure */ xfs_extlen_t prod = 0; /* product factor for allocators */ xfs_extlen_t ralen = 0; /* realtime allocation length */ xfs_extlen_t align; /* minimum allocation alignment */ xfs_rtblock_t rtb; mp = ap->ip->i_mount; align = xfs_get_extsz_hint(ap->ip); prod = align / mp->m_sb.sb_rextsize; error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, align, 1, ap->eof, 0, ap->conv, &ap->offset, &ap->length); if (error) return error; ASSERT(ap->length); ASSERT(ap->length % mp->m_sb.sb_rextsize == 0); /* * If the offset & length are not perfectly aligned * then kill prod, it will just get us in trouble. */ if (do_mod(ap->offset, align) || ap->length % align) prod = 1; /* * Set ralen to be the actual requested length in rtextents. */ ralen = ap->length / mp->m_sb.sb_rextsize; /* * If the old value was close enough to MAXEXTLEN that * we rounded up to it, cut it back so it's valid again. * Note that if it's a really large request (bigger than * MAXEXTLEN), we don't hear about that number, and can't * adjust the starting point to match it. */ if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN) ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; /* * Lock out other modifications to the RT bitmap inode. */ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); /* * If it's an allocation to an empty file at offset 0, * pick an extent that will space things out in the rt area. */ if (ap->eof && ap->offset == 0) { xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */ error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx); if (error) return error; ap->blkno = rtx * mp->m_sb.sb_rextsize; } else { ap->blkno = 0; } xfs_bmap_adjacent(ap); /* * Realtime allocation, done through xfs_rtallocate_extent. */ atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO; do_div(ap->blkno, mp->m_sb.sb_rextsize); rtb = ap->blkno; ap->length = ralen; if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, &ralen, atype, ap->wasdel, prod, &rtb))) return error; if (rtb == NULLFSBLOCK && prod > 1 && (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length, &ralen, atype, ap->wasdel, 1, &rtb))) return error; ap->blkno = rtb; if (ap->blkno != NULLFSBLOCK) { ap->blkno *= mp->m_sb.sb_rextsize; ralen *= mp->m_sb.sb_rextsize; ap->length = ralen; ap->ip->i_d.di_nblocks += ralen; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) ap->ip->i_delayed_blks -= ralen; /* * Adjust the disk quota also. This was reserved * earlier. */ xfs_trans_mod_dquot_byino(ap->tp, ap->ip, ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_RTBCOUNT, (long) ralen); } else { ap->length = 0; } return 0; }
int xfs_free_file_space( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len) { int committed; int done; xfs_fileoff_t endoffset_fsb; int error; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_bmbt_irec_t imap; xfs_off_t ioffset; xfs_extlen_t mod=0; xfs_mount_t *mp; int nimap; uint resblks; xfs_off_t rounding; int rt; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; mp = ip->i_mount; trace_xfs_free_file_space(ip); error = xfs_qm_dqattach(ip, 0); if (error) return error; error = 0; if (len <= 0) /* if nothing being freed */ return error; rt = XFS_IS_REALTIME_INODE(ip); startoffset_fsb = XFS_B_TO_FSB(mp, offset); endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); /* wait for the completion of any pending DIOs */ inode_dio_wait(VFS_I(ip)); rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset, -1); if (error) goto out; truncate_pagecache_range(VFS_I(ip), ioffset, -1); /* * Need to zero the stuff we're not freeing, on disk. * If it's a realtime file & can't use unwritten extents then we * actually need to zero the extent edges. Otherwise xfs_bunmapi * will take care of it for us. */ if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) { nimap = 1; error = xfs_bmapi_read(ip, startoffset_fsb, 1, &imap, &nimap, 0); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { xfs_daddr_t block; ASSERT(imap.br_startblock != DELAYSTARTBLOCK); block = imap.br_startblock; mod = do_div(block, mp->m_sb.sb_rextsize); if (mod) startoffset_fsb += mp->m_sb.sb_rextsize - mod; } nimap = 1; error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1, &imap, &nimap, 0); if (error) goto out; ASSERT(nimap == 0 || nimap == 1); if (nimap && imap.br_startblock != HOLESTARTBLOCK) { ASSERT(imap.br_startblock != DELAYSTARTBLOCK); mod++; if (mod && (mod != mp->m_sb.sb_rextsize)) endoffset_fsb -= mod; } } if ((done = (endoffset_fsb <= startoffset_fsb))) /* * One contiguous piece to clear */ error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1); else { /* * Some full blocks, possibly two pieces to clear */ if (offset < XFS_FSB_TO_B(mp, startoffset_fsb)) error = xfs_zero_remaining_bytes(ip, offset, XFS_FSB_TO_B(mp, startoffset_fsb) - 1); if (!error && XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len) error = xfs_zero_remaining_bytes(ip, XFS_FSB_TO_B(mp, endoffset_fsb), offset + len - 1); } /* * free file space until done or until there is an error */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); while (!error && !done) { /* * allocate and setup the transaction. Allow this * transaction to dip into the reserve blocks to ensure * the freeing of the space succeeds at ENOSPC. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); /* * check for running out of space */ if (error) { /* * Free the transaction structure. */ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot, ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); /* * issue the bunmapi() call to free the blocks */ xfs_bmap_init(&free_list, &firstfsb); error = xfs_bunmapi(tp, ip, startoffset_fsb, endoffset_fsb - startoffset_fsb, 0, 2, &firstfsb, &free_list, &done); if (error) { goto error0; } /* * complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); } out: return error; error0: xfs_bmap_cancel(&free_list); error1: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); goto out; }
STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, struct fsxattr *fa, int mask) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return XFS_ERROR(EINVAL); /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids * in inode here, because we can't hold it across the trans_reserve. * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) return code; } /* * For the other attributes, we acquire the inode lock and * first do an error checking pass. */ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (code) goto error_return; lock_flags = XFS_ILOCK_EXCL; xfs_ilock(ip, lock_flags); /* * CAP_FOWNER overrides the following restrictions: * * The user ID of the calling process must be equal * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } /* * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { ASSERT(tp); code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ goto error_return; } } if (mask & FSX_EXTSIZE) { /* * Can't change extent size if any extents are allocated. */ if (ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * Extent size must be a multiple of the appropriate block * size, if set at all. It must also be smaller than the * maximum extent size supported by the filesystem. * * Also, for non-realtime files, limit the extent size hint to * half the size of the AGs in the filesystem so alignment * doesn't result in extents larger than an AG. */ if (fa->fsx_extsize != 0) { xfs_extlen_t size; xfs_fsblock_t extsize_fsb; extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); if (extsize_fsb > MAXEXTLEN) { code = XFS_ERROR(EINVAL); goto error_return; } if (XFS_IS_REALTIME_INODE(ip) || ((mask & FSX_XFLAGS) && (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; } else { size = mp->m_sb.sb_blocksize; if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { code = XFS_ERROR(EINVAL); goto error_return; } } if (fa->fsx_extsize % size) { code = XFS_ERROR(EINVAL); goto error_return; } } } if (mask & FSX_XFLAGS) { /* * Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (XFS_IS_REALTIME_INODE(ip)) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * If realtime flag is set then must have realtime data. */ if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { if ((mp->m_sb.sb_rblocks == 0) || (mp->m_sb.sb_rextsize == 0) || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { code = XFS_ERROR(EINVAL); goto error_return; } } /* * Can't modify an immutable/append-only file unless * we have appropriate permission. */ if ((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) { code = XFS_ERROR(EPERM); goto error_return; } } xfs_trans_ijoin(tp, ip, 0); /* * Change file ownership. Must be the owner or privileged. */ if (mask & FSX_PROJID) { /* * CAP_FSETID overrides the following restrictions: * * The set-user-ID and set-group-ID bits of a file will be * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* * Change the ownerships and register quota modifications * in the transaction. */ if (xfs_get_projid(ip) != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } xfs_set_projid(ip, fa->fsx_projid); /* * We may have to rev the inode as well as * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } } if (mask & FSX_EXTSIZE) ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; if (mask & FSX_XFLAGS) { xfs_set_diflags(ip, fa->fsx_xflags); xfs_diflags_to_linux(ip); } xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis * mix so this probably isn't worth the trouble to optimize. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); code = xfs_trans_commit(tp, 0); xfs_iunlock(ip, lock_flags); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return code; error_return: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); return code; }
int xfs_symlink( struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp) { struct xfs_mount *mp = dp->i_mount; struct xfs_trans *tp = NULL; struct xfs_inode *ip = NULL; int error = 0; int pathlen; struct xfs_bmap_free free_list; xfs_fsblock_t first_block; bool unlock_dp_on_error = false; uint cancel_flags; int committed; xfs_fileoff_t first_fsb; xfs_filblks_t fs_blocks; int nmaps; struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS]; xfs_daddr_t d; const char *cur_chunk; int byte_cnt; int n; xfs_buf_t *bp; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; struct xfs_dquot *pdqp = NULL; uint resblks; *ipp = NULL; trace_xfs_symlink(dp, link_name); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); /* * Check component lengths of the target path name. */ pathlen = strlen(target_path); if (pathlen >= MAXPATHLEN) /* total string too long */ return XFS_ERROR(ENAMETOOLONG); udqp = gdqp = NULL; if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) prid = xfs_get_projid(dp); else prid = XFS_PROJID_DEFAULT; /* * Make sure that we have allocated dquot(s) on disk. */ error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), xfs_kgid_to_gid(current_fsgid()), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) goto std_return; tp = xfs_trans_alloc(mp, XFS_TRANS_SYMLINK); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; /* * The symlink will fit into the inode data fork? * There can't be any attributes so we get the whole variable part. */ if (pathlen <= XFS_LITINO(mp, dp->i_d.di_version)) fs_blocks = 0; else fs_blocks = xfs_symlink_blocks(mp, pathlen); resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, resblks, 0); if (error == ENOSPC && fs_blocks == 0) { resblks = 0; error = xfs_trans_reserve(tp, &M_RES(mp)->tr_symlink, 0, 0); } if (error) { cancel_flags = 0; goto error_return; } xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); unlock_dp_on_error = true; /* * Check whether the directory allows new symlinks or not. */ if (dp->i_d.di_flags & XFS_DIFLAG_NOSYMLINKS) { error = XFS_ERROR(EPERM); goto error_return; } /* * Reserve disk quota : blocks and inode. */ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, pdqp, resblks, 1, 0); if (error) goto error_return; /* * Check for ability to enter directory entry, if no space reserved. */ error = xfs_dir_canenter(tp, dp, link_name, resblks); if (error) goto error_return; /* * Initialize the bmap freelist prior to calling either * bmapi or the directory create code. */ xfs_bmap_init(&free_list, &first_block); /* * Allocate an inode for the symlink. */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, resblks > 0, &ip, NULL); if (error) { if (error == ENOSPC) goto error_return; goto error1; } /* * An error after we've joined dp to the transaction will result in the * transaction cancel unlocking dp so don't do it explicitly in the * error path. */ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); unlock_dp_on_error = false; /* * Also attach the dquot(s) to it, if applicable. */ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); if (resblks) resblks -= XFS_IALLOC_SPACE_RES(mp); /* * If the symlink will fit into the inode, write it inline. */ if (pathlen <= XFS_IFORK_DSIZE(ip)) { xfs_idata_realloc(ip, pathlen, XFS_DATA_FORK); memcpy(ip->i_df.if_u1.if_data, target_path, pathlen); ip->i_d.di_size = pathlen; /* * The inode was initially created in extent format. */ ip->i_df.if_flags &= ~(XFS_IFEXTENTS | XFS_IFBROOT); ip->i_df.if_flags |= XFS_IFINLINE; ip->i_d.di_format = XFS_DINODE_FMT_LOCAL; xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); } else { int offset; first_fsb = 0; nmaps = XFS_SYMLINK_MAPS; error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks, XFS_BMAPI_METADATA, &first_block, resblks, mval, &nmaps, &free_list); if (error) goto error2; if (resblks) resblks -= fs_blocks; ip->i_d.di_size = pathlen; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); cur_chunk = target_path; offset = 0; for (n = 0; n < nmaps; n++) { char *buf; d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); if (!bp) { error = ENOMEM; goto error2; } bp->b_ops = &xfs_symlink_buf_ops; byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); byte_cnt = min(byte_cnt, pathlen); buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, byte_cnt, bp); memcpy(buf, cur_chunk, byte_cnt); cur_chunk += byte_cnt; pathlen -= byte_cnt; offset += byte_cnt; xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - (char *)bp->b_addr); } ASSERT(pathlen == 0); } /* * Create the directory entry for the symlink. */ error = xfs_dir_createname(tp, dp, link_name, ip->i_ino, &first_block, &free_list, resblks); if (error) goto error2; xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); /* * If this is a synchronous mount, make sure that the * symlink transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error2; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); *ipp = ip; return 0; error2: IRELE(ip); error1: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; error_return: xfs_trans_cancel(tp, cancel_flags); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); if (unlock_dp_on_error) xfs_iunlock(dp, XFS_ILOCK_EXCL); std_return: return error; }
/* * Truncate file. Must have write permission and not be a directory. */ int xfs_setattr_size( struct xfs_inode *ip, struct iattr *iattr) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; uint lock_flags = 0; uint commit_flags = 0; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; /* * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; /* * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } /* * Make sure that the dquots are attached to the inode. */ error = xfs_qm_dqattach(ip, 0); if (error) return error; /* * Now we can make the changes. Before we join the inode to the * transaction, take care of the part of the truncation that must be * done without the inode lock. This needs to be done before joining * the inode to the transaction, because the inode cannot be unlocked * once it is a part of the transaction. */ if (newsize > oldsize) { /* * Do the first part of growing a file: zero any data in the * last block that is beyond the old EOF. We need to do this * before the inode is joined to the transaction to modify * i_size. */ error = xfs_zero_eof(ip, newsize, oldsize); if (error) return error; } /* * We are going to log the inode size change in this transaction so * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files * problem. * * Only flush from the on disk size to the smaller of the in memory * file size or the new size as that's the range we really care about * here and prevents waiting for other data not within the range we * care about here. */ if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; } /* * Wait for all direct I/O to complete. */ inode_dio_wait(inode); error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; truncate_setsize(inode, newsize); commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Only change the c/mtime if we are changing the size or we are * explicitly asked to change it. This handles the semantic difference * between truncate() and ftruncate() as implemented in the VFS. * * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a * special case where we need to update the times despite not having * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* * The first thing we do is set the size to new_size permanently on * disk. This way we don't have to worry about anyone ever being able * to look at the data being freed even in the face of a crash. * What we're getting around here is the case where we free a block, it * is allocated to another file, it is written to, and then we crash. * If the new data gets written to the file but the log buffers * containing the free and reallocation don't, then we'd end up with * garbage in the blocks being freed. As long as we make the new size * permanent before actually freeing any blocks it doesn't matter if * they get written to. */ ip->i_d.di_size = newsize; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (newsize <= oldsize) { error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); if (error) goto out_trans_abort; /* * Truncated "down", so we're removing references to old data * here - if we delay flushing for a long time, we expose * ourselves unduly to the notorious NULL files problem. So, * we mark this inode and flush it when the file is closed, * and do not wait the usual (long) time for writeout. */ xfs_iflags_set(ip, XFS_ITRUNCATED); /* A truncate down always removes post-EOF blocks. */ xfs_inode_clear_eofblocks_tag(ip); } if (iattr->ia_valid & ATTR_MODE) xfs_setattr_mode(ip, iattr); if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; out_trans_abort: commit_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, commit_flags); goto out_unlock; }
/* * Generic handler routine to remove a name from an attribute list. * Transitions attribute list from Btree to shortform as necessary. */ int xfs_attr_remove( struct xfs_inode *dp, const unsigned char *name, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_args args; int error; XFS_STATS_INC(mp, xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; /* * we have no control over the attribute names that userspace passes us * to remove, so we have to allow the name lookup prior to attribute * removal to fail. */ args.op_flags = XFS_DA_OP_OKNOENT; error = xfs_qm_dqattach(dp); if (error) return error; /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_attrrm, XFS_ATTRRM_SPACE_RES(mp), 0, (flags & ATTR_ROOT) ? XFS_TRANS_RESERVE : 0, &args.trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); /* * No need to make quota reservations here. We expect to release some * blocks not allocate in the common case. */ xfs_trans_ijoin(args.trans, dp, 0); if (!xfs_inode_hasattr(dp)) { error = -ENOATTR; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(&args); } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { error = xfs_attr_leaf_removename(&args); } else { error = xfs_attr_node_removename(&args); } if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
/* * Allocate a block and fill it with dquots. * This is called when the bmapi finds a hole. */ STATIC int xfs_qm_dqalloc( xfs_trans_t **tpp, xfs_mount_t *mp, xfs_dquot_t *dqp, xfs_inode_t *quotip, xfs_fileoff_t offset_fsb, xfs_buf_t **O_bpp) { xfs_fsblock_t firstblock; xfs_bmap_free_t flist; xfs_bmbt_irec_t map; int nmaps, error, committed; xfs_buf_t *bp; xfs_trans_t *tp = *tpp; ASSERT(tp != NULL); xfs_dqtrace_entry(dqp, "DQALLOC"); /* * Initialize the bmap freelist prior to calling bmapi code. */ XFS_BMAP_INIT(&flist, &firstblock); xfs_ilock(quotip, XFS_ILOCK_EXCL); /* * Return if this type of quotas is turned off while we didn't * have an inode lock */ if (XFS_IS_THIS_QUOTA_OFF(dqp)) { xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (ESRCH); } /* * xfs_trans_commit normally decrements the vnode ref count * when it unlocks the inode. Since we want to keep the quota * inode around, we bump the vnode ref count now. */ VN_HOLD(XFS_ITOV(quotip)); xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; if ((error = xfs_bmapi(tp, quotip, offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, &firstblock, XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps, &flist))) { goto error0; } ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(nmaps == 1); ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); /* * Keep track of the blkno to save a lookup later */ dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); /* now we can just get the buffer (there's nothing to read yet) */ bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, XFS_QI_DQCHUNKLEN(mp), 0); if (!bp || (error = XFS_BUF_GETERROR(bp))) goto error1; /* * Make a chunk of dquots out of this buffer and log * the entire thing. */ xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), dqp->dq_flags & XFS_DQ_ALLTYPES, bp); /* * xfs_bmap_finish() may commit the current transaction and * start a second transaction if the freelist is not empty. * * Since we still want to modify this buffer, we need to * ensure that the buffer is not released on commit of * the first transaction and ensure the buffer is added to the * second transaction. * * If there is only one transaction then don't stop the buffer * from being released when it commits later on. */ xfs_trans_bhold(tp, bp); if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) { goto error1; } if (committed) { tp = *tpp; xfs_trans_bjoin(tp, bp); } else { xfs_trans_bhold_release(tp, bp); } *O_bpp = bp; return 0; error1: xfs_bmap_cancel(&flist); error0: xfs_iunlock(quotip, XFS_ILOCK_EXCL); return (error); }
STATIC int xfs_file_fsync( struct file *file, struct dentry *dentry, int datasync) { struct xfs_inode *ip = XFS_I(dentry->d_inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. */ if (!log_flushed) xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); /* * If this inode is on the RT dev we need to flush that * cache as well. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_isem = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_isem = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_isem) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; down(&inode->i_sem); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_isem) dmflags |= DM_FLAGS_ISEM; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (!(ioflags & IO_INVIS)) { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); inode_update_time(inode, 1); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize, pos + count); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); up(&inode->i_sem); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_isem = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_isem = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_unlock_isem; xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); if (error) goto out_unlock_internal; } } xfs_rwunlock(bdp, locktype); if (need_isem) up(&inode->i_sem); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) up(&inode->i_sem); return -error; }
/* * Free a symlink that has blocks associated with it. */ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { xfs_buf_t *bp; int committed; int done; int error; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; int i; xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; int nmaps; int size; xfs_trans_t *tp; mp = ip->i_mount; ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); /* * We're freeing a symlink that has some * blocks allocated to it. Free the * blocks here. We know that we've got * either 1 or 2 extents and that we can * free them all in one bunmapi call. */ ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Lock the inode, fix the size, and join it to the transaction. * Hold it so in the normal path, we still have it locked for * the second transaction. In the error paths we need it * held so the cancel won't rele it, see below. */ size = (int)ip->i_d.di_size; ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Find the block(s) so we can inval and unmap them. */ done = 0; xfs_bmap_init(&free_list, &first_block); nmaps = ARRAY_SIZE(mval); error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); if (error) goto error_trans_cancel; /* * Invalidate the block(s). No validation is done. */ for (i = 0; i < nmaps; i++) { bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { error = ENOMEM; goto error_bmap_cancel; } xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the free_list. */ error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, &first_block, &free_list, &done); if (error) goto error_bmap_cancel; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto error_bmap_cancel; /* * The transaction must have been committed, since there were * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. * The new tp has the extent freeing and EFDs. */ ASSERT(committed); /* * The first xact was committed, so add the inode to the new one. * Mark it dirty so it will be logged and moved forward in the log as * part of every commit. */ xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Commit the transaction containing extent freeing and EFDs. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error_unlock; } /* * Remove the memory for extent descriptions (just bookkeeping). */ if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return 0; error_bmap_cancel: xfs_bmap_cancel(&free_list); error_trans_cancel: xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
int xfs_alloc_file_space( struct xfs_inode *ip, xfs_off_t offset, xfs_off_t len, int alloc_type) { xfs_mount_t *mp = ip->i_mount; xfs_off_t count; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; xfs_extlen_t extsz, temp; xfs_fileoff_t startoffset_fsb; xfs_fsblock_t firstfsb; int nimaps; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imaps[1], *imapp; xfs_bmap_free_t free_list; uint qblocks, resblks, resrtextents; int committed; int error; trace_xfs_alloc_file_space(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = xfs_qm_dqattach(ip, 0); if (error) return error; if (len <= 0) return XFS_ERROR(EINVAL); rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); count = len; imapp = &imaps[0]; nimaps = 1; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* * Allocate file space until done or until there is an error */ while (allocatesize_fsb && !error) { xfs_fileoff_t s, e; /* * Determine space reservations for data/realtime. */ if (unlikely(extsz)) { s = startoffset_fsb; do_div(s, extsz); s *= extsz; e = startoffset_fsb + allocatesize_fsb; if ((temp = do_mod(startoffset_fsb, extsz))) e += temp; if ((temp = do_mod(e, extsz))) e += extsz - temp; } else { s = 0; e = allocatesize_fsb; } /* * The transaction reservation is limited to a 32-bit block * count, hence we need to limit the number of blocks we are * trying to reserve to avoid an overflow. We can't allocate * more than @nimaps extents, and an extent is limited on disk * to MAXEXTLEN (21 bits), so use that to enforce the limit. */ resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps)); if (unlikely(rt)) { resrtextents = qblocks = resblks; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks); quota_flag = XFS_QMOPT_RES_REGBLKS; } /* * Allocate and setup the transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, resrtextents); /* * Check for running out of space */ if (error) { /* * Free the transaction structure. */ ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); xfs_trans_cancel(tp, 0); break; } xfs_ilock(ip, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; xfs_trans_ijoin(tp, ip, 0); xfs_bmap_init(&free_list, &firstfsb); error = xfs_bmapi_write(tp, ip, startoffset_fsb, allocatesize_fsb, alloc_type, &firstfsb, 0, imapp, &nimaps, &free_list); if (error) { goto error0; } /* * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { goto error0; } error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { break; } allocated_fsb = imapp->br_blockcount; if (nimaps == 0) { error = XFS_ERROR(ENOSPC); break; } startoffset_fsb += allocated_fsb; allocatesize_fsb -= allocated_fsb; } return error; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; }
/* * xfs_rename */ int xfs_rename( xfs_inode_t *src_dp, struct xfs_name *src_name, xfs_inode_t *src_ip, xfs_inode_t *target_dp, struct xfs_name *target_name, xfs_inode_t *target_ip) { xfs_trans_t *tp = NULL; xfs_mount_t *mp = src_dp->i_mount; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int spaceres; int num_inodes; xfs_itrace_entry(src_dp); xfs_itrace_entry(target_dp); if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dp, DM_RIGHT_NULL, target_dp, DM_RIGHT_NULL, src_name->name, target_name->name, 0, 0, 0); if (error) return error; } /* Return through std_return after this point. */ new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); if (src_is_directory) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && new_parent && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); goto std_return; } } xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, inodes, &num_inodes); xfs_bmap_init(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto std_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto std_return; } /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ IHOLD(src_dp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { IHOLD(target_dp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } IHOLD(src_ip); xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) { IHOLD(target_ip); xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); goto error_return; } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); if (error) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; } /* * We always want to hit the ctime on the source inode. * * This isn't strictly required by the standards since the source * inode isn't really being changed, but old unix file systems did * it and some incremental backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dp, DM_RIGHT_NULL, target_dp, DM_RIGHT_NULL, src_name->name, target_name->name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; }
int xfs_setattr_nonsize( struct xfs_inode *ip, struct iattr *iattr, int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_trans_t *tp; int error; kuid_t uid = GLOBAL_ROOT_UID, iuid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID, igid = GLOBAL_ROOT_GID; struct xfs_dquot *udqp = NULL, *gdqp = NULL; struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; trace_xfs_setattr(ip); /* If acls are being inherited, we already have this checked */ if (!(flags & XFS_ATTR_NOACL)) { if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); } ASSERT((mask & ATTR_SIZE) == 0); /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids * in inode here, because we can't hold it across the trans_reserve. * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = iattr->ia_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = inode->i_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = iattr->ia_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = inode->i_gid; } /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on * the same variable. See xfs_create() for an example. */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid), xfs_kgid_to_gid(gid), xfs_get_projid(ip), qflags, &udqp, &gdqp, NULL); if (error) return error; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); if (error) goto out_dqrele; xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Change file ownership. Must be the owner or privileged. */ if (mask & (ATTR_UID|ATTR_GID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change * while we didn't have the inode locked, inode's dquot(s) * would have changed also. */ iuid = inode->i_uid; igid = inode->i_gid; gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; /* * Do a quota reservation only if uid/gid is actually * going to change. */ if (XFS_IS_QUOTA_RUNNING(mp) && ((XFS_IS_UQUOTA_ON(mp) && !uid_eq(iuid, uid)) || (XFS_IS_GQUOTA_ON(mp) && !gid_eq(igid, gid)))) { ASSERT(tp); error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, NULL, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) /* out of quota */ goto out_trans_cancel; } } xfs_trans_ijoin(tp, ip, 0); /* * Change file ownership. Must be the owner or privileged. */ if (mask & (ATTR_UID|ATTR_GID)) { /* * CAP_FSETID overrides the following restrictions: * * The set-user-ID and set-group-ID bits of a file will be * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* * Change the ownerships and register quota modifications * in the transaction. */ if (!uid_eq(iuid, uid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = xfs_kuid_to_uid(uid); inode->i_uid = uid; } if (!gid_eq(igid, gid)) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || !XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = xfs_kgid_to_gid(gid); inode->i_gid = gid; } } if (mask & ATTR_MODE) xfs_setattr_mode(ip, iattr); if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot1); xfs_qm_dqrele(olddquot2); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); if (error) return XFS_ERROR(error); /* * XXX(hch): Updating the ACL entries is not atomic vs the i_mode * update. We could avoid this with linked transactions * and passing down the transaction pointer all the way * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { error = -posix_acl_chmod(inode, inode->i_mode); if (error) return XFS_ERROR(error); } return 0; out_trans_cancel: xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct file *file, const char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); if (size == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return -EIO; } if (unlikely(ioflags & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } if (ioflags & IO_ISLOCKED) iolock = 0; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } if ((ssize_t) size < 0) { ret = -EINVAL; goto error; } if (!access_ok(VERIFY_READ, buf, size)) { ret = -EINVAL; goto error; } retry: if (unlikely(ioflags & IO_ISDIRECT)) { xfs_inval_cached_pages(vp, io, *offset, 1, 1); xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_direct_write(file, buf, size, offset); } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_file_write(file, buf, size, offset); } if (unlikely(ioflags & IO_INVIS)) { /* generic_file_write updates the mtime/ctime but we need * to undo that because this I/O was supposed to be * invisible. */ struct inode *inode = LINVFS_GET_IP(vp); inode->i_mtime = xip->i_d.di_mtime.t_sec; inode->i_ctime = xip->i_d.di_ctime.t_sec; } else { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; mark_inode_dirty_sync(inode); } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ /* * If we are coming from an nfsd thread then insert into the * reference cache. */ if (!strcmp(current->comm, "nfsd")) xfs_refcache_insert(xip); /* Drop lock this way - the old refcache release is in here */ if (iolock) xfs_rwunlock(bdp, locktype); return(ret); }
/* * xfs_rename */ int xfs_rename( bhv_desc_t *src_dir_bdp, bhv_vname_t *src_vname, bhv_vnode_t *target_dir_vp, bhv_vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; xfs_inode_t *src_dp, *target_dp, *src_ip, *target_ip; xfs_mount_t *mp; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ bhv_vnode_t *src_dir_vp; int spaceres; int target_link_zero = 0; int num_inodes; char *src_name = VNAME(src_vname); char *target_name = VNAME(target_vname); int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); src_dir_vp = BHV_TO_VNODE(src_dir_bdp); vn_trace_entry(src_dir_vp, "xfs_rename", (inst_t *)__return_address); vn_trace_entry(target_dir_vp, "xfs_rename", (inst_t *)__return_address); /* * Find the XFS behavior descriptor for the target directory * vnode since it was not handed to us. */ target_dp = xfs_vtoi(target_dir_vp); if (target_dp == NULL) { return XFS_ERROR(EXDEV); } src_dp = XFS_BHVTOI(src_dir_bdp); mp = src_dp->i_mount; if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, 0, 0); if (error) { return error; } } /* Return through std_return after this point. */ /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. * xfs_lock_for_rename() will return ENOENT if src_name * does not exist in the source directory. */ tp = NULL; error = xfs_lock_for_rename(src_dp, target_dp, src_vname, target_vname, &src_ip, &target_ip, inodes, &num_inodes); if (error) { /* * We have nothing locked, no inode references, and * no transaction, so just get out. */ goto std_return; } ASSERT(src_ip != NULL); if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); /* * Drop the locks on our inodes so that we can start the transaction. */ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); XFS_BMAP_INIT(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto rele_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto rele_return; } /* * Reacquire the inode locks we dropped above. */ xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ VN_HOLD(src_dir_vp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { VN_HOLD(target_dir_vp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } if ((src_ip != src_dp) && (src_ip != target_dp)) { xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); } if ((target_ip != NULL) && (target_ip != src_ip) && (target_ip != src_dp) && (target_ip != target_dp)) { xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ if (spaceres == 0 && (error = xfs_dir_canenter(tp, target_dp, target_name, target_namelen))) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; target_ip_dropped = 1; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } /* Do this test while we still hold the locks */ target_link_zero = (target_ip)->i_d.di_nlink==0; } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, "..", 2, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { /* * We always want to hit the ctime on the source inode. * We do it in the if clause above for the 'new_parent && * src_is_directory' case, and here we get all the other * cases. This isn't strictly required by the standards * since the source inode isn't really being changed, * but old unix file systems did it and some incremental * backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Update the generation counts on all the directory inodes * that we're modifying. */ src_dp->i_gen++; xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) { target_dp->i_gen++; xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); } /* * If there was a target inode, take an extra reference on * it here so that it doesn't go to xfs_inactive() from * within the commit. */ if (target_ip != NULL) { IHOLD(target_ip); } /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } /* * Take refs. for vop_link_removed calls below. No need to worry * about directory refs. because the caller holds them. * * Do holds before the xfs_bmap_finish since it might rele them down * to zero. */ if (target_ip_dropped) IHOLD(target_ip); IHOLD(src_ip); error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); if (target_ip != NULL) { IRELE(target_ip); } if (target_ip_dropped) { IRELE(target_ip); } IRELE(src_ip); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); } /* * Let interposed file systems know about removed links. */ if (target_ip_dropped) { bhv_vop_link_removed(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } IRELE(src_ip); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; rele_return: IRELE(src_ip); if (target_ip != NULL) { IRELE(target_ip); } goto std_return; }
/* * Add a name to a Btree-format attribute list. * * This will involve walking down the Btree, and may involve splitting * leaf nodes and even splitting intermediate nodes up to and including * the root node (a special case of an intermediate node). * * "Remote" attribute values confuse the issue and atomic rename operations * add a whole extra layer of confusion on top of that. */ STATIC int xfs_attr_node_addname(xfs_da_args_t *args) { xfs_da_state_t *state; xfs_da_state_blk_t *blk; xfs_inode_t *dp; xfs_mount_t *mp; int committed, retval, error; trace_xfs_attr_node_addname(args); /* * Fill in bucket of arguments/results/context to carry around. */ dp = args->dp; mp = dp->i_mount; restart: state = xfs_da_state_alloc(); state->args = args; state->mp = mp; /* * Search to see if name already exists, and get back a pointer * to where it should go. */ error = xfs_da3_node_lookup_int(state, &retval); if (error) goto out; blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); if ((args->flags & ATTR_REPLACE) && (retval == -ENOATTR)) { goto out; } else if (retval == -EEXIST) { if (args->flags & ATTR_CREATE) goto out; trace_xfs_attr_node_replace(args); /* save the attribute state for later removal*/ args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; args->rmtblkcnt2 = args->rmtblkcnt; args->rmtvaluelen2 = args->rmtvaluelen; /* * clear the remote attr state now that it is saved so that the * values reflect the state of the attribute we are about to * add, not the attribute we just found and will remove later. */ args->rmtblkno = 0; args->rmtblkcnt = 0; args->rmtvaluelen = 0; } retval = xfs_attr3_leaf_add(blk->bp, state->args); if (retval == -ENOSPC) { if (state->path.active == 1) { /* * Its really a single leaf node, but it had * out-of-line values so it looked like it *might* * have been a b-tree. */ xfs_da_state_free(state); state = NULL; xfs_bmap_init(args->flist, args->firstblock); error = xfs_attr3_leaf_to_node(args); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); } if (error) { ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); goto out; } /* * bmap_finish() may have committed the last trans * and started a new one. We need the inode to be * in all transactions. */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); /* * Commit the node conversion and start the next * trans in the chain. */ error = xfs_trans_roll(&args->trans, dp); if (error) goto out; goto restart; } /* * Split as many Btree elements as required. * This code tracks the new and old attr's location * in the index/blkno/rmtblkno/rmtblkcnt fields and * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields. */ xfs_bmap_init(args->flist, args->firstblock); error = xfs_da3_split(state); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); } if (error) { ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); goto out; } /* * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); } else { /* * Addition succeeded, update Btree hashvals. */ xfs_da3_fixhashpath(state, &state->path); } /* * Kill the state structure, we're done with it and need to * allow the buffers to come back later. */ xfs_da_state_free(state); state = NULL; /* * Commit the leaf addition or btree split and start the next * trans in the chain. */ error = xfs_trans_roll(&args->trans, dp); if (error) goto out; /* * If there was an out-of-line value, allocate the blocks we * identified for its storage and copy the value. This is done * after we create the attribute so that we don't overflow the * maximum size of a transaction and/or hit a deadlock. */ if (args->rmtblkno > 0) { error = xfs_attr_rmtval_set(args); if (error) return error; } /* * If this is an atomic rename operation, we must "flip" the * incomplete flags on the "new" and "old" attribute/value pairs * so that one disappears and one appears atomically. Then we * must remove the "old" attribute/value pair. */ if (args->op_flags & XFS_DA_OP_RENAME) { /* * In a separate transaction, set the incomplete flag on the * "old" attr and clear the incomplete flag on the "new" attr. */ error = xfs_attr3_leaf_flipflags(args); if (error) goto out; /* * Dismantle the "old" attribute/value pair by removing * a "remote" value (if it exists). */ args->index = args->index2; args->blkno = args->blkno2; args->rmtblkno = args->rmtblkno2; args->rmtblkcnt = args->rmtblkcnt2; args->rmtvaluelen = args->rmtvaluelen2; if (args->rmtblkno) { error = xfs_attr_rmtval_remove(args); if (error) return error; } /* * Re-find the "old" attribute entry after any split ops. * The INCOMPLETE flag means that we will find the "old" * attr, not the "new" one. */ args->flags |= XFS_ATTR_INCOMPLETE; state = xfs_da_state_alloc(); state->args = args; state->mp = mp; state->inleaf = 0; error = xfs_da3_node_lookup_int(state, &retval); if (error) goto out; /* * Remove the name and update the hashvals in the tree. */ blk = &state->path.blk[ state->path.active-1 ]; ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC); error = xfs_attr3_leaf_remove(blk->bp, args); xfs_da3_fixhashpath(state, &state->path); /* * Check to see if the tree needs to be collapsed. */ if (retval && (state->path.active > 1)) { xfs_bmap_init(args->flist, args->firstblock); error = xfs_da3_join(state); if (!error) { error = xfs_bmap_finish(&args->trans, args->flist, &committed); } if (error) { ASSERT(committed); args->trans = NULL; xfs_bmap_cancel(args->flist); goto out; } /* * bmap_finish() may have committed the last trans * and started a new one. We need the inode to be * in all transactions. */ if (committed) xfs_trans_ijoin(args->trans, dp, 0); } /* * Commit and start the next trans in the chain. */ error = xfs_trans_roll(&args->trans, dp); if (error) goto out; } else if (args->rmtblkno > 0) { /* * Added a "remote" value, just clear the incomplete flag. */ error = xfs_attr3_leaf_clearflag(args); if (error) goto out; } retval = error = 0; out: if (state) xfs_da_state_free(state); if (error) return error; return retval; }
/* * Handle logging requirements of various synchronous types of write. */ int xfs_write_sync_logforce( xfs_mount_t *mp, xfs_inode_t *ip) { int error = 0; /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(ip->i_update_size)) { xfs_inode_log_item_t *iip = ip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(ip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); } } return error; }
int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_buf *leaf_bp = NULL; struct xfs_da_args args; struct xfs_trans_res tres; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, local; XFS_STATS_INC(mp, xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.value = value; args.valuelen = valuelen; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); error = xfs_qm_dqattach(dp); if (error) return error; /* * If the inode doesn't have an attribute fork, add one. * (inode must not be locked when we call this routine) */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); if (error) return error; } tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ error = xfs_trans_alloc(mp, &tres, args.total, 0, rsvd ? XFS_TRANS_RESERVE : 0, &args.trans); if (error) return error; xfs_ilock(dp, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans); return error; } xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(&args); /* * Try to add the attr to the attribute list in * the inode. */ error = xfs_attr_shortform_addname(&args); if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ ASSERT(args.trans != NULL); /* * If this is a synchronous mount, make sure that * the transaction goes to disk before returning * to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } err2 = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; } /* * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp); if (error) goto out; /* * Prevent the leaf buffer from being unlocked so that a * concurrent AIL push cannot grab the half-baked leaf * buffer and run into problems with the write verifier. */ xfs_trans_bhold(args.trans, leaf_bp); error = xfs_defer_finish(&args.trans); if (error) goto out; /* * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf, which * means that we have to hold & join the leaf buffer here too. */ error = xfs_trans_roll_inode(&args.trans, dp); if (error) goto out; xfs_trans_bjoin(args.trans, leaf_bp); leaf_bp = NULL; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); else error = xfs_attr_node_addname(&args); if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (leaf_bp) xfs_trans_brelse(args.trans, leaf_bp); if (args.trans) xfs_trans_cancel(args.trans); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }