/* * xfs_rename */ int xfs_rename( bhv_desc_t *src_dir_bdp, vname_t *src_vname, vnode_t *target_dir_vp, vname_t *target_vname, cred_t *credp) { xfs_trans_t *tp; xfs_inode_t *src_dp, *target_dp, *src_ip, *target_ip; xfs_mount_t *mp; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int target_ip_dropped = 0; /* dropped target_ip link? */ vnode_t *src_dir_vp; bhv_desc_t *target_dir_bdp; int spaceres; int target_link_zero = 0; int num_inodes; char *src_name = VNAME(src_vname); char *target_name = VNAME(target_vname); int src_namelen = VNAMELEN(src_vname); int target_namelen = VNAMELEN(target_vname); src_dir_vp = BHV_TO_VNODE(src_dir_bdp); vn_trace_entry(src_dir_vp, "xfs_rename", (inst_t *)__return_address); vn_trace_entry(target_dir_vp, "xfs_rename", (inst_t *)__return_address); /* * Find the XFS behavior descriptor for the target directory * vnode since it was not handed to us. */ target_dir_bdp = vn_bhv_lookup_unlocked(VN_BHV_HEAD(target_dir_vp), &xfs_vnodeops); if (target_dir_bdp == NULL) { return XFS_ERROR(EXDEV); } src_dp = XFS_BHVTOI(src_dir_bdp); target_dp = XFS_BHVTOI(target_dir_bdp); mp = src_dp->i_mount; if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, 0, 0); if (error) { return error; } } /* Return through std_return after this point. */ /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. * xfs_lock_for_rename() will return ENOENT if src_name * does not exist in the source directory. */ tp = NULL; error = xfs_lock_for_rename(src_dp, target_dp, src_vname, target_vname, &src_ip, &target_ip, inodes, &num_inodes); if (error) { /* * We have nothing locked, no inode references, and * no transaction, so just get out. */ goto std_return; } ASSERT(src_ip != NULL); if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && (src_dp != target_dp) && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); goto rele_return; } } new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); /* * Drop the locks on our inodes so that we can start the transaction. */ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); XFS_BMAP_INIT(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_namelen); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto rele_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto rele_return; } /* * Reacquire the inode locks we dropped above. */ xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ VN_HOLD(src_dir_vp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { VN_HOLD(target_dir_vp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } if ((src_ip != src_dp) && (src_ip != target_dp)) { xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); } if ((target_ip != NULL) && (target_ip != src_ip) && (target_ip != src_dp) && (target_ip != target_dp)) { xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ if (spaceres == 0 && (error = XFS_DIR_CANENTER(mp, tp, target_dp, target_name, target_namelen))) { goto error_return; } /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = XFS_DIR_CREATENAME(mp, tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) { goto error_return; } if (error) { goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) { goto abort_return; } } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(XFS_DIR_ISEMPTY(target_ip->i_mount, target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = XFS_DIR_REPLACE(mp, tp, target_dp, target_name, target_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { goto abort_return; } xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) { goto abort_return; } target_ip_dropped = 1; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) { goto abort_return; } } /* Do this test while we still hold the locks */ target_link_zero = (target_ip)->i_d.di_nlink==0; } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = XFS_DIR_REPLACE(mp, tp, src_ip, "..", 2, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) { goto abort_return; } xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } else { /* * We always want to hit the ctime on the source inode. * We do it in the if clause above for the 'new_parent && * src_is_directory' case, and here we get all the other * cases. This isn't strictly required by the standards * since the source inode isn't really being changed, * but old unix file systems did it and some incremental * backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) { goto abort_return; } } error = XFS_DIR_REMOVENAME(mp, tp, src_dp, src_name, src_namelen, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) { goto abort_return; } xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Update the generation counts on all the directory inodes * that we're modifying. */ src_dp->i_gen++; xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) { target_dp->i_gen++; xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); } /* * If there was a target inode, take an extra reference on * it here so that it doesn't go to xfs_inactive() from * within the commit. */ if (target_ip != NULL) { IHOLD(target_ip); } /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } /* * Take refs. for vop_link_removed calls below. No need to worry * about directory refs. because the caller holds them. * * Do holds before the xfs_bmap_finish since it might rele them down * to zero. */ if (target_ip_dropped) IHOLD(target_ip); IHOLD(src_ip); error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); if (target_ip != NULL) { IRELE(target_ip); } if (target_ip_dropped) { IRELE(target_ip); } IRELE(src_ip); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); if (target_ip != NULL) { xfs_refcache_purge_ip(target_ip); IRELE(target_ip); } /* * Let interposed file systems know about removed links. */ if (target_ip_dropped) { VOP_LINK_REMOVED(XFS_ITOV(target_ip), target_dir_vp, target_link_zero); IRELE(target_ip); } FSC_NOTIFY_NAME_CHANGED(XFS_ITOV(src_ip)); IRELE(src_ip); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dir_vp->v_vfsp, src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dir_vp->v_vfsp, target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dir_vp, DM_RIGHT_NULL, target_dir_vp, DM_RIGHT_NULL, src_name, target_name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; rele_return: IRELE(src_ip); if (target_ip != NULL) { IRELE(target_ip); } goto std_return; }
int xfs_attr_set( struct xfs_inode *dp, const unsigned char *name, unsigned char *value, int valuelen, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_args args; struct xfs_bmap_free flist; struct xfs_trans_res tres; xfs_fsblock_t firstblock; int rsvd = (flags & ATTR_ROOT) != 0; int error, err2, committed, local; XFS_STATS_INC(xs_attr_set); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.value = value; args.valuelen = valuelen; args.firstblock = &firstblock; args.flist = &flist; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; args.total = xfs_attr_calc_size(&args, &local); error = xfs_qm_dqattach(dp, 0); if (error) return error; /* * If the inode doesn't have an attribute fork, add one. * (inode must not be locked when we call this routine) */ if (XFS_IFORK_Q(dp) == 0) { int sf_size = sizeof(xfs_attr_sf_hdr_t) + XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); if (error) return error; } /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off * this one via the trans_dup() call. All transactions will contain * the inode, and the inode will always be marked with trans_ihold(). * Since the inode will be locked in all transactions, we must log * the inode in every transaction to let it float upward through * the log. */ args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET); /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; tres.tr_logres = M_RES(mp)->tr_attrsetm.tr_logres + M_RES(mp)->tr_attrsetrt.tr_logres * args.total; tres.tr_logcount = XFS_ATTRSET_LOG_COUNT; tres.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_reserve(args.trans, &tres, args.total, 0); if (error) { xfs_trans_cancel(args.trans, 0); return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0, rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); return error; } xfs_trans_ijoin(args.trans, dp, 0); /* * If the attribute list is non-existent or a shortform list, * upgrade it to a single-leaf-block attribute list. */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && dp->i_d.di_anextents == 0)) { /* * Build initial attribute list (if required). */ if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) xfs_attr_shortform_create(&args); /* * Try to add the attr to the attribute list in * the inode. */ error = xfs_attr_shortform_addname(&args); if (error != -ENOSPC) { /* * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ ASSERT(args.trans != NULL); /* * If this is a synchronous mount, make sure that * the transaction goes to disk before returning * to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if (!error && (flags & ATTR_KERNOTIME) == 0) { xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); } err2 = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error ? error : err2; } /* * It won't fit in the shortform, transform to a leaf block. * GROT: another possible req'mt for a double-split btree op. */ xfs_bmap_init(args.flist, args.firstblock); error = xfs_attr_shortform_to_leaf(&args); if (!error) { error = xfs_bmap_finish(&args.trans, args.flist, &committed); } if (error) { ASSERT(committed); args.trans = NULL; xfs_bmap_cancel(&flist); goto out; } /* * bmap_finish() may have committed the last trans and started * a new one. We need the inode to be in all transactions. */ if (committed) xfs_trans_ijoin(args.trans, dp, 0); /* * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf. */ error = xfs_trans_roll(&args.trans, dp); if (error) goto out; } if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) error = xfs_attr_leaf_addname(&args); else error = xfs_attr_node_addname(&args); if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
/* * Allocates a new inode from disk and return a pointer to the * incore copy. This routine will internally commit the current * transaction and allocate a new one if the Space Manager needed * to do an allocation to replenish the inode free-list. * * This routine is designed to be called from xfs_create and * xfs_create_dir. * */ int xfs_dir_ialloc( xfs_trans_t **tpp, /* input: current transaction; output: may be a new transaction. */ xfs_inode_t *dp, /* directory within whose allocate the inode. */ umode_t mode, xfs_nlink_t nlink, xfs_dev_t rdev, prid_t prid, /* project id */ int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be locked. */ int *committed) { xfs_trans_t *tp; xfs_trans_t *ntp; xfs_inode_t *ip; xfs_buf_t *ialloc_context = NULL; int code; uint log_res; uint log_count; void *dqinfo; uint tflags; tp = *tpp; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); /* * xfs_ialloc will return a pointer to an incore inode if * the Space Manager has an available inode on the free * list. Otherwise, it will do an allocation and replenish * the freelist. Since we can only do one allocation per * transaction without deadlocks, we will need to commit the * current transaction and start a new one. We will then * need to call xfs_ialloc again to get the inode. * * If xfs_ialloc did an allocation to replenish the freelist, * it returns the bp containing the head of the freelist as * ialloc_context. We will hold a lock on it across the * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &ip); /* * Return an error if we were unable to allocate a new inode. * This should only happen if we run out of space on disk or * encounter a disk error. */ if (code) { *ipp = NULL; return code; } if (!ialloc_context && !ip) { *ipp = NULL; return XFS_ERROR(ENOSPC); } /* * If the AGI buffer is non-NULL, then we were unable to get an * inode in one operation. We need to commit the current * transaction and call xfs_ialloc() again. It is guaranteed * to succeed the second time. */ if (ialloc_context) { /* * Normally, xfs_trans_commit releases all the locks. * We call bhold to hang on to the ialloc_context across * the commit. Holding this buffer prevents any other * processes from doing any allocations in this * allocation group. */ xfs_trans_bhold(tp, ialloc_context); /* * Save the log reservation so we can use * them in the next transaction. */ log_res = xfs_trans_get_log_res(tp); log_count = xfs_trans_get_log_count(tp); /* * We want the quota changes to be associated with the next * transaction, NOT this one. So, detach the dqinfo from this * and attach it to the next transaction. */ dqinfo = NULL; tflags = 0; if (tp->t_dqinfo) { dqinfo = (void *)tp->t_dqinfo; tp->t_dqinfo = NULL; tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); } ntp = xfs_trans_dup(tp); code = xfs_trans_commit(tp, 0); tp = ntp; if (committed != NULL) { *committed = 1; } /* * If we get an error during the commit processing, * release the buffer that is still held and return * to the caller. */ if (code) { xfs_buf_relse(ialloc_context); if (dqinfo) { tp->t_dqinfo = dqinfo; xfs_trans_free_dqinfo(tp); } *tpp = ntp; *ipp = NULL; return code; } /* * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ xfs_log_ticket_put(tp->t_ticket); code = xfs_trans_reserve(tp, 0, log_res, 0, XFS_TRANS_PERM_LOG_RES, log_count); /* * Re-attach the quota info that we detached from prev trx. */ if (dqinfo) { tp->t_dqinfo = dqinfo; tp->t_flags |= tflags; } if (code) { xfs_buf_relse(ialloc_context); *tpp = ntp; *ipp = NULL; return code; } xfs_trans_bjoin(tp, ialloc_context); /* * Call ialloc again. Since we've locked out all * other allocations in this allocation group, * this call should always succeed. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, &ialloc_context, &ip); /* * If we get an error at this point, return to the caller * so that the current transaction can be aborted. */ if (code) { *tpp = tp; *ipp = NULL; return code; } ASSERT(!ialloc_context && ip); } else { if (committed != NULL) *committed = 0; } *ipp = ip; *tpp = tp; return 0; }
/* * Try to truncate the given file to 0 length. Currently called * only out of xfs_remove when it has to truncate a file to free * up space for the remove to proceed. */ int xfs_truncate_file( xfs_mount_t *mp, xfs_inode_t *ip) { xfs_trans_t *tp; int error; #ifdef QUOTADEBUG /* * This is called to truncate the quotainodes too. */ if (XFS_IS_UQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_uquotino) ASSERT(ip->i_udquot); } if (XFS_IS_OQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_gquotino) ASSERT(ip->i_gdquot); } #endif /* * Make the call to xfs_itruncate_start before starting the * transaction, because we cannot make the call while we're * in a transaction. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } /* * Follow the normal truncate locking protocol. Since we * hold the inode in the transaction, we know that it's number * of references will stay constant. */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Signal a sync xaction. The only case where that isn't * the case is if we're truncating an already unlinked file * on a wsync fs. In that case, we know the blocks can't * reappear in the file because the links to file are * permanently toast. Currently, we're always going to * want a sync transaction because this code is being * called from places where nlink is guaranteed to be 1 * but I'm leaving the tests in to protect against future * changes -- rcc. */ error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0, XFS_DATA_FORK, ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); } else { xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); } xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); return error; }
STATIC int xfs_file_fsync( struct file *file, int datasync) { struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; int error = 0; int log_flushed = 0; trace_xfs_file_fsync(ip); if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); xfs_ioend_wait(ip); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* * If we have an RT and/or log subvolume we need to make sure * to flush the write cache the device used for file data * first. This is to ensure newly written file data make * it to disk before logging the new inode size in case of * an extending write. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(mp->m_rtdev_targp); else if (mp->m_logdev_targp != mp->m_ddev_targp) xfs_blkdev_issue_flush(mp->m_ddev_targp); } /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((inode->i_state & I_DIRTY_DATASYNC) || ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } /* * If we only have a single device, and the log force about was * a no-op we might have to flush the data device cache here. * This can only happen for fdatasync/O_DSYNC if we were overwriting * an already allocated file and thus do not have any metadata to * commit. */ if ((mp->m_flags & XFS_MOUNT_BARRIER) && mp->m_logdev_targp == mp->m_ddev_targp && !XFS_IS_REALTIME_INODE(ip) && !log_flushed) xfs_blkdev_issue_flush(mp->m_ddev_targp); return -error; }
int xfs_setattr_nonsize( struct xfs_inode *ip, struct iattr *iattr, int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_trans_t *tp; int error; uid_t uid = 0, iuid = 0; gid_t gid = 0, igid = 0; struct xfs_dquot *udqp = NULL, *gdqp = NULL; struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); ASSERT((mask & ATTR_SIZE) == 0); /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids * in inode here, because we can't hold it across the trans_reserve. * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = iattr->ia_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = iattr->ia_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on * the same variable. See xfs_create() for an example. */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), qflags, &udqp, &gdqp); if (error) return error; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) goto out_dqrele; xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Change file ownership. Must be the owner or privileged. */ if (mask & (ATTR_UID|ATTR_GID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change * while we didn't have the inode locked, inode's dquot(s) * would have changed also. */ iuid = ip->i_d.di_uid; igid = ip->i_d.di_gid; gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; /* * Do a quota reservation only if uid/gid is actually * going to change. */ if (XFS_IS_QUOTA_RUNNING(mp) && ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { ASSERT(tp); error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) /* out of quota */ goto out_trans_cancel; } } xfs_trans_ijoin(tp, ip); /* * Change file ownership. Must be the owner or privileged. */ if (mask & (ATTR_UID|ATTR_GID)) { /* * CAP_FSETID overrides the following restrictions: * * The set-user-ID and set-group-ID bits of a file will be * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* * Change the ownerships and register quota modifications * in the transaction. */ if (iuid != uid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; inode->i_uid = uid; } if (igid != gid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; inode->i_gid = gid; } } /* * Change file access modes. */ if (mask & ATTR_MODE) { umode_t mode = iattr->ia_mode; if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) mode &= ~S_ISGID; ip->i_d.di_mode &= S_IFMT; ip->i_d.di_mode |= mode & ~S_IFMT; inode->i_mode &= S_IFMT; inode->i_mode |= mode & ~S_IFMT; } /* * Change file access or modified times. */ if (mask & ATTR_ATIME) { inode->i_atime = iattr->ia_atime; ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; ip->i_update_core = 1; } if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; ip->i_update_core = 1; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; ip->i_update_core = 1; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot1); xfs_qm_dqrele(olddquot2); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); if (error) return XFS_ERROR(error); /* * XXX(hch): Updating the ACL entries is not atomic vs the i_mode * update. We could avoid this with linked transactions * and passing down the transaction pointer all the way * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { error = -xfs_acl_chmod(inode); if (error) return XFS_ERROR(error); } return 0; out_trans_cancel: xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; }
/* * Syssgi interface for swapext */ int xfs_swapext( xfs_swapext_t __user *sxp) { xfs_swapext_t sx; xfs_inode_t *ip=NULL, *tip=NULL, *ips[2]; xfs_trans_t *tp; xfs_mount_t *mp; xfs_bstat_t *sbp; struct file *fp = NULL, *tfp = NULL; vnode_t *vp, *tvp; bhv_desc_t *bdp, *tbdp; vn_bhv_head_t *bhp, *tbhp; uint lock_flags=0; int ilf_fields, tilf_fields; int error = 0; xfs_ifork_t tempif, *ifp, *tifp; __uint64_t tmp; int aforkblks = 0; int taforkblks = 0; int locked = 0; if (copy_from_user(&sx, sxp, sizeof(sx))) return XFS_ERROR(EFAULT); /* Pull information for the target fd */ if (((fp = fget((int)sx.sx_fdtarget)) == NULL) || ((vp = LINVFS_GET_VP(fp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } bhp = VN_BHV_HEAD(vp); bdp = vn_bhv_lookup(bhp, &xfs_vnodeops); if (bdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { ip = XFS_BHVTOI(bdp); } if (((tfp = fget((int)sx.sx_fdtmp)) == NULL) || ((tvp = LINVFS_GET_VP(tfp->f_dentry->d_inode)) == NULL)) { error = XFS_ERROR(EINVAL); goto error0; } tbhp = VN_BHV_HEAD(tvp); tbdp = vn_bhv_lookup(tbhp, &xfs_vnodeops); if (tbdp == NULL) { error = XFS_ERROR(EBADF); goto error0; } else { tip = XFS_BHVTOI(tbdp); } if (ip->i_mount != tip->i_mount) { error = XFS_ERROR(EINVAL); goto error0; } if (ip->i_ino == tip->i_ino) { error = XFS_ERROR(EINVAL); goto error0; } mp = ip->i_mount; sbp = &sx.sx_stat; if (XFS_FORCED_SHUTDOWN(mp)) { error = XFS_ERROR(EIO); goto error0; } locked = 1; /* Lock in i_ino order */ if (ip->i_ino < tip->i_ino) { ips[0] = ip; ips[1] = tip; } else { ips[0] = tip; ips[1] = ip; } lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; xfs_lock_inodes(ips, 2, 0, lock_flags); /* Check permissions */ error = xfs_iaccess(ip, S_IWUSR, NULL); if (error) goto error0; error = xfs_iaccess(tip, S_IWUSR, NULL); if (error) goto error0; /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify both files are either real-time or non-realtime */ if ((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) != (tip->i_d.di_flags & XFS_DIFLAG_REALTIME)) { error = XFS_ERROR(EINVAL); goto error0; } /* Should never get a local format */ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { error = XFS_ERROR(EINVAL); goto error0; } if (VN_CACHED(tvp) != 0) xfs_inval_cached_pages(XFS_ITOV(tip), &(tip->i_iocore), (xfs_off_t)0, 0, 0); /* Verify O_DIRECT for ftmp */ if (VN_CACHED(tvp) != 0) { error = XFS_ERROR(EINVAL); goto error0; } /* Verify all data are being swapped */ if (sx.sx_offset != 0 || sx.sx_length != ip->i_d.di_size || sx.sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto error0; } /* * If the target has extended attributes, the tmp file * must also in order to ensure the correct data fork * format. */ if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { error = XFS_ERROR(EINVAL); goto error0; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != ip->i_d.di_ctime.t_sec) || (sbp->bs_ctime.tv_nsec != ip->i_d.di_ctime.t_nsec) || (sbp->bs_mtime.tv_sec != ip->i_d.di_mtime.t_sec) || (sbp->bs_mtime.tv_nsec != ip->i_d.di_mtime.t_nsec)) { error = XFS_ERROR(EBUSY); goto error0; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * VOP_READ (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(vp)) { error = XFS_ERROR(EBUSY); goto error0; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ VOP_TOSS_PAGES(vp, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); return error; } xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); xfs_trans_cancel(tp, 0); return error; } } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; tempif = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = tempif; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; ilf_fields = XFS_ILOG_CORE; switch(ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } ilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: ilf_fields |= XFS_ILOG_DBROOT; break; } tilf_fields = XFS_ILOG_CORE; switch(tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } tilf_fields |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: tilf_fields |= XFS_ILOG_DBROOT; break; } /* * Increment vnode ref counts since xfs_trans_commit & * xfs_trans_cancel will both unlock the inodes and * decrement the associated ref counts. */ VN_HOLD(vp); VN_HOLD(tvp); xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); xfs_trans_log_inode(tp, tip, tilf_fields); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) { xfs_trans_set_sync(tp); } error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT, NULL); fput(fp); fput(tfp); return error; error0: if (locked) { xfs_iunlock(ip, lock_flags); xfs_iunlock(tip, lock_flags); } if (fp != NULL) fput(fp); if (tfp != NULL) fput(tfp); return error; }
int xfs_iomap_write_direct( xfs_inode_t *ip, xfs_off_t offset, size_t count, int flags, xfs_bmbt_irec_t *ret_imap, int *nmaps, int found) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint qblocks, resblks, resrtextents; int committed; int error; /* * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > ip->i_size) { error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) goto error_out; } else { if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) ret_imap->br_blockcount + ret_imap->br_startoff); } count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); resaligned = count_fsb; if (unlikely(extsz)) { if ((temp = do_mod(offset_fsb, extsz))) resaligned += temp; if ((temp = do_mod(resaligned, extsz))) resaligned += extsz - temp; } if (unlikely(rt)) { resrtextents = qblocks = resaligned; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); quota_flag = XFS_QMOPT_RES_REGBLKS; } /* * Allocate and setup the transaction */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); /* * Check for running out of space, note: need lock to return */ if (error) xfs_trans_cancel(tp, 0); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) goto error_out; error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); bmapi_flag = XFS_BMAPI_WRITE; if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* * Issue the xfs_bmapi() call to allocate the blocks */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list, NULL); if (error) goto error0; /* * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto error0; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error_out; /* * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { error = ENOSPC; goto error_out; } if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { error = xfs_cmn_err_fsblock_zero(ip, &imap); goto error_out; } *ret_imap = imap; *nmaps = 1; return 0; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); *nmaps = 0; /* nothing set-up here */ error_out: return XFS_ERROR(error); }
/* * Pass in a delayed allocate extent, convert it to real extents; * return to the caller the extent we create which maps on top of * the originating callers request. * * Called without a lock on the inode. * * We no longer bother to look at the incoming map - all we have to * guarantee is that whatever we allocate fills the required range. */ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, xfs_bmbt_irec_t *map, int *retmap) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; xfs_bmbt_irec_t imap; xfs_trans_t *tp; int nimaps, committed; int error = 0; int nres; *retmap = 0; /* * Make sure that the dquots are there. */ error = xfs_qm_dqattach(ip, 0); if (error) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = map->br_blockcount; map_start_fsb = map->br_startoff; XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* * Set up a transaction with which to allocate the * backing store for the file. Do allocations in a * loop until we get some space in the range we are * interested in. The other space that might be allocated * is in the delayed allocation extent on which we sit * but before our buffer starts. */ nimaps = 0; while (nimaps == 0) { tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_bmap_init(&free_list, &first_block); /* * it is possible that the extents have changed since * we did the read call as we dropped the ilock for a * while. We have to be careful about truncates or hole * punchs here - we are not allowed to allocate * non-delalloc blocks here. * * The only protection against truncation is the pages * for the range we are being asked to convert are * locked and hence a truncate will block on them * first. * * As a result, if we go beyond the range we really * need and hit an delalloc extent boundary followed by * a hole while we have excess blocks in the map, we * will fill the hole incorrectly and overrun the * transaction reservation. * * Using a single map prevents this as we are forced to * check each map we look for overlap with the desired * range and abort as soon as we find it. Also, given * that we only return a single map, having one beyond * what we can return is probably a bit silly. * * We also need to check that we don't go beyond EOF; * this is a truncate optimisation as a truncate sets * the new file size before block on the pages we * currently have locked under writeback. Because they * are about to be tossed, we don't need to write them * back.... */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, ip->i_size); error = xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; last_block = XFS_FILEOFF_MAX(last_block, end_fsb); if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { error = EAGAIN; goto trans_cancel; } } /* Go get the actual blocks */ error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, &imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto trans_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; xfs_iunlock(ip, XFS_ILOCK_EXCL); } /* * See if we were able to allocate an extent that * covers at least part of the callers request */ if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((offset_fsb >= imap.br_startoff) && (offset_fsb < (imap.br_startoff + imap.br_blockcount))) { *map = imap; *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; } /* * So far we have not mapped the requested part of the * file, just surrounding data, try again. */ count_fsb -= imap.br_blockcount; map_start_fsb = imap.br_startoff + imap.br_blockcount; } trans_cancel: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * allocate an incore dquot from the kernel heap, * and fill its core with quota information kept on disk. * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk * if it wasn't already allocated. */ STATIC int xfs_qm_idtodq( xfs_mount_t *mp, xfs_dqid_t id, /* gid or uid, depending on type */ uint type, /* UDQUOT or GDQUOT */ uint flags, /* DQALLOC, DQREPAIR */ xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ { xfs_dquot_t *dqp; int error; xfs_trans_t *tp; int cancelflags=0; dqp = xfs_qm_dqinit(mp, id, type); tp = NULL; if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); if ((error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), XFS_WRITE_LOG_RES(mp) + BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 + 128, 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT))) { cancelflags = 0; goto error0; } cancelflags = XFS_TRANS_RELEASE_LOG_RES; } /* * Read it from disk; xfs_dqread() takes care of * all the necessary initialization of dquot's fields (locks, etc) */ if ((error = xfs_qm_dqread(tp, id, dqp, flags))) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to * allocate (ENOENT). */ xfs_dqtrace_entry(dqp, "DQREAD FAIL"); cancelflags |= XFS_TRANS_ABORT; goto error0; } if (tp) { if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL))) goto error1; } *O_dqpp = dqp; return (0); error0: ASSERT(error); if (tp) xfs_trans_cancel(tp, cancelflags); error1: xfs_qm_dqdestroy(dqp); *O_dqpp = NULL; return (error); }
/* * Adjust quota limits, and start/stop timers accordingly. */ int xfs_qm_scall_setqlim( xfs_mount_t *mp, xfs_dqid_t id, uint type, fs_disk_quota_t *newlim) { struct xfs_quotainfo *q = mp->m_quotainfo; xfs_disk_dquot_t *ddq; xfs_dquot_t *dqp; xfs_trans_t *tp; int error; xfs_qcnt_t hard, soft; if (newlim->d_fieldmask & ~XFS_DQ_MASK) return EINVAL; if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) return 0; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 0, 0, XFS_DEFAULT_LOG_COUNT))) { xfs_trans_cancel(tp, 0); return (error); } /* * We don't want to race with a quotaoff so take the quotaoff lock. * (We don't hold an inode lock, so there's nothing else to stop * a quotaoff from happening). (XXXThis doesn't currently happen * because we take the vfslock before calling xfs_qm_sysent). */ mutex_lock(&q->qi_quotaofflock); /* * Get the dquot (locked), and join it to the transaction. * Allocate the dquot if this doesn't exist. */ if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { xfs_trans_cancel(tp, XFS_TRANS_ABORT); ASSERT(error != ENOENT); goto out_unlock; } xfs_trans_dqjoin(tp, dqp); ddq = &dqp->q_core; /* * Make sure that hardlimits are >= soft limits before changing. */ hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : be64_to_cpu(ddq->d_blk_hardlimit); soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : be64_to_cpu(ddq->d_blk_softlimit); if (hard == 0 || hard >= soft) { ddq->d_blk_hardlimit = cpu_to_be64(hard); ddq->d_blk_softlimit = cpu_to_be64(soft); if (id == 0) { q->qi_bhardlimit = hard; q->qi_bsoftlimit = soft; } } else { xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : be64_to_cpu(ddq->d_rtb_hardlimit); soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : be64_to_cpu(ddq->d_rtb_softlimit); if (hard == 0 || hard >= soft) { ddq->d_rtb_hardlimit = cpu_to_be64(hard); ddq->d_rtb_softlimit = cpu_to_be64(soft); if (id == 0) { q->qi_rtbhardlimit = hard; q->qi_rtbsoftlimit = soft; } } else { xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? (xfs_qcnt_t) newlim->d_ino_hardlimit : be64_to_cpu(ddq->d_ino_hardlimit); soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? (xfs_qcnt_t) newlim->d_ino_softlimit : be64_to_cpu(ddq->d_ino_softlimit); if (hard == 0 || hard >= soft) { ddq->d_ino_hardlimit = cpu_to_be64(hard); ddq->d_ino_softlimit = cpu_to_be64(soft); if (id == 0) { q->qi_ihardlimit = hard; q->qi_isoftlimit = soft; } } else { xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); } /* * Update warnings counter(s) if requested */ if (newlim->d_fieldmask & FS_DQ_BWARNS) ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); if (newlim->d_fieldmask & FS_DQ_IWARNS) ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); if (newlim->d_fieldmask & FS_DQ_RTBWARNS) ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); if (id == 0) { /* * Timelimits for the super user set the relative time * the other users can be over quota for this file system. * If it is zero a default is used. Ditto for the default * soft and hard limit values (already done, above), and * for warnings. */ if (newlim->d_fieldmask & FS_DQ_BTIMER) { q->qi_btimelimit = newlim->d_btimer; ddq->d_btimer = cpu_to_be32(newlim->d_btimer); } if (newlim->d_fieldmask & FS_DQ_ITIMER) { q->qi_itimelimit = newlim->d_itimer; ddq->d_itimer = cpu_to_be32(newlim->d_itimer); } if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { q->qi_rtbtimelimit = newlim->d_rtbtimer; ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); } if (newlim->d_fieldmask & FS_DQ_BWARNS) q->qi_bwarnlimit = newlim->d_bwarns; if (newlim->d_fieldmask & FS_DQ_IWARNS) q->qi_iwarnlimit = newlim->d_iwarns; if (newlim->d_fieldmask & FS_DQ_RTBWARNS) q->qi_rtbwarnlimit = newlim->d_rtbwarns; } else { /* * If the user is now over quota, start the timelimit. * The user will not be 'warned'. * Note that we keep the timers ticking, whether enforcement * is on or off. We don't really want to bother with iterating * over all ondisk dquots and turning the timers on/off. */ xfs_qm_adjust_dqtimers(mp, ddq); } dqp->dq_flags |= XFS_DQ_DIRTY; xfs_trans_log_dquot(tp, dqp); error = xfs_trans_commit(tp, 0); xfs_qm_dqrele(dqp); out_unlock: mutex_unlock(&q->qi_quotaofflock); return error; }
/* * Truncate file. Must have write permission and not be a directory. */ int xfs_setattr_size( struct xfs_inode *ip, struct iattr *iattr) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; uint lock_flags = 0; bool did_zeroing = false; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return -EROFS; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; error = inode_change_ok(inode, iattr); if (error) return error; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; /* * Short circuit the truncate case for zero length files. */ if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) return 0; /* * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } /* * Make sure that the dquots are attached to the inode. */ error = xfs_qm_dqattach(ip, 0); if (error) return error; /* * File data changes must be complete before we start the transaction to * modify the inode. This needs to be done before joining the inode to * the transaction because the inode cannot be unlocked once it is a * part of the transaction. * * Start with zeroing any data block beyond EOF that we may expose on * file extension. */ if (newsize > oldsize) { error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing); if (error) return error; } /* * We are going to log the inode size change in this transaction so * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files * problem. Note that this includes any block zeroing we did above; * otherwise those blocks may not be zeroed after a crash. */ if (newsize > ip->i_d.di_size && (oldsize != ip->i_d.di_size || did_zeroing)) { error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ip->i_d.di_size, newsize); if (error) return error; } /* Now wait for all direct I/O to complete. */ inode_dio_wait(inode); /* * We've already locked out new page faults, so now we can safely remove * pages from the page cache knowing they won't get refaulted until we * drop the XFS_MMAP_EXCL lock after the extent manipulations are * complete. The truncate_setsize() call also cleans partial EOF page * PTEs on extending truncates and hence ensures sub-page block size * filesystems are correctly handled, too. * * We have to do all the page cache truncate work outside the * transaction context as the "lock" order is page lock->log space * reservation as defined by extent allocation in the writeback path. * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but * having already truncated the in-memory version of the file (i.e. made * user visible changes). There's not much we can do about this, except * to hope that the caller sees ENOMEM and retries the truncate * operation. */ if (IS_DAX(inode)) error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct); else error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) return error; truncate_setsize(inode, newsize); tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) goto out_trans_cancel; lock_flags |= XFS_ILOCK_EXCL; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); /* * Only change the c/mtime if we are changing the size or we are * explicitly asked to change it. This handles the semantic difference * between truncate() and ftruncate() as implemented in the VFS. * * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a * special case where we need to update the times despite not having * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ if (newsize != oldsize && !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; } /* * The first thing we do is set the size to new_size permanently on * disk. This way we don't have to worry about anyone ever being able * to look at the data being freed even in the face of a crash. * What we're getting around here is the case where we free a block, it * is allocated to another file, it is written to, and then we crash. * If the new data gets written to the file but the log buffers * containing the free and reallocation don't, then we'd end up with * garbage in the blocks being freed. As long as we make the new size * permanent before actually freeing any blocks it doesn't matter if * they get written to. */ ip->i_d.di_size = newsize; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (newsize <= oldsize) { error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); if (error) goto out_trans_cancel; /* * Truncated "down", so we're removing references to old data * here - if we delay flushing for a long time, we expose * ourselves unduly to the notorious NULL files problem. So, * we mark this inode and flush it when the file is closed, * and do not wait the usual (long) time for writeout. */ xfs_iflags_set(ip, XFS_ITRUNCATED); /* A truncate down always removes post-EOF blocks. */ xfs_inode_clear_eofblocks_tag(ip); } if (iattr->ia_valid & ATTR_MODE) xfs_setattr_mode(ip, iattr); if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) xfs_setattr_time(ip, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; out_trans_cancel: xfs_trans_cancel(tp); goto out_unlock; }
int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, size_t count) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; xfs_fsblock_t firstfsb; int nimaps; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint resblks; int committed; int error; xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, &ip->i_iocore, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; do { /* * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. */ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Modify the unwritten extent state of the buffer. */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) goto error_on_bmapi_transaction; error = xfs_bmap_finish(&(tp), &(free_list), firstfsb, &committed); if (error) goto error_on_bmapi_transaction; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return XFS_ERROR(error); if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* * The numblks_fsb value should always get * smaller, otherwise the loop is stuck. */ ASSERT(imap.br_blockcount); break; } offset_fsb += numblks_fsb; count_fsb -= numblks_fsb; } while (count_fsb > 0); return 0; error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * Pass in a delayed allocate extent, convert it to real extents; * return to the caller the extent we create which maps on top of * the originating callers request. * * Called without a lock on the inode. */ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, xfs_bmbt_irec_t *map, int *retmap) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; xfs_trans_t *tp; int i, nimaps, committed; int error = 0; int nres; *retmap = 0; /* * Make sure that the dquots are there. */ if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = map->br_blockcount; map_start_fsb = map->br_startoff; XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* * Set up a transaction with which to allocate the * backing store for the file. Do allocations in a * loop until we get some space in the range we are * interested in. The other space that might be allocated * is in the delayed allocation extent on which we sit * but before our buffer starts. */ nimaps = 0; while (nimaps == 0) { tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error == ENOSPC) { error = xfs_trans_reserve(tp, 0, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); XFS_BMAP_INIT(&free_list, &first_block); nimaps = XFS_STRAT_WRITE_IMAPS; /* * Ensure we don't go beyond eof - it is possible * the extents changed since we did the read call, * we dropped the ilock in the interim. */ end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size); xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); last_block = XFS_FILEOFF_MAX(last_block, end_fsb); if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { error = EAGAIN; goto trans_cancel; } } /* Go get the actual blocks */ error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); if (error) goto trans_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); if (error) goto error0; xfs_iunlock(ip, XFS_ILOCK_EXCL); } /* * See if we were able to allocate an extent that * covers at least part of the callers request */ for (i = 0; i < nimaps; i++) { if (unlikely(!imap[i].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) return xfs_cmn_err_fsblock_zero(ip, &imap[i]); if ((offset_fsb >= imap[i].br_startoff) && (offset_fsb < (imap[i].br_startoff + imap[i].br_blockcount))) { *map = imap[i]; *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; } count_fsb -= imap[i].br_blockcount; } /* So far we have not mapped the requested part of the * file, just surrounding data, try again. */ nimaps--; map_start_fsb = imap[nimaps].br_startoff + imap[nimaps].br_blockcount; } trans_cancel: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * Read in the ondisk dquot using dqtobp() then copy it to an incore version, * and release the buffer immediately. * * If XFS_QMOPT_DQALLOC is set, allocate a dquot on disk if it needed. */ int xfs_qm_dqread( struct xfs_mount *mp, xfs_dqid_t id, uint type, uint flags, struct xfs_dquot **O_dqpp) { struct xfs_dquot *dqp; struct xfs_disk_dquot *ddqp; struct xfs_buf *bp; struct xfs_trans *tp = NULL; int error; int cancelflags = 0; dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP); dqp->dq_flags = type; dqp->q_core.d_id = cpu_to_be32(id); dqp->q_mount = mp; INIT_LIST_HEAD(&dqp->q_lru); mutex_init(&dqp->q_qlock); init_waitqueue_head(&dqp->q_pinwait); /* * Because we want to use a counting completion, complete * the flush completion once to allow a single access to * the flush completion without blocking. */ init_completion(&dqp->q_flush); complete(&dqp->q_flush); /* * Make sure group quotas have a different lock class than user * quotas. */ switch (type) { case XFS_DQ_USER: /* uses the default lock class */ break; case XFS_DQ_GROUP: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); break; case XFS_DQ_PROJ: lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); break; default: ASSERT(0); break; } XFS_STATS_INC(xs_qm_dquot); trace_xfs_dqread(dqp); if (flags & XFS_QMOPT_DQALLOC) { tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, XFS_QM_DQALLOC_SPACE_RES(mp), 0); if (error) goto error1; cancelflags = XFS_TRANS_RELEASE_LOG_RES; } /* * get a pointer to the on-disk dquot and the buffer containing it * dqp already knows its own type (GROUP/USER). */ error = xfs_qm_dqtobp(&tp, dqp, &ddqp, &bp, flags); if (error) { /* * This can happen if quotas got turned off (ESRCH), * or if the dquot didn't exist on disk and we ask to * allocate (ENOENT). */ trace_xfs_dqread_fail(dqp); cancelflags |= XFS_TRANS_ABORT; goto error1; } /* copy everything from disk dquot to the incore dquot */ memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); xfs_qm_dquot_logitem_init(dqp); /* * Reservation counters are defined as reservation plus current usage * to avoid having to add every time. */ dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); /* initialize the dquot speculative prealloc thresholds */ xfs_dquot_set_prealloc_limits(dqp); /* Mark the buf so that this will stay incore a little longer */ xfs_buf_set_ref(bp, XFS_DQUOT_REF); /* * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) * So we need to release with xfs_trans_brelse(). * The strategy here is identical to that of inodes; we lock * the dquot in xfs_qm_dqget() before making it accessible to * others. This is because dquots, like inodes, need a good level of * concurrency, and we don't want to take locks on the entire buffers * for dquot accesses. * Note also that the dquot buffer may even be dirty at this point, if * this particular dquot was repaired. We still aren't afraid to * brelse it because we have the changes incore. */ ASSERT(xfs_buf_islocked(bp)); xfs_trans_brelse(tp, bp); if (tp) { error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; } *O_dqpp = dqp; return error; error1: if (tp) xfs_trans_cancel(tp, cancelflags); error0: xfs_qm_dqdestroy(dqp); *O_dqpp = NULL; return error; }
int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, size_t count) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; xfs_fsblock_t firstfsb; int nimaps; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint resblks; int committed; int error; xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); /* * Reserve enough blocks in this transaction for two complete extent * btree splits. We may be converting the middle part of an unwritten * extent and in this case we will insert two new extents in the btree * each of which could cause a full split. * * This reservation amount will be used in the first call to * xfs_bmbt_split() to select an AG with enough space to satisfy the * rest of the operation. */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; do { /* * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. * * Note that we open code the transaction allocation here * to pass KM_NOFS--we can't risk to recursing back into * the filesystem here as we might be asked to write out * the same inode that we complete here and might deadlock * on the iolock. */ xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Modify the unwritten extent state of the buffer. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) goto error_on_bmapi_transaction; error = xfs_bmap_finish(&(tp), &(free_list), &committed); if (error) goto error_on_bmapi_transaction; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return XFS_ERROR(error); if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* * The numblks_fsb value should always get * smaller, otherwise the loop is stuck. */ ASSERT(imap.br_blockcount); break; } offset_fsb += numblks_fsb; count_fsb -= numblks_fsb; } while (count_fsb > 0); return 0; error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * xfs_rename */ int xfs_rename( xfs_inode_t *src_dp, struct xfs_name *src_name, xfs_inode_t *src_ip, xfs_inode_t *target_dp, struct xfs_name *target_name, xfs_inode_t *target_ip) { xfs_trans_t *tp = NULL; xfs_mount_t *mp = src_dp->i_mount; int new_parent; /* moving to a new dir */ int src_is_directory; /* src_name is a directory */ int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int cancel_flags; int committed; xfs_inode_t *inodes[4]; int spaceres; int num_inodes; xfs_itrace_entry(src_dp); xfs_itrace_entry(target_dp); if (DM_EVENT_ENABLED(src_dp, DM_EVENT_RENAME) || DM_EVENT_ENABLED(target_dp, DM_EVENT_RENAME)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_RENAME, src_dp, DM_RIGHT_NULL, target_dp, DM_RIGHT_NULL, src_name->name, target_name->name, 0, 0, 0); if (error) return error; } /* Return through std_return after this point. */ new_parent = (src_dp != target_dp); src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR); if (src_is_directory) { /* * Check for link count overflow on target_dp */ if (target_ip == NULL && new_parent && target_dp->i_d.di_nlink >= XFS_MAXLINK) { error = XFS_ERROR(EMLINK); goto std_return; } } xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, inodes, &num_inodes); xfs_bmap_init(&free_list, &first_block); tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME); cancel_flags = XFS_TRANS_RELEASE_LOG_RES; spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len); error = xfs_trans_reserve(tp, spaceres, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); if (error == ENOSPC) { spaceres = 0; error = xfs_trans_reserve(tp, 0, XFS_RENAME_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_RENAME_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); goto std_return; } /* * Attach the dquots to the inodes */ if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) { xfs_trans_cancel(tp, cancel_flags); goto std_return; } /* * Lock all the participating inodes. Depending upon whether * the target_name exists in the target directory, and * whether the target directory is the same as the source * directory, we can lock from 2 to 4 inodes. */ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL); /* * Join all the inodes to the transaction. From this point on, * we can rely on either trans_commit or trans_cancel to unlock * them. Note that we need to add a vnode reference to the * directories since trans_commit & trans_cancel will decrement * them when they unlock the inodes. Also, we need to be careful * not to add an inode to the transaction more than once. */ IHOLD(src_dp); xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL); if (new_parent) { IHOLD(target_dp); xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL); } IHOLD(src_ip); xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL); if (target_ip) { IHOLD(target_ip); xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL); } /* * If we are using project inheritance, we only allow renames * into our tree when the project IDs are the same; else the * tree quota mechanism would be circumvented. */ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); goto error_return; } /* * Set up the target. */ if (target_ip == NULL) { /* * If there's no space reservation, check the entry will * fit before actually inserting it. */ error = xfs_dir_canenter(tp, target_dp, target_name, spaceres); if (error) goto error_return; /* * If target does not exist and the rename crosses * directories, adjust the target directory link count * to account for the ".." reference from the new entry. */ error = xfs_dir_createname(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error == ENOSPC) goto error_return; if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (new_parent && src_is_directory) { error = xfs_bumplink(tp, target_dp); if (error) goto abort_return; } } else { /* target_ip != NULL */ /* * If target exists and it's a directory, check that both * target and source are directories and that target can be * destroyed, or that neither is a directory. */ if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) { /* * Make sure target dir is empty. */ if (!(xfs_dir_isempty(target_ip)) || (target_ip->i_d.di_nlink > 2)) { error = XFS_ERROR(EEXIST); goto error_return; } } /* * Link the source inode under the target name. * If the source inode is a directory and we are moving * it across directories, its ".." entry will be * inconsistent until we replace that down below. * * In case there is already an entry with the same * name at the destination directory, remove it first. */ error = xfs_dir_replace(tp, target_dp, target_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * Decrement the link count on the target since the target * dir no longer points to it. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; if (src_is_directory) { /* * Drop the link from the old "." entry. */ error = xfs_droplink(tp, target_ip); if (error) goto abort_return; } } /* target_ip != NULL */ /* * Remove the source. */ if (new_parent && src_is_directory) { /* * Rewrite the ".." entry to point to the new * directory. */ error = xfs_dir_replace(tp, src_ip, &xfs_name_dotdot, target_dp->i_ino, &first_block, &free_list, spaceres); ASSERT(error != EEXIST); if (error) goto abort_return; } /* * We always want to hit the ctime on the source inode. * * This isn't strictly required by the standards since the source * inode isn't really being changed, but old unix file systems did * it and some incremental backup programs won't work without it. */ xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. */ if (src_is_directory && (new_parent || target_ip != NULL)) { /* * Decrement link count on src_directory since the * entry that's moved no longer points to it. */ error = xfs_droplink(tp, src_dp); if (error) goto abort_return; } error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino, &first_block, &free_list, spaceres); if (error) goto abort_return; xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); if (new_parent) xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); /* * If this is a synchronous mount, make sure that the * rename transaction goes to disk before returning to * the user. */ if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { xfs_trans_set_sync(tp); } error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) { xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); goto std_return; } /* * trans_commit will unlock src_ip, target_ip & decrement * the vnode references. */ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); /* Fall through to std_return with error = 0 or errno from * xfs_trans_commit */ std_return: if (DM_EVENT_ENABLED(src_dp, DM_EVENT_POSTRENAME) || DM_EVENT_ENABLED(target_dp, DM_EVENT_POSTRENAME)) { (void) XFS_SEND_NAMESP (mp, DM_EVENT_POSTRENAME, src_dp, DM_RIGHT_NULL, target_dp, DM_RIGHT_NULL, src_name->name, target_name->name, 0, error, 0); } return error; abort_return: cancel_flags |= XFS_TRANS_ABORT; /* FALLTHROUGH */ error_return: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, cancel_flags); goto std_return; }
/* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes */ STATIC int xfs_qm_qino_alloc( xfs_mount_t *mp, xfs_inode_t **ip, uint flags) { xfs_trans_t *tp; int error; int committed; bool need_alloc = true; *ip = NULL; /* * With superblock that doesn't have separate pquotino, we * share an inode between gquota and pquota. If the on-disk * superblock has GQUOTA and the filesystem is now mounted * with PQUOTA, just use sb_gquotino for sb_pquotino and * vice-versa. */ if (!xfs_sb_version_has_pquotino(&mp->m_sb) && (flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) { xfs_ino_t ino = NULLFSINO; if ((flags & XFS_QMOPT_PQUOTA) && (mp->m_sb.sb_gquotino != NULLFSINO)) { ino = mp->m_sb.sb_gquotino; ASSERT(mp->m_sb.sb_pquotino == NULLFSINO); } else if ((flags & XFS_QMOPT_GQUOTA) && (mp->m_sb.sb_pquotino != NULLFSINO)) { ino = mp->m_sb.sb_pquotino; ASSERT(mp->m_sb.sb_gquotino == NULLFSINO); } if (ino != NULLFSINO) { error = xfs_iget(mp, NULL, ino, 0, 0, ip); if (error) return error; mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; need_alloc = false; } } tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create, XFS_QM_QINOCREATE_SPACE_RES(mp), 0); if (error) { xfs_trans_cancel(tp, 0); return error; } if (need_alloc) { error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); return error; } } /* * Make the changes in the superblock, and log those too. * sbfields arg may contain fields other than *QUOTINO; * VERSIONNUM for example. */ spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); xfs_sb_version_addquota(&mp->m_sb); mp->m_sb.sb_uquotino = NULLFSINO; mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; /* qflags will get updated fully _after_ quotacheck */ mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT; } if (flags & XFS_QMOPT_UQUOTA) mp->m_sb.sb_uquotino = (*ip)->i_ino; else if (flags & XFS_QMOPT_GQUOTA) mp->m_sb.sb_gquotino = (*ip)->i_ino; else mp->m_sb.sb_pquotino = (*ip)->i_ino; spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); xfs_alert(mp, "%s failed (error %d)!", __func__, error); } if (need_alloc) xfs_finish_inode_setup(*ip); return error; }
/* * Truncate file. Must have write permission and not be a directory. */ int xfs_setattr_size( struct xfs_inode *ip, struct iattr *iattr, int flags) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; struct xfs_trans *tp; int error; uint lock_flags; uint commit_flags = 0; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); lock_flags = XFS_ILOCK_EXCL; if (!(flags & XFS_ATTR_NOLOCK)) lock_flags |= XFS_IOLOCK_EXCL; xfs_ilock(ip, lock_flags); /* * Short circuit the truncate case for zero length files. */ if (iattr->ia_size == 0 && ip->i_size == 0 && ip->i_d.di_nextents == 0) { if (!(mask & (ATTR_CTIME|ATTR_MTIME))) goto out_unlock; /* * Use the regular setattr path to update the timestamps. */ xfs_iunlock(ip, lock_flags); iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } /* * Make sure that the dquots are attached to the inode. */ error = xfs_qm_dqattach_locked(ip, 0); if (error) goto out_unlock; /* * Now we can make the changes. Before we join the inode to the * transaction, take care of the part of the truncation that must be * done without the inode lock. This needs to be done before joining * the inode to the transaction, because the inode cannot be unlocked * once it is a part of the transaction. */ if (iattr->ia_size > ip->i_size) { /* * Do the first part of growing a file: zero any data in the * last block that is beyond the old EOF. We need to do this * before the inode is joined to the transaction to modify * i_size. */ error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); if (error) goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; /* * We are going to log the inode size change in this transaction so * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files * problem. * * Only flush from the on disk size to the smaller of the in memory * file size or the new size as that's the range we really care about * here and prevents waiting for other data not within the range we * care about here. */ if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, XBF_ASYNC, FI_NONE); if (error) goto out_unlock; } /* * Wait for all I/O to complete. */ xfs_ioend_wait(ip); error = -block_truncate_page(inode->i_mapping, iattr->ia_size, xfs_get_blocks); if (error) goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) goto out_trans_cancel; truncate_setsize(inode, iattr->ia_size); commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip); /* * Only change the c/mtime if we are changing the size or we are * explicitly asked to change it. This handles the semantic difference * between truncate() and ftruncate() as implemented in the VFS. * * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a * special case where we need to update the times despite not having * these flags set. For all other operations the VFS set these flags * explicitly if it wants a timestamp update. */ if (iattr->ia_size != ip->i_size && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); mask |= ATTR_CTIME | ATTR_MTIME; } if (iattr->ia_size > ip->i_size) { ip->i_d.di_size = iattr->ia_size; ip->i_size = iattr->ia_size; } else if (iattr->ia_size <= ip->i_size || (iattr->ia_size == 0 && ip->i_d.di_nextents)) { error = xfs_itruncate_data(&tp, ip, iattr->ia_size); if (error) goto out_trans_abort; /* * Truncated "down", so we're removing references to old data * here - if we delay flushing for a long time, we expose * ourselves unduly to the notorious NULL files problem. So, * we mark this inode and flush it when the file is closed, * and do not wait the usual (long) time for writeout. */ xfs_iflags_set(ip, XFS_ITRUNCATED); } if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; ip->i_update_core = 1; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; ip->i_update_core = 1; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; out_trans_abort: commit_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, commit_flags); goto out_unlock; }
int xfs_attr_inactive(xfs_inode_t *dp) { xfs_trans_t *trans; xfs_mount_t *mp; int error; mp = dp->i_mount; ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); xfs_ilock(dp, XFS_ILOCK_SHARED); if (!xfs_inode_hasattr(dp) || dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { xfs_iunlock(dp, XFS_ILOCK_SHARED); return 0; } xfs_iunlock(dp, XFS_ILOCK_SHARED); /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off * this one via the trans_dup() call. All transactions will contain * the inode, and the inode will always be marked with trans_ihold(). * Since the inode will be locked in all transactions, we must log * the inode in every transaction to let it float upward through * the log. */ trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); if (error) { xfs_trans_cancel(trans, 0); return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); /* * No need to make quota reservations here. We expect to release some * blocks, not allocate, in the common case. */ xfs_trans_ijoin(trans, dp, 0); /* * Decide on what work routines to call based on the inode size. */ if (!xfs_inode_hasattr(dp) || dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { error = 0; goto out; } error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out; error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out; error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
static int xfs_swap_extents( xfs_inode_t *ip, /* target inode */ xfs_inode_t *tip, /* tmp inode */ xfs_swapext_t *sxp) { xfs_mount_t *mp = ip->i_mount; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; xfs_ifork_t *tempifp, *ifp, *tifp; int src_log_flags, target_log_flags; int error = 0; int aforkblks = 0; int taforkblks = 0; __uint64_t tmp; tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); if (!tempifp) { error = XFS_ERROR(ENOMEM); goto out; } /* * we have to do two separate lock calls here to keep lockdep * happy. If we try to get all the locks in one call, lock will * report false positives when we drop the ILOCK and regain them * below. */ xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* Verify that both files have the same format */ if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify both files are either real-time or non-realtime */ if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) { error = XFS_ERROR(EINVAL); goto out_unlock; } if (VN_CACHED(VFS_I(tip)) != 0) { error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); if (error) goto out_unlock; } /* Verify O_DIRECT for ftmp */ if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } /* Verify all data are being swapped */ if (sxp->sx_offset != 0 || sxp->sx_length != ip->i_d.di_size || sxp->sx_length != tip->i_d.di_size) { error = XFS_ERROR(EFAULT); goto out_unlock; } trace_xfs_swap_extent_before(ip, 0); trace_xfs_swap_extent_before(tip, 1); /* check inode formats now that data is flushed */ error = xfs_swap_extents_check_format(ip, tip); if (error) { xfs_notice(mp, "%s: inode 0x%llx format is incompatible for exchanging.", __func__, ip->i_ino); goto out_unlock; } /* * Compare the current change & modify times with that * passed in. If they differ, we abort this swap. * This is the mechanism used to ensure the calling * process that the file was not changed out from * under it. */ if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) || (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) || (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) || (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) { error = XFS_ERROR(EBUSY); goto out_unlock; } /* We need to fail if the file is memory mapped. Once we have tossed * all existing pages, the page fault will have no option * but to go to the filesystem for pages. By making the page fault call * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL); /* * There is a race condition here since we gave up the * ilock. However, the data fork will not change since * we have the iolock (locked for truncation too) so we * are safe. We don't really care if non-io related * fields change. */ xfs_tosspages(ip, 0, -1, FI_REMAPF); tp = xfs_trans_alloc(mp, XFS_TRANS_SWAPEXT); if ((error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0))) { xfs_iunlock(ip, XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_IOLOCK_EXCL); xfs_trans_cancel(tp, 0); goto out; } xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks */ if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) && (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks); if (error) goto out_trans_cancel; } if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) && (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) { error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &taforkblks); if (error) goto out_trans_cancel; } /* * Swap the data forks of the inodes */ ifp = &ip->i_df; tifp = &tip->i_df; *tempifp = *ifp; /* struct copy */ *ifp = *tifp; /* struct copy */ *tifp = *tempifp; /* struct copy */ /* * Fix the on-disk inode values */ tmp = (__uint64_t)ip->i_d.di_nblocks; ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks; tip->i_d.di_nblocks = tmp + taforkblks - aforkblks; tmp = (__uint64_t) ip->i_d.di_nextents; ip->i_d.di_nextents = tip->i_d.di_nextents; tip->i_d.di_nextents = tmp; tmp = (__uint64_t) ip->i_d.di_format; ip->i_d.di_format = tip->i_d.di_format; tip->i_d.di_format = tmp; /* * The extents in the source inode could still contain speculative * preallocation beyond EOF (e.g. the file is open but not modified * while defrag is in progress). In that case, we need to copy over the * number of delalloc blocks the data fork in the source inode is * tracking beyond EOF so that when the fork is truncated away when the * temporary inode is unlinked we don't underrun the i_delayed_blks * counter on that inode. */ ASSERT(tip->i_delayed_blks == 0); tip->i_delayed_blks = ip->i_delayed_blks; ip->i_delayed_blks = 0; src_log_flags = XFS_ILOG_CORE; switch (ip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) { ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; } src_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: src_log_flags |= XFS_ILOG_DBROOT; break; } target_log_flags = XFS_ILOG_CORE; switch (tip->i_d.di_format) { case XFS_DINODE_FMT_EXTENTS: /* If the extents fit in the inode, fix the * pointer. Otherwise it's already NULL or * pointing to the extent. */ if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) { tifp->if_u1.if_extents = tifp->if_u2.if_inline_ext; } target_log_flags |= XFS_ILOG_DEXT; break; case XFS_DINODE_FMT_BTREE: target_log_flags |= XFS_ILOG_DBROOT; break; } xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_log_inode(tp, ip, src_log_flags); xfs_trans_log_inode(tp, tip, target_log_flags); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); trace_xfs_swap_extent_after(ip, 0); trace_xfs_swap_extent_after(tip, 1); out: kmem_free(tempifp); return error; out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_iunlock(tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); goto out; out_trans_cancel: xfs_trans_cancel(tp, 0); goto out_unlock; }
int xfs_setattr_nonsize( struct xfs_inode *ip, struct iattr *iattr, int flags) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_trans_t *tp; int error; uid_t uid = 0, iuid = 0; gid_t gid = 0, igid = 0; struct xfs_dquot *udqp = NULL, *gdqp = NULL; struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); ASSERT((mask & ATTR_SIZE) == 0); if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { uid = iattr->ia_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { gid = iattr->ia_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } ASSERT(udqp == NULL); ASSERT(gdqp == NULL); error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), qflags, &udqp, &gdqp); if (error) return error; } tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) goto out_dqrele; xfs_ilock(ip, XFS_ILOCK_EXCL); if (mask & (ATTR_UID|ATTR_GID)) { iuid = ip->i_d.di_uid; igid = ip->i_d.di_gid; gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; if (XFS_IS_QUOTA_RUNNING(mp) && ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { ASSERT(tp); error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (error) goto out_trans_cancel; } } xfs_trans_ijoin(tp, ip, 0); if (mask & (ATTR_UID|ATTR_GID)) { if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); if (iuid != uid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; inode->i_uid = uid; } if (igid != gid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; inode->i_gid = gid; } } if (mask & ATTR_MODE) { umode_t mode = iattr->ia_mode; if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) mode &= ~S_ISGID; ip->i_d.di_mode &= S_IFMT; ip->i_d.di_mode |= mode & ~S_IFMT; inode->i_mode &= S_IFMT; inode->i_mode |= mode & ~S_IFMT; } if (mask & ATTR_ATIME) { inode->i_atime = iattr->ia_atime; ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; } if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_qm_dqrele(olddquot1); xfs_qm_dqrele(olddquot2); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); if (error) return XFS_ERROR(error); if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { error = -xfs_acl_chmod(inode); if (error) return XFS_ERROR(error); } return 0; out_trans_cancel: xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return error; }
/* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes */ STATIC int xfs_qm_qino_alloc( xfs_mount_t *mp, xfs_inode_t **ip, __int64_t sbfields, uint flags) { xfs_trans_t *tp; int error; int committed; tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); if ((error = xfs_trans_reserve(tp, XFS_QM_QINOCREATE_SPACE_RES(mp), XFS_CREATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT))) { xfs_trans_cancel(tp, 0); return error; } error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); return error; } /* * Make the changes in the superblock, and log those too. * sbfields arg may contain fields other than *QUOTINO; * VERSIONNUM for example. */ spin_lock(&mp->m_sb_lock); if (flags & XFS_QMOPT_SBVERSION) { ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); xfs_sb_version_addquota(&mp->m_sb); mp->m_sb.sb_uquotino = NULLFSINO; mp->m_sb.sb_gquotino = NULLFSINO; /* qflags will get updated _after_ quotacheck */ mp->m_sb.sb_qflags = 0; } if (flags & XFS_QMOPT_UQUOTA) mp->m_sb.sb_uquotino = (*ip)->i_ino; else mp->m_sb.sb_gquotino = (*ip)->i_ino; spin_unlock(&mp->m_sb_lock); xfs_mod_sb(tp, sbfields); if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { xfs_alert(mp, "%s failed (error %d)!", __func__, error); return error; } return 0; }
int xfs_setattr_size( struct xfs_inode *ip, struct iattr *iattr, int flags) { struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_off_t oldsize, newsize; struct xfs_trans *tp; int error; uint lock_flags; uint commit_flags = 0; trace_xfs_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); error = -inode_change_ok(inode, iattr); if (error) return XFS_ERROR(error); ASSERT(S_ISREG(ip->i_d.di_mode)); ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); lock_flags = XFS_ILOCK_EXCL; if (!(flags & XFS_ATTR_NOLOCK)) lock_flags |= XFS_IOLOCK_EXCL; xfs_ilock(ip, lock_flags); oldsize = inode->i_size; newsize = iattr->ia_size; if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { if (!(mask & (ATTR_CTIME|ATTR_MTIME))) goto out_unlock; xfs_iunlock(ip, lock_flags); iattr->ia_valid &= ~ATTR_SIZE; return xfs_setattr_nonsize(ip, iattr, 0); } error = xfs_qm_dqattach_locked(ip, 0); if (error) goto out_unlock; if (newsize > oldsize) { error = xfs_zero_eof(ip, newsize, oldsize); if (error) goto out_unlock; } xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; /* * We are going to log the inode size change in this transaction so * any previous writes that are beyond the on disk EOF and the new * EOF that have not been written out need to be written here. If we * do not write the data out, we expose ourselves to the null files * problem. * * Only flush from the on disk size to the smaller of the in memory * file size or the new size as that's the range we really care about * here and prevents waiting for other data not within the range we * care about here. */ if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) { error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0, FI_NONE); if (error) goto out_unlock; } inode_dio_wait(inode); error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks); if (error) goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT); if (error) goto out_trans_cancel; truncate_setsize(inode, newsize); commit_flags = XFS_TRANS_RELEASE_LOG_RES; lock_flags |= XFS_ILOCK_EXCL; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { iattr->ia_ctime = iattr->ia_mtime = current_fs_time(inode->i_sb); mask |= ATTR_CTIME | ATTR_MTIME; } /* * The first thing we do is set the size to new_size permanently on * disk. This way we don't have to worry about anyone ever being able * to look at the data being freed even in the face of a crash. * What we're getting around here is the case where we free a block, it * is allocated to another file, it is written to, and then we crash. * If the new data gets written to the file but the log buffers * containing the free and reallocation don't, then we'd end up with * garbage in the blocks being freed. As long as we make the new size * permanent before actually freeing any blocks it doesn't matter if * they get written to. */ ip->i_d.di_size = newsize; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (newsize <= oldsize) { error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize); if (error) goto out_trans_abort; xfs_iflags_set(ip, XFS_ITRUNCATED); } if (mask & ATTR_CTIME) { inode->i_ctime = iattr->ia_ctime; ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); out_unlock: if (lock_flags) xfs_iunlock(ip, lock_flags); return error; out_trans_abort: commit_flags |= XFS_TRANS_ABORT; out_trans_cancel: xfs_trans_cancel(tp, commit_flags); goto out_unlock; }
/* * Adjust quota limits, and start/stop timers accordingly. */ STATIC int xfs_qm_scall_setqlim( xfs_mount_t *mp, xfs_dqid_t id, uint type, fs_disk_quota_t *newlim) { xfs_disk_dquot_t *ddq; xfs_dquot_t *dqp; xfs_trans_t *tp; int error; xfs_qcnt_t hard, soft; if (!capable(CAP_SYS_ADMIN)) return XFS_ERROR(EPERM); if ((newlim->d_fieldmask & (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK)) == 0) return (0); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, 0, 0, XFS_DEFAULT_LOG_COUNT))) { xfs_trans_cancel(tp, 0); return (error); } /* * We don't want to race with a quotaoff so take the quotaoff lock. * (We don't hold an inode lock, so there's nothing else to stop * a quotaoff from happening). (XXXThis doesn't currently happen * because we take the vfslock before calling xfs_qm_sysent). */ mutex_lock(&(XFS_QI_QOFFLOCK(mp)), PINOD); /* * Get the dquot (locked), and join it to the transaction. * Allocate the dquot if this doesn't exist. */ if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { xfs_trans_cancel(tp, XFS_TRANS_ABORT); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); ASSERT(error != ENOENT); return (error); } xfs_dqtrace_entry(dqp, "Q_SETQLIM: AFT DQGET"); xfs_trans_dqjoin(tp, dqp); ddq = &dqp->q_core; /* * Make sure that hardlimits are >= soft limits before changing. */ hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : INT_GET(ddq->d_blk_hardlimit, ARCH_CONVERT); soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : INT_GET(ddq->d_blk_softlimit, ARCH_CONVERT); if (hard == 0 || hard >= soft) { INT_SET(ddq->d_blk_hardlimit, ARCH_CONVERT, hard); INT_SET(ddq->d_blk_softlimit, ARCH_CONVERT, soft); if (id == 0) { mp->m_quotainfo->qi_bhardlimit = hard; mp->m_quotainfo->qi_bsoftlimit = soft; } } else { qdprintk("blkhard %Ld < blksoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : INT_GET(ddq->d_rtb_hardlimit, ARCH_CONVERT); soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : INT_GET(ddq->d_rtb_softlimit, ARCH_CONVERT); if (hard == 0 || hard >= soft) { INT_SET(ddq->d_rtb_hardlimit, ARCH_CONVERT, hard); INT_SET(ddq->d_rtb_softlimit, ARCH_CONVERT, soft); if (id == 0) { mp->m_quotainfo->qi_rtbhardlimit = hard; mp->m_quotainfo->qi_rtbsoftlimit = soft; } } else { qdprintk("rtbhard %Ld < rtbsoft %Ld\n", hard, soft); } hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? (xfs_qcnt_t) newlim->d_ino_hardlimit : INT_GET(ddq->d_ino_hardlimit, ARCH_CONVERT); soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? (xfs_qcnt_t) newlim->d_ino_softlimit : INT_GET(ddq->d_ino_softlimit, ARCH_CONVERT); if (hard == 0 || hard >= soft) { INT_SET(ddq->d_ino_hardlimit, ARCH_CONVERT, hard); INT_SET(ddq->d_ino_softlimit, ARCH_CONVERT, soft); if (id == 0) { mp->m_quotainfo->qi_ihardlimit = hard; mp->m_quotainfo->qi_isoftlimit = soft; } } else { qdprintk("ihard %Ld < isoft %Ld\n", hard, soft); } if (id == 0) { /* * Timelimits for the super user set the relative time * the other users can be over quota for this file system. * If it is zero a default is used. Ditto for the default * soft and hard limit values (already done, above). */ if (newlim->d_fieldmask & FS_DQ_BTIMER) { mp->m_quotainfo->qi_btimelimit = newlim->d_btimer; INT_SET(ddq->d_btimer, ARCH_CONVERT, newlim->d_btimer); } if (newlim->d_fieldmask & FS_DQ_ITIMER) { mp->m_quotainfo->qi_itimelimit = newlim->d_itimer; INT_SET(ddq->d_itimer, ARCH_CONVERT, newlim->d_itimer); } if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { mp->m_quotainfo->qi_rtbtimelimit = newlim->d_rtbtimer; INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, newlim->d_rtbtimer); } } else /* if (XFS_IS_QUOTA_ENFORCED(mp)) */ { /* * If the user is now over quota, start the timelimit. * The user will not be 'warned'. * Note that we keep the timers ticking, whether enforcement * is on or off. We don't really want to bother with iterating * over all ondisk dquots and turning the timers on/off. */ xfs_qm_adjust_dqtimers(mp, ddq); } dqp->dq_flags |= XFS_DQ_DIRTY; xfs_trans_log_dquot(tp, dqp); xfs_dqtrace_entry(dqp, "Q_SETQLIM: COMMIT"); xfs_trans_commit(tp, 0, NULL); xfs_qm_dqprint(dqp); xfs_qm_dqrele(dqp); mutex_unlock(&(XFS_QI_QOFFLOCK(mp))); return (0); }
STATIC long xfs_file_fallocate( struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); struct xfs_trans *tp; long error; loff_t new_size = 0; if (!S_ISREG(inode->i_mode)) return -EINVAL; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; xfs_ilock(ip, XFS_IOLOCK_EXCL); if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { unsigned blksize_mask = (1 << inode->i_blkbits) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; goto out_unlock; } ASSERT(offset + len < i_size_read(inode)); new_size = i_size_read(inode) - len; error = xfs_collapse_file_space(ip, offset, len); if (error) goto out_unlock; } else { if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; error = -inode_newsize_ok(inode, new_size); if (error) goto out_unlock; } if (mode & FALLOC_FL_ZERO_RANGE) error = xfs_zero_file_space(ip, offset, len); else error = xfs_alloc_file_space(ip, offset, len, XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); if (error) { xfs_trans_cancel(tp, 0); goto out_unlock; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (file->f_flags & O_DSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); if (error) goto out_unlock; /* Change file size if needed */ if (new_size) { struct iattr iattr; iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; error = xfs_setattr_size(ip, &iattr); } out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return -error; }
/* * Generic handler routine to remove a name from an attribute list. * Transitions attribute list from Btree to shortform as necessary. */ int xfs_attr_remove( struct xfs_inode *dp, const unsigned char *name, int flags) { struct xfs_mount *mp = dp->i_mount; struct xfs_da_args args; struct xfs_bmap_free flist; xfs_fsblock_t firstblock; int error; XFS_STATS_INC(xs_attr_remove); if (XFS_FORCED_SHUTDOWN(dp->i_mount)) return -EIO; if (!xfs_inode_hasattr(dp)) return -ENOATTR; error = xfs_attr_args_init(&args, dp, name, flags); if (error) return error; args.firstblock = &firstblock; args.flist = &flist; /* * we have no control over the attribute names that userspace passes us * to remove, so we have to allow the name lookup prior to attribute * removal to fail. */ args.op_flags = XFS_DA_OP_OKNOENT; error = xfs_qm_dqattach(dp, 0); if (error) return error; /* * Start our first transaction of the day. * * All future transactions during this code must be "chained" off * this one via the trans_dup() call. All transactions will contain * the inode, and the inode will always be marked with trans_ihold(). * Since the inode will be locked in all transactions, we must log * the inode in every transaction to let it float upward through * the log. */ args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM); /* * Root fork attributes can use reserved data blocks for this * operation if necessary */ if (flags & ATTR_ROOT) args.trans->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(args.trans, &M_RES(mp)->tr_attrrm, XFS_ATTRRM_SPACE_RES(mp), 0); if (error) { xfs_trans_cancel(args.trans, 0); return error; } xfs_ilock(dp, XFS_ILOCK_EXCL); /* * No need to make quota reservations here. We expect to release some * blocks not allocate in the common case. */ xfs_trans_ijoin(args.trans, dp, 0); if (!xfs_inode_hasattr(dp)) { error = -ENOATTR; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { ASSERT(dp->i_afp->if_flags & XFS_IFINLINE); error = xfs_attr_shortform_remove(&args); } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { error = xfs_attr_leaf_removename(&args); } else { error = xfs_attr_node_removename(&args); } if (error) goto out; /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(args.trans); if ((flags & ATTR_KERNOTIME) == 0) xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. */ xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE); error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; out: if (args.trans) { xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); } xfs_iunlock(dp, XFS_ILOCK_EXCL); return error; }
int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, xfs_flock64_t *bf) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; struct iattr iattr; bool setprealloc = false; bool clrprealloc = false; int error; /* * Only allow the sys admin to reserve space unless * unwritten extents are enabled. */ if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && !capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); if (!(filp->f_mode & FMODE_WRITE)) return -XFS_ERROR(EBADF); if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); error = mnt_want_write_file(filp); if (error) return error; xfs_ilock(ip, XFS_IOLOCK_EXCL); switch (bf->l_whence) { case 0: /*SEEK_SET*/ break; case 1: /*SEEK_CUR*/ bf->l_start += filp->f_pos; break; case 2: /*SEEK_END*/ bf->l_start += XFS_ISIZE(ip); break; default: error = XFS_ERROR(EINVAL); goto out_unlock; } /* * length of <= 0 for resv/unresv/zero is invalid. length for * alloc/free is ignored completely and we have no idea what userspace * might have set it to, so set it to zero to allow range * checks to pass. */ switch (cmd) { case XFS_IOC_ZERO_RANGE: case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: if (bf->l_len <= 0) { error = XFS_ERROR(EINVAL); goto out_unlock; } break; default: bf->l_len = 0; break; } if (bf->l_start < 0 || bf->l_start > mp->m_super->s_maxbytes || bf->l_start + bf->l_len < 0 || bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { error = XFS_ERROR(EINVAL); goto out_unlock; } switch (cmd) { case XFS_IOC_ZERO_RANGE: error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); if (!error) setprealloc = true; break; case XFS_IOC_RESVSP: case XFS_IOC_RESVSP64: error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, XFS_BMAPI_PREALLOC); if (!error) setprealloc = true; break; case XFS_IOC_UNRESVSP: case XFS_IOC_UNRESVSP64: error = xfs_free_file_space(ip, bf->l_start, bf->l_len); break; case XFS_IOC_ALLOCSP: case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP: case XFS_IOC_FREESP64: if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), bf->l_start - XFS_ISIZE(ip), 0); if (error) goto out_unlock; } iattr.ia_valid = ATTR_SIZE; iattr.ia_size = bf->l_start; error = xfs_setattr_size(ip, &iattr); if (!error) clrprealloc = true; break; default: ASSERT(0); error = XFS_ERROR(EINVAL); } if (error) goto out_unlock; tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); if (error) { xfs_trans_cancel(tp, 0); goto out_unlock; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); if (!(ioflags & IO_INVIS)) { ip->i_d.di_mode &= ~S_ISUID; if (ip->i_d.di_mode & S_IXGRP) ip->i_d.di_mode &= ~S_ISGID; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if (setprealloc) ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; else if (clrprealloc) ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); if (filp->f_flags & O_DSYNC) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0); out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); mnt_drop_write_file(filp); return -error; }
STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, struct fsxattr *fa, int mask) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; trace_xfs_ioctl_setattr(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); /* * Disallow 32bit project ids when projid32bit feature is not enabled. */ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) return XFS_ERROR(EINVAL); /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids * in inode here, because we can't hold it across the trans_reserve. * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) return code; } /* * For the other attributes, we acquire the inode lock and * first do an error checking pass. */ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (code) goto error_return; lock_flags = XFS_ILOCK_EXCL; xfs_ilock(ip, lock_flags); /* * CAP_FOWNER overrides the following restrictions: * * The user ID of the calling process must be equal * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } /* * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && xfs_get_projid(ip) != fa->fsx_projid) { ASSERT(tp); code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ goto error_return; } } if (mask & FSX_EXTSIZE) { /* * Can't change extent size if any extents are allocated. */ if (ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * Extent size must be a multiple of the appropriate block * size, if set at all. It must also be smaller than the * maximum extent size supported by the filesystem. * * Also, for non-realtime files, limit the extent size hint to * half the size of the AGs in the filesystem so alignment * doesn't result in extents larger than an AG. */ if (fa->fsx_extsize != 0) { xfs_extlen_t size; xfs_fsblock_t extsize_fsb; extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); if (extsize_fsb > MAXEXTLEN) { code = XFS_ERROR(EINVAL); goto error_return; } if (XFS_IS_REALTIME_INODE(ip) || ((mask & FSX_XFLAGS) && (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; } else { size = mp->m_sb.sb_blocksize; if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { code = XFS_ERROR(EINVAL); goto error_return; } } if (fa->fsx_extsize % size) { code = XFS_ERROR(EINVAL); goto error_return; } } } if (mask & FSX_XFLAGS) { /* * Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (XFS_IS_REALTIME_INODE(ip)) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * If realtime flag is set then must have realtime data. */ if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { if ((mp->m_sb.sb_rblocks == 0) || (mp->m_sb.sb_rextsize == 0) || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { code = XFS_ERROR(EINVAL); goto error_return; } } /* * Can't modify an immutable/append-only file unless * we have appropriate permission. */ if ((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) { code = XFS_ERROR(EPERM); goto error_return; } } xfs_trans_ijoin(tp, ip); /* * Change file ownership. Must be the owner or privileged. */ if (mask & FSX_PROJID) { /* * CAP_FSETID overrides the following restrictions: * * The set-user-ID and set-group-ID bits of a file will be * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* * Change the ownerships and register quota modifications * in the transaction. */ if (xfs_get_projid(ip) != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } xfs_set_projid(ip, fa->fsx_projid); /* * We may have to rev the inode as well as * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } } if (mask & FSX_EXTSIZE) ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; if (mask & FSX_XFLAGS) { xfs_set_diflags(ip, fa->fsx_xflags); xfs_diflags_to_linux(ip); } xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(xs_ig_attrchg); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis * mix so this probably isn't worth the trouble to optimize. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); code = xfs_trans_commit(tp, 0); xfs_iunlock(ip, lock_flags); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); return code; error_return: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); return code; }
/* * Free a symlink that has blocks associated with it. */ STATIC int xfs_inactive_symlink_rmt( xfs_inode_t *ip, xfs_trans_t **tpp) { xfs_buf_t *bp; int committed; int done; int error; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; int i; xfs_mount_t *mp; xfs_bmbt_irec_t mval[XFS_SYMLINK_MAPS]; int nmaps; xfs_trans_t *ntp; int size; xfs_trans_t *tp; tp = *tpp; mp = ip->i_mount; ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS); /* * We're freeing a symlink that has some * blocks allocated to it. Free the * blocks here. We know that we've got * either 1 or 2 extents and that we can * free them all in one bunmapi call. */ ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); /* * Lock the inode, fix the size, and join it to the transaction. * Hold it so in the normal path, we still have it locked for * the second transaction. In the error paths we need it * held so the cancel won't rele it, see below. */ size = (int)ip->i_d.di_size; ip->i_d.di_size = 0; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Find the block(s) so we can inval and unmap them. */ done = 0; xfs_bmap_init(&free_list, &first_block); nmaps = ARRAY_SIZE(mval); error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size), mval, &nmaps, 0); if (error) goto error0; /* * Invalidate the block(s). No validation is done. */ for (i = 0; i < nmaps; i++) { bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, mval[i].br_startblock), XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0); if (!bp) { error = ENOMEM; goto error1; } xfs_trans_binval(tp, bp); } /* * Unmap the dead block(s) to the free_list. */ if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, &first_block, &free_list, &done))) goto error1; ASSERT(done); /* * Commit the first transaction. This logs the EFI and the inode. */ if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) goto error1; /* * The transaction must have been committed, since there were * actually extents freed by xfs_bunmapi. See xfs_bmap_finish. * The new tp has the extent freeing and EFDs. */ ASSERT(committed); /* * The first xact was committed, so add the inode to the new one. * Mark it dirty so it will be logged and moved forward in the log as * part of every commit. */ xfs_trans_ijoin(tp, ip, 0); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); /* * Get a new, empty transaction to return to our caller. */ ntp = xfs_trans_dup(tp); /* * Commit the transaction containing extent freeing and EFDs. * If we get an error on the commit here or on the reserve below, * we need to unlock the inode since the new transaction doesn't * have the inode attached. */ error = xfs_trans_commit(tp, 0); tp = ntp; if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error0; } /* * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ xfs_log_ticket_put(tp->t_ticket); /* * Remove the memory for extent descriptions (just bookkeeping). */ if (ip->i_df.if_bytes) xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK); ASSERT(ip->i_df.if_bytes == 0); /* * Put an itruncate log reservation in the new transaction * for our caller. */ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); if (error) { ASSERT(XFS_FORCED_SHUTDOWN(mp)); goto error0; } xfs_trans_ijoin(tp, ip, 0); *tpp = tp; return 0; error1: xfs_bmap_cancel(&free_list); error0: return error; }