STATIC int xfs_commit_dummy_trans( struct xfs_mount *mp, uint flags) { struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; int log_flags = XFS_LOG_FORCE; if (flags & SYNC_WAIT) log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery * that all others are OK. */ tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return error; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_trans_commit(tp, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); /* the log force ensures this transaction is pushed to disk */ xfs_log_force(mp, 0, log_flags); return error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct file *file, const char *buf, size_t size, loff_t *offset, int ioflags, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); if (size == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; fs_check_frozen(vp->v_vfsp, SB_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return -EIO; } if (unlikely(ioflags & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } if (ioflags & IO_ISLOCKED) iolock = 0; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } if ((ssize_t) size < 0) { ret = -EINVAL; goto error; } if (!access_ok(VERIFY_READ, buf, size)) { ret = -EINVAL; goto error; } retry: if (unlikely(ioflags & IO_ISDIRECT)) { xfs_inval_cached_pages(vp, io, *offset, 1, 1); xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_direct_write(file, buf, size, offset); } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = do_generic_file_write(file, buf, size, offset); } if (unlikely(ioflags & IO_INVIS)) { /* generic_file_write updates the mtime/ctime but we need * to undo that because this I/O was supposed to be * invisible. */ struct inode *inode = LINVFS_GET_IP(vp); inode->i_mtime = xip->i_d.di_mtime.t_sec; inode->i_ctime = xip->i_d.di_ctime.t_sec; } else { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); } if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; mark_inode_dirty_sync(inode); } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ /* * If we are coming from an nfsd thread then insert into the * reference cache. */ if (!strcmp(current->comm, "nfsd")) xfs_refcache_insert(xip); /* Drop lock this way - the old refcache release is in here */ if (iolock) xfs_rwunlock(bdp, locktype); return(ret); }
STATIC int xfs_file_fsync( struct file *file, struct dentry *dentry, int datasync) { struct xfs_inode *ip = XFS_I(dentry->d_inode); struct xfs_trans *tp; int error = 0; int log_flushed = 0; xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return -XFS_ERROR(EIO); xfs_iflags_clear(ip, XFS_ITRUNCATED); /* * We always need to make sure that the required inode state is safe on * disk. The inode might be clean but we still might need to force the * log because of committed transactions that haven't hit the disk yet. * Likewise, there could be unflushed non-transactional changes to the * inode core that have to go to disk and this requires us to issue * a synchronous transaction to capture these changes correctly. * * This code relies on the assumption that if the i_update_core field * of the inode is clear and the inode is unpinned then it is clean * and no action is required. */ xfs_ilock(ip, XFS_ILOCK_SHARED); /* * First check if the VFS inode is marked dirty. All the dirtying * of non-transactional updates no goes through mark_inode_dirty*, * which allows us to distinguish beteeen pure timestamp updates * and i_size updates which need to be caught for fdatasync. * After that also theck for the dirty state in the XFS inode, which * might gets cleared when the inode gets written out via the AIL * or xfs_iflush_cluster. */ if (((dentry->d_inode->i_state & I_DIRTY_DATASYNC) || ((dentry->d_inode->i_state & I_DIRTY_SYNC) && !datasync)) && ip->i_update_core) { /* * Kick off a transaction to log the inode core to get the * updates. The sync transaction will also force the log. */ xfs_iunlock(ip, XFS_ILOCK_SHARED); tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); if (error) { xfs_trans_cancel(tp, 0); return -error; } xfs_ilock(ip, XFS_ILOCK_EXCL); /* * Note - it's possible that we might have pushed ourselves out * of the way during trans_reserve which would flush the inode. * But there's no guarantee that the inode buffer has actually * gone out yet (it's delwri). Plus the buffer could be pinned * anyway if it's part of an inode in another recent * transaction. So we play it safe and fire off the * transaction anyway. */ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = _xfs_trans_commit(tp, 0, &log_flushed); xfs_iunlock(ip, XFS_ILOCK_EXCL); } else { /* * Timestamps/size haven't changed since last inode flush or * inode transaction commit. That means either nothing got * written or a transaction committed which caught the updates. * If the latter happened and the transaction hasn't hit the * disk yet, the inode will be still be pinned. If it is, * force the log. */ if (xfs_ipincount(ip)) { error = _xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, XFS_LOG_SYNC, &log_flushed); } xfs_iunlock(ip, XFS_ILOCK_SHARED); } if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /* * If the log write didn't issue an ordered tag we need * to flush the disk cache for the data device now. */ if (!log_flushed) xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); /* * If this inode is on the RT dev we need to flush that * cache as well. */ if (XFS_IS_REALTIME_INODE(ip)) xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp); } return -error; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int nsegs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; unsigned long segs = nsegs; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0, error = 0; xfs_fsize_t isize, new_size; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; size_t ocount = 0, count; loff_t pos; int need_isem = 1, need_flush = 0; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ ocount += iv->iov_len; if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) return -EINVAL; if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) continue; if (seg == 0) return -EFAULT; segs = seg; ocount -= iv->iov_len; /* This segment is no good */ break; } count = ocount; pos = *offset; if (count == 0) return 0; io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; if (ioflags & IO_ISDIRECT) { xfs_buftarg_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((pos & target->pbr_smask) || (count & target->pbr_smask)) return XFS_ERROR(-EINVAL); if (!VN_CACHED(vp) && pos < i_size_read(inode)) need_isem = 0; if (VN_CACHED(vp)) need_flush = 1; } relock: if (need_isem) { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; down(&inode->i_sem); } else { iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = i_size_read(inode); if (file->f_flags & O_APPEND) *offset = isize; start: error = -generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } new_size = pos + count; if (new_size > isize) io->io_new_size = new_size; if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = pos; int dmflags = FILP_DELAY_FLAG(file); if (need_isem) dmflags |= DM_FLAGS_ISEM; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, pos, count, dmflags, &locktype); if (error) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != isize) { pos = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (!(ioflags & IO_INVIS)) { xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); inode_update_time(inode, 1); } /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (pos > isize) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize, pos + count); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); goto out_unlock_isem; } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_isem; } } retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; if ((ioflags & IO_ISDIRECT)) { if (need_flush) { xfs_inval_cached_trace(io, pos, -1, ctooff(offtoct(pos)), -1); VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)), -1, FI_REMAPF_LOCKED); } if (need_isem) { /* demote the lock now the cached pages are gone */ XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL); up(&inode->i_sem); iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; need_isem = 0; } xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_direct_write(iocb, iovp, &segs, pos, offset, count, ocount); /* * direct-io write to a hole: fall through to buffered I/O * for completing the rest of the request. */ if (ret >= 0 && ret != count) { XFS_STATS_ADD(xs_write_bytes, ret); pos += ret; count -= ret; need_isem = 1; ioflags &= ~IO_ISDIRECT; xfs_iunlock(xip, iolock); goto relock; } } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs, *offset, ioflags); ret = generic_file_buffered_write(iocb, iovp, segs, pos, offset, count, ret); } current->backing_dev_info = NULL; if (ret == -EIOCBQUEUED) ret = wait_on_sync_kiocb(iocb); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) goto out_unlock_isem; xfs_rwlock(bdp, locktype); pos = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } error = -ret; if (ret <= 0) goto out_unlock_internal; XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); if (error) goto out_unlock_internal; } } xfs_rwunlock(bdp, locktype); if (need_isem) up(&inode->i_sem); error = sync_page_range(inode, mapping, pos, ret); if (!error) error = ret; return error; } out_unlock_internal: xfs_rwunlock(bdp, locktype); out_unlock_isem: if (need_isem) up(&inode->i_sem); return -error; }
/* * Try to truncate the given file to 0 length. Currently called * only out of xfs_remove when it has to truncate a file to free * up space for the remove to proceed. */ int xfs_truncate_file( xfs_mount_t *mp, xfs_inode_t *ip) { xfs_trans_t *tp; int error; #ifdef QUOTADEBUG /* * This is called to truncate the quotainodes too. */ if (XFS_IS_UQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_uquotino) ASSERT(ip->i_udquot); } if (XFS_IS_OQUOTA_ON(mp)) { if (ip->i_ino != mp->m_sb.sb_gquotino) ASSERT(ip->i_gdquot); } #endif /* * Make the call to xfs_itruncate_start before starting the * transaction, because we cannot make the call while we're * in a transaction. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, (xfs_fsize_t)0); tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) { xfs_trans_cancel(tp, 0); xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } /* * Follow the normal truncate locking protocol. Since we * hold the inode in the transaction, we know that it's number * of references will stay constant. */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Signal a sync xaction. The only case where that isn't * the case is if we're truncating an already unlinked file * on a wsync fs. In that case, we know the blocks can't * reappear in the file because the links to file are * permanently toast. Currently, we're always going to * want a sync transaction because this code is being * called from places where nlink is guaranteed to be 1 * but I'm leaving the tests in to protect against future * changes -- rcc. */ error = xfs_itruncate_finish(&tp, ip, (xfs_fsize_t)0, XFS_DATA_FORK, ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) ? 1 : 0)); if (error) { xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); } else { xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); } xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); return error; }
int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, size_t count) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; xfs_fsblock_t firstfsb; int nimaps; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint resblks; int committed; int error; xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, ip, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); /* * Reserve enough blocks in this transaction for two complete extent * btree splits. We may be converting the middle part of an unwritten * extent and in this case we will insert two new extents in the btree * each of which could cause a full split. * * This reservation amount will be used in the first call to * xfs_bmbt_split() to select an AG with enough space to satisfy the * rest of the operation. */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; do { /* * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. * * Note that we open code the transaction allocation here * to pass KM_NOFS--we can't risk to recursing back into * the filesystem here as we might be asked to write out * the same inode that we complete here and might deadlock * on the iolock. */ xfs_wait_for_freeze(mp, SB_FREEZE_TRANS); tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS); tp->t_flags |= XFS_TRANS_RESERVE; error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Modify the unwritten extent state of the buffer. */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) goto error_on_bmapi_transaction; error = xfs_bmap_finish(&(tp), &(free_list), &committed); if (error) goto error_on_bmapi_transaction; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return XFS_ERROR(error); if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* * The numblks_fsb value should always get * smaller, otherwise the loop is stuck. */ ASSERT(imap.br_blockcount); break; } offset_fsb += numblks_fsb; count_fsb -= numblks_fsb; } while (count_fsb > 0); return 0; error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * Pass in a delayed allocate extent, convert it to real extents; * return to the caller the extent we create which maps on top of * the originating callers request. * * Called without a lock on the inode. * * We no longer bother to look at the incoming map - all we have to * guarantee is that whatever we allocate fills the required range. */ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, xfs_bmbt_irec_t *map, int *retmap) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; xfs_bmbt_irec_t imap; xfs_trans_t *tp; int nimaps, committed; int error = 0; int nres; *retmap = 0; /* * Make sure that the dquots are there. */ error = xfs_qm_dqattach(ip, 0); if (error) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = map->br_blockcount; map_start_fsb = map->br_startoff; XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* * Set up a transaction with which to allocate the * backing store for the file. Do allocations in a * loop until we get some space in the range we are * interested in. The other space that might be allocated * is in the delayed allocation extent on which we sit * but before our buffer starts. */ nimaps = 0; while (nimaps == 0) { tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); tp->t_flags |= XFS_TRANS_RESERVE; nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_bmap_init(&free_list, &first_block); /* * it is possible that the extents have changed since * we did the read call as we dropped the ilock for a * while. We have to be careful about truncates or hole * punchs here - we are not allowed to allocate * non-delalloc blocks here. * * The only protection against truncation is the pages * for the range we are being asked to convert are * locked and hence a truncate will block on them * first. * * As a result, if we go beyond the range we really * need and hit an delalloc extent boundary followed by * a hole while we have excess blocks in the map, we * will fill the hole incorrectly and overrun the * transaction reservation. * * Using a single map prevents this as we are forced to * check each map we look for overlap with the desired * range and abort as soon as we find it. Also, given * that we only return a single map, having one beyond * what we can return is probably a bit silly. * * We also need to check that we don't go beyond EOF; * this is a truncate optimisation as a truncate sets * the new file size before block on the pages we * currently have locked under writeback. Because they * are about to be tossed, we don't need to write them * back.... */ nimaps = 1; end_fsb = XFS_B_TO_FSB(mp, ip->i_size); error = xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); if (error) goto trans_cancel; last_block = XFS_FILEOFF_MAX(last_block, end_fsb); if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { error = EAGAIN; goto trans_cancel; } } /* Go get the actual blocks */ error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, &imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto trans_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error0; xfs_iunlock(ip, XFS_ILOCK_EXCL); } /* * See if we were able to allocate an extent that * covers at least part of the callers request */ if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((offset_fsb >= imap.br_startoff) && (offset_fsb < (imap.br_startoff + imap.br_blockcount))) { *map = imap; *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; } /* * So far we have not mapped the requested part of the * file, just surrounding data, try again. */ count_fsb -= imap.br_blockcount; map_start_fsb = imap.br_startoff + imap.br_blockcount; } trans_cancel: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, size_t count) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_trans_t *tp; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; xfs_bmbt_irec_t imap; int committed; int error; int nres; int nimaps; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, &ip->i_iocore, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); do { nres = XFS_DIOSTRAT_SPACE_RES(mp, 0); /* * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. */ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); goto error0; } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Modify the unwritten extent state of the buffer. */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, XFS_BMAPI_WRITE, &firstfsb, 1, &imap, &nimaps, &free_list); if (error) goto error_on_bmapi_transaction; error = xfs_bmap_finish(&(tp), &(free_list), firstfsb, &committed); if (error) goto error_on_bmapi_transaction; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) goto error0; if ( !(io->io_flags & XFS_IOCORE_RT) && !imap.br_startblock) { cmn_err(CE_PANIC,"Access to block zero: fs <%s> " "inode: %lld start_block : %llx start_off : " "%llx blkcnt : %llx extent-state : %x \n", (ip->i_mount)->m_fsname, (long long)ip->i_ino, imap.br_startblock,imap.br_startoff, imap.br_blockcount,imap.br_state); } if ((numblks_fsb = imap.br_blockcount) == 0) { /* * The numblks_fsb value should always get * smaller, otherwise the loop is stuck. */ ASSERT(imap.br_blockcount); break; } offset_fsb += numblks_fsb; count_fsb -= numblks_fsb; } while (count_fsb > 0); return 0; error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); error0: return XFS_ERROR(error); }
int xfs_iomap_write_unwritten( xfs_inode_t *ip, xfs_off_t offset, size_t count) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_filblks_t count_fsb; xfs_filblks_t numblks_fsb; xfs_fsblock_t firstfsb; int nimaps; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint resblks; int committed; int error; xfs_iomap_enter_trace(XFS_IOMAP_UNWRITTEN, &ip->i_iocore, offset, count); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; do { /* * set up a transaction to convert the range of extents * from unwritten to real. Do allocations in a loop until * we have covered the range passed in. */ tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); /* * Modify the unwritten extent state of the buffer. */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; error = XFS_BMAPI(mp, tp, io, offset_fsb, count_fsb, XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb, 1, &imap, &nimaps, &free_list, NULL); if (error) goto error_on_bmapi_transaction; error = xfs_bmap_finish(&(tp), &(free_list), firstfsb, &committed); if (error) goto error_on_bmapi_transaction; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) return XFS_ERROR(error); if (unlikely(!imap.br_startblock && !(io->io_flags & XFS_IOCORE_RT))) return xfs_cmn_err_fsblock_zero(ip, &imap); if ((numblks_fsb = imap.br_blockcount) == 0) { /* * The numblks_fsb value should always get * smaller, otherwise the loop is stuck. */ ASSERT(imap.br_blockcount); break; } offset_fsb += numblks_fsb; count_fsb -= numblks_fsb; } while (count_fsb > 0); return 0; error_on_bmapi_transaction: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT)); xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
/* * Pass in a delayed allocate extent, convert it to real extents; * return to the caller the extent we create which maps on top of * the originating callers request. * * Called without a lock on the inode. */ int xfs_iomap_write_allocate( xfs_inode_t *ip, xfs_off_t offset, size_t count, xfs_bmbt_irec_t *map, int *retmap) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb, last_block; xfs_fileoff_t end_fsb, map_start_fsb; xfs_fsblock_t first_block; xfs_bmap_free_t free_list; xfs_filblks_t count_fsb; xfs_bmbt_irec_t imap[XFS_STRAT_WRITE_IMAPS]; xfs_trans_t *tp; int i, nimaps, committed; int error = 0; int nres; *retmap = 0; /* * Make sure that the dquots are there. */ if ((error = XFS_QM_DQATTACH(mp, ip, 0))) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); count_fsb = map->br_blockcount; map_start_fsb = map->br_startoff; XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); while (count_fsb != 0) { /* * Set up a transaction with which to allocate the * backing store for the file. Do allocations in a * loop until we get some space in the range we are * interested in. The other space that might be allocated * is in the delayed allocation extent on which we sit * but before our buffer starts. */ nimaps = 0; while (nimaps == 0) { tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE); nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); error = xfs_trans_reserve(tp, nres, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); if (error == ENOSPC) { error = xfs_trans_reserve(tp, 0, XFS_WRITE_LOG_RES(mp), 0, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); } if (error) { xfs_trans_cancel(tp, 0); return XFS_ERROR(error); } xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); XFS_BMAP_INIT(&free_list, &first_block); nimaps = XFS_STRAT_WRITE_IMAPS; /* * Ensure we don't go beyond eof - it is possible * the extents changed since we did the read call, * we dropped the ilock in the interim. */ end_fsb = XFS_B_TO_FSB(mp, ip->i_d.di_size); xfs_bmap_last_offset(NULL, ip, &last_block, XFS_DATA_FORK); last_block = XFS_FILEOFF_MAX(last_block, end_fsb); if ((map_start_fsb + count_fsb) > last_block) { count_fsb = last_block - map_start_fsb; if (count_fsb == 0) { error = EAGAIN; goto trans_cancel; } } /* Go get the actual blocks */ error = XFS_BMAPI(mp, tp, io, map_start_fsb, count_fsb, XFS_BMAPI_WRITE, &first_block, 1, imap, &nimaps, &free_list, NULL); if (error) goto trans_cancel; error = xfs_bmap_finish(&tp, &free_list, first_block, &committed); if (error) goto trans_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); if (error) goto error0; xfs_iunlock(ip, XFS_ILOCK_EXCL); } /* * See if we were able to allocate an extent that * covers at least part of the callers request */ for (i = 0; i < nimaps; i++) { if (unlikely(!imap[i].br_startblock && !(io->io_flags & XFS_IOCORE_RT))) return xfs_cmn_err_fsblock_zero(ip, &imap[i]); if ((offset_fsb >= imap[i].br_startoff) && (offset_fsb < (imap[i].br_startoff + imap[i].br_blockcount))) { *map = imap[i]; *retmap = 1; XFS_STATS_INC(xs_xstrat_quick); return 0; } count_fsb -= imap[i].br_blockcount; } /* So far we have not mapped the requested part of the * file, just surrounding data, try again. */ nimaps--; map_start_fsb = imap[nimaps].br_startoff + imap[nimaps].br_blockcount; } trans_cancel: xfs_bmap_cancel(&free_list); xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); error0: xfs_iunlock(ip, XFS_ILOCK_EXCL); return XFS_ERROR(error); }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, uio_t *uio, int ioflag, cred_t *credp) { xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret = 0; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_fsize_t size; xfs_iocore_t *io; xfs_vnode_t *vp; int iolock; //int eventsent = 0; vrwlock_t locktype; xfs_off_t offset_c; xfs_off_t *offset; xfs_off_t pos; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); xip = XFS_BHVTOI(bdp); io = &xip->i_iocore; mp = io->io_mount; if (XFS_FORCED_SHUTDOWN(xip->i_mount)) { return EIO; } size = uio->uio_resid; pos = offset_c = uio->uio_offset; offset = &offset_c; if (unlikely(ioflag & IO_ISDIRECT)) { if (((__psint_t)buf & BBMASK) || (*offset & mp->m_blockmask) || (size & mp->m_blockmask)) { return EINVAL; } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { if (io->io_flags & XFS_IOCORE_RT) return EINVAL; iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (ioflag & O_APPEND) *offset = isize; //start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } #ifdef RMC /* probably be a long time before if ever that we do dmapi */ if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, dmflags, &locktype); if (error) { if (iolock) xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } #endif /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflag & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); #if 0 /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) error = -remove_suid(file->f_dentry); if (unlikely(error)) { xfs_iunlock(xip, iolock); goto out_unlock_mutex; } } #endif //retry: if (unlikely(ioflag & IO_ISDIRECT)) { #ifdef RMC xfs_off_t pos = *offset; struct address_space *mapping = file->f_dentry->d_inode->i_mapping; struct inode *inode = mapping->host; ret = precheck_file_write(file, inode, &size, &pos); if (ret || size == 0) goto error; xfs_inval_cached_pages(vp, io, pos, 1, 1); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* mark_inode_dirty_sync(inode); - we do this later */ xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, buf, size, pos, ioflags); ret = generic_file_direct_IO(WRITE, file, (char *)buf, size, pos); xfs_inval_cached_pages(vp, io, pos, 1, 1); if (ret > 0) *offset += ret; #endif } else { xfs_rw_enter_trace(XFS_WRITE_ENTER, io, buf, size, *offset, ioflags); ret = xfs_write_file(xip,uio,ioflag); } xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); //error: if (ret <= 0) { if (iolock) xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { printf("xfs_write look at doing more here %s:%d\n",__FILE__,__LINE__); #ifdef RMC struct inode *inode = LINVFS_GET_IP(vp); i_size_write(inode, *offset); mark_inode_dirty_sync(inode); #endif xip->i_d.di_size = *offset; xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } /* Handle various SYNC-type writes */ #if 0 // if ((file->f_flags & O_SYNC) || IS_SYNC(inode)) { #endif if (ioflag & IO_SYNC) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { xfs_inode_log_item_t *iip = xip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (error) goto out_unlock_internal; } xfs_rwunlock(bdp, locktype); return ret; } /* (ioflags & O_SYNC) */ out_unlock_internal: xfs_rwunlock(bdp, locktype); #if 0 out_unlock_mutex: if (need_i_mutex) mutex_unlock(&inode->i_mutex); #endif //out_nounlocks: return -error; }
/* * This routine allocates disk space for the given file. * Originally derived from xfs_alloc_file_space(). */ int libxfs_alloc_file_space( xfs_inode_t *ip, xfs_off_t offset, xfs_off_t len, int alloc_type, int attr_flags) { xfs_mount_t *mp; xfs_off_t count; xfs_filblks_t datablocks; xfs_filblks_t allocated_fsb; xfs_filblks_t allocatesize_fsb; xfs_fsblock_t firstfsb; xfs_bmap_free_t free_list; xfs_bmbt_irec_t *imapp; xfs_bmbt_irec_t imaps[1]; int reccount; uint resblks; xfs_fileoff_t startoffset_fsb; xfs_trans_t *tp; int xfs_bmapi_flags; int committed; int error; if (len <= 0) return EINVAL; count = len; error = 0; imapp = &imaps[0]; reccount = 1; xfs_bmapi_flags = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); mp = ip->i_mount; startoffset_fsb = XFS_B_TO_FSBT(mp, offset); allocatesize_fsb = XFS_B_TO_FSB(mp, count); /* allocate file space until done or until there is an error */ while (allocatesize_fsb && !error) { datablocks = allocatesize_fsb; tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); resblks = (uint)XFS_DIOSTRAT_SPACE_RES(mp, datablocks); error = xfs_trans_reserve(tp, resblks, 0, 0, 0, 0); if (error) break; xfs_trans_ijoin(tp, ip, 0); xfs_trans_ihold(tp, ip); XFS_BMAP_INIT(&free_list, &firstfsb); error = xfs_bmapi(tp, ip, startoffset_fsb, allocatesize_fsb, xfs_bmapi_flags, &firstfsb, 0, imapp, &reccount, &free_list); if (error) break; /* complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); if (error) break; error = xfs_trans_commit(tp, 0, NULL); if (error) break; allocated_fsb = imapp->br_blockcount; if (reccount == 0) return ENOSPC; startoffset_fsb += allocated_fsb; allocatesize_fsb -= allocated_fsb; } return error; }
/* * Roll from one trans in the sequence of PERMANENT transactions to * the next: permanent transactions are only flushed out when * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon * as possible to let chunks of it go to the log. So we commit the * chunk we've been working on and get a new transaction to continue. */ int xfs_trans_roll( struct xfs_trans **tpp, struct xfs_inode *dp) { struct xfs_trans *trans; unsigned int logres, count; int error; /* * Ensure that the inode is always logged. */ trans = *tpp; xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); /* * Copy the critical parameters from one trans to the next. */ logres = trans->t_log_res; count = trans->t_log_count; *tpp = xfs_trans_dup(trans); /* * Commit the current transaction. * If this commit failed, then it'd just unlock those items that * are not marked ihold. That also means that a filesystem shutdown * is in progress. The caller takes the responsibility to cancel * the duplicate transaction that gets returned. */ error = xfs_trans_commit(trans, 0); if (error) return (error); trans = *tpp; /* * transaction commit worked ok so we can drop the extra ticket * reference that we gained in xfs_trans_dup() */ xfs_log_ticket_put(trans->t_ticket); /* * Reserve space in the log for th next transaction. * This also pushes items in the "AIL", the list of logged items, * out to disk if they are taking up space at the tail of the log * that we want to use. This requires that either nothing be locked * across this call, or that anything that is locked be logged in * the prior and the next transactions. */ error = xfs_trans_reserve(trans, 0, logres, 0, XFS_TRANS_PERM_LOG_RES, count); /* * Ensure that the inode is in the new transaction and locked. */ if (error) return error; xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); xfs_trans_ihold(trans, dp); return 0; }
ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address); xip = XFS_BHVTOI(bdp); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (size == 0) return 0; io = &(xip->i_iocore); mp = io->io_mount; xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (ioflags & IO_ISDIRECT) { pb_target_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (size && !(ioflags & IO_INVIS)) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } retry: if (ioflags & IO_ISDIRECT) { xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); } ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret <= 0) { xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ xfs_rwunlock(bdp, locktype); return(ret); }
STATIC int xfs_ioctl_setattr( xfs_inode_t *ip, struct fsxattr *fa, int mask) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; unsigned int lock_flags = 0; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; struct xfs_dquot *olddquot = NULL; int code; xfs_itrace_entry(ip); if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); /* * If disk quotas is on, we make sure that the dquots do exist on disk, * before we start any other transactions. Trying to do this later * is messy. We don't care to take a readlock to look at the ids * in inode here, because we can't hold it across the trans_reserve. * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, ip->i_d.di_gid, fa->fsx_projid, XFS_QMOPT_PQUOTA, &udqp, &gdqp); if (code) return code; } /* * For the other attributes, we acquire the inode lock and * first do an error checking pass. */ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); if (code) goto error_return; lock_flags = XFS_ILOCK_EXCL; xfs_ilock(ip, lock_flags); /* * CAP_FOWNER overrides the following restrictions: * * The user ID of the calling process must be equal * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } /* * Do a quota reservation only if projid is actually going to change. */ if (mask & FSX_PROJID) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) && ip->i_d.di_projid != fa->fsx_projid) { ASSERT(tp); code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0); if (code) /* out of quota */ goto error_return; } } if (mask & FSX_EXTSIZE) { /* * Can't change extent size if any extents are allocated. */ if (ip->i_d.di_nextents && ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * Extent size must be a multiple of the appropriate block * size, if set at all. */ if (fa->fsx_extsize != 0) { xfs_extlen_t size; if (XFS_IS_REALTIME_INODE(ip) || ((mask & FSX_XFLAGS) && (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { size = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; } else { size = mp->m_sb.sb_blocksize; } if (fa->fsx_extsize % size) { code = XFS_ERROR(EINVAL); goto error_return; } } } if (mask & FSX_XFLAGS) { /* * Can't change realtime flag if any extents are allocated. */ if ((ip->i_d.di_nextents || ip->i_delayed_blks) && (XFS_IS_REALTIME_INODE(ip)) != (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { code = XFS_ERROR(EINVAL); /* EFBIG? */ goto error_return; } /* * If realtime flag is set then must have realtime data. */ if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { if ((mp->m_sb.sb_rblocks == 0) || (mp->m_sb.sb_rextsize == 0) || (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { code = XFS_ERROR(EINVAL); goto error_return; } } /* * Can't modify an immutable/append-only file unless * we have appropriate permission. */ if ((ip->i_d.di_flags & (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || (fa->fsx_xflags & (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && !capable(CAP_LINUX_IMMUTABLE)) { code = XFS_ERROR(EPERM); goto error_return; } } xfs_trans_ijoin(tp, ip, lock_flags); xfs_trans_ihold(tp, ip); /* * Change file ownership. Must be the owner or privileged. */ if (mask & FSX_PROJID) { /* * CAP_FSETID overrides the following restrictions: * * The set-user-ID and set-group-ID bits of a file will be * cleared upon successful return from chown() */ if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && !capable(CAP_FSETID)) ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); /* * Change the ownerships and register quota modifications * in the transaction. */ if (ip->i_d.di_projid != fa->fsx_projid) { if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { olddquot = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_projid = fa->fsx_projid; /* * We may have to rev the inode as well as * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } } if (mask & FSX_EXTSIZE) ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; if (mask & FSX_XFLAGS) { xfs_set_diflags(ip, fa->fsx_xflags); xfs_diflags_to_linux(ip); } xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_ichgtime(ip, XFS_ICHGTIME_CHG); XFS_STATS_INC(xs_ig_attrchg); /* * If this is a synchronous mount, make sure that the * transaction goes to disk before returning to the user. * This is slightly sub-optimal in that truncates require * two sync transactions instead of one for wsync filesystems. * One for the truncate and one for the timestamps since we * don't want to change the timestamps unless we're sure the * truncate worked. Truncates are less than 1% of the laddis * mix so this probably isn't worth the trouble to optimize. */ if (mp->m_flags & XFS_MOUNT_WSYNC) xfs_trans_set_sync(tp); code = xfs_trans_commit(tp, 0); xfs_iunlock(ip, lock_flags); /* * Release any dquot(s) the inode had kept before chown. */ xfs_qm_dqrele(olddquot); xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); if (code) return code; if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) { XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0, (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0); } return 0; error_return: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_trans_cancel(tp, 0); if (lock_flags) xfs_iunlock(ip, lock_flags); return code; }
int xfs_iomap_write_direct( xfs_inode_t *ip, xfs_off_t offset, size_t count, int flags, xfs_bmbt_irec_t *ret_imap, int *nmaps, int found) { xfs_mount_t *mp = ip->i_mount; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb, resaligned; xfs_fsblock_t firstfsb; xfs_extlen_t extsz, temp; int nimaps; int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; uint qblocks, resblks, resrtextents; int committed; int error; /* * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ error = xfs_qm_dqattach_locked(ip, 0); if (error) return XFS_ERROR(error); rt = XFS_IS_REALTIME_INODE(ip); extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); if ((offset + count) > ip->i_size) { error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); if (error) goto error_out; } else { if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) last_fsb = MIN(last_fsb, (xfs_fileoff_t) ret_imap->br_blockcount + ret_imap->br_startoff); } count_fsb = last_fsb - offset_fsb; ASSERT(count_fsb > 0); resaligned = count_fsb; if (unlikely(extsz)) { if ((temp = do_mod(offset_fsb, extsz))) resaligned += temp; if ((temp = do_mod(resaligned, extsz))) resaligned += extsz - temp; } if (unlikely(rt)) { resrtextents = qblocks = resaligned; resrtextents /= mp->m_sb.sb_rextsize; resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned); quota_flag = XFS_QMOPT_RES_REGBLKS; } /* * Allocate and setup the transaction */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); /* * Check for running out of space, note: need lock to return */ if (error) xfs_trans_cancel(tp, 0); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) goto error_out; error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); if (error) goto error1; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); bmapi_flag = XFS_BMAPI_WRITE; if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* * Issue the xfs_bmapi() call to allocate the blocks */ xfs_bmap_init(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list, NULL); if (error) goto error0; /* * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, &committed); if (error) goto error0; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) goto error_out; /* * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { error = ENOSPC; goto error_out; } if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) { error = xfs_cmn_err_fsblock_zero(ip, &imap); goto error_out; } *ret_imap = imap; *nmaps = 1; return 0; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); *nmaps = 0; /* nothing set-up here */ error_out: return XFS_ERROR(error); }
/* * Handle logging requirements of various synchronous types of write. */ int xfs_write_sync_logforce( xfs_mount_t *mp, xfs_inode_t *ip) { int error = 0; /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(ip->i_update_size)) { xfs_inode_log_item_t *iip = ip->i_itemp; /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ if (iip && iip->ili_last_lsn) { xfs_log_force(mp, iip->ili_last_lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(ip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); } } return error; }
int xfs_iomap_write_direct( xfs_inode_t *ip, xfs_off_t offset, size_t count, int flags, xfs_bmbt_irec_t *ret_imap, int *nmaps, int found) { xfs_mount_t *mp = ip->i_mount; xfs_iocore_t *io = &ip->i_iocore; xfs_fileoff_t offset_fsb; xfs_fileoff_t last_fsb; xfs_filblks_t count_fsb; xfs_fsblock_t firstfsb; int nimaps; int error; int bmapi_flag; int quota_flag; int rt; xfs_trans_t *tp; xfs_bmbt_irec_t imap; xfs_bmap_free_t free_list; xfs_filblks_t qblocks, resblks; int committed; int resrtextents; /* * Make sure that the dquots are there. This doesn't hold * the ilock across a disk read. */ error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED); if (error) return XFS_ERROR(error); offset_fsb = XFS_B_TO_FSBT(mp, offset); last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); count_fsb = last_fsb - offset_fsb; if (found && (ret_imap->br_startblock == HOLESTARTBLOCK)) { xfs_fileoff_t map_last_fsb; map_last_fsb = ret_imap->br_blockcount + ret_imap->br_startoff; if (map_last_fsb < last_fsb) { last_fsb = map_last_fsb; count_fsb = last_fsb - offset_fsb; } ASSERT(count_fsb > 0); } /* * Determine if reserving space on the data or realtime partition. */ if ((rt = XFS_IS_REALTIME_INODE(ip))) { xfs_extlen_t extsz; if (!(extsz = ip->i_d.di_extsize)) extsz = mp->m_sb.sb_rextsize; resrtextents = qblocks = (count_fsb + extsz - 1); do_div(resrtextents, mp->m_sb.sb_rextsize); resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0); quota_flag = XFS_QMOPT_RES_RTBLKS; } else { resrtextents = 0; resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, count_fsb); quota_flag = XFS_QMOPT_RES_REGBLKS; } /* * Allocate and setup the transaction */ xfs_iunlock(ip, XFS_ILOCK_EXCL); tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); error = xfs_trans_reserve(tp, resblks, XFS_WRITE_LOG_RES(mp), resrtextents, XFS_TRANS_PERM_LOG_RES, XFS_WRITE_LOG_COUNT); /* * Check for running out of space, note: need lock to return */ if (error) xfs_trans_cancel(tp, 0); xfs_ilock(ip, XFS_ILOCK_EXCL); if (error) goto error_out; if (XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag)) { error = (EDQUOT); goto error1; } bmapi_flag = XFS_BMAPI_WRITE; xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); if (!(flags & BMAPI_MMAP) && (offset < ip->i_d.di_size || rt)) bmapi_flag |= XFS_BMAPI_PREALLOC; /* * Issue the bmapi() call to allocate the blocks */ XFS_BMAP_INIT(&free_list, &firstfsb); nimaps = 1; error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag, &firstfsb, 0, &imap, &nimaps, &free_list); if (error) goto error0; /* * Complete the transaction */ error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed); if (error) goto error0; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL); if (error) goto error_out; /* * Copy any maps to caller's array and return any error. */ if (nimaps == 0) { error = (ENOSPC); goto error_out; } *ret_imap = imap; *nmaps = 1; if ( !(io->io_flags & XFS_IOCORE_RT) && !ret_imap->br_startblock) { cmn_err(CE_PANIC,"Access to block zero: fs <%s> inode: %lld " "start_block : %llx start_off : %llx blkcnt : %llx " "extent-state : %x \n", (ip->i_mount)->m_fsname, (long long)ip->i_ino, ret_imap->br_startblock, ret_imap->br_startoff, ret_imap->br_blockcount,ret_imap->br_state); } return 0; error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */ xfs_bmap_cancel(&free_list); XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag); error1: /* Just cancel transaction */ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); *nmaps = 0; /* nothing set-up here */ error_out: return XFS_ERROR(error); }