ssize_t /* bytes written, or (-) error */ xfs_write( bhv_desc_t *bdp, struct kiocb *iocb, const struct iovec *iovp, unsigned int segs, loff_t *offset, int ioflags, cred_t *credp) { struct file *file = iocb->ki_filp; size_t size = 0; xfs_inode_t *xip; xfs_mount_t *mp; ssize_t ret; int error = 0; xfs_fsize_t isize, new_size; xfs_fsize_t n, limit; xfs_iocore_t *io; vnode_t *vp; unsigned long seg; int iolock; int eventsent = 0; vrwlock_t locktype; XFS_STATS_INC(xs_write_calls); vp = BHV_TO_VNODE(bdp); vn_trace_entry(vp, "xfs_write", (inst_t *)__return_address); xip = XFS_BHVTOI(bdp); /* START copy & waste from filemap.c */ for (seg = 0; seg < segs; seg++) { const struct iovec *iv = &iovp[seg]; /* * If any segment has a negative length, or the cumulative * length ever wraps negative then return -EINVAL. */ size += iv->iov_len; if (unlikely((ssize_t)(size|iv->iov_len) < 0)) return XFS_ERROR(-EINVAL); } /* END copy & waste from filemap.c */ if (size == 0) return 0; io = &(xip->i_iocore); mp = io->io_mount; xfs_check_frozen(mp, bdp, XFS_FREEZE_WRITE); if (XFS_FORCED_SHUTDOWN(mp)) { return -EIO; } if (ioflags & IO_ISDIRECT) { pb_target_t *target = (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? mp->m_rtdev_targp : mp->m_ddev_targp; if ((*offset & target->pbr_smask) || (size & target->pbr_smask)) { return XFS_ERROR(-EINVAL); } iolock = XFS_IOLOCK_SHARED; locktype = VRWLOCK_WRITE_DIRECT; } else { iolock = XFS_IOLOCK_EXCL; locktype = VRWLOCK_WRITE; } xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); isize = xip->i_d.di_size; limit = XFS_MAXIOFFSET(mp); if (file->f_flags & O_APPEND) *offset = isize; start: n = limit - *offset; if (n <= 0) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return -EFBIG; } if (n < size) size = n; new_size = *offset + size; if (new_size > isize) { io->io_new_size = new_size; } if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS) && !eventsent)) { loff_t savedsize = *offset; xfs_iunlock(xip, XFS_ILOCK_EXCL); error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, *offset, size, FILP_DELAY_FLAG(file), &locktype); if (error) { xfs_iunlock(xip, iolock); return -error; } xfs_ilock(xip, XFS_ILOCK_EXCL); eventsent = 1; /* * The iolock was dropped and reaquired in XFS_SEND_DATA * so we have to recheck the size when appending. * We will only "goto start;" once, since having sent the * event prevents another call to XFS_SEND_DATA, which is * what allows the size to change in the first place. */ if ((file->f_flags & O_APPEND) && savedsize != xip->i_d.di_size) { *offset = isize = xip->i_d.di_size; goto start; } } /* * On Linux, generic_file_write updates the times even if * no data is copied in so long as the write had a size. * * We must update xfs' times since revalidate will overcopy xfs. */ if (size && !(ioflags & IO_INVIS)) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); /* * If the offset is beyond the size of the file, we have a couple * of things to do. First, if there is already space allocated * we need to either create holes or zero the disk or ... * * If there is a page where the previous size lands, we need * to zero it out up to the new size. */ if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, isize, *offset + size); if (error) { xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); return(-error); } } xfs_iunlock(xip, XFS_ILOCK_EXCL); /* * If we're writing the file then make sure to clear the * setuid and setgid bits if the process is not being run * by root. This keeps people from modifying setuid and * setgid binaries. */ if (((xip->i_d.di_mode & S_ISUID) || ((xip->i_d.di_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))) && !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (error) { xfs_iunlock(xip, iolock); return -error; } } retry: if (ioflags & IO_ISDIRECT) { xfs_inval_cached_pages(vp, &xip->i_iocore, *offset, 1, 1); } ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset); if ((ret == -ENOSPC) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_rwunlock(bdp, locktype); error = XFS_SEND_NAMESP(xip->i_mount, DM_EVENT_NOSPACE, vp, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, 0, 0, 0); /* Delay flag intentionally unused */ if (error) return -error; xfs_rwlock(bdp, locktype); *offset = xip->i_d.di_size; goto retry; } if (*offset > xip->i_d.di_size) { xfs_ilock(xip, XFS_ILOCK_EXCL); if (*offset > xip->i_d.di_size) { struct inode *inode = LINVFS_GET_IP(vp); xip->i_d.di_size = *offset; i_size_write(inode, *offset); xip->i_update_core = 1; xip->i_update_size = 1; } xfs_iunlock(xip, XFS_ILOCK_EXCL); } if (ret <= 0) { xfs_rwunlock(bdp, locktype); return ret; } XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ if ((file->f_flags & O_SYNC) || IS_SYNC(file->f_dentry->d_inode)) { /* * If we're treating this as O_DSYNC and we have not updated the * size, force the log. */ if (!(mp->m_flags & XFS_MOUNT_OSYNCISOSYNC) && !(xip->i_update_size)) { /* * If an allocation transaction occurred * without extending the size, then we have to force * the log up the proper point to ensure that the * allocation is permanent. We can't count on * the fact that buffered writes lock out direct I/O * writes - the direct I/O write could have extended * the size nontransactionally, then finished before * we started. xfs_write_file will think that the file * didn't grow but the update isn't safe unless the * size change is logged. * * Force the log if we've committed a transaction * against the inode or if someone else has and * the commit record hasn't gone to disk (e.g. * the inode is pinned). This guarantees that * all changes affecting the inode are permanent * when we return. */ xfs_inode_log_item_t *iip; xfs_lsn_t lsn; iip = xip->i_itemp; if (iip && iip->ili_last_lsn) { lsn = iip->ili_last_lsn; xfs_log_force(mp, lsn, XFS_LOG_FORCE | XFS_LOG_SYNC); } else if (xfs_ipincount(xip) > 0) { xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); } } else { xfs_trans_t *tp; /* * O_SYNC or O_DSYNC _with_ a size update are handled * the same way. * * If the write was synchronous then we need to make * sure that the inode modification time is permanent. * We'll have updated the timestamp above, so here * we use a synchronous transaction to log the inode. * It's not fast, but it's necessary. * * If this a dsync write and the size got changed * non-transactionally, then we need to ensure that * the size change gets logged in a synchronous * transaction. */ tp = xfs_trans_alloc(mp, XFS_TRANS_WRITE_SYNC); if ((error = xfs_trans_reserve(tp, 0, XFS_SWRITE_LOG_RES(mp), 0, 0, 0))) { /* Transaction reserve failed */ xfs_trans_cancel(tp, 0); } else { /* Transaction reserve successful */ xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, xip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, xip); xfs_trans_log_inode(tp, xip, XFS_ILOG_CORE); xfs_trans_set_sync(tp); error = xfs_trans_commit(tp, 0, (xfs_lsn_t)0); xfs_iunlock(xip, XFS_ILOCK_EXCL); } } } /* (ioflags & O_SYNC) */ xfs_rwunlock(bdp, locktype); return(ret); }
static ssize_t raw_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos) { struct iovec local_iov = { .iov_base = (void *)buf, .iov_len = count }; return generic_file_write_nolock(file, &local_iov, 1, ppos); } static ssize_t raw_file_aio_write(struct kiocb *iocb, const char *buf, size_t count, loff_t pos) { struct iovec local_iov = { .iov_base = (void *)buf, .iov_len = count }; return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); } static struct file_operations raw_fops = { .read = generic_file_read, .aio_read = generic_file_aio_read, .write = raw_file_write, .aio_write = raw_file_aio_write, .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, .readv = generic_file_readv, .writev = generic_file_writev, .owner = THIS_MODULE, }; static struct file_operations raw_ctl_fops = { .ioctl = raw_ctl_ioctl, .open = raw_open, .owner = THIS_MODULE, }; static int __init raw_init(void) { int i; register_chrdev(RAW_MAJOR, "raw", &raw_fops); devfs_mk_cdev(MKDEV(RAW_MAJOR, 0), S_IFCHR | S_IRUGO | S_IWUGO, "raw/rawctl"); for (i = 1; i < MAX_RAW_MINORS; i++) devfs_mk_cdev(MKDEV(RAW_MAJOR, i), S_IFCHR | S_IRUGO | S_IWUGO, "raw/raw%d", i); return 0; } static void __exit raw_exit(void) { int i; for (i = 1; i < MAX_RAW_MINORS; i++) devfs_remove("raw/raw%d", i); devfs_remove("raw/rawctl"); devfs_remove("raw"); unregister_chrdev(RAW_MAJOR, "raw"); }
static ssize_t raw_file_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct iovec local_iov = { .iov_base = (char __user *)buf, .iov_len = count }; return generic_file_write_nolock(file, &local_iov, 1, ppos); } static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) { struct iovec local_iov = { .iov_base = (char __user *)buf, .iov_len = count }; return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); } static struct file_operations raw_fops = { .read = generic_file_read, .aio_read = generic_file_aio_read, .write = raw_file_write, .aio_write = raw_file_aio_write, .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, .readv = generic_file_readv, .writev = generic_file_writev, .owner = THIS_MODULE, }; static struct file_operations raw_ctl_fops = { .ioctl = raw_ctl_ioctl, .open = raw_open, .owner = THIS_MODULE, }; static struct cdev raw_cdev = { .kobj = {.name = "raw", }, .owner = THIS_MODULE, }; static int __init raw_init(void) { int i; dev_t dev = MKDEV(RAW_MAJOR, 0); if (register_chrdev_region(dev, MAX_RAW_MINORS, "raw")) goto error; cdev_init(&raw_cdev, &raw_fops); if (cdev_add(&raw_cdev, dev, MAX_RAW_MINORS)) { kobject_put(&raw_cdev.kobj); unregister_chrdev_region(dev, MAX_RAW_MINORS); goto error; } devfs_mk_cdev(MKDEV(RAW_MAJOR, 0), S_IFCHR | S_IRUGO | S_IWUGO, "raw/rawctl"); for (i = 1; i < MAX_RAW_MINORS; i++) devfs_mk_cdev(MKDEV(RAW_MAJOR, i), S_IFCHR | S_IRUGO | S_IWUGO, "raw/raw%d", i); return 0; error: printk(KERN_ERR "error register raw device\n"); return 1; }