asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count) { ssize_t ret; struct file * file; ret = -EBADF; file = fget(fd); if (file) { if (file->f_mode & FMODE_READ) { ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode, file, file->f_pos, count); if (!ret) { ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = -EINVAL; if (file->f_op && (read = file->f_op->read) != NULL) ret = read(file, buf, count, &file->f_pos); } } if (ret > 0) inode_dir_notify(file->f_dentry->d_parent->d_inode, DN_ACCESS); fput(file); } return ret; }
asmlinkage ssize_t sys_pread(unsigned int fd, char * buf, size_t count, loff_t pos) { ssize_t ret; struct file * file; ssize_t (*read)(struct file *, char *, size_t, loff_t *); ret = -EBADF; file = fget(fd); if (!file) goto bad_file; if (!(file->f_mode & FMODE_READ)) goto out; ret = locks_verify_area(FLOCK_VERIFY_READ, file->f_dentry->d_inode, file, pos, count); if (ret) goto out; ret = -EINVAL; if (!file->f_op || !(read = file->f_op->read)) goto out; if (pos < 0) goto out; ret = read(file, buf, count, &pos); if (ret > 0) dnotify_parent(file->f_dentry, DN_ACCESS); out: fput(file); bad_file: return ret; }
asmlinkage int sys_read(unsigned int fd,char * buf,int count) { int error; struct file * file; struct inode * inode; error = -EBADF; file = fget(fd); if (!file) goto bad_file; inode = file->f_inode; if (!inode) goto out; error = -EBADF; if (!(file->f_mode & 1)) goto out; error = -EINVAL; if (!file->f_op || !file->f_op->read) goto out; error = 0; if (count <= 0) goto out; error = locks_verify_area(FLOCK_VERIFY_READ,inode,file,file->f_pos,count); if (error) goto out; error = verify_area(VERIFY_WRITE,buf,count); if (error) goto out; error = file->f_op->read(inode,file,buf,count); out: fput(file, inode); bad_file: return error; }
ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct inode *inode = file->f_dentry->d_inode; ssize_t ret; if (!(file->f_mode & FMODE_READ)) return -EBADF; if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) return -EINVAL; ret = locks_verify_area(FLOCK_VERIFY_READ, inode, file, *pos, count); if (!ret) { ret = security_file_permission (file, MAY_READ); if (!ret) { if (file->f_op->read) ret = file->f_op->read(file, buf, count, pos); else ret = do_sync_read(file, buf, count, pos); if (ret > 0) dnotify_parent(file->f_dentry, DN_ACCESS); } } return ret; }
ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { struct inode *inode = file->f_dentry->d_inode; ssize_t ret; if (!(file->f_mode & FMODE_WRITE)) return -EBADF; if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) return -EINVAL; ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, *pos, count); if (!ret) { ret = security_file_permission (file, MAY_WRITE); if (!ret) { if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); else ret = do_sync_write(file, buf, count, pos); if (ret > 0) dnotify_parent(file->f_dentry, DN_MODIFY); } } return ret; }
asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count) { ssize_t ret; struct file * file; ret = -EBADF; file = fget(fd); if (file) { if (file->f_mode & FMODE_WRITE) { struct inode *inode = file->f_dentry->d_inode; ret = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, file->f_pos, count); if (!ret) { ssize_t (*write)(struct file *, const char *, size_t, loff_t *); ret = -EINVAL; if (file->f_op && (write = file->f_op->write) != NULL) ret = write(file, buf, count, &file->f_pos); } } if (ret > 0) dnotify_parent(file->f_dentry, DN_MODIFY); fput(file); } return ret; }
asmlinkage ssize_t sys_pwrite(unsigned int fd, const char * buf, size_t count, loff_t pos) { ssize_t ret; struct file * file; ssize_t (*write)(struct file *, const char *, size_t, loff_t *); ret = -EBADF; file = fget(fd); if (!file) goto bad_file; if (!(file->f_mode & FMODE_WRITE)) goto out; ret = locks_verify_area(FLOCK_VERIFY_WRITE, file->f_dentry->d_inode, file, pos, count); if (ret) goto out; ret = -EINVAL; if (!file->f_op || !(write = file->f_op->write)) goto out; if (pos < 0) goto out; ret = write(file, buf, count, &pos); if (ret > 0) dnotify_parent(file->f_dentry, DN_MODIFY); out: fput(file); bad_file: return ret; }
asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count) { int error; struct file * file; struct inode * inode; error = -EBADF; file = fget(fd); if (!file) goto bad_file; inode = file->f_inode; if (!inode) goto out; if (!(file->f_mode & 2)) goto out; error = -EINVAL; if (!file->f_op || !file->f_op->write) goto out; error = 0; if (!count) goto out; error = locks_verify_area(FLOCK_VERIFY_WRITE,inode,file,file->f_pos,count); if (error) goto out; error = verify_area(VERIFY_READ,buf,count); if (error) goto out; /* * If data has been written to the file, remove the setuid and * the setgid bits. We do it anyway otherwise there is an * extremely exploitable race - does your OS get it right |-> * * Set ATTR_FORCE so it will always be changed. */ if (!suser() && (inode->i_mode & (S_ISUID | S_ISGID))) { struct iattr newattrs; /* * Don't turn off setgid if no group execute. This special * case marks candidates for mandatory locking. */ newattrs.ia_mode = inode->i_mode & ~(S_ISUID | ((inode->i_mode & S_IXGRP) ? S_ISGID : 0)); newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_FORCE; notify_change(inode, &newattrs); } down(&inode->i_sem); error = file->f_op->write(inode,file,buf,count); up(&inode->i_sem); #ifdef CONFIG_OSFMACH3 if (inode->i_mem_object && inode->i_mem_object->imo_cacheable) inode_pager_uncache(inode); #endif /* CONFIG_OSFMACH3 */ out: fput(file, inode); bad_file: return error; }
asmlinkage int sys_write(unsigned int fd,char * buf,unsigned int count) { int error; struct file * file; struct inode * inode; int written; if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) return -EBADF; if (!(file->f_mode & 2)) return -EBADF; if (!file->f_op || !file->f_op->write) return -EINVAL; if (!count) return 0; error = locks_verify_area(FLOCK_VERIFY_WRITE,inode,file,file->f_pos,count); if (error) return error; error = verify_area(VERIFY_READ,buf,count); if (error) return error; /* * If data has been written to the file, remove the setuid and * the setgid bits. We do it anyway otherwise there is an * extremely exploitable race - does your OS get it right |-> * * Set ATTR_FORCE so it will always be changed. */ if (!suser() && (inode->i_mode & (S_ISUID | S_ISGID))) { struct iattr newattrs; /* * Don't turn off setgid if no group execute. This special * case marks candidates for mandatory locking. */ newattrs.ia_mode = inode->i_mode & ~(S_ISUID | ((inode->i_mode & S_IXGRP) ? S_ISGID : 0)); newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_FORCE; notify_change(inode, &newattrs); } down(&inode->i_sem); written = file->f_op->write(inode,file,buf,count); up(&inode->i_sem); return written; }
asmlinkage int sys_truncate(const char * path, unsigned long length) { struct inode * inode; int error; error = namei(path,&inode); if (error) return error; error = -EACCES; if (S_ISDIR(inode->i_mode)) goto out; error = permission(inode,MAY_WRITE); if (error) goto out; error = -EROFS; if (IS_RDONLY(inode)) goto out; error = -EPERM; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) goto out; error = get_write_access(inode); if (error) goto out; error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, NULL, length < inode->i_size ? length : inode->i_size, abs(inode->i_size - length)); if (!error) { if (inode->i_sb && inode->i_sb->dq_op) inode->i_sb->dq_op->initialize(inode, -1); error = do_truncate(inode, length); } put_write_access(inode); out: iput(inode); return error; }
asmlinkage int sys_ftruncate(unsigned int fd, unsigned long length) { struct inode * inode; struct file * file; int error; if (fd >= NR_OPEN || !(file = current->files->fd[fd])) return -EBADF; if (!(inode = file->f_inode)) return -ENOENT; if (S_ISDIR(inode->i_mode) || !(file->f_mode & FMODE_WRITE)) return -EACCES; if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; error = locks_verify_area(FLOCK_VERIFY_WRITE, inode, file, length < inode->i_size ? length : inode->i_size, abs(inode->i_size - length)); if (!error) error = do_truncate(inode, length); return error; }
asmlinkage int sys_read(unsigned int fd,char * buf,int count) { int error; struct file * file; struct inode * inode; if (fd>=NR_OPEN || !(file=current->files->fd[fd]) || !(inode=file->f_inode)) return -EBADF; if (!(file->f_mode & 1)) return -EBADF; if (!file->f_op || !file->f_op->read) return -EINVAL; if (count <= 0) return 0; error = locks_verify_area(FLOCK_VERIFY_READ,inode,file,file->f_pos,count); if (error) return error; error = verify_area(VERIFY_WRITE,buf,count); if (error) return error; return file->f_op->read(inode,file,buf,count); }
static int do_readv_writev(int type, struct inode * inode, struct file * file, const struct iovec * vector, unsigned long count) { size_t tot_len; struct iovec iov[UIO_MAXIOV]; int retval, i; IO_fn_t fn; /* * First get the "struct iovec" from user memory and * verify all the pointers */ if (!count) return 0; if (count > UIO_MAXIOV) return -EINVAL; retval = verify_area(VERIFY_READ, vector, count*sizeof(*vector)); if (retval) return retval; memcpy_fromfs(iov, vector, count*sizeof(*vector)); tot_len = 0; for (i = 0 ; i < count ; i++) { tot_len += iov[i].iov_len; retval = verify_area(type, iov[i].iov_base, iov[i].iov_len); if (retval) return retval; } retval = locks_verify_area(type == VERIFY_READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, file->f_pos, tot_len); if (retval) return retval; /* * Then do the actual IO. Note that sockets need to be handled * specially as they have atomicity guarantees and can handle * iovec's natively */ if (inode->i_sock) return sock_readv_writev(type, inode, file, iov, count, tot_len); if (!file->f_op) return -EINVAL; /* VERIFY_WRITE actually means a read, as we write to user space */ fn = file->f_op->read; if (type == VERIFY_READ) fn = (IO_fn_t) file->f_op->write; vector = iov; while (count > 0) { void * base; int len, nr; base = vector->iov_base; len = vector->iov_len; vector++; count--; nr = fn(inode, file, base, len); if (nr < 0) { if (retval) break; retval = nr; break; } retval += nr; if (nr != len) break; } return retval; }
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, loff_t max) { struct file * in_file, * out_file; struct inode * in_inode, * out_inode; loff_t pos; ssize_t retval; int fput_needed_in, fput_needed_out; /* * Get input file, and verify that it is ok.. */ retval = -EBADF; in_file = fget_light(in_fd, &fput_needed_in); if (!in_file) goto out; if (!(in_file->f_mode & FMODE_READ)) goto fput_in; retval = -EINVAL; in_inode = in_file->f_dentry->d_inode; if (!in_inode) goto fput_in; if (!in_file->f_op || !in_file->f_op->sendfile) goto fput_in; if (!ppos) ppos = &in_file->f_pos; retval = locks_verify_area(FLOCK_VERIFY_READ, in_inode, in_file, *ppos, count); if (retval) goto fput_in; retval = security_file_permission (in_file, MAY_READ); if (retval) goto fput_in; /* * Get output file, and verify that it is ok.. */ retval = -EBADF; out_file = fget_light(out_fd, &fput_needed_out); if (!out_file) goto fput_in; if (!(out_file->f_mode & FMODE_WRITE)) goto fput_out; retval = -EINVAL; if (!out_file->f_op || !out_file->f_op->sendpage) goto fput_out; out_inode = out_file->f_dentry->d_inode; retval = locks_verify_area(FLOCK_VERIFY_WRITE, out_inode, out_file, out_file->f_pos, count); if (retval) goto fput_out; retval = security_file_permission (out_file, MAY_WRITE); if (retval) goto fput_out; if (!max) max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); pos = *ppos; retval = -EINVAL; if (unlikely(pos < 0)) goto fput_out; if (unlikely(pos + count > max)) { retval = -EOVERFLOW; if (pos >= max) goto fput_out; count = max - pos; } retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); if (*ppos > max) retval = -EOVERFLOW; fput_out: fput_light(out_file, fput_needed_out); fput_in: fput_light(in_file, fput_needed_in); out: return retval; }
static ssize_t do_readv_writev(int type, struct file *file, const struct iovec __user * uvector, unsigned long nr_segs, loff_t *pos) { typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack, *vector; ssize_t ret; int seg; io_fn_t fn; iov_fn_t fnv; struct inode *inode; /* * SuS says "The readv() function *may* fail if the iovcnt argument * was less than or equal to 0, or greater than {IOV_MAX}. Linux has * traditionally returned zero for zero segments, so... */ ret = 0; if (nr_segs == 0) goto out; /* * First get the "struct iovec" from user memory and * verify all the pointers */ ret = -EINVAL; if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) goto out; if (!file->f_op) goto out; if (nr_segs > UIO_FASTIOV) { ret = -ENOMEM; iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); if (!iov) goto out; } ret = -EFAULT; if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) goto out; /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an * ssize_t. The total length is fitting an ssize_t * * Be careful here because iov_len is a size_t not an ssize_t */ tot_len = 0; ret = -EINVAL; for (seg = 0; seg < nr_segs; seg++) { ssize_t len = (ssize_t)iov[seg].iov_len; if (len < 0) /* size_t not fitting an ssize_t .. */ goto out; tot_len += len; if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ goto out; } if (tot_len == 0) { ret = 0; goto out; } inode = file->f_dentry->d_inode; /* VERIFY_WRITE actually means a read, as we write to user space */ ret = locks_verify_area((type == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), inode, file, *pos, tot_len); if (ret) goto out; fnv = NULL; if (type == READ) { fn = file->f_op->read; fnv = file->f_op->readv; } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->writev; } if (fnv) { ret = fnv(file, iov, nr_segs, pos); goto out; } /* Do it by hand, with file-ops */ ret = 0; vector = iov; while (nr_segs > 0) { void __user * base; size_t len; ssize_t nr; base = vector->iov_base; len = vector->iov_len; vector++; nr_segs--; nr = fn(file, base, len, pos); if (nr < 0) { if (!ret) ret = nr; break; } ret += nr; if (nr != len) break; } out: if (iov != iovstack) kfree(iov); if ((ret + (type == READ)) > 0) dnotify_parent(file->f_dentry, (type == READ) ? DN_ACCESS : DN_MODIFY); return ret; }
static ssize_t do_readv_writev(int type, struct file *file, const struct iovec * vector, unsigned long count) { typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack; ssize_t ret, i; iov_fn_t fnv; struct inode *inode; /* * First get the "struct iovec" from user memory and * verify all the pointers */ ret = 0; if (!count) goto out_nofree; ret = -EINVAL; if (count > UIO_MAXIOV) goto out_nofree; if (!file->f_op) goto out_nofree; if (count > UIO_FASTIOV) { ret = -ENOMEM; iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); if (!iov) goto out_nofree; } ret = -EFAULT; if (copy_from_user(iov, vector, count*sizeof(*vector))) goto out; /* * Single unix specification: * We should -EINVAL if an element length is not >= 0 and fitting an ssize_t * The total length is fitting an ssize_t * * Be careful here because iov_len is a size_t not an ssize_t */ tot_len = 0; ret = -EINVAL; for (i = 0 ; i < count ; i++) { ssize_t len = (ssize_t) iov[i].iov_len; if (len < 0) /* size_t not fitting an ssize_t .. */ goto out; tot_len += len; /* We must do this work unsigned - signed overflow is undefined and gcc 3.2 now uses that fact sometimes... FIXME: put in a proper limits.h for each platform */ #if BITS_PER_LONG==64 if (tot_len > 0x7FFFFFFFFFFFFFFFUL) #else if (tot_len > 0x7FFFFFFFUL) #endif goto out; } inode = file->f_dentry->d_inode; /* VERIFY_WRITE actually means a read, as we write to user space */ ret = locks_verify_area((type == VERIFY_WRITE ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), inode, file, file->f_pos, tot_len); if (ret) goto out; fnv = (type == VERIFY_WRITE ? file->f_op->readv : file->f_op->writev); if (fnv) { ret = fnv(file, iov, count, &file->f_pos); goto out; } ret = fallback_readv_writev(type == VERIFY_WRITE ? READ : WRITE, file, iov, count, &file->f_pos); out: if (iov != iovstack) kfree(iov); out_nofree: /* VERIFY_WRITE actually means a read, as we write to user space */ if ((ret + (type == VERIFY_WRITE)) > 0) dnotify_parent(file->f_dentry, (type == VERIFY_WRITE) ? DN_ACCESS : DN_MODIFY); return ret; }
static ssize_t do_readv_writev(int type, struct file *file, const struct iovec * vector, unsigned long count) { typedef ssize_t (*io_fn_t)(struct file *, char *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); size_t tot_len; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov=iovstack; ssize_t ret, i; io_fn_t fn; iov_fn_t fnv; struct inode *inode; /* * First get the "struct iovec" from user memory and * verify all the pointers */ ret = 0; if (!count) goto out_nofree; ret = -EINVAL; if (count > UIO_MAXIOV) goto out_nofree; if (!file->f_op) goto out_nofree; if (count > UIO_FASTIOV) { ret = -ENOMEM; iov = kmalloc(count*sizeof(struct iovec), GFP_KERNEL); if (!iov) goto out_nofree; } ret = -EFAULT; if (copy_from_user(iov, vector, count*sizeof(*vector))) goto out; /* BSD readv/writev returns EINVAL if one of the iov_len values < 0 or tot_len overflowed a 32-bit integer. -ink */ tot_len = 0; ret = -EINVAL; for (i = 0 ; i < count ; i++) { size_t tmp = tot_len; int len = iov[i].iov_len; if (len < 0) goto out; (u32)tot_len += len; if (tot_len < tmp || tot_len < (u32)len) goto out; } inode = file->f_dentry->d_inode; /* VERIFY_WRITE actually means a read, as we write to user space */ ret = locks_verify_area((type == VERIFY_WRITE ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE), inode, file, file->f_pos, tot_len); if (ret) goto out; fnv = (type == VERIFY_WRITE ? file->f_op->readv : file->f_op->writev); if (fnv) { ret = fnv(file, iov, count, &file->f_pos); goto out; } /* VERIFY_WRITE actually means a read, as we write to user space */ fn = (type == VERIFY_WRITE ? file->f_op->read : (io_fn_t) file->f_op->write); ret = 0; vector = iov; while (count > 0) { void * base; size_t len; ssize_t nr; base = vector->iov_base; len = vector->iov_len; vector++; count--; nr = fn(file, base, len, &file->f_pos); if (nr < 0) { if (!ret) ret = nr; break; } ret += nr; if (nr != len) break; } out: if (iov != iovstack) kfree(iov); out_nofree: /* VERIFY_WRITE actually means a read, as we write to user space */ if ((ret + (type == VERIFY_WRITE)) > 0) inode_dir_notify(file->f_dentry->d_parent->d_inode, (type == VERIFY_WRITE) ? DN_MODIFY : DN_ACCESS); return ret; }