static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); error = gfs2_glock_nq(&i_gh); if (error == 0) { file_accessed(file); gfs2_glock_dq(&i_gh); } gfs2_holder_uninit(&i_gh); if (error) return error; } vma->vm_ops = &gfs2_vm_ops; return 0; }
static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t offset) { char *data; ssize_t ret; struct file *filp = iocb->ki_filp; /* XXX: temporary */ char __user *buf = iov[0].iov_base; size_t count = iov[0].iov_len; if (nr_segs != 1) return -EINVAL; data = filp->private_data; ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data)); if (ret <= 0) return ret; iocb->ki_pos += ret; file_accessed(filp); return ret; }
static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, size_t count, loff_t offset) { char *data; size_t len; struct file *filp = iocb->ki_filp; data = filp->private_data; len = strlen(data); if (offset > len) { count = 0; goto out; } if (count > len - offset) count = len - offset; if (copy_to_user(buf, data + offset, count)) { count = -EFAULT; goto out; } iocb->ki_pos += count; file_accessed(filp); out: return count; }
/* file operations for directories */ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) { struct coda_file_info *cfi; struct file *host_file; int ret; cfi = CODA_FTOC(coda_file); BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; if (host_file->f_op->iterate) { struct inode *host_inode = file_inode(host_file); mutex_lock(&host_inode->i_mutex); ret = -ENOENT; if (!IS_DEADDIR(host_inode)) { ret = host_file->f_op->iterate(host_file, ctx); file_accessed(host_file); } mutex_unlock(&host_inode->i_mutex); return ret; } /* Venus: we must read Venus dirents from a file */ return coda_venus_readdir(coda_file, ctx); }
int vfs_readdir(struct file *file, filldir_t filler, void *buf) { struct inode *inode = file->f_path.dentry->d_inode; int res = -ENOTDIR; if (!file->f_op || !file->f_op->readdir) goto out; res = security_file_permission(file, MAY_READ); if (res) goto out; res = mutex_lock_killable(&inode->i_mutex); if (res) goto out; res = -ENOENT; if (!IS_DEADDIR(inode)) { res = file->f_op->readdir(file, buf, filler); file_accessed(file); } mutex_unlock(&inode->i_mutex); out: return res; }
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); nid_t ino = inode->i_ino; int ret = 0; bool need_cp = false; struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .for_reclaim = 0, }; if (unlikely(f2fs_readonly(inode->i_sb))) return 0; trace_f2fs_sync_file_enter(inode); /* if fdatasync is triggered, let's do in-place-update */ if (get_dirty_pages(inode) <= SM_I(sbi)->min_fsync_blocks) set_inode_flag(fi, FI_NEED_IPU); ret = filemap_write_and_wait_range(inode->i_mapping, start, end); clear_inode_flag(fi, FI_NEED_IPU); if (ret) { trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; } /* if the inode is dirty, let's recover all the time */ if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { update_inode_page(inode); goto go_write; } /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { /* it may call write_inode just prior to fsync */ if (need_inode_page_update(sbi, ino)) goto go_write; if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) goto flush_out; goto out; } go_write: /* guarantee free sections for fsync */ f2fs_balance_fs(sbi); /* * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ down_read(&fi->i_sem); need_cp = need_do_checkpoint(inode); up_read(&fi->i_sem); if (need_cp) { /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); /* * We've secured consistency through sync_fs. Following pino * will be used only for fsynced inodes after checkpoint. */ try_to_fix_pino(inode); clear_inode_flag(fi, FI_APPEND_WRITE); clear_inode_flag(fi, FI_UPDATE_WRITE); goto out; } sync_nodes: sync_node_pages(sbi, ino, &wbc); /* if cp_error was enabled, we should avoid infinite loop */ if (unlikely(f2fs_cp_error(sbi))) goto out; if (need_inode_block_update(sbi, ino)) { mark_inode_dirty_sync(inode); f2fs_write_inode(inode, NULL); goto sync_nodes; } ret = wait_on_node_pages_writeback(sbi, ino); if (ret) goto out; /* once recovery info is written, don't need to tack this */ remove_dirty_inode(sbi, ino, APPEND_INO); clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: remove_dirty_inode(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); f2fs_trace_ios(NULL, 1); return ret; } static pgoff_t __get_first_dirty_index(struct address_space *mapping, pgoff_t pgofs, int whence) { struct pagevec pvec; int nr_pages; if (whence != SEEK_DATA) return 0; /* find first dirty page index */ pagevec_init(&pvec, 0); nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs, PAGECACHE_TAG_DIRTY, 1); pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX; pagevec_release(&pvec); return pgofs; } static bool __found_offset(block_t blkaddr, pgoff_t dirty, pgoff_t pgofs, int whence) { switch (whence) { case SEEK_DATA: if ((blkaddr == NEW_ADDR && dirty == pgofs) || (blkaddr != NEW_ADDR && blkaddr != NULL_ADDR)) return true; break; case SEEK_HOLE: if (blkaddr == NULL_ADDR) return true; break; } return false; } static inline int unsigned_offsets(struct file *file) { return file->f_mode & FMODE_UNSIGNED_OFFSET; } static loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) { if (offset < 0 && !unsigned_offsets(file)) return -EINVAL; if (offset > maxsize) return -EINVAL; if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; } return offset; } static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes = inode->i_sb->s_maxbytes; struct dnode_of_data dn; pgoff_t pgofs, end_offset, dirty; loff_t data_ofs = offset; loff_t isize; int err = 0; mutex_lock(&inode->i_mutex); isize = i_size_read(inode); if (offset >= isize) goto fail; /* handle inline data case */ if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; } pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT); dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence); for (; data_ofs < isize; data_ofs = pgofs << PAGE_CACHE_SHIFT) { set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA); if (err && err != -ENOENT) { goto fail; } else if (err == -ENOENT) { /* direct node does not exists */ if (whence == SEEK_DATA) { pgofs = PGOFS_OF_NEXT_DNODE(pgofs, F2FS_I(inode)); continue; } else { goto found; } } end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); /* find data/hole in dnode block */ for (; dn.ofs_in_node < end_offset; dn.ofs_in_node++, pgofs++, data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) { block_t blkaddr; blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node); if (__found_offset(blkaddr, dirty, pgofs, whence)) { f2fs_put_dnode(&dn); goto found; } } f2fs_put_dnode(&dn); } if (whence == SEEK_DATA) goto fail; found: if (whence == SEEK_HOLE && data_ofs > isize) data_ofs = isize; mutex_unlock(&inode->i_mutex); return vfs_setpos(file, data_ofs, maxbytes); fail: mutex_unlock(&inode->i_mutex); return -ENXIO; } static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) { struct inode *inode = file->f_mapping->host; loff_t maxbytes = inode->i_sb->s_maxbytes; switch (whence) { case SEEK_SET: case SEEK_CUR: case SEEK_END: return generic_file_llseek_size(file, offset, whence, maxbytes); case SEEK_DATA: case SEEK_HOLE: if (offset < 0) return -ENXIO; return f2fs_seek_block(file, offset, whence); } return -EINVAL; } static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); if (f2fs_encrypted_inode(inode)) { int err = f2fs_get_encryption_info(inode); if (err) return 0; } /* we don't need to use inline_data strictly */ if (f2fs_has_inline_data(inode)) { int err = f2fs_convert_inline_inode(inode); if (err) return err; } file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; } static int f2fs_file_open(struct inode *inode, struct file *filp) { int ret = generic_file_open(inode, filp); if (!ret && f2fs_encrypted_inode(inode)) { ret = f2fs_get_encryption_info(inode); if (ret) ret = -EACCES; } return ret; }
static ssize_t pipe_readv(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; int do_wakeup; ssize_t ret; struct iovec *iov = (struct iovec *)_iov; size_t total_len; total_len = iov_length(iov, nr_segs); /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; do_wakeup = 0; ret = 0; down(PIPE_SEM(*inode)); for (;;) { int size = PIPE_LEN(*inode); if (size) { char *pipebuf = PIPE_BASE(*inode) + PIPE_START(*inode); ssize_t chars = PIPE_MAX_RCHUNK(*inode); if (chars > total_len) chars = total_len; if (chars > size) chars = size; if (pipe_iov_copy_to_user(iov, pipebuf, chars)) { if (!ret) ret = -EFAULT; break; } ret += chars; PIPE_START(*inode) += chars; PIPE_START(*inode) &= (PIPE_SIZE - 1); PIPE_LEN(*inode) -= chars; total_len -= chars; do_wakeup = 1; if (!total_len) break; /* common path: read succeeded */ } if (PIPE_LEN(*inode)) /* test for cyclic buffers */ continue; if (!PIPE_WRITERS(*inode)) break; if (!PIPE_WAITING_WRITERS(*inode)) { /* syscall merging: Usually we must not sleep * if O_NONBLOCK is set, or if we got some data. * But if a writer sleeps in kernel space, then * we can wait for that data without violating POSIX. */ if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } } if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } if (do_wakeup) { wake_up_interruptible_sync(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } pipe_wait(inode); } up(PIPE_SEM(*inode)); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } /* * Hack: we turn off atime updates for -RT kernels. * Who uses them on pipes anyway? */ #ifndef CONFIG_PREEMPT_RT if (ret > 0) file_accessed(filp); #endif return ret; }
/* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O * or buffered I/O path depending on the mount options and/or * augmented/extended metadata attached to the file. * Note: File extended attributes override any mount options. */ static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", __func__, handle, (int)count); if (type == ORANGEFS_IO_WRITE) { gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): proceeding with offset : %llu, " "size %d\n", __func__, handle, llu(*offset), (int)count); } if (count == 0) { ret = 0; goto out; } while (iov_iter_count(iter)) { size_t each_count = iov_iter_count(iter); size_t amt_complete; /* how much to transfer in this loop iteration */ if (each_count > orangefs_bufmap_size_query()) each_count = orangefs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", __func__, handle, (int)each_count); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): BEFORE wait_for_io: offset is %d\n", __func__, handle, (int)*offset); ret = wait_for_direct_io(type, inode, offset, iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, handle, (int)ret); if (ret < 0) goto out; *offset += ret; total_count += ret; amt_complete = ret; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): AFTER wait_for_io: offset is %d\n", __func__, handle, (int)*offset); /* * if we got a short I/O operations, * fall out and return what we got so far */ if (amt_complete < each_count) break; } /*end while */ out: if (total_count > 0) ret = total_count; if (ret > 0) { if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { file_update_time(file); /* * Must invalidate to ensure write loop doesn't * prevent kernel from reading updated * attribute. Size probably changed because of * the write, and other clients could update * any other attribute. */ orangefs_inode->getattr_time = jiffies - 1; } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%d) returned.\n", __func__, handle, (int)ret); return ret; }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = file_inode(vma->vm_file); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .map_pages = filemap_map_pages, .page_mkwrite = gfs2_page_mkwrite, .remap_pages = generic_file_remap_pages, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; return 0; } /** * gfs2_open_common - This is common to open and atomic_open * @inode: The inode being opened * @file: The file being opened * * This maybe called under a glock or not depending upon how it has * been called. We must always be called under a glock for regular * files, however. For other file types, it does not matter whether * we hold the glock or not. * * Returns: Error code or 0 for success */ int gfs2_open_common(struct inode *inode, struct file *file) { struct gfs2_file *fp; int ret; if (S_ISREG(inode->i_mode)) { ret = generic_file_open(inode, file); if (ret) return ret; } fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * After atomic_open, this function is only used for opening files * which are already cached. We must still get the glock for regular * files to ensure that we have the file size uptodate for the large * file check which is in the common code. That is only an issue for * regular files though. * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; int error; bool need_unlock = false; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; need_unlock = true; } error = gfs2_open_common(inode, file); if (need_unlock) gfs2_glock_dq_uninit(&i_gh); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip, &inode->i_writecount); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * We split the data flushing here so that we don't wait for the data * until after we've also sent the metadata to disk. Note that for * data=ordered, we will write & wait for the data at the log flush * stage anyway, so this is unlikely to make much of a difference * except in the data=writeback case. * * If the fdatawrite fails due to any reason except -EIO, we will * continue the remainder of the fsync, although we'll still report * the error at the end. This is to match filemap_write_and_wait_range() * behaviour. * * Returns: errno */ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; if (mapping->nrpages) { ret1 = filemap_fdatawrite_range(mapping, start, end); if (ret1 == -EIO) return ret1; } if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(mapping); gfs2_ail_flush(ip->i_gl, 1); } if (mapping->nrpages) ret = filemap_fdatawait_range(mapping, start, end); return ret ? ret : ret1; } /** * gfs2_file_aio_write - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; size_t writesize = iov_length(iov, nr_segs); struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file, pos, writesize); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_aio_write(iocb, iov, nr_segs, pos); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, int mode) { struct gfs2_inode *ip = GFS2_I(inode); struct buffer_head *dibh; int error; loff_t size = len; unsigned int nr_blks; sector_t lblock = offset >> inode->i_blkbits; error = gfs2_meta_inode_buffer(ip, &dibh); if (unlikely(error)) return error; gfs2_trans_add_meta(ip->i_gl, dibh); if (gfs2_is_stuffed(ip)) { error = gfs2_unstuff_dinode(ip, NULL); if (unlikely(error)) goto out; } while (len) { struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; bh_map.b_size = len; set_buffer_zeronew(&bh_map); error = gfs2_block_map(inode, lblock, &bh_map, 1); if (unlikely(error)) goto out; len -= bh_map.b_size; nr_blks = bh_map.b_size >> inode->i_blkbits; lblock += nr_blks; if (!buffer_new(&bh_map)) continue; if (unlikely(!buffer_zeronew(&bh_map))) { error = -EIO; goto out; } } if (offset + size > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE)) i_size_write(inode, offset + size); mark_inode_dirty(inode); out: brelse(dibh); return error; } static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, unsigned int *data_blocks, unsigned int *ind_blocks) { const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); unsigned int max_blocks = ip->i_rgd->rd_free_clone; unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); for (tmp = max_data; tmp > sdp->sd_diptrs;) { tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); max_data -= tmp; } /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, so it might end up with fewer data blocks */ if (max_data <= *data_blocks) return; *data_blocks = max_data; *ind_blocks = max_blocks - max_data; *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; if (*len > max) { *len = max; gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); } } static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned int data_blocks = 0, ind_blocks = 0, rblocks; loff_t bytes, max_bytes; int error; const loff_t pos = offset; const loff_t count = len; loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; loff_t max_chunk_size = UINT_MAX & bsize_mask; struct gfs2_holder gh; next = (next + 1) << sdp->sd_sb.sb_bsize_shift; /* We only support the FALLOC_FL_KEEP_SIZE mode */ if (mode & ~FALLOC_FL_KEEP_SIZE) return -EOPNOTSUPP; offset &= bsize_mask; len = next - offset; bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; if (!bytes) bytes = UINT_MAX; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; error = gfs2_rs_alloc(ip); if (error) return error; mutex_lock(&inode->i_mutex); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); error = gfs2_glock_nq(&gh); if (unlikely(error)) goto out_uninit; gfs2_size_hint(file, offset, len); while (len > 0) { if (len < bytes) bytes = len; if (!gfs2_write_alloc_required(ip, offset, bytes)) { len -= bytes; offset += bytes; continue; } error = gfs2_quota_lock_check(ip); if (error) goto out_unlock; retry: gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; error = gfs2_inplace_reserve(ip, &ap); if (error) { if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { bytes >>= 1; bytes &= bsize_mask; if (bytes == 0) bytes = sdp->sd_sb.sb_bsize; goto retry; } goto out_qunlock; } max_bytes = bytes; calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len, &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); if (error) goto out_trans_fail; error = fallocate_chunk(inode, offset, max_bytes, mode); gfs2_trans_end(sdp); if (error) goto out_trans_fail; len -= max_bytes; offset += max_bytes; gfs2_inplace_release(ip); gfs2_quota_unlock(ip); } if (error == 0) error = generic_write_sync(file, pos, count); goto out_unlock; out_trans_fail: gfs2_inplace_release(ip); out_qunlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); mutex_unlock(&inode->i_mutex); return error; } #ifdef CONFIG_GFS2_FS_LOCKING_DLM /** * gfs2_setlease - acquire/release a file lease * @file: the file pointer * @arg: lease type * @fl: file lock * * We don't currently have a way to enforce a lease across the whole * cluster; until we do, disable leases (by just returning -EINVAL), * unless the administrator has requested purely local locking. * * Locking: called under i_lock * * Returns: errno */ static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl) { return -EINVAL; } /** * gfs2_lock - acquire/release a posix lock on a file * @file: the file pointer * @cmd: either modify or retrieve lock state, possibly wait * @fl: type and range of lock * * Returns: errno */ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); struct gfs2_sbd *sdp = GFS2_SB(file->f_mapping->host); struct lm_lockstruct *ls = &sdp->sd_lockstruct; if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; if (__mandatory_lock(&ip->i_inode) && fl->fl_type != F_UNLCK) return -ENOLCK; if (cmd == F_CANCELLK) { /* Hack: */ cmd = F_SETLK; fl->fl_type = F_UNLCK; } if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { if (fl->fl_type == F_UNLCK) posix_lock_file_wait(file, fl); return -EIO; } if (IS_GETLK(cmd)) return dlm_posix_get(ls->ls_dlm, ip->i_no_addr, file, fl); else if (fl->fl_type == F_UNLCK) return dlm_posix_unlock(ls->ls_dlm, ip->i_no_addr, file, fl); else return dlm_posix_lock(ls->ls_dlm, ip->i_no_addr, file, cmd, fl); } static int do_flock(struct file *file, int cmd, struct file_lock *fl) { struct gfs2_file *fp = file->private_data; struct gfs2_holder *fl_gh = &fp->f_fl_gh; struct gfs2_inode *ip = GFS2_I(file_inode(file)); struct gfs2_glock *gl; unsigned int state; int flags; int error = 0; state = (fl->fl_type == F_WRLCK) ? LM_ST_EXCLUSIVE : LM_ST_SHARED; flags = (IS_SETLKW(cmd) ? 0 : LM_FLAG_TRY) | GL_EXACT | GL_NOCACHE; mutex_lock(&fp->f_fl_mutex); gl = fl_gh->gh_gl; if (gl) { if (fl_gh->gh_state == state) goto out; flock_lock_file_wait(file, &(struct file_lock){.fl_type = F_UNLCK}); gfs2_glock_dq_wait(fl_gh); gfs2_holder_reinit(state, flags, fl_gh); } else { error = gfs2_glock_get(GFS2_SB(&ip->i_inode), ip->i_no_addr, &gfs2_flock_glops, CREATE, &gl); if (error) goto out; gfs2_holder_init(gl, state, flags, fl_gh); gfs2_glock_put(gl); } error = gfs2_glock_nq(fl_gh); if (error) { gfs2_holder_uninit(fl_gh); if (error == GLR_TRYFAILED) error = -EAGAIN; } else { error = flock_lock_file_wait(file, fl); gfs2_assert_warn(GFS2_SB(&ip->i_inode), !error); } out: mutex_unlock(&fp->f_fl_mutex); return error; }
STATIC ssize_t xfs_file_dio_aio_read( struct kiocb *iocb, struct iov_iter *to) { struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); loff_t isize = i_size_read(inode); size_t count = iov_iter_count(to); struct iov_iter data; struct xfs_buftarg *target; ssize_t ret = 0; trace_xfs_file_direct_read(ip, count, iocb->ki_pos); if (!count) return 0; /* skip atime */ if (XFS_IS_REALTIME_INODE(ip)) target = ip->i_mount->m_rtdev_targp; else target = ip->i_mount->m_ddev_targp; /* DIO must be aligned to device logical sector size */ if ((iocb->ki_pos | count) & target->bt_logical_sectormask) { if (iocb->ki_pos == isize) return 0; return -EINVAL; } file_accessed(iocb->ki_filp); /* * Locking is a bit tricky here. If we take an exclusive lock for direct * IO, we effectively serialise all new concurrent read IO to this file * and block it behind IO that is currently in progress because IO in * progress holds the IO lock shared. We only need to hold the lock * exclusive to blow away the page cache, so only take lock exclusively * if the page cache needs invalidation. This allows the normal direct * IO case of no page cache pages to proceeed concurrently without * serialisation. */ xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); if (mapping->nrpages) { xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); /* * The generic dio code only flushes the range of the particular * I/O. Because we take an exclusive lock here, this whole * sequence is considerably more expensive for us. This has a * noticeable performance impact for any file with cached pages, * even when outside of the range of the particular I/O. * * Hence, amortize the cost of the lock against a full file * flush and reduce the chances of repeated iolock cycles going * forward. */ if (mapping->nrpages) { ret = filemap_write_and_wait(mapping); if (ret) { xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } /* * Invalidate whole pages. This can return an error if * we fail to invalidate a page, but this should never * happen on XFS. Warn if it does fail. */ ret = invalidate_inode_pages2(mapping); WARN_ON_ONCE(ret); ret = 0; } xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); } data = *to; ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data, xfs_get_blocks_direct, NULL, NULL, 0); if (ret >= 0) { iocb->ki_pos += ret; iov_iter_advance(to, ret); } xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); return ret; }
static int vvp_io_read_start(const struct lu_env *env, const struct cl_io_slice *ios) { struct vvp_io *vio = cl2vvp_io(env, ios); struct ccc_io *cio = cl2ccc_io(env, ios); struct cl_io *io = ios->cis_io; struct cl_object *obj = io->ci_obj; struct inode *inode = ccc_object_inode(obj); struct ll_ra_read *bead = &vio->cui_bead; struct file *file = cio->cui_fd->fd_file; int result; loff_t pos = io->u.ci_rd.rd.crw_pos; long cnt = io->u.ci_rd.rd.crw_count; long tot = cio->cui_tot_count; int exceed = 0; CLOBINVRNT(env, obj, ccc_object_invariant(obj)); CDEBUG(D_VFSTRACE, "read: -> [%lli, %lli)\n", pos, pos + cnt); if (!can_populate_pages(env, io, inode)) return 0; result = ccc_prep_size(env, obj, io, pos, tot, &exceed); if (result != 0) return result; else if (exceed != 0) goto out; LU_OBJECT_HEADER(D_INODE, env, &obj->co_lu, "Read ino %lu, %lu bytes, offset %lld, size %llu\n", inode->i_ino, cnt, pos, i_size_read(inode)); /* turn off the kernel's read-ahead */ cio->cui_fd->fd_file->f_ra.ra_pages = 0; /* initialize read-ahead window once per syscall */ if (!vio->cui_ra_window_set) { vio->cui_ra_window_set = 1; bead->lrr_start = cl_index(obj, pos); bead->lrr_count = cl_index(obj, tot + PAGE_CACHE_SIZE - 1); ll_ra_read_in(file, bead); } /* BUG: 5972 */ file_accessed(file); switch (vio->cui_io_subtype) { case IO_NORMAL: result = lustre_generic_file_read(file, cio, &pos); break; case IO_SPLICE: result = generic_file_splice_read(file, &pos, vio->u.splice.cui_pipe, cnt, vio->u.splice.cui_flags); /* LU-1109: do splice read stripe by stripe otherwise if it * may make nfsd stuck if this read occupied all internal pipe * buffers. */ io->ci_continue = 0; break; default: CERROR("Wrong IO type %u\n", vio->cui_io_subtype); LBUG(); } out: if (result >= 0) { if (result < cnt) io->ci_continue = 0; io->ci_nob += result; ll_rw_stats_tally(ll_i2sbi(inode), current->pid, cio->cui_fd, pos, result, READ); result = 0; } return result; }
static ssize_t ncp_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); size_t already_read = 0; off_t pos = iocb->ki_pos; size_t bufsize; int error; void *freepage; size_t freelen; ncp_dbg(1, "enter %pD2\n", file); if (!iov_iter_count(to)) return 0; if (pos > inode->i_sb->s_maxbytes) return 0; iov_iter_truncate(to, inode->i_sb->s_maxbytes - pos); error = ncp_make_open(inode, O_RDONLY); if (error) { ncp_dbg(1, "open failed, error=%d\n", error); return error; } bufsize = NCP_SERVER(inode)->buffer_size; error = -EIO; freelen = ncp_read_bounce_size(bufsize); freepage = vmalloc(freelen); if (!freepage) goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (iov_iter_count(to)) { int read_this_time; size_t to_read = min_t(size_t, bufsize - (pos % bufsize), iov_iter_count(to)); error = ncp_read_bounce(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, pos, to_read, to, &read_this_time, freepage, freelen); if (error) { error = -EIO; /* NW errno -> Linux errno */ break; } pos += read_this_time; already_read += read_this_time; if (read_this_time != to_read) break; } vfree(freepage); iocb->ki_pos = pos; file_accessed(file); ncp_dbg(1, "exit %pD2\n", file); outrel: ncp_inode_close(inode); return already_read ? already_read : error; }
static int wrapfs_mmap(struct file *file, struct vm_area_struct *vma) { int err = 0; bool willwrite; struct file *lower_file; const struct vm_operations_struct *saved_vm_ops = NULL; #ifdef DEBUG_SUPPORT if(debug_support(wrapfs_lower_file(file)->f_dentry->d_sb,"file")) UDBG; #endif /* this might be deferred to mmap's writepage */ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); /* * File systems which do not implement ->writepage may use * generic_file_readonly_mmap as their ->mmap op. If you call * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. * But we cannot call the lower ->mmap op, so we can't tell that * writeable mappings won't work. Therefore, our only choice is to * check if the lower file system supports the ->writepage, and if * not, return EINVAL (the same error that * generic_file_readonly_mmap returns in that case). */ lower_file = wrapfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; printk(KERN_ERR "wrapfs: lower file system does not " "support writeable mmap\n"); goto out; } /* * find and save lower vm_ops. * * XXX: the VFS should have a cleaner way of finding the lower vm_ops */ if (!WRAPFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { printk(KERN_ERR "wrapfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ err = do_munmap(current->mm, vma->vm_start, vma->vm_end - vma->vm_start); if (err) { printk(KERN_ERR "wrapfs: do_munmap failed %d\n", err); goto out; } } /* * Next 3 lines are all I need from generic_file_mmap. I definitely * don't want its test for ->readpage which returns -ENOEXEC. */ file_accessed(file); vma->vm_ops = &wrapfs_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; file->f_mapping->a_ops = &wrapfs_aops; /* set our aops */ if (!WRAPFS_F(file)->lower_vm_ops) /* save for our ->fault */ WRAPFS_F(file)->lower_vm_ops = saved_vm_ops; out: #ifdef DEBUG_SUPPORT if(debug_support(lower_file->f_dentry->d_sb,"file")) UDBGE(err); #endif return err; }
static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &nilfs_file_vm_ops; return 0; }
static int sdcardfs_mmap(struct file *file, struct vm_area_struct *vma) { int err = 0; bool willwrite; struct file *lower_file; const struct vm_operations_struct *saved_vm_ops = NULL; #ifdef CONFIG_TIMA_IOMMU_OPT if (vma->vm_end - vma->vm_start) { /* iommu optimization- needs to be turned ON from * the tz side. */ cpu_v7_tima_iommu_opt(vma->vm_start, vma->vm_end, (unsigned long)vma->vm_mm->pgd); } #endif /* CONFIG_TIMA_IOMMU_OPT */ /* this might be deferred to mmap's writepage */ willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags); /* * File systems which do not implement ->writepage may use * generic_file_readonly_mmap as their ->mmap op. If you call * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL. * But we cannot call the lower ->mmap op, so we can't tell that * writeable mappings won't work. Therefore, our only choice is to * check if the lower file system supports the ->writepage, and if * not, return EINVAL (the same error that * generic_file_readonly_mmap returns in that case). */ lower_file = sdcardfs_lower_file(file); if (willwrite && !lower_file->f_mapping->a_ops->writepage) { err = -EINVAL; printk(KERN_ERR "sdcardfs: lower file system does not " "support writeable mmap\n"); goto out; } /* * find and save lower vm_ops. * * XXX: the VFS should have a cleaner way of finding the lower vm_ops */ if (!SDCARDFS_F(file)->lower_vm_ops) { err = lower_file->f_op->mmap(lower_file, vma); if (err) { printk(KERN_ERR "sdcardfs: lower mmap failed %d\n", err); goto out; } saved_vm_ops = vma->vm_ops; /* save: came from lower ->mmap */ err = do_munmap(current->mm, vma->vm_start, vma->vm_end - vma->vm_start); if (err) { printk(KERN_ERR "sdcardfs: do_munmap failed %d\n", err); goto out; } } /* * Next 3 lines are all I need from generic_file_mmap. I definitely * don't want its test for ->readpage which returns -ENOEXEC. */ file_accessed(file); vma->vm_ops = &sdcardfs_vm_ops; file->f_mapping->a_ops = &sdcardfs_aops; /* set our aops */ if (!SDCARDFS_F(file)->lower_vm_ops) /* save for our ->fault */ SDCARDFS_F(file)->lower_vm_ops = saved_vm_ops; out: return err; }
ssize_t hmfs_xip_file_read(struct file * filp, char __user * buf, size_t len, loff_t * ppos) { /* from do_XIP_mapping_read */ struct inode *inode = filp->f_inode; pgoff_t index, end_index; unsigned long offset; loff_t isize, pos; size_t copied = 0, error = 0; pos = *ppos; index = pos >> HMFS_PAGE_SIZE_BITS; offset = pos & ~HMFS_PAGE_MASK; mutex_lock(&inode->i_mutex); isize = i_size_read(inode); if (!isize) goto out; end_index = (isize - 1) >> HMFS_PAGE_SIZE_BITS; /* * nr : read length for this loop * offset : start inner-blk offset this loop * index : start inner-file blk number this loop * copied : read length so far * FIXME: pending for write_lock? anything like access_ok() */ do { unsigned long nr, left; void *xip_mem[1]; int zero = 0; int size; /* nr is the maximum number of bytes to copy from this page */ nr = HMFS_PAGE_SIZE; //HMFS_SIZE if (index >= end_index) { if (index > end_index) goto out; nr = ((isize - 1) & ~HMFS_PAGE_MASK) + 1; if (nr <= offset) { goto out; } } nr = nr - offset; if (nr > len - copied) nr = len - copied; BUG_ON(nr > HMFS_PAGE_SIZE); error = get_data_blocks(inode, index, index + 1, xip_mem, &size, RA_END); if (unlikely(error || size != 1)) { if (error == -ENODATA) { /* sparse */ zero = 1; } else goto out; } /* copy to user space */ if (!zero) left = __copy_to_user(buf + copied, xip_mem[0] + offset, nr); else left = __clear_user(buf + copied, nr); if (left) { error = -EFAULT; goto out; } copied += (nr - left); offset += (nr - left); index += offset >> HMFS_PAGE_SIZE_BITS; offset &= ~HMFS_PAGE_MASK; } while (copied < len); out: mutex_unlock(&inode->i_mutex); *ppos = pos + copied; if (filp) file_accessed(filp); return (copied ? copied : error); }
ssize_t rawfs_reg_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; struct super_block *sb = filp->f_path.dentry->d_sb; struct rawfs_sb_info *rawfs_sb = RAWFS_SB(sb); // struct address_space *mapping=filp->f_mapping; struct inode *inode = filp->f_mapping->host; struct rawfs_inode_info *inode_info = RAWFS_I(inode); ssize_t retval; // size_t count; loff_t *ppos = &iocb->ki_pos; struct rawfs_file_info *fi = NULL; unsigned int curr_file_pos = pos; unsigned int curr_buf_pos = 0; int starting_page, total_pages; int remain_buf_size; int result; struct rawfs_file_list_entry * entry; const struct iovec *iv = &iov[0]; // TODO: Process all io vectors // Always use direct I/O // loff_t size; // int block_no; // Get Lock mutex_lock(&rawfs_sb->rawfs_lock); retval=iov_length(iov, nr_segs); RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_write %s, pos %lld, " "len %d\n", inode_info->i_name, pos, retval); fi = kzalloc(sizeof(struct rawfs_file_info), GFP_NOFS); if (!fi) { retval = 0; goto out; } /* rawfs_fill_file_info(inode , fi); */ rawfs_fill_fileinfo_by_dentry(filp->f_path.dentry, fi); /* Update file_info */ if ((pos+retval) > fi->i_size) fi->i_size = pos+retval; fi->i_chunk_index = 1; fi->i_chunk_total = S_ISDIR(fi->i_mode) ? 1 : CEILING((unsigned)fi->i_size, rawfs_sb->page_data_size); /* do GC, if the required space is not enough */ result = rawfs_reserve_space(sb, fi->i_chunk_total); if (result<0) { retval=result; goto out; } // get entry from file list entry = rawfs_file_list_get(sb, fi->i_name, fi->i_parent_folder_id); if (!entry) { RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_write: %s file list " "entry missing\n", fi->i_name); dump_stack(); goto out; } // Write File { int preceding_pages, rear_pages; struct rawfs_page *page_buf = NULL; int i; // Prepare page buffer page_buf = kzalloc(rawfs_sb->page_size, GFP_NOFS); if (page_buf == NULL) { retval = 0; goto out; } starting_page = rawfs_sb->data_block_free_page_index; preceding_pages = FLOOR((unsigned)pos, rawfs_sb->page_data_size); total_pages = CEILING((unsigned)fi->i_size, rawfs_sb->page_data_size); rear_pages = CEILING((unsigned)pos + iv->iov_len, rawfs_sb->page_data_size); remain_buf_size = iv->iov_len; RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_write %s, " "preceding_pages %d, rear_pages %d, total_pages %d, " "remain_buf_size %d\n", inode_info->i_name, preceding_pages, rear_pages, total_pages, remain_buf_size); // Step 1: Copy preceding pages, if starting pos is not 0. for (i=0;i<total_pages;i++) { // Read, if necessary, (no need for new files) if ( (i<=preceding_pages) || (i>=rear_pages)) rawfs_sb->dev.read_page(sb, entry->i_location_block, entry->i_location_page+i, page_buf); // Update info memcpy(&page_buf->i_info.i_file_info, fi, sizeof(struct rawfs_file_info)); // Within Modify Range: Copy Modify if ((i>=preceding_pages) && (i<=rear_pages)) { int start_in_buf; int copy_len; start_in_buf = (curr_file_pos % rawfs_sb->page_data_size); copy_len = ((start_in_buf + remain_buf_size) > rawfs_sb->page_data_size) ? (rawfs_sb->page_data_size - start_in_buf) : remain_buf_size; if (copy_from_user(&page_buf->i_data[0] + start_in_buf, (char*)iv->iov_base + curr_buf_pos, copy_len)) { retval = -EFAULT; goto out2; } curr_buf_pos += copy_len; remain_buf_size -= copy_len; RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_write %s, %d, " "curr_buf_pos %d, remain_buf_size %d start_in_buf %d " "copy_len %d starting pattern %X\n", inode_info->i_name, i, curr_buf_pos, remain_buf_size, start_in_buf, copy_len, *(unsigned int*)(&page_buf->i_data[0] + start_in_buf)); } rawfs_page_signature(sb, page_buf); // Write rawfs_sb->dev.write_page(sb, rawfs_sb->data_block, rawfs_sb->data_block_free_page_index, page_buf); rawfs_sb->data_block_free_page_index++; fi->i_chunk_index++; } out2: if (page_buf) kfree(page_buf); } if (retval > 0) { *ppos = pos + retval; } file_accessed(filp); // Update Inode: file size, block, page i_size_write(inode, fi->i_size); // TODO: Get inode lock when update inode_info->i_location_block = rawfs_sb->data_block; inode_info->i_location_page = starting_page; inode_info->i_location_page_count = total_pages; // update location entry->i_location_block = rawfs_sb->data_block; entry->i_location_page = starting_page; entry->i_location_page_count = total_pages; out: if (fi) kfree(fi); // Release Lock mutex_unlock(&rawfs_sb->rawfs_lock); return retval; }
//----------------------------------------------------------------------------- // Regular File Operation //----------------------------------------------------------------------------- ssize_t rawfs_reg_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; struct super_block *sb = filp->f_path.dentry->d_sb; struct rawfs_sb_info *rawfs_sb = RAWFS_SB(sb); // struct address_space *mapping=filp->f_mapping; struct inode *inode = filp->f_mapping->host; struct rawfs_inode_info *inode_info = RAWFS_I(inode); ssize_t retval; size_t count; loff_t *ppos = &iocb->ki_pos; loff_t size; unsigned int curr_file_pos = pos; unsigned int curr_buf_pos = 0; int remain_buf_size; const struct iovec *iv = &iov[0]; // TODO: Process all io vectors retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read %s, segment check " "result %d\n", inode_info->i_name, retval); if (retval) return retval; mutex_lock(&rawfs_sb->rawfs_lock); retval=iov_length(iov, nr_segs); size = i_size_read(inode); RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read %s, pos %lld, len %d, " "filesize: %lld\n", inode_info->i_name, pos, retval, size); if ((retval + pos) >= size) retval = size - pos; if (pos < size) { /* Read File */ { int preceding_pages, rear_pages; struct rawfs_page *page_buf = NULL; int i; // Prepare page buffer page_buf = kzalloc(rawfs_sb->page_size, GFP_NOFS); if (page_buf == NULL) { retval = 0; goto out; } preceding_pages = FLOOR((unsigned)pos, rawfs_sb->page_data_size); rear_pages = CEILING((unsigned)pos + retval, rawfs_sb->page_data_size); remain_buf_size = retval; RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read %s, " "preceding_pages %d, rear_pages %d, remain_buf_size %d\n", inode_info->i_name, preceding_pages, rear_pages, remain_buf_size); // Step 1: Copy preceding pages, if starting pos is not 0. for (i=preceding_pages;i<rear_pages;i++) { __u32 crc; // Read page rawfs_sb->dev.read_page(sb, inode_info->i_location_block, inode_info->i_location_page+i, page_buf); // TODO: skip this page, if unrecoverable error occurs /* CRC error should not happen, since we have already check them at bootup */ crc = rawfs_page_crc_data(sb, page_buf); if (crc != page_buf->i_crc) RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read: " "%s @ %X, crc fail %X, expected %X\n", page_buf->i_info.i_file_info.i_name, page_buf->i_info.i_file_info.i_parent_folder_id, crc, page_buf->i_crc); else RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read: " "%s @ %X, crc %X\n", page_buf->i_info.i_file_info.i_name, page_buf->i_info.i_file_info.i_parent_folder_id, page_buf->i_crc); /* Copy requried parts */ { int start_in_buf; int copy_len; start_in_buf = (curr_file_pos % rawfs_sb->page_data_size); copy_len = ((start_in_buf + remain_buf_size) > rawfs_sb->page_data_size) ? (rawfs_sb->page_data_size - start_in_buf) : remain_buf_size; if (copy_to_user((char*)iv->iov_base + curr_buf_pos, &page_buf->i_data[0] + start_in_buf, copy_len)) { retval = -EFAULT; goto out2; } RAWFS_PRINT(RAWFS_DBG_FILE, "rawfs_reg_file_aio_read %s, " "%d, curr_buf_pos %d, remain_buf_size %d " "start_in_buf %d copy_len %d starting pattern %X\n", inode_info->i_name, i, curr_buf_pos, remain_buf_size, start_in_buf, copy_len, *(unsigned int*)(&page_buf->i_data[0] + start_in_buf)); curr_buf_pos += copy_len; remain_buf_size -= copy_len; } } out2: if (page_buf) kfree(page_buf); } if (retval > 0) *ppos = pos + retval; if (retval < 0 || *ppos >= size) { file_accessed(filp); goto out; } } else retval = 0; out: // Release Lock mutex_unlock(&rawfs_sb->rawfs_lock); return retval; }
/* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O * or buffered I/O path depending on the mount options and/or * augmented/extended metadata attached to the file. * Note: File extended attributes override any mount options. */ static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", __func__, handle, (int)count); if (type == ORANGEFS_IO_WRITE) { gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): proceeding with offset : %llu, " "size %d\n", __func__, handle, llu(*offset), (int)count); } if (count == 0) { ret = 0; goto out; } while (iov_iter_count(iter)) { size_t each_count = iov_iter_count(iter); size_t amt_complete; /* how much to transfer in this loop iteration */ if (each_count > orangefs_bufmap_size_query()) each_count = orangefs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", __func__, handle, (int)each_count); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): BEFORE wait_for_io: offset is %d\n", __func__, handle, (int)*offset); ret = wait_for_direct_io(type, inode, offset, iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, handle, (int)ret); if (ret < 0) goto out; *offset += ret; total_count += ret; amt_complete = ret; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): AFTER wait_for_io: offset is %d\n", __func__, handle, (int)*offset); /* * if we got a short I/O operations, * fall out and return what we got so far */ if (amt_complete < each_count) break; } /*end while */ out: if (total_count > 0) ret = total_count; if (ret > 0) { if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { SetMtimeFlag(orangefs_inode); inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%d) returned.\n", __func__, handle, (int)ret); return ret; }
static ssize_t pipe_read(struct kiocb *iocb, struct iov_iter *to) { size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; int do_wakeup; ssize_t ret; /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; do_wakeup = 0; ret = 0; __pipe_lock(pipe); for (;;) { int bufs = pipe->nrbufs; if (bufs) { int curbuf = pipe->curbuf; struct pipe_buffer *buf = pipe->bufs + curbuf; const struct pipe_buf_operations *ops = buf->ops; size_t chars = buf->len; size_t written; int error; if (chars > total_len) chars = total_len; error = ops->confirm(pipe, buf); if (error) { if (!ret) ret = error; break; } written = copy_page_to_iter(buf->page, buf->offset, chars, to); if (unlikely(written < chars)) { if (!ret) ret = -EFAULT; break; } ret += chars; buf->offset += chars; buf->len -= chars; /* Was it a packet buffer? Clean up and exit */ if (buf->flags & PIPE_BUF_FLAG_PACKET) { total_len = chars; buf->len = 0; } if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); curbuf = (curbuf + 1) & (pipe->buffers - 1); pipe->curbuf = curbuf; pipe->nrbufs = --bufs; do_wakeup = 1; } total_len -= chars; if (!total_len) break; /* common path: read succeeded */ } if (bufs) /* More to do? */ continue; if (!atomic_read(&pipe->writers)) break; if (!atomic_read(&pipe->waiting_writers)) { /* syscall merging: Usually we must not sleep * if O_NONBLOCK is set, or if we got some data. * But if a writer sleeps in kernel space, then * we can wait for that data without violating POSIX. */ if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } } if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } if (do_wakeup) { wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } pipe_wait(pipe); } __pipe_unlock(pipe); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } if (ret > 0) file_accessed(filp); return ret; }
static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_alloc_parms ap = { .aflags = 0, }; unsigned long last_index; u64 pos = page->index << PAGE_CACHE_SHIFT; unsigned int data_blocks, ind_blocks, rblocks; int alloc_required = 0; struct gfs2_holder gh; loff_t size; int ret; sb_start_pagefault(inode->i_sb); /* Update file times before taking page lock */ file_update_time(vma->vm_file); ret = get_write_access(inode); if (ret) goto out; ret = gfs2_rs_alloc(ip); if (ret) goto out_write_access; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); ret = gfs2_glock_nq(&gh); if (ret) goto out_uninit; set_bit(GLF_DIRTY, &ip->i_gl->gl_flags); set_bit(GIF_SW_PAGED, &ip->i_flags); gfs2_size_hint(inode, pos, PAGE_CACHE_SIZE); ret = gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE, &alloc_required); if (ret) goto out_unlock; if (!alloc_required) { lock_page(page); if (!PageUptodate(page) || page->mapping != inode->i_mapping) { ret = -EAGAIN; unlock_page(page); } goto out_unlock; } ret = gfs2_rindex_update(sdp); if (ret) goto out_unlock; ret = gfs2_quota_lock_check(ip); if (ret) goto out_unlock; gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks); ap.target = data_blocks + ind_blocks; ret = gfs2_inplace_reserve(ip, &ap); if (ret) goto out_quota_unlock; rblocks = RES_DINODE + ind_blocks; if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) goto out_trans_fail; lock_page(page); ret = -EINVAL; size = i_size_read(inode); last_index = (size - 1) >> PAGE_CACHE_SHIFT; /* Check page index against inode size */ if (size == 0 || (page->index > last_index)) goto out_trans_end; ret = -EAGAIN; /* If truncated, we must retry the operation, we may have raced * with the glock demotion code. */ if (!PageUptodate(page) || page->mapping != inode->i_mapping) goto out_trans_end; /* Unstuff, if required, and allocate backing blocks for page */ ret = 0; if (gfs2_is_stuffed(ip)) ret = gfs2_unstuff_dinode(ip, page); if (ret == 0) ret = gfs2_allocate_page_backing(page); out_trans_end: if (ret) unlock_page(page); gfs2_trans_end(sdp); out_trans_fail: gfs2_inplace_release(ip); out_quota_unlock: gfs2_quota_unlock(ip); out_unlock: gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); if (ret == 0) { set_page_dirty(page); wait_for_stable_page(page); } out_write_access: put_write_access(inode); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(ret); } static const struct vm_operations_struct gfs2_vm_ops = { .fault = filemap_fault, .page_mkwrite = gfs2_page_mkwrite, }; /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * * There is no need to get a lock here unless we should be updating * atime. We ignore any locking errors since the only consequence is * a missed atime update (which will just be deferred until later). * * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); if (!(file->f_flags & O_NOATIME) && !IS_NOATIME(&ip->i_inode)) { struct gfs2_holder i_gh; int error; error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) return error; /* grab lock to update inode */ gfs2_glock_dq_uninit(&i_gh); file_accessed(file); } vma->vm_ops = &gfs2_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; return 0; } /** * gfs2_open - open a file * @inode: the inode to open * @file: the struct file for this opening * * Returns: errno */ static int gfs2_open(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder i_gh; struct gfs2_file *fp; int error; fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); if (!fp) return -ENOMEM; mutex_init(&fp->f_fl_mutex); gfs2_assert_warn(GFS2_SB(inode), !file->private_data); file->private_data = fp; if (S_ISREG(ip->i_inode.i_mode)) { error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); if (error) goto fail; if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) { error = -EOVERFLOW; goto fail_gunlock; } gfs2_glock_dq_uninit(&i_gh); } return 0; fail_gunlock: gfs2_glock_dq_uninit(&i_gh); fail: file->private_data = NULL; kfree(fp); return error; } /** * gfs2_release - called to close a struct file * @inode: the inode the struct file belongs to * @file: the struct file being closed * * Returns: errno */ static int gfs2_release(struct inode *inode, struct file *file) { struct gfs2_inode *ip = GFS2_I(inode); kfree(file->private_data); file->private_data = NULL; if (!(file->f_mode & FMODE_WRITE)) return 0; gfs2_rs_delete(ip); return 0; } /** * gfs2_fsync - sync the dirty data for a file (across the cluster) * @file: the file that points to the dentry * @start: the start position in the file to sync * @end: the end position in the file to sync * @datasync: set if we can ignore timestamp changes * * The VFS will flush data for us. We only need to worry * about metadata here. * * Returns: errno */ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret; if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; if (sync_state) { ret = sync_inode_metadata(inode, 1); if (ret) return ret; if (gfs2_is_jdata(ip)) filemap_write_and_wait(inode->i_mapping); gfs2_ail_flush(ip->i_gl, 1); } return 0; } /** * gfs2_file_aio_write - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments * @pos: The file position * * We have to do a lock/unlock here to refresh the inode size for * O_APPEND writes, otherwise we can land up writing at the wrong * offset. There is still a race, but provided the app is using its * own file locking, this will make O_APPEND work as expected. * */ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; size_t writesize = iov_length(iov, nr_segs); struct dentry *dentry = file->f_dentry; struct gfs2_inode *ip = GFS2_I(dentry->d_inode); struct gfs2_sbd *sdp; int ret; sdp = GFS2_SB(file->f_mapping->host); ret = gfs2_rs_alloc(ip); if (ret) return ret; gfs2_size_hint(file->f_dentry->d_inode, pos, writesize); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) return ret; gfs2_glock_dq_uninit(&gh); } return generic_file_aio_write(iocb, iov, nr_segs, pos); } static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { int error; struct inode *inode = out->f_mapping->host; struct gfs2_inode *ip = GFS2_I(inode); error = gfs2_rs_alloc(ip); if (error) return (ssize_t)error; gfs2_size_hint(inode, *ppos, len); return generic_file_splice_write(pipe, out, ppos, len, flags); }
static ssize_t ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; size_t already_read = 0; off_t pos; size_t bufsize; int error; void* freepage; size_t freelen; DPRINTK("ncp_file_read: enter %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); pos = *ppos; if ((ssize_t) count < 0) { return -EINVAL; } if (!count) return 0; if (pos > inode->i_sb->s_maxbytes) return 0; if (pos + count > inode->i_sb->s_maxbytes) { count = inode->i_sb->s_maxbytes - pos; } error = ncp_make_open(inode, O_RDONLY); if (error) { DPRINTK(KERN_ERR "ncp_file_read: open failed, error=%d\n", error); return error; } bufsize = NCP_SERVER(inode)->buffer_size; error = -EIO; freelen = ncp_read_bounce_size(bufsize); freepage = vmalloc(freelen); if (!freepage) goto outrel; error = 0; /* First read in as much as possible for each bufsize. */ while (already_read < count) { int read_this_time; size_t to_read = min_t(unsigned int, bufsize - (pos % bufsize), count - already_read); error = ncp_read_bounce(NCP_SERVER(inode), NCP_FINFO(inode)->file_handle, pos, to_read, buf, &read_this_time, freepage, freelen); if (error) { error = -EIO; /* NW errno -> Linux errno */ break; } pos += read_this_time; buf += read_this_time; already_read += read_this_time; if (read_this_time != to_read) { break; } } vfree(freepage); *ppos = pos; file_accessed(file); DPRINTK("ncp_file_read: exit %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); outrel: ncp_inode_close(inode); return already_read ? already_read : error; }
static ssize_t pipe_read(struct kiocb *iocb, const struct iovec *_iov, unsigned long nr_segs, loff_t pos) { struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_path.dentry->d_inode; struct pipe_inode_info *pipe; int do_wakeup; ssize_t ret; struct iovec *iov = (struct iovec *)_iov; size_t total_len; total_len = iov_length(iov, nr_segs); /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; do_wakeup = 0; ret = 0; mutex_lock(&inode->i_mutex); pipe = inode->i_pipe; for (;;) { int bufs = pipe->nrbufs; if (bufs) { int curbuf = pipe->curbuf; struct pipe_buffer *buf = pipe->bufs + curbuf; const struct pipe_buf_operations *ops = buf->ops; void *addr; size_t chars = buf->len; int error, atomic; if (chars > total_len) chars = total_len; error = ops->confirm(pipe, buf); if (error) { if (!ret) error = ret; break; } atomic = !iov_fault_in_pages_write(iov, chars); redo: addr = ops->map(pipe, buf, atomic); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); ops->unmap(pipe, buf, addr); if (unlikely(error)) { /* * Just retry with the slow path if we failed. */ if (atomic) { atomic = 0; goto redo; } if (!ret) ret = error; break; } ret += chars; buf->offset += chars; buf->len -= chars; if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); curbuf = (curbuf + 1) & (pipe->buffers - 1); pipe->curbuf = curbuf; pipe->nrbufs = --bufs; do_wakeup = 1; } total_len -= chars; if (!total_len) break; /* common path: read succeeded */ } if (bufs) /* More to do? */ continue; if (!pipe->writers) break; if (!pipe->waiting_writers) { /* syscall merging: Usually we must not sleep * if O_NONBLOCK is set, or if we got some data. * But if a writer sleeps in kernel space, then * we can wait for that data without violating POSIX. */ if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } } if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } if (do_wakeup) { wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } pipe_wait(pipe); } mutex_unlock(&inode->i_mutex); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible_sync(&pipe->wait); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } if (ret > 0) file_accessed(filp); return ret; }
static ssize_t pipe_readv(struct file *filp, const struct iovec *_iov, unsigned long nr_segs, loff_t *ppos) { struct inode *inode = filp->f_dentry->d_inode; struct pipe_inode_info *info; int do_wakeup; ssize_t ret; struct iovec *iov = (struct iovec *)_iov; size_t total_len; total_len = iov_length(iov, nr_segs); /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; do_wakeup = 0; ret = 0; down(PIPE_SEM(*inode)); info = inode->i_pipe; for (;;) { int bufs = info->nrbufs; if (bufs) { int curbuf = info->curbuf; struct pipe_buffer *buf = info->bufs + curbuf; struct pipe_buf_operations *ops = buf->ops; void *addr; size_t chars = buf->len; int error; if (chars > total_len) chars = total_len; addr = ops->map(filp, info, buf); error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars); ops->unmap(info, buf); if (unlikely(error)) { if (!ret) ret = -EFAULT; break; } ret += chars; buf->offset += chars; buf->len -= chars; if (!buf->len) { buf->ops = NULL; ops->release(info, buf); curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); info->curbuf = curbuf; info->nrbufs = --bufs; do_wakeup = 1; } total_len -= chars; if (!total_len) break; /* common path: read succeeded */ } if (bufs) /* More to do? */ continue; if (!PIPE_WRITERS(*inode)) break; if (!PIPE_WAITING_WRITERS(*inode)) { /* syscall merging: Usually we must not sleep * if O_NONBLOCK is set, or if we got some data. * But if a writer sleeps in kernel space, then * we can wait for that data without violating POSIX. */ if (ret) break; if (filp->f_flags & O_NONBLOCK) { ret = -EAGAIN; break; } } if (signal_pending(current)) { if (!ret) ret = -ERESTARTSYS; break; } if (do_wakeup) { wake_up_interruptible_sync(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } pipe_wait(inode); } up(PIPE_SEM(*inode)); /* Signal writers asynchronously that there is more room. */ if (do_wakeup) { wake_up_interruptible(PIPE_WAIT(*inode)); kill_fasync(PIPE_FASYNC_WRITERS(*inode), SIGIO, POLL_OUT); } if (ret > 0) file_accessed(filp); return ret; }