int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) { int ret = 0, lock_level = 0; struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb); /* * Only support shared writeable mmap for local mounts which * don't know about holes. */ if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) && ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) && ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) { mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags); /* This is -EINVAL because generic_file_readonly_mmap * returns it in a similar situation. */ return -EINVAL; } ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode, file->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto out; } ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level); out: vma->vm_ops = &ocfs2_file_vm_ops; vma->vm_flags |= VM_CAN_NONLINEAR; return 0; }
static int ocfs2_readpage(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; int ret, unlock = 1; mlog_entry("(0x%p, %lu)\n", file, (page ? page->index : 0)); ret = ocfs2_meta_lock_with_page(inode, NULL, 0, page); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; mlog_errno(ret); goto out; } if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) { ret = AOP_TRUNCATED_PAGE; goto out_meta_unlock; } /* * i_size might have just been updated as we grabed the meta lock. We * might now be discovering a truncate that hit on another node. * block_read_full_page->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers * (generic_file_read, vm_ops->fault) are clever enough to check i_size * and notice that the page they just read isn't needed. * * XXX sys_readahead() seems to get that wrong? */ if (start >= i_size_read(inode)) { zero_user_page(page, 0, PAGE_SIZE, KM_USER0); SetPageUptodate(page); ret = 0; goto out_alloc; } ret = ocfs2_data_lock_with_page(inode, 0, page); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; mlog_errno(ret); goto out_alloc; } ret = block_read_full_page(page, ocfs2_get_block); unlock = 0; ocfs2_data_unlock(inode, 0); out_alloc: up_read(&OCFS2_I(inode)->ip_alloc_sem); out_meta_unlock: ocfs2_meta_unlock(inode, 0); out: if (unlock) unlock_page(page); mlog_exit(ret); return ret; }
static ssize_t ocfs2_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { int ret = 0; struct inode *inode = in->f_path.dentry->d_inode; mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, (unsigned int)len, in->f_path.dentry->d_name.len, in->f_path.dentry->d_name.name); /* * See the comment in ocfs2_file_aio_read() */ ret = ocfs2_meta_lock(inode, NULL, 0); if (ret < 0) { mlog_errno(ret); goto bail; } ocfs2_meta_unlock(inode, 0); ret = generic_file_splice_read(in, ppos, pipe, len, flags); bail: mlog_exit(ret); return ret; }
static struct dentry *ocfs2_get_parent(struct dentry *child) { int status; u64 blkno; struct dentry *parent; struct inode *inode; struct inode *dir = child->d_inode; struct buffer_head *dirent_bh = NULL; struct ocfs2_dir_entry *dirent; mlog_entry("(0x%p, '%.*s')\n", child, child->d_name.len, child->d_name.name); mlog(0, "find parent of directory %llu\n", (unsigned long long)OCFS2_I(dir)->ip_blkno); status = ocfs2_meta_lock(dir, NULL, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); parent = ERR_PTR(status); goto bail; } status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh, &dirent); if (status < 0) { parent = ERR_PTR(-ENOENT); goto bail_unlock; } inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0); if (IS_ERR(inode)) { mlog(ML_ERROR, "Unable to create inode %llu\n", (unsigned long long)blkno); parent = ERR_PTR(-EACCES); goto bail_unlock; } parent = d_alloc_anon(inode); if (!parent) { iput(inode); parent = ERR_PTR(-ENOMEM); } parent->d_op = &ocfs2_dentry_ops; bail_unlock: ocfs2_meta_unlock(dir, 0); if (dirent_bh) brelse(dirent_bh); bail: mlog_exit_ptr(parent); return parent; }
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) { struct inode *inode = ac->ac_inode; if (inode) { if (ac->ac_which != OCFS2_AC_USE_LOCAL) ocfs2_meta_unlock(inode, 1); mutex_unlock(&inode->i_mutex); iput(inode); } if (ac->ac_bh) brelse(ac->ac_bh); kfree(ac); }
static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block) { sector_t status; u64 p_blkno = 0; int err = 0; struct inode *inode = mapping->host; mlog_entry("(block = %llu)\n", (unsigned long long)block); /* We don't need to lock journal system files, since they aren't * accessed concurrently from multiple nodes. */ if (!INODE_JOURNAL(inode)) { err = ocfs2_meta_lock(inode, NULL, 0); if (err) { if (err != -ENOENT) mlog_errno(err); goto bail; } down_read(&OCFS2_I(inode)->ip_alloc_sem); } err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); if (!INODE_JOURNAL(inode)) { up_read(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_meta_unlock(inode, 0); } if (err) { mlog(ML_ERROR, "get_blocks() failed, block = %llu\n", (unsigned long long)block); mlog_errno(err); goto bail; } bail: status = err ? 0 : p_blkno; mlog_exit((int)status); return status; }
int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) { int ret; mlog_entry_void(); ret = ocfs2_meta_lock(inode, NULL, 0); if (ret) { if (ret != -ENOENT) mlog_errno(ret); goto out; } ret = generic_permission(inode, mask, NULL); ocfs2_meta_unlock(inode, 0); out: mlog_exit(ret); return ret; }
/* * This is called from our getattr. */ int ocfs2_inode_revalidate(struct dentry *dentry) { struct inode *inode = dentry->d_inode; int status = 0; mlog_entry("(inode = 0x%p, ino = %llu)\n", inode, inode ? (unsigned long long)OCFS2_I(inode)->ip_blkno : 0ULL); if (!inode) { mlog(0, "eep, no inode!\n"); status = -ENOENT; goto bail; } spin_lock(&OCFS2_I(inode)->ip_lock); if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { spin_unlock(&OCFS2_I(inode)->ip_lock); mlog(0, "inode deleted!\n"); status = -ENOENT; goto bail; } spin_unlock(&OCFS2_I(inode)->ip_lock); /* Let ocfs2_meta_lock do the work of updating our struct * inode for us. */ status = ocfs2_meta_lock(inode, NULL, 0); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto bail; } ocfs2_meta_unlock(inode, 0); bail: mlog_exit(status); return status; }
void ocfs2_delete_inode(struct inode *inode) { int wipe, status; sigset_t blocked, oldset; struct buffer_head *di_bh = NULL; mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); if (is_bad_inode(inode)) { mlog(0, "Skipping delete of bad inode\n"); goto bail; } if (!ocfs2_inode_is_valid_to_delete(inode)) { /* It's probably not necessary to truncate_inode_pages * here but we do it for safety anyway (it will most * likely be a no-op anyway) */ ocfs2_cleanup_delete_inode(inode, 0); goto bail; } /* We want to block signals in delete_inode as the lock and * messaging paths may return us -ERESTARTSYS. Which would * cause us to exit early, resulting in inodes being orphaned * forever. */ sigfillset(&blocked); status = sigprocmask(SIG_BLOCK, &blocked, &oldset); if (status < 0) { mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 1); goto bail; } /* Lock down the inode. This gives us an up to date view of * it's metadata (for verification), and allows us to * serialize delete_inode votes. * * Even though we might be doing a truncate, we don't take the * allocation lock here as it won't be needed - nobody will * have the file open. */ status = ocfs2_meta_lock(inode, &di_bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); ocfs2_cleanup_delete_inode(inode, 0); goto bail_unblock; } /* Query the cluster. This will be the final decision made * before we go ahead and wipe the inode. */ status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); if (!wipe || status < 0) { /* Error and inode busy vote both mean we won't be * removing the inode, so they take almost the same * path. */ if (status < 0) mlog_errno(status); /* Someone in the cluster has voted to not wipe this * inode, or it was never completely orphaned. Write * out the pages and exit now. */ ocfs2_cleanup_delete_inode(inode, 1); goto bail_unlock_inode; } ocfs2_cleanup_delete_inode(inode, 0); status = ocfs2_wipe_inode(inode, di_bh); if (status < 0) { if (status != -EDEADLK) mlog_errno(status); goto bail_unlock_inode; } /* * Mark the inode as successfully deleted. * * This is important for ocfs2_clear_inode() as it will check * this flag and skip any checkpointing work * * ocfs2_stuff_meta_lvb() also uses this flag to invalidate * the LVB for other nodes. */ OCFS2_I(inode)->ip_flags |= OCFS2_INODE_DELETED; bail_unlock_inode: ocfs2_meta_unlock(inode, 1); brelse(di_bh); bail_unblock: status = sigprocmask(SIG_SETMASK, &oldset, NULL); if (status < 0) mlog_errno(status); bail: clear_inode(inode); mlog_exit_void(); }
static int ocfs2_wipe_inode(struct inode *inode, struct buffer_head *di_bh) { int status, orphaned_slot; struct inode *orphan_dir_inode = NULL; struct buffer_head *orphan_dir_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di; di = (struct ocfs2_dinode *) di_bh->b_data; orphaned_slot = le16_to_cpu(di->i_orphaned_slot); status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); if (status) return status; orphan_dir_inode = ocfs2_get_system_file_inode(osb, ORPHAN_DIR_SYSTEM_INODE, orphaned_slot); if (!orphan_dir_inode) { status = -EEXIST; mlog_errno(status); goto bail; } /* Lock the orphan dir. The lock will be held for the entire * delete_inode operation. We do this now to avoid races with * recovery completion on other nodes. */ mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1); if (status < 0) { mutex_unlock(&orphan_dir_inode->i_mutex); mlog_errno(status); goto bail; } /* we do this while holding the orphan dir lock because we * don't want recovery being run from another node to vote for * an inode delete on us -- this will result in two nodes * truncating the same file! */ status = ocfs2_truncate_for_delete(osb, inode, di_bh); if (status < 0) { mlog_errno(status); goto bail_unlock_dir; } status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, orphan_dir_bh); if (status < 0) mlog_errno(status); bail_unlock_dir: ocfs2_meta_unlock(orphan_dir_inode, 1); mutex_unlock(&orphan_dir_inode->i_mutex); brelse(orphan_dir_bh); bail: iput(orphan_dir_inode); ocfs2_signal_wipe_completion(osb, orphaned_slot); return status; }
static int ocfs2_remove_inode(struct inode *inode, struct buffer_head *di_bh, struct inode *orphan_dir_inode, struct buffer_head *orphan_dir_bh) { int status; struct inode *inode_alloc_inode = NULL; struct buffer_head *inode_alloc_bh = NULL; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; inode_alloc_inode = ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, le16_to_cpu(di->i_suballoc_slot)); if (!inode_alloc_inode) { status = -EEXIST; mlog_errno(status); goto bail; } mutex_lock(&inode_alloc_inode->i_mutex); status = ocfs2_meta_lock(inode_alloc_inode, &inode_alloc_bh, 1); if (status < 0) { mutex_unlock(&inode_alloc_inode->i_mutex); mlog_errno(status); goto bail; } handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto bail_unlock; } status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, orphan_dir_bh); if (status < 0) { mlog_errno(status); goto bail_commit; } /* set the inodes dtime */ status = ocfs2_journal_access(handle, inode, di_bh, OCFS2_JOURNAL_ACCESS_WRITE); if (status < 0) { mlog_errno(status); goto bail_commit; } di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); le32_and_cpu(&di->i_flags, ~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); status = ocfs2_journal_dirty(handle, di_bh); if (status < 0) { mlog_errno(status); goto bail_commit; } ocfs2_remove_from_cache(inode, di_bh); status = ocfs2_free_dinode(handle, inode_alloc_inode, inode_alloc_bh, di); if (status < 0) mlog_errno(status); bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: ocfs2_meta_unlock(inode_alloc_inode, 1); mutex_unlock(&inode_alloc_inode->i_mutex); brelse(inode_alloc_bh); bail: iput(inode_alloc_inode); return status; }
static int ocfs2_read_locked_inode(struct inode *inode, struct ocfs2_find_inode_args *args) { struct super_block *sb; struct ocfs2_super *osb; struct ocfs2_dinode *fe; struct buffer_head *bh = NULL; int status, can_lock; u32 generation = 0; mlog_entry("(0x%p, 0x%p)\n", inode, args); status = -EINVAL; if (inode == NULL || inode->i_sb == NULL) { mlog(ML_ERROR, "bad inode\n"); return status; } sb = inode->i_sb; osb = OCFS2_SB(sb); if (!args) { mlog(ML_ERROR, "bad inode args\n"); make_bad_inode(inode); return status; } /* * To improve performance of cold-cache inode stats, we take * the cluster lock here if possible. * * Generally, OCFS2 never trusts the contents of an inode * unless it's holding a cluster lock, so taking it here isn't * a correctness issue as much as it is a performance * improvement. * * There are three times when taking the lock is not a good idea: * * 1) During startup, before we have initialized the DLM. * * 2) If we are reading certain system files which never get * cluster locks (local alloc, truncate log). * * 3) If the process doing the iget() is responsible for * orphan dir recovery. We're holding the orphan dir lock and * can get into a deadlock with another process on another * node in ->delete_inode(). * * #1 and #2 can be simply solved by never taking the lock * here for system files (which are the only type we read * during mount). It's a heavier approach, but our main * concern is user-accesible files anyway. * * #3 works itself out because we'll eventually take the * cluster lock before trusting anything anyway. */ can_lock = !(args->fi_flags & OCFS2_FI_FLAG_SYSFILE) && !(args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) && !ocfs2_mount_local(osb); /* * To maintain backwards compatibility with older versions of * ocfs2-tools, we still store the generation value for system * files. The only ones that actually matter to userspace are * the journals, but it's easier and inexpensive to just flag * all system files similarly. */ if (args->fi_flags & OCFS2_FI_FLAG_SYSFILE) generation = osb->fs_generation; ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_meta_lockres, OCFS2_LOCK_TYPE_META, generation, inode); ocfs2_inode_lock_res_init(&OCFS2_I(inode)->ip_open_lockres, OCFS2_LOCK_TYPE_OPEN, 0, inode); if (can_lock) { status = ocfs2_open_lock(inode); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } status = ocfs2_meta_lock(inode, NULL, 0); if (status) { make_bad_inode(inode); mlog_errno(status); return status; } } if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) { status = ocfs2_try_open_lock(inode, 0); if (status) { make_bad_inode(inode); return status; } } status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, can_lock ? inode : NULL); if (status < 0) { mlog_errno(status); goto bail; } status = -EINVAL; fe = (struct ocfs2_dinode *) bh->b_data; if (!OCFS2_IS_VALID_DINODE(fe)) { mlog(0, "Invalid dinode #%llu: signature = %.*s\n", (unsigned long long)args->fi_blkno, 7, fe->i_signature); goto bail; } /* * This is a code bug. Right now the caller needs to * understand whether it is asking for a system file inode or * not so the proper lock names can be built. */ mlog_bug_on_msg(!!(fe->i_flags & cpu_to_le32(OCFS2_SYSTEM_FL)) != !!(args->fi_flags & OCFS2_FI_FLAG_SYSFILE), "Inode %llu: system file state is ambigous\n", (unsigned long long)args->fi_blkno); if (S_ISCHR(le16_to_cpu(fe->i_mode)) || S_ISBLK(le16_to_cpu(fe->i_mode))) inode->i_rdev = huge_decode_dev(le64_to_cpu(fe->id1.dev1.i_rdev)); if (ocfs2_populate_inode(inode, fe, 0) < 0) goto bail; BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno)); status = 0; bail: if (can_lock) ocfs2_meta_unlock(inode, 0); if (status < 0) make_bad_inode(inode); if (args && bh) brelse(bh); mlog_exit(status); return status; }
static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_path.dentry->d_inode; mlog_entry("(0x%p, %u, '%.*s')\n", filp, (unsigned int)nr_segs, filp->f_path.dentry->d_name.len, filp->f_path.dentry->d_name.name); if (!inode) { ret = -EINVAL; mlog_errno(ret); goto bail; } /* * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ if (filp->f_flags & O_DIRECT) { down_read(&inode->i_alloc_sem); have_alloc_sem = 1; ret = ocfs2_rw_lock(inode, 0); if (ret < 0) { mlog_errno(ret); goto bail; } rw_level = 0; /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); } /* * We're fine letting folks race truncates and extending * writes with read across the cluster, just like they can * locally. Hence no rw_lock during read. * * Take and drop the meta data lock to update inode fields * like i_size. This allows the checks down below * generic_file_aio_read() a chance of actually working. */ ret = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } ocfs2_meta_unlock(inode, lock_level); ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); /* see ocfs2_file_aio_write */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { rw_level = -1; have_alloc_sem = 0; } bail: if (have_alloc_sem) up_read(&inode->i_alloc_sem); if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); mlog_exit(ret); return ret; }
static int ocfs2_prepare_inode_for_write(struct dentry *dentry, loff_t *ppos, size_t count, int appending, int *direct_io) { int ret = 0, meta_level = appending; struct inode *inode = dentry->d_inode; u32 clusters; loff_t newsize, saved_pos; /* * We sample i_size under a read level meta lock to see if our write * is extending the file, if it is we back off and get a write level * meta lock. */ for(;;) { ret = ocfs2_meta_lock(inode, NULL, meta_level); if (ret < 0) { meta_level = -1; mlog_errno(ret); goto out; } /* Clear suid / sgid if necessary. We do this here * instead of later in the write path because * remove_suid() calls ->setattr without any hint that * we may have already done our cluster locking. Since * ocfs2_setattr() *must* take cluster locks to * proceeed, this will lead us to recursively lock the * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ if (should_remove_suid(dentry)) { if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } ret = ocfs2_write_remove_suid(inode); if (ret < 0) { mlog_errno(ret); goto out_unlock; } } /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ if (appending) { saved_pos = i_size_read(inode); mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); } else { saved_pos = *ppos; } if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { loff_t end = saved_pos + count; /* * Skip the O_DIRECT checks if we don't need * them. */ if (!direct_io || !(*direct_io)) break; /* * Allowing concurrent direct writes means * i_size changes wouldn't be synchronized, so * one node could wind up truncating another * nodes writes. */ if (end > i_size_read(inode)) { *direct_io = 0; break; } /* * We don't fill holes during direct io, so * check for them here. If any are found, the * caller will have to retake some cluster * locks and initiate the io as buffered. */ ret = ocfs2_check_range_for_holes(inode, saved_pos, count); if (ret == 1) { *direct_io = 0; ret = 0; } else if (ret < 0) mlog_errno(ret); break; } /* * The rest of this loop is concerned with legacy file * systems which don't support sparse files. */ newsize = count + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, (long long) i_size_read(inode)); /* No need for a higher level metadata lock if we're * never going past i_size. */ if (newsize <= i_size_read(inode)) break; if (meta_level == 0) { ocfs2_meta_unlock(inode, meta_level); meta_level = 1; continue; } spin_lock(&OCFS2_I(inode)->ip_lock); clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - OCFS2_I(inode)->ip_clusters; spin_unlock(&OCFS2_I(inode)->ip_lock); mlog(0, "Writing at EOF, may need more allocation: " "i_size = %lld, newsize = %lld, need %u clusters\n", (long long) i_size_read(inode), (long long) newsize, clusters); /* We only want to continue the rest of this loop if * our extend will actually require more * allocation. */ if (!clusters) break; ret = ocfs2_extend_file(inode, NULL, newsize, count); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); goto out_unlock; } break; } if (appending) *ppos = saved_pos; out_unlock: ocfs2_meta_unlock(inode, meta_level); out: return ret; }
/* * ocfs2_readdir() * */ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; unsigned long offset, blk, last_ra_blk = 0; int i, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_path.dentry->d_inode; struct super_block * sb = inode->i_sb; unsigned int ra_sectors = 16; int lock_level = 0; mlog_entry("dirino=%llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno); stored = 0; bh = NULL; error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); if (lock_level && error >= 0) { /* We release EX lock which used to update atime * and get PR lock again to reduce contention * on commonly accessed directories. */ ocfs2_meta_unlock(inode, 1); lock_level = 0; error = ocfs2_meta_lock(inode, NULL, 0); } if (error < 0) { if (error != -ENOENT) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; goto bail_nolock; } offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < i_size_read(inode)) { blk = (filp->f_pos) >> sb->s_blocksize_bits; bh = ocfs2_bread(inode, blk, &err, 0); if (!bh) { mlog(ML_ERROR, "directory #%llu contains a hole at offset %lld\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, filp->f_pos); filp->f_pos += sb->s_blocksize - offset; continue; } /* The idea here is to begin with 8k read-ahead and to stay * 4k ahead of our current position. * * TODO: Use the pagecache for this. We just need to * make sure it's cluster-safe... */ if (!last_ra_blk || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) { for (i = ra_sectors >> (sb->s_blocksize_bits - 9); i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } last_ra_blk = blk; ra_sectors = 8; } revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (le16_to_cpu(de->rec_len) < OCFS2_DIR_REC_LEN(1)) break; i += le16_to_cpu(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | offset; filp->f_version = inode->i_version; } while (!error && filp->f_pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { /* On error, skip the f_pos to the next block. */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; brelse(bh); goto bail; } offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is * currently swapped out. So, use a * version stamp to detect whether or * not the directory has been modified * during the copy operation. */ unsigned long version = filp->f_version; unsigned char d_type = DT_UNKNOWN; if (de->file_type < OCFS2_FT_MAX) d_type = ocfs2_filetype_table[de->file_type]; error = filldir(dirent, de->name, de->name_len, filp->f_pos, ino_from_blkno(sb, le64_to_cpu(de->inode)), d_type); if (error) break; if (version != filp->f_version) goto revalidate; stored ++; } filp->f_pos += le16_to_cpu(de->rec_len); } offset = 0; brelse(bh); } stored = 0; bail: ocfs2_meta_unlock(inode, lock_level); bail_nolock: mlog_exit(stored); return stored; }
/* * ocfs2_readdir() * */ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) { int error = 0; unsigned long offset, blk; int i, num, stored; struct buffer_head * bh, * tmp; struct ocfs2_dir_entry * de; int err; struct inode *inode = filp->f_dentry->d_inode; struct super_block * sb = inode->i_sb; int have_disk_lock = 0; mlog_entry("dirino=%"MLFu64"\n", OCFS2_I(inode)->ip_blkno); stored = 0; bh = NULL; error = ocfs2_meta_lock(inode, NULL, NULL, 0); if (error < 0) { if (error != -ENOENT) mlog_errno(error); /* we haven't got any yet, so propagate the error. */ stored = error; goto bail; } have_disk_lock = 1; offset = filp->f_pos & (sb->s_blocksize - 1); while (!error && !stored && filp->f_pos < i_size_read(inode)) { blk = (filp->f_pos) >> sb->s_blocksize_bits; bh = ocfs2_bread(inode, blk, &err, 0); if (!bh) { mlog(ML_ERROR, "directory #%"MLFu64" contains a hole " "at offset %lld\n", OCFS2_I(inode)->ip_blkno, filp->f_pos); filp->f_pos += sb->s_blocksize - offset; continue; } /* * Do the readahead (8k) */ if (!offset) { for (i = 16 >> (sb->s_blocksize_bits - 9), num = 0; i > 0; i--) { tmp = ocfs2_bread(inode, ++blk, &err, 1); if (tmp) brelse(tmp); } } revalidate: /* If the dir block has changed since the last call to * readdir(2), then we might be pointing to an invalid * dirent right now. Scan from the start of the block * to make sure. */ if (filp->f_version != inode->i_version) { for (i = 0; i < sb->s_blocksize && i < offset; ) { de = (struct ocfs2_dir_entry *) (bh->b_data + i); /* It's too expensive to do a full * dirent test each time round this * loop, but we do have to test at * least that it is non-zero. A * failure will be detected in the * dirent test below. */ if (le16_to_cpu(de->rec_len) < OCFS2_DIR_REC_LEN(1)) break; i += le16_to_cpu(de->rec_len); } offset = i; filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | offset; filp->f_version = inode->i_version; } while (!error && filp->f_pos < i_size_read(inode) && offset < sb->s_blocksize) { de = (struct ocfs2_dir_entry *) (bh->b_data + offset); if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { /* On error, skip the f_pos to the next block. */ filp->f_pos = (filp->f_pos | (sb->s_blocksize - 1)) + 1; brelse(bh); goto bail; } offset += le16_to_cpu(de->rec_len); if (le64_to_cpu(de->inode)) { /* We might block in the next section * if the data destination is * currently swapped out. So, use a * version stamp to detect whether or * not the directory has been modified * during the copy operation. */ unsigned long version = filp->f_version; unsigned char d_type = DT_UNKNOWN; if (de->file_type < OCFS2_FT_MAX) d_type = ocfs2_filetype_table[de->file_type]; error = filldir(dirent, de->name, de->name_len, filp->f_pos, ino_from_blkno(sb, le64_to_cpu(de->inode)), d_type); if (error) break; if (version != filp->f_version) goto revalidate; stored ++; } filp->f_pos += le16_to_cpu(de->rec_len); } offset = 0; brelse(bh); } stored = 0; bail: if (have_disk_lock) ocfs2_meta_unlock(inode, 0); mlog_exit(stored); return stored; }
int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) { int status = 0, size_change; struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct ocfs2_super *osb = OCFS2_SB(sb); struct buffer_head *bh = NULL; handle_t *handle = NULL; mlog_entry("(0x%p, '%.*s')\n", dentry, dentry->d_name.len, dentry->d_name.name); if (attr->ia_valid & ATTR_MODE) mlog(0, "mode change: %d\n", attr->ia_mode); if (attr->ia_valid & ATTR_UID) mlog(0, "uid change: %d\n", attr->ia_uid); if (attr->ia_valid & ATTR_GID) mlog(0, "gid change: %d\n", attr->ia_gid); if (attr->ia_valid & ATTR_SIZE) mlog(0, "size change...\n"); if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) mlog(0, "time change...\n"); #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ | ATTR_GID | ATTR_UID | ATTR_MODE) if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); return 0; } status = inode_change_ok(inode, attr); if (status) return status; size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; if (size_change) { status = ocfs2_rw_lock(inode, 1); if (status < 0) { mlog_errno(status); goto bail; } } status = ocfs2_meta_lock(inode, &bh, 1); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto bail_unlock_rw; } if (size_change && attr->ia_size != i_size_read(inode)) { if (i_size_read(inode) > attr->ia_size) status = ocfs2_truncate_file(inode, bh, attr->ia_size); else status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); status = -ENOSPC; goto bail_unlock; } } handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto bail_unlock; } status = inode_setattr(inode, attr); if (status < 0) { mlog_errno(status); goto bail_commit; } status = ocfs2_mark_inode_dirty(handle, inode, bh); if (status < 0) mlog_errno(status); bail_commit: ocfs2_commit_trans(osb, handle); bail_unlock: ocfs2_meta_unlock(inode, 1); bail_unlock_rw: if (size_change) ocfs2_rw_unlock(inode, 1); bail: if (bh) brelse(bh); mlog_exit(status); return status; }