/* * Called to set up an inode from iget5_locked. */ static int orangefs_set_inode(struct inode *inode, void *data) { struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; ORANGEFS_I(inode)->refn.fs_id = ref->fs_id; ORANGEFS_I(inode)->refn.khandle = ref->khandle; return 0; }
static int flush_racache(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; int ret; gossip_debug(GOSSIP_UTILS_DEBUG, "%s: %pU: Handle is %pU | fs_id %d\n", __func__, get_khandle_from_ino(inode), &orangefs_inode->refn.khandle, orangefs_inode->refn.fs_id); new_op = op_alloc(ORANGEFS_VFS_OP_RA_FLUSH); if (!new_op) return -ENOMEM; new_op->upcall.req.ra_cache_flush.refn = orangefs_inode->refn; ret = service_operation(new_op, "orangefs_flush_racache", get_interruptible_flag(inode)); gossip_debug(GOSSIP_UTILS_DEBUG, "%s: got return value of %d\n", __func__, ret); op_release(new_op); return ret; }
/* * Push all data for a specific file onto permanent storage. */ static int orangefs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret = -EINVAL; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(file->f_path.dentry->d_inode); struct orangefs_kernel_op_s *new_op = NULL; /* required call */ filemap_write_and_wait_range(file->f_mapping, start, end); new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); if (!new_op) return -ENOMEM; new_op->upcall.req.fsync.refn = orangefs_inode->refn; ret = service_operation(new_op, "orangefs_fsync", get_interruptible_flag(file->f_path.dentry->d_inode)); gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_fsync got return value of %d\n", ret); op_release(new_op); orangefs_flush_inode(file->f_path.dentry->d_inode); return ret; }
int orangefs_init_acl(struct inode *inode, struct inode *dir) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct posix_acl *default_acl, *acl; umode_t mode = inode->i_mode; int error = 0; ClearModeFlag(orangefs_inode); error = posix_acl_create(dir, &mode, &default_acl, &acl); if (error) return error; if (default_acl) { error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); posix_acl_release(default_acl); } if (acl) { if (!error) error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); } /* If mode of the inode was changed, then do a forcible ->setattr */ if (mode != inode->i_mode) { SetModeFlag(orangefs_inode); inode->i_mode = mode; orangefs_flush_inode(inode); } return error; }
/* * Obtain attributes of an object given a dentry */ int orangefs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { int ret = -ENOENT; struct inode *inode = path->dentry->d_inode; struct orangefs_inode_s *orangefs_inode = NULL; gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_getattr: called on %pd\n", path->dentry); ret = orangefs_inode_getattr(inode, 0, 0, request_mask); if (ret == 0) { generic_fillattr(inode, stat); /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); stat->blksize = orangefs_inode->blksize; if (request_mask & STATX_SIZE) stat->result_mask = STATX_BASIC_STATS; else stat->result_mask = STATX_BASIC_STATS & ~STATX_SIZE; } return ret; }
/* * Push all data for a specific file onto permanent storage. */ static int orangefs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { int ret; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(file_inode(file)); struct orangefs_kernel_op_s *new_op = NULL; new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC); if (!new_op) return -ENOMEM; new_op->upcall.req.fsync.refn = orangefs_inode->refn; ret = service_operation(new_op, "orangefs_fsync", get_interruptible_flag(file_inode(file))); gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_fsync got return value of %d\n", ret); op_release(new_op); orangefs_flush_inode(file_inode(file)); return ret; }
/* Called whenever the VFS dirties the inode in response to atime updates */ static void orangefs_dirty_inode(struct inode *inode, int flags) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_dirty_inode: %pU\n", get_khandle_from_ino(inode)); SetAtimeFlag(orangefs_inode); }
/* * Called to determine if handles match. */ static int orangefs_test_inode(struct inode *inode, void *data) { struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data; struct orangefs_inode_s *orangefs_inode = NULL; orangefs_inode = ORANGEFS_I(inode); return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle)) && orangefs_inode->refn.fs_id == ref->fs_id); }
static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; loff_t orig_size; int ret = -EINVAL; gossip_debug(GOSSIP_INODE_DEBUG, "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n", __func__, get_khandle_from_ino(inode), &orangefs_inode->refn.khandle, orangefs_inode->refn.fs_id, iattr->ia_size); /* Ensure that we have a up to date size, so we know if it changed. */ ret = orangefs_inode_getattr(inode, 0, 1); if (ret == -ESTALE) ret = -EIO; if (ret) { gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n", __func__, ret); return ret; } orig_size = i_size_read(inode); truncate_setsize(inode, iattr->ia_size); new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE); if (!new_op) return -ENOMEM; new_op->upcall.req.truncate.refn = orangefs_inode->refn; new_op->upcall.req.truncate.size = (__s64) iattr->ia_size; ret = service_operation(new_op, __func__, get_interruptible_flag(inode)); /* * the truncate has no downcall members to retrieve, but * the status value tells us if it went through ok or not */ gossip_debug(GOSSIP_INODE_DEBUG, "orangefs: orangefs_truncate got return value of %d\n", ret); op_release(new_op); if (ret != 0) return ret; if (orig_size != i_size_read(inode)) iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME; return ret; }
static void orangefs_destroy_inode(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); gossip_debug(GOSSIP_SUPER_DEBUG, "%s: deallocated %p destroying inode %pU\n", __func__, orangefs_inode, get_khandle_from_ino(inode)); call_rcu(&inode->i_rcu, orangefs_i_callback); }
static void orangefs_destroy_inode(struct inode *inode) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); gossip_debug(GOSSIP_SUPER_DEBUG, "%s: deallocated %p destroying inode %pU\n", __func__, orangefs_inode, get_khandle_from_ino(inode)); kmem_cache_free(orangefs_inode_cache, orangefs_inode); }
static int orangefs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { int len = parent ? 10 : 5; int type = 1; struct orangefs_object_kref refn; if (*max_len < len) { gossip_err("fh buffer is too small for encoding\n"); *max_len = len; type = 255; goto out; } refn = ORANGEFS_I(inode)->refn; ORANGEFS_khandle_to(&refn.khandle, fh, 16); fh[4] = refn.fs_id; gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding fh: handle %pU, fsid %u\n", &refn.khandle, refn.fs_id); if (parent) { refn = ORANGEFS_I(parent)->refn; ORANGEFS_khandle_to(&refn.khandle, (char *) fh + 20, 16); fh[9] = refn.fs_id; type = 2; gossip_debug(GOSSIP_SUPER_DEBUG, "Encoding parent: handle %pU, fsid %u\n", &refn.khandle, refn.fs_id); } *max_len = len; out: return type; }
static int orangefs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct orangefs_kernel_op_s *new_op; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "orangefs_rename: called (%pd2 => %pd2) ct=%d\n", old_dentry, new_dentry, d_count(new_dentry)); ORANGEFS_I(new_dentry->d_parent->d_inode)->getattr_time = jiffies - 1; new_op = op_alloc(ORANGEFS_VFS_OP_RENAME); if (!new_op) return -EINVAL; new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn; new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn; strncpy(new_op->upcall.req.rename.d_old_name, old_dentry->d_name.name, ORANGEFS_NAME_MAX); strncpy(new_op->upcall.req.rename.d_new_name, new_dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, "orangefs_rename", get_interruptible_flag(old_dentry->d_inode)); gossip_debug(GOSSIP_NAME_DEBUG, "orangefs_rename: got downcall status %d\n", ret); if (new_dentry->d_inode) new_dentry->d_inode->i_ctime = CURRENT_TIME; op_release(new_op); return ret; }
static int orangefs_inode_removexattr(struct inode *inode, const char *prefix, const char *name, int flags) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op = NULL; int ret = -ENOMEM; down_write(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR); if (!new_op) goto out_unlock; new_op->upcall.req.removexattr.refn = orangefs_inode->refn; /* * NOTE: Although keys are meant to be NULL terminated * textual strings, I am going to explicitly pass the * length just in case we change this later on... */ ret = snprintf((char *)new_op->upcall.req.removexattr.key, ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", (prefix ? prefix : ""), name); new_op->upcall.req.removexattr.key_sz = ret + 1; gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_removexattr: key %s, key_sz %d\n", (char *)new_op->upcall.req.removexattr.key, (int)new_op->upcall.req.removexattr.key_sz); ret = service_operation(new_op, "orangefs_inode_removexattr", get_interruptible_flag(inode)); if (ret == -ENOENT) { /* * Request to replace a non-existent attribute is an error. */ if (flags & XATTR_REPLACE) ret = -ENODATA; else ret = 0; } gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_removexattr: returning %d\n", ret); op_release(new_op); out_unlock: up_write(&orangefs_inode->xattr_sem); return ret; }
/* return 0 on success; non-zero otherwise */ static int orangefs_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "%s: called on %s\n" " (inode %pU): Parent is %pU | fs_id %d\n", __func__, dentry->d_name.name, get_khandle_from_ino(inode), &parent->refn.khandle, parent->refn.fs_id); new_op = op_alloc(ORANGEFS_VFS_OP_REMOVE); if (!new_op) return -ENOMEM; new_op->upcall.req.remove.parent_refn = parent->refn; strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, "orangefs_unlink", get_interruptible_flag(inode)); gossip_debug(GOSSIP_NAME_DEBUG, "%s: service_operation returned:%d:\n", __func__, ret); op_release(new_op); if (!ret) { drop_nlink(inode); SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); mark_inode_dirty_sync(dir); } return ret; }
/* * Read data from a specified offset in a file (referenced by inode). * Data may be placed either in a user or kernel buffer. */ ssize_t orangefs_inode_read(struct inode *inode, struct iov_iter *iter, loff_t *offset, loff_t readahead_size) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); size_t count = iov_iter_count(iter); size_t bufmap_size; ssize_t ret = -EINVAL; g_orangefs_stats.reads++; bufmap_size = orangefs_bufmap_size_query(); if (count > bufmap_size) { gossip_debug(GOSSIP_FILE_DEBUG, "%s: count is too large (%zd/%zd)!\n", __func__, count, bufmap_size); return -EINVAL; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU) %zd@%llu\n", __func__, &orangefs_inode->refn.khandle, count, llu(*offset)); ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter, count, readahead_size); if (ret > 0) *offset += ret; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%zd) returned.\n", __func__, &orangefs_inode->refn.khandle, ret); return ret; }
/* * Obtain attributes of an object given a dentry */ int orangefs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat) { int ret = -ENOENT; struct inode *inode = dentry->d_inode; struct orangefs_inode_s *orangefs_inode = NULL; gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_getattr: called on %pd\n", dentry); ret = orangefs_inode_getattr(inode, 0, 0); if (ret == 0) { generic_fillattr(inode, kstat); /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); kstat->blksize = orangefs_inode->blksize; } return ret; }
static int orangefs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; struct inode *inode; int mode = 755; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__); if (!symname) return -EINVAL; if (strlen(symname)+1 > ORANGEFS_NAME_MAX) return -ENAMETOOLONG; new_op = op_alloc(ORANGEFS_VFS_OP_SYMLINK); if (!new_op) return -ENOMEM; new_op->upcall.req.sym.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.sym.attributes, ORANGEFS_TYPE_SYMLINK, mode); strncpy(new_op->upcall.req.sym.entry_name, dentry->d_name.name, ORANGEFS_NAME_MAX); strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, "Symlink Got ORANGEFS handle %pU on fsid %d (ret=%d)\n", &new_op->downcall.resp.sym.refn.khandle, new_op->downcall.resp.sym.refn.fs_id, ret); if (ret < 0) { gossip_debug(GOSSIP_NAME_DEBUG, "%s: failed with error code %d\n", __func__, ret); goto out; } inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0, &new_op->downcall.resp.sym.refn); if (IS_ERR(inode)) { gossip_err ("*** Failed to allocate orangefs symlink inode\n"); ret = PTR_ERR(inode); goto out; } gossip_debug(GOSSIP_NAME_DEBUG, "Assigned symlink inode new number of %pU\n", get_khandle_from_ino(inode)); d_instantiate(dentry, inode); unlock_new_inode(inode); dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000; ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, "Inode (Symlink) %pU -> %s\n", get_khandle_from_ino(inode), dentry->d_name.name); SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); mark_inode_dirty_sync(dir); ret = 0; out: op_release(new_op); return ret; }
/* * Get a newly allocated inode to go with a negative dentry. */ static int orangefs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool exclusive) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; struct inode *inode; int ret; gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n", __func__, dentry->d_name.name); new_op = op_alloc(ORANGEFS_VFS_OP_CREATE); if (!new_op) return -ENOMEM; new_op->upcall.req.create.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.create.attributes, ORANGEFS_TYPE_METAFILE, mode); strncpy(new_op->upcall.req.create.d_name, dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n", __func__, dentry->d_name.name, &new_op->downcall.resp.create.refn.khandle, new_op->downcall.resp.create.refn.fs_id, new_op, ret); if (ret < 0) goto out; inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0, &new_op->downcall.resp.create.refn); if (IS_ERR(inode)) { gossip_err("%s: Failed to allocate inode for file :%s:\n", __func__, dentry->d_name.name); ret = PTR_ERR(inode); goto out; } gossip_debug(GOSSIP_NAME_DEBUG, "%s: Assigned inode :%pU: for file :%s:\n", __func__, get_khandle_from_ino(inode), dentry->d_name.name); d_instantiate(dentry, inode); unlock_new_inode(inode); dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000; ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, "%s: dentry instantiated for %s\n", __func__, dentry->d_name.name); SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); mark_inode_dirty_sync(dir); ret = 0; out: op_release(new_op); gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s: returning %d\n", __func__, dentry->d_name.name, ret); return ret; }
/* * Attempt to resolve an object name (dentry->d_name), parent handle, and * fsid into a handle for the object. */ static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; struct inode *inode; struct dentry *res; int ret = -EINVAL; /* * in theory we could skip a lookup here (if the intent is to * create) in order to avoid a potentially failed lookup, but * leaving it in can skip a valid lookup and try to create a file * that already exists (e.g. the vfs already handles checking for * -EEXIST on O_EXCL opens, which is broken if we skip this lookup * in the create path) */ gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n", __func__, dentry->d_name.name); if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1)) return ERR_PTR(-ENAMETOOLONG); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); if (!new_op) return ERR_PTR(-ENOMEM); new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n", __FILE__, __func__, __LINE__, &parent->refn.khandle); new_op->upcall.req.lookup.parent_refn = parent->refn; strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name, ORANGEFS_NAME_MAX); gossip_debug(GOSSIP_NAME_DEBUG, "%s: doing lookup on %s under %pU,%d\n", __func__, new_op->upcall.req.lookup.d_name, &new_op->upcall.req.lookup.parent_refn.khandle, new_op->upcall.req.lookup.parent_refn.fs_id); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, "Lookup Got %pU, fsid %d (ret=%d)\n", &new_op->downcall.resp.lookup.refn.khandle, new_op->downcall.resp.lookup.refn.fs_id, ret); if (ret < 0) { if (ret == -ENOENT) { /* * if no inode was found, add a negative dentry to * dcache anyway; if we don't, we don't hold expected * lookup semantics and we most noticeably break * during directory renames. * * however, if the operation failed or exited, do not * add the dentry (e.g. in the case that a touch is * issued on a file that already exists that was * interrupted during this lookup -- no need to add * another negative dentry for an existing file) */ gossip_debug(GOSSIP_NAME_DEBUG, "orangefs_lookup: Adding *negative* dentry " "%p for %s\n", dentry, dentry->d_name.name); d_add(dentry, NULL); res = NULL; goto out; } /* must be a non-recoverable error */ res = ERR_PTR(ret); goto out; } dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000; inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn); if (IS_ERR(inode)) { gossip_debug(GOSSIP_NAME_DEBUG, "error %ld from iget\n", PTR_ERR(inode)); res = ERR_CAST(inode); goto out; } ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d " "Found good inode [%lu] with count [%d]\n", __FILE__, __func__, __LINE__, inode->i_ino, (int)atomic_read(&inode->i_count)); /* update dentry/inode pair into dcache */ res = d_splice_alias(inode, dentry); gossip_debug(GOSSIP_NAME_DEBUG, "Lookup success (inode ct = %d)\n", (int)atomic_read(&inode->i_count)); out: op_release(new_op); return res; }
/* * Post and wait for the I/O upcall to finish */ static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode, loff_t *offset, struct iov_iter *iter, size_t total_size, loff_t readahead_size) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; struct orangefs_kernel_op_s *new_op = NULL; struct iov_iter saved = *iter; int buffer_index = -1; ssize_t ret; new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO); if (!new_op) return -ENOMEM; /* synchronous I/O */ new_op->upcall.req.io.readahead_size = readahead_size; new_op->upcall.req.io.io_type = type; new_op->upcall.req.io.refn = orangefs_inode->refn; populate_shared_memory: /* get a shared buffer index */ buffer_index = orangefs_bufmap_get(); if (buffer_index < 0) { ret = buffer_index; gossip_debug(GOSSIP_FILE_DEBUG, "%s: orangefs_bufmap_get failure (%zd)\n", __func__, ret); goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): GET op %p -> buffer_index %d\n", __func__, handle, new_op, buffer_index); new_op->uses_shared_memory = 1; new_op->upcall.req.io.buf_index = buffer_index; new_op->upcall.req.io.count = total_size; new_op->upcall.req.io.offset = *offset; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): offset: %llu total_size: %zd\n", __func__, handle, llu(*offset), total_size); /* * Stage 1: copy the buffers into client-core's address space * precopy_buffers only pertains to writes. */ if (type == ORANGEFS_IO_WRITE) { ret = precopy_buffers(buffer_index, iter, total_size); if (ret < 0) goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Calling post_io_request with tag (%llu)\n", __func__, handle, llu(new_op->tag)); /* Stage 2: Service the I/O operation */ ret = service_operation(new_op, type == ORANGEFS_IO_WRITE ? "file_write" : "file_read", get_interruptible_flag(inode)); /* * If service_operation() returns -EAGAIN #and# the operation was * purged from orangefs_request_list or htable_ops_in_progress, then * we know that the client was restarted, causing the shared memory * area to be wiped clean. To restart a write operation in this * case, we must re-copy the data from the user's iovec to a NEW * shared memory location. To restart a read operation, we must get * a new shared memory location. */ if (ret == -EAGAIN && op_state_purged(new_op)) { orangefs_bufmap_put(buffer_index); buffer_index = -1; if (type == ORANGEFS_IO_WRITE) *iter = saved; gossip_debug(GOSSIP_FILE_DEBUG, "%s:going to repopulate_shared_memory.\n", __func__); goto populate_shared_memory; } if (ret < 0) { if (ret == -EINTR) { /* * We can't return EINTR if any data was written, * it's not POSIX. It is minimally acceptable * to give a partial write, the way NFS does. * * It would be optimal to return all or nothing, * but if a userspace write is bigger than * an IO buffer, and the interrupt occurs * between buffer writes, that would not be * possible. */ switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) { /* * If the op was waiting when the interrupt * occurred, then the client-core did not * trigger the write. */ case OP_VFS_STATE_WAITING: if (*offset == 0) ret = -EINTR; else ret = 0; break; /* * If the op was in progress when the interrupt * occurred, then the client-core was able to * trigger the write. */ case OP_VFS_STATE_INPROGR: ret = total_size; break; default: gossip_err("%s: unexpected op state :%d:.\n", __func__, new_op->op_state); ret = 0; break; } gossip_debug(GOSSIP_FILE_DEBUG, "%s: got EINTR, state:%d: %p\n", __func__, new_op->op_state, new_op); } else { gossip_err("%s: error in %s handle %pU, returning %zd\n", __func__, type == ORANGEFS_IO_READ ? "read from" : "write to", handle, ret); } if (orangefs_cancel_op_in_progress(new_op)) return ret; goto out; } /* * Stage 3: Post copy buffers from client-core's address space * postcopy_buffers only pertains to reads. */ if (type == ORANGEFS_IO_READ) { ret = postcopy_buffers(buffer_index, iter, new_op->downcall.resp.io.amt_complete); if (ret < 0) goto out; } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Amount %s, returned by the sys-io call:%d\n", __func__, handle, type == ORANGEFS_IO_READ ? "read" : "written", (int)new_op->downcall.resp.io.amt_complete); ret = new_op->downcall.resp.io.amt_complete; out: if (buffer_index >= 0) { orangefs_bufmap_put(buffer_index); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): PUT buffer_index %d\n", __func__, handle, buffer_index); buffer_index = -1; } op_release(new_op); return ret; }
int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int error = 0; void *value = NULL; size_t size = 0; const char *name = NULL; switch (type) { case ACL_TYPE_ACCESS: name = XATTR_NAME_POSIX_ACL_ACCESS; if (acl) { umode_t mode; error = posix_acl_update_mode(inode, &mode, &acl); if (error) { gossip_err("%s: posix_acl_update_mode err: %d\n", __func__, error); return error; } if (inode->i_mode != mode) SetModeFlag(orangefs_inode); inode->i_mode = mode; mark_inode_dirty_sync(inode); } break; case ACL_TYPE_DEFAULT: name = XATTR_NAME_POSIX_ACL_DEFAULT; break; default: gossip_err("%s: invalid type %d!\n", __func__, type); return -EINVAL; } gossip_debug(GOSSIP_ACL_DEBUG, "%s: inode %pU, key %s type %d\n", __func__, get_khandle_from_ino(inode), name, type); if (acl) { size = posix_acl_xattr_size(acl->a_count); value = kmalloc(size, GFP_KERNEL); if (!value) return -ENOMEM; error = posix_acl_to_xattr(&init_user_ns, acl, value, size); if (error < 0) goto out; } gossip_debug(GOSSIP_ACL_DEBUG, "%s: name %s, value %p, size %zd, acl %p\n", __func__, name, value, size, acl); /* * Go ahead and set the extended attribute now. NOTE: Suppose acl * was NULL, then value will be NULL and size will be 0 and that * will xlate to a removexattr. However, we don't want removexattr * complain if attributes does not exist. */ error = orangefs_inode_setxattr(inode, name, value, size, 0); out: kfree(value); if (!error) set_cached_acl(inode, type, acl); return error; }
/* * Tries to get a specified object's keys into a user-specified buffer of a * given size. Note that like the previous instances of xattr routines, this * also allows you to pass in a NULL pointer and 0 size to probe the size for * subsequent memory allocations. Thus our return value is always the size of * all the keys unless there were errors in fetching the keys! */ ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct inode *inode = dentry->d_inode; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; __u64 token = ORANGEFS_ITERATE_START; ssize_t ret = -ENOMEM; ssize_t total = 0; int count_keys = 0; int key_size; int i = 0; int returned_count = 0; if (size > 0 && !buffer) { gossip_err("%s: bogus NULL pointers\n", __func__); return -EINVAL; } down_read(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_LISTXATTR); if (!new_op) goto out_unlock; if (buffer && size > 0) memset(buffer, 0, size); try_again: key_size = 0; new_op->upcall.req.listxattr.refn = orangefs_inode->refn; new_op->upcall.req.listxattr.token = token; new_op->upcall.req.listxattr.requested_count = (size == 0) ? 0 : ORANGEFS_MAX_XATTR_LISTLEN; ret = service_operation(new_op, __func__, get_interruptible_flag(inode)); if (ret != 0) goto done; if (size == 0) { /* * This is a bit of a big upper limit, but I did not want to * spend too much time getting this correct, since users end * up allocating memory rather than us... */ total = new_op->downcall.resp.listxattr.returned_count * ORANGEFS_MAX_XATTR_NAMELEN; goto done; } returned_count = new_op->downcall.resp.listxattr.returned_count; if (returned_count < 0 || returned_count > ORANGEFS_MAX_XATTR_LISTLEN) { gossip_err("%s: impossible value for returned_count:%d:\n", __func__, returned_count); ret = -EIO; goto done; } /* * Check to see how much can be fit in the buffer. Fit only whole keys. */ for (i = 0; i < returned_count; i++) { if (new_op->downcall.resp.listxattr.lengths[i] < 0 || new_op->downcall.resp.listxattr.lengths[i] > ORANGEFS_MAX_XATTR_NAMELEN) { gossip_err("%s: impossible value for lengths[%d]\n", __func__, new_op->downcall.resp.listxattr.lengths[i]); ret = -EIO; goto done; } if (total + new_op->downcall.resp.listxattr.lengths[i] > size) goto done; /* * Since many dumb programs try to setxattr() on our reserved * xattrs this is a feeble attempt at defeating those by not * listing them in the output of listxattr.. sigh */ if (is_reserved_key(new_op->downcall.resp.listxattr.key + key_size, new_op->downcall.resp. listxattr.lengths[i])) { gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n", i, new_op->downcall.resp.listxattr.key + key_size); memcpy(buffer + total, new_op->downcall.resp.listxattr.key + key_size, new_op->downcall.resp.listxattr.lengths[i]); total += new_op->downcall.resp.listxattr.lengths[i]; count_keys++; } else { gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n", i, new_op->downcall.resp.listxattr.key + key_size); } key_size += new_op->downcall.resp.listxattr.lengths[i]; } /* * Since the buffer was large enough, we might have to continue * fetching more keys! */ token = new_op->downcall.resp.listxattr.token; if (token != ORANGEFS_ITERATE_END) goto try_again; done: gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d" " [size of buffer %ld] (filled in %d keys)\n", __func__, ret ? (int)ret : (int)total, (long)size, count_keys); op_release(new_op); if (ret == 0) ret = total; out_unlock: up_read(&orangefs_inode->xattr_sem); return ret; }
/* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O * or buffered I/O path depending on the mount options and/or * augmented/extended metadata attached to the file. * Note: File extended attributes override any mount options. */ static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", __func__, handle, (int)count); if (type == ORANGEFS_IO_WRITE) { gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): proceeding with offset : %llu, " "size %d\n", __func__, handle, llu(*offset), (int)count); } if (count == 0) { ret = 0; goto out; } while (iov_iter_count(iter)) { size_t each_count = iov_iter_count(iter); size_t amt_complete; /* how much to transfer in this loop iteration */ if (each_count > orangefs_bufmap_size_query()) each_count = orangefs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", __func__, handle, (int)each_count); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): BEFORE wait_for_io: offset is %d\n", __func__, handle, (int)*offset); ret = wait_for_direct_io(type, inode, offset, iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, handle, (int)ret); if (ret < 0) goto out; *offset += ret; total_count += ret; amt_complete = ret; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): AFTER wait_for_io: offset is %d\n", __func__, handle, (int)*offset); /* * if we got a short I/O operations, * fall out and return what we got so far */ if (amt_complete < each_count) break; } /*end while */ out: if (total_count > 0) ret = total_count; if (ret > 0) { if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { SetMtimeFlag(orangefs_inode); inode->i_mtime = CURRENT_TIME; mark_inode_dirty_sync(inode); } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%d) returned.\n", __func__, handle, (int)ret); return ret; }
static void orangefs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); kmem_cache_free(orangefs_inode_cache, orangefs_inode); }
static int orangefs_inode_type(enum orangefs_ds_type objtype) { if (objtype == ORANGEFS_TYPE_METAFILE) return S_IFREG; else if (objtype == ORANGEFS_TYPE_DIRECTORY) return S_IFDIR; else if (objtype == ORANGEFS_TYPE_SYMLINK) return S_IFLNK; else return -1; } static int orangefs_inode_is_stale(struct inode *inode, int new, struct ORANGEFS_sys_attr_s *attrs, char *link_target) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); int type = orangefs_inode_type(attrs->objtype); if (!new) { /* * If the inode type or symlink target have changed then this * inode is stale. */ if (type == -1 || !(inode->i_mode & type)) { orangefs_make_bad_inode(inode); return 1; } if (type == S_IFLNK && strncmp(orangefs_inode->link_target, link_target, ORANGEFS_NAME_MAX)) { orangefs_make_bad_inode(inode); return 1; }
/* * Common entry point for read/write/readv/writev * This function will dispatch it to either the direct I/O * or buffered I/O path depending on the mount options and/or * augmented/extended metadata attached to the file. * Note: File extended attributes override any mount options. */ static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file, loff_t *offset, struct iov_iter *iter) { struct inode *inode = file->f_mapping->host; struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_khandle *handle = &orangefs_inode->refn.khandle; size_t count = iov_iter_count(iter); ssize_t total_count = 0; ssize_t ret = -EINVAL; gossip_debug(GOSSIP_FILE_DEBUG, "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n", __func__, handle, (int)count); if (type == ORANGEFS_IO_WRITE) { gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): proceeding with offset : %llu, " "size %d\n", __func__, handle, llu(*offset), (int)count); } if (count == 0) { ret = 0; goto out; } while (iov_iter_count(iter)) { size_t each_count = iov_iter_count(iter); size_t amt_complete; /* how much to transfer in this loop iteration */ if (each_count > orangefs_bufmap_size_query()) each_count = orangefs_bufmap_size_query(); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): size of each_count(%d)\n", __func__, handle, (int)each_count); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): BEFORE wait_for_io: offset is %d\n", __func__, handle, (int)*offset); ret = wait_for_direct_io(type, inode, offset, iter, each_count, 0); gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): return from wait_for_io:%d\n", __func__, handle, (int)ret); if (ret < 0) goto out; *offset += ret; total_count += ret; amt_complete = ret; gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): AFTER wait_for_io: offset is %d\n", __func__, handle, (int)*offset); /* * if we got a short I/O operations, * fall out and return what we got so far */ if (amt_complete < each_count) break; } /*end while */ out: if (total_count > 0) ret = total_count; if (ret > 0) { if (type == ORANGEFS_IO_READ) { file_accessed(file); } else { file_update_time(file); /* * Must invalidate to ensure write loop doesn't * prevent kernel from reading updated * attribute. Size probably changed because of * the write, and other clients could update * any other attribute. */ orangefs_inode->getattr_time = jiffies - 1; } } gossip_debug(GOSSIP_FILE_DEBUG, "%s(%pU): Value(%d) returned.\n", __func__, handle, (int)ret); return ret; }
static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct orangefs_inode_s *parent = ORANGEFS_I(dir); struct orangefs_kernel_op_s *new_op; struct inode *inode; int ret; new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR); if (!new_op) return -ENOMEM; new_op->upcall.req.mkdir.parent_refn = parent->refn; fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes, ORANGEFS_TYPE_DIRECTORY, mode); strncpy(new_op->upcall.req.mkdir.d_name, dentry->d_name.name, ORANGEFS_NAME_MAX); ret = service_operation(new_op, __func__, get_interruptible_flag(dir)); gossip_debug(GOSSIP_NAME_DEBUG, "Mkdir Got ORANGEFS handle %pU on fsid %d\n", &new_op->downcall.resp.mkdir.refn.khandle, new_op->downcall.resp.mkdir.refn.fs_id); if (ret < 0) { gossip_debug(GOSSIP_NAME_DEBUG, "%s: failed with error code %d\n", __func__, ret); goto out; } inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0, &new_op->downcall.resp.mkdir.refn); if (IS_ERR(inode)) { gossip_err("*** Failed to allocate orangefs dir inode\n"); ret = PTR_ERR(inode); goto out; } gossip_debug(GOSSIP_NAME_DEBUG, "Assigned dir inode new number of %pU\n", get_khandle_from_ino(inode)); d_instantiate(dentry, inode); unlock_new_inode(inode); dentry->d_time = jiffies + dcache_timeout_msecs*HZ/1000; ORANGEFS_I(inode)->getattr_time = jiffies - 1; gossip_debug(GOSSIP_NAME_DEBUG, "Inode (Directory) %pU -> %s\n", get_khandle_from_ino(inode), dentry->d_name.name); /* * NOTE: we have no good way to keep nlink consistent for directories * across clients; keep constant at 1. */ SetMtimeFlag(parent); dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb); mark_inode_dirty_sync(dir); out: op_release(new_op); return ret; }
/* * Tries to get a specified key's attributes of a given * file into a user-specified buffer. Note that the getxattr * interface allows for the users to probe the size of an * extended attribute by passing in a value of 0 to size. * Thus our return value is always the size of the attribute * unless the key does not exist for the file and/or if * there were errors in fetching the attribute value. */ ssize_t orangefs_inode_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op = NULL; ssize_t ret = -ENOMEM; ssize_t length = 0; int fsuid; int fsgid; gossip_debug(GOSSIP_XATTR_DEBUG, "%s: name %s, buffer_size %zd\n", __func__, name, size); if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; fsuid = from_kuid(&init_user_ns, current_fsuid()); fsgid = from_kgid(&init_user_ns, current_fsgid()); gossip_debug(GOSSIP_XATTR_DEBUG, "getxattr on inode %pU, name %s " "(uid %o, gid %o)\n", get_khandle_from_ino(inode), name, fsuid, fsgid); down_read(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR); if (!new_op) goto out_unlock; new_op->upcall.req.getxattr.refn = orangefs_inode->refn; strcpy(new_op->upcall.req.getxattr.key, name); /* * NOTE: Although keys are meant to be NULL terminated textual * strings, I am going to explicitly pass the length just in case * we change this later on... */ new_op->upcall.req.getxattr.key_sz = strlen(name) + 1; ret = service_operation(new_op, "orangefs_inode_getxattr", get_interruptible_flag(inode)); if (ret != 0) { if (ret == -ENOENT) { ret = -ENODATA; gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_getxattr: inode %pU key %s" " does not exist!\n", get_khandle_from_ino(inode), (char *)new_op->upcall.req.getxattr.key); } goto out_release_op; } /* * Length returned includes null terminator. */ length = new_op->downcall.resp.getxattr.val_sz; /* * Just return the length of the queried attribute. */ if (size == 0) { ret = length; goto out_release_op; } /* * Check to see if key length is > provided buffer size. */ if (length > size) { ret = -ERANGE; goto out_release_op; } memcpy(buffer, new_op->downcall.resp.getxattr.val, length); memset(buffer + length, 0, size - length); gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_getxattr: inode %pU " "key %s key_sz %d, val_len %d\n", get_khandle_from_ino(inode), (char *)new_op-> upcall.req.getxattr.key, (int)new_op-> upcall.req.getxattr.key_sz, (int)ret); ret = length; out_release_op: op_release(new_op); out_unlock: up_read(&orangefs_inode->xattr_sem); return ret; }
/* * Tries to set an attribute for a given key on a file. * * Returns a -ve number on error and 0 on success. Key is text, but value * can be binary! */ int orangefs_inode_setxattr(struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode); struct orangefs_kernel_op_s *new_op; int internal_flag = 0; int ret = -ENOMEM; gossip_debug(GOSSIP_XATTR_DEBUG, "%s: name %s, buffer_size %zd\n", __func__, name, size); if (size > ORANGEFS_MAX_XATTR_VALUELEN) return -EINVAL; if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) return -EINVAL; internal_flag = convert_to_internal_xattr_flags(flags); /* This is equivalent to a removexattr */ if (size == 0 && !value) { gossip_debug(GOSSIP_XATTR_DEBUG, "removing xattr (%s)\n", name); return orangefs_inode_removexattr(inode, name, flags); } gossip_debug(GOSSIP_XATTR_DEBUG, "setxattr on inode %pU, name %s\n", get_khandle_from_ino(inode), name); down_write(&orangefs_inode->xattr_sem); new_op = op_alloc(ORANGEFS_VFS_OP_SETXATTR); if (!new_op) goto out_unlock; new_op->upcall.req.setxattr.refn = orangefs_inode->refn; new_op->upcall.req.setxattr.flags = internal_flag; /* * NOTE: Although keys are meant to be NULL terminated textual * strings, I am going to explicitly pass the length just in * case we change this later on... */ strcpy(new_op->upcall.req.setxattr.keyval.key, name); new_op->upcall.req.setxattr.keyval.key_sz = strlen(name) + 1; memcpy(new_op->upcall.req.setxattr.keyval.val, value, size); new_op->upcall.req.setxattr.keyval.val_sz = size; gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_setxattr: key %s, key_sz %d " " value size %zd\n", (char *)new_op->upcall.req.setxattr.keyval.key, (int)new_op->upcall.req.setxattr.keyval.key_sz, size); ret = service_operation(new_op, "orangefs_inode_setxattr", get_interruptible_flag(inode)); gossip_debug(GOSSIP_XATTR_DEBUG, "orangefs_inode_setxattr: returning %d\n", ret); /* when request is serviced properly, free req op struct */ op_release(new_op); out_unlock: up_write(&orangefs_inode->xattr_sem); return ret; }