/* Modify buffer of refcount, then release buffer */ static int update_refcount(struct sb *sb, struct buffer_head *buffer, unsigned offset, u16 val) { unsigned delta = tux3_get_current_delta(); struct buffer_head *clone; __be16 *refcount; /* * The atable is protected by i_mutex for now. * blockdirty() should never return -EAGAIN. * FIXME: need finer granularity locking */ clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } refcount = bufdata(clone); refcount[offset] = cpu_to_be16(val); mark_buffer_dirty_non(clone); blockput(clone); return 0; }
static loff_t unatom_dict_write(struct inode *atable, atom_t atom, loff_t where) { unsigned delta = tux3_get_current_delta(); struct buffer_head *buffer, *clone; loff_t old; unsigned offset; buffer = blockread_unatom(atable, atom, &offset); if (!buffer) return -EIO; /* * The atable is protected by i_mutex for now. * blockdirty() should never return -EAGAIN. * FIXME: need finer granularity locking */ clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } __be64 *unatom_dict = bufdata(clone); old = be64_to_cpu(unatom_dict[offset]); unatom_dict[offset] = cpu_to_be64(where); mark_buffer_dirty_non(clone); blockput(clone); return old; }
static int tux3_set_page_dirty_buffers(struct page *page) { #if 0 struct address_space *mapping = page->mapping; int newly_dirty; spin_lock(&mapping->private_lock); if (page_has_buffers(page)) { struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; do { set_buffer_dirty(bh); bh = bh->b_this_page; } while (bh != head); } newly_dirty = !TestSetPageDirty(page); spin_unlock(&mapping->private_lock); if (newly_dirty) __set_page_dirty(page, mapping, 1); return newly_dirty; #else struct address_space *mapping = page->mapping; unsigned delta = tux3_get_current_delta(); struct buffer_head *head, *buffer; int newly_dirty; /* This should be tux3 page and locked */ assert(mapping); assert(PageLocked(page)); /* This page should have buffers (caller should allocate) */ assert(page_has_buffers(page)); /* * FIXME: we dirty all buffers on this page, so we optimize this * by avoiding to check page-dirty/inode-dirty multiple times. */ newly_dirty = 0; if (!TestSetPageDirty(page)) { __tux3_set_page_dirty(page, mapping, 1); newly_dirty = 1; } buffer = head = page_buffers(page); do { __tux3_mark_buffer_dirty(buffer, delta); buffer = buffer->b_this_page; } while (buffer != head); #endif return newly_dirty; }
static void add_maps(struct inode *inode, block_t index, struct block_segment *seg, int nr_segs) { unsigned delta = tux3_get_current_delta(); for (int i = 0; i < nr_segs; i++) { struct block_segment *s = &seg[i]; for (unsigned j = 0; j < s->count; j++) { struct buffer_head *buf; buf = blockget(inode->map, index + j); buf = blockdirty(buf, delta); memset(buf->data, 0, inode->i_sb->blocksize); *(block_t *)buf->data = s->block + j; mark_buffer_dirty_non(buf); blockput(buf); } index += s->count; } }
int tux_delete_entry(struct inode *dir, struct buffer_head *buffer, tux_dirent *entry) { unsigned delta = tux3_get_current_delta(); tux_dirent *prev = NULL, *this = bufdata(buffer); struct buffer_head *clone; void *olddata; while ((char *)this < (char *)entry) { if (this->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, bufindex(buffer)); return -EIO; } prev = this; this = next_entry(this); } /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(buffer); clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } entry = ptr_redirect(entry, olddata, bufdata(clone)); prev = ptr_redirect(prev, olddata, bufdata(clone)); if (prev) prev->rec_len = tux_rec_len_to_disk((void *)next_entry(entry) - (void *)prev); memset(entry->name, 0, entry->name_len); entry->name_len = entry->type = 0; entry->inum = 0; mark_buffer_dirty_non(clone); blockput(clone); return 0; }
static int tux3_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { struct tux_iattr iattr = { .uid = current_fsuid(), .gid = current_fsgid(), .mode = S_IFLNK | S_IRWXUGO, }; return __tux3_symlink(dir, dentry, &iattr, symname); } #endif /* !__KERNEL__ */ static int tux3_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct sb *sb = tux_sb(inode->i_sb); change_begin(sb); int err = tux_del_dirent(dir, dentry); if (!err) { tux3_iattrdirty(inode); inode->i_ctime = dir->i_ctime; /* FIXME: we shouldn't write inode for i_nlink = 0? */ inode_dec_link_count(inode); } change_end(sb); return err; } static int tux3_rmdir(struct inode *dir, struct dentry *dentry) { struct sb *sb = tux_sb(dir->i_sb); struct inode *inode = dentry->d_inode; int err = tux_dir_is_empty(inode); if (!err) { change_begin(sb); err = tux_del_dirent(dir, dentry); if (!err) { tux3_iattrdirty(inode); inode->i_ctime = dir->i_ctime; /* FIXME: we need to do this for POSIX? */ /* inode->i_size = 0; */ clear_nlink(inode); tux3_mark_inode_dirty_sync(inode); inode_dec_link_count(dir); } change_end(sb); } return err; } static int tux3_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { struct inode *old_inode = old_dentry->d_inode; struct inode *new_inode = new_dentry->d_inode; struct sb *sb = tux_sb(old_inode->i_sb); struct buffer_head *old_buffer, *new_buffer, *clone; tux_dirent *old_entry, *new_entry; void *olddata; int err, new_subdir = 0; unsigned delta; old_entry = tux_find_dirent(old_dir, &old_dentry->d_name, &old_buffer); if (IS_ERR(old_entry)) return PTR_ERR(old_entry); /* FIXME: is this needed? */ assert(be64_to_cpu(old_entry->inum) == tux_inode(old_inode)->inum); change_begin(sb); delta = tux3_get_current_delta(); if (new_inode) { int old_is_dir = S_ISDIR(old_inode->i_mode); if (old_is_dir) { err = tux_dir_is_empty(new_inode); if (err) goto error; } new_entry = tux_find_dirent(new_dir, &new_dentry->d_name, &new_buffer); if (IS_ERR(new_entry)) { assert(PTR_ERR(new_entry) != -ENOENT); err = PTR_ERR(new_entry); goto error; } /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(new_buffer); clone = blockdirty(new_buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(new_buffer); err = PTR_ERR(clone); goto error; } new_entry = ptr_redirect(new_entry, olddata, bufdata(clone)); /* this releases new_buffer */ tux_update_dirent(new_dir, clone, new_entry, old_inode); tux3_iattrdirty(new_inode); new_inode->i_ctime = new_dir->i_ctime; if (old_is_dir) drop_nlink(new_inode); inode_dec_link_count(new_inode); } else { new_subdir = S_ISDIR(old_inode->i_mode) && new_dir != old_dir; if (new_subdir) { if (new_dir->i_nlink >= TUX_LINK_MAX) { err = -EMLINK; goto error; } } err = tux_create_dirent(new_dir, &new_dentry->d_name, old_inode); if (err) goto error; if (new_subdir) inode_inc_link_count(new_dir); } tux3_iattrdirty(old_inode); old_inode->i_ctime = new_dir->i_ctime; tux3_mark_inode_dirty(old_inode); /* * The new entry can be on same buffer with old_buffer, and * may did buffer fork in the above path. So if old_buffer is * forked buffer, we update the old_buffer in here. */ if (buffer_forked(old_buffer)) { clone = blockget(mapping(old_dir), bufindex(old_buffer)); assert(clone); old_entry = ptr_redirect(old_entry, bufdata(old_buffer), bufdata(clone)); blockput(old_buffer); old_buffer = clone; } err = tux_delete_dirent(old_dir, old_buffer, old_entry); if (err) { tux3_fs_error(sb, "couldn't delete old entry (%Lu)", tux_inode(old_inode)->inum); /* FIXME: now, we have hardlink even if it's dir. */ inode_inc_link_count(old_inode); } if (!err && new_subdir) inode_dec_link_count(old_dir); change_end(sb); return err; error: change_end(sb); blockput(old_buffer); return err; } #ifdef __KERNEL__ const struct file_operations tux_dir_fops = { .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = tux_readdir, .fsync = tux3_sync_file, }; const struct inode_operations tux_dir_iops = { .create = tux3_create, .lookup = tux3_lookup, .link = tux3_link, .unlink = tux3_unlink, .symlink = tux3_symlink, .mkdir = tux3_mkdir, .rmdir = tux3_rmdir, .mknod = tux3_mknod, .rename = tux3_rename, .setattr = tux3_setattr, .getattr = tux3_getattr // .setxattr = generic_setxattr, // .getxattr = generic_getxattr, // .listxattr = ext3_listxattr, // .removexattr = generic_removexattr, // .permission = ext3_permission, /* FIXME: why doesn't ext4 support this for directory? */ // .fallocate = ext4_fallocate, // .fiemap = ext4_fiemap, };
loff_t tux_create_entry(struct inode *dir, const char *name, unsigned len, inum_t inum, umode_t mode, loff_t *size) { unsigned delta = tux3_get_current_delta(); struct sb *sb = tux_sb(dir->i_sb); tux_dirent *entry; struct buffer_head *buffer, *clone; unsigned reclen = TUX_REC_LEN(len), rec_len, name_len, offset; unsigned blocksize = sb->blocksize; block_t block, blocks = *size >> sb->blockbits; void *olddata; for (block = 0; block < blocks; block++) { buffer = blockread(mapping(dir), block); if (!buffer) return -EIO; entry = bufdata(buffer); tux_dirent *limit = bufdata(buffer) + blocksize - reclen; while (entry <= limit) { if (entry->rec_len == 0) { blockput(buffer); tux_zero_len_error(dir, block); return -EIO; } name_len = TUX_REC_LEN(entry->name_len); rec_len = tux_rec_len_from_disk(entry->rec_len); if (is_deleted(entry) && rec_len >= reclen) goto create; if (rec_len >= name_len + reclen) goto create; entry = (void *)entry + rec_len; } blockput(buffer); } entry = NULL; buffer = blockget(mapping(dir), block); assert(!buffer_dirty(buffer)); create: /* * The directory is protected by i_mutex. * blockdirty() should never return -EAGAIN. */ olddata = bufdata(buffer); clone = blockdirty(buffer, delta); if (IS_ERR(clone)) { assert(PTR_ERR(clone) != -EAGAIN); blockput(buffer); return PTR_ERR(clone); } if (!entry) { /* Expanding the directory size. Initialize block. */ entry = bufdata(clone); memset(entry, 0, blocksize); entry->rec_len = tux_rec_len_to_disk(blocksize); assert(is_deleted(entry)); *size += blocksize; } else { entry = ptr_redirect(entry, olddata, bufdata(clone)); if (!is_deleted(entry)) { tux_dirent *newent = (void *)entry + name_len; unsigned rest_rec_len = rec_len - name_len; newent->rec_len = tux_rec_len_to_disk(rest_rec_len); entry->rec_len = tux_rec_len_to_disk(name_len); entry = newent; } } entry->name_len = len; memcpy(entry->name, name, len); offset = (void *)entry - bufdata(clone); /* this releases buffer */ tux_update_entry(clone, entry, inum, mode); return (block << sb->blockbits) + offset; /* only for xattr create */ }
static int tux3_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) { struct inode *inode = file_inode(vma->vm_file); struct sb *sb = tux_sb(inode->i_sb); struct page *clone, *page = vmf->page; void *ptr; int ret; sb_start_pagefault(inode->i_sb); retry: down_read(&tux_inode(inode)->truncate_lock); lock_page(page); if (page->mapping != mapping(inode)) { unlock_page(page); ret = VM_FAULT_NOPAGE; goto out; } /* * page fault can be happened while holding change_begin/end() * (e.g. copy of user data between ->write_begin and * ->write_end for write(2)). * * So, we use nested version here. */ change_begin_atomic_nested(sb, &ptr); /* * FIXME: Caller releases vmf->page (old_page) unconditionally. * So, this takes additional refcount to workaround it. */ if (vmf->page == page) page_cache_get(page); clone = pagefork_for_blockdirty(page, tux3_get_current_delta()); if (IS_ERR(clone)) { /* Someone did page fork */ pgoff_t index = page->index; change_end_atomic_nested(sb, ptr); unlock_page(page); page_cache_release(page); up_read(&tux_inode(inode)->truncate_lock); switch (PTR_ERR(clone)) { case -EAGAIN: page = find_get_page(inode->i_mapping, index); assert(page); goto retry; case -ENOMEM: ret = VM_FAULT_OOM; break; default: ret = VM_FAULT_SIGBUS; break; } goto out; } file_update_time(vma->vm_file); /* Assign buffers to dirty */ if (!page_has_buffers(clone)) create_empty_buffers(clone, sb->blocksize, 0); /* * We mark the page dirty already here so that when freeze is in * progress, we are guaranteed that writeback during freezing will * see the dirty page and writeprotect it again. */ tux3_set_page_dirty(clone); #if 1 /* FIXME: Caller doesn't see the changed vmf->page */ vmf->page = clone; change_end_atomic_nested(sb, ptr); /* FIXME: caller doesn't know about pagefork */ unlock_page(clone); page_cache_release(clone); ret = 0; // ret = VM_FAULT_LOCKED; #endif out: up_read(&tux_inode(inode)->truncate_lock); sb_end_pagefault(inode->i_sb); return ret; }