/* * return number of bytes that on-wire representation of @inode's identity * consumes. */ static int encode_inode_size(struct inode *inode) { assert("nikita-3514", inode != NULL); assert("nikita-3515", inode_file_plugin(inode) != NULL); assert("nikita-3516", inode_file_plugin(inode)->wire.size != NULL); return inode_file_plugin(inode)->wire.size(inode) + sizeof(d16); }
static int reserve_tail2extent_iteration(struct inode *inode) { reiser4_block_nr unformatted_nodes; reiser4_tree *tree; tree = reiser4_tree_by_inode(inode); /* number of unformatted nodes which will be created */ unformatted_nodes = TAIL2EXTENT_PAGE_NUM; /* * space required for one iteration of extent->tail conversion: * * 1. kill N tail items * * 2. insert TAIL2EXTENT_PAGE_NUM unformatted nodes * * 3. insert TAIL2EXTENT_PAGE_NUM (worst-case single-block * extents) extent units. * * 4. drilling to the leaf level by coord_by_key() * * 5. possible update of stat-data * */ grab_space_enable(); return reiser4_grab_space (2 * tree->height + TAIL2EXTENT_PAGE_NUM + TAIL2EXTENT_PAGE_NUM * estimate_one_insert_into_item(tree) + 1 + estimate_one_insert_item(tree) + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT); }
/* * Check whether @node is possible vroot of @object. */ static void handle_vroot(struct inode *object, znode * node) { file_plugin *fplug; coord_t coord; fplug = inode_file_plugin(object); assert("nikita-3353", fplug != NULL); assert("nikita-3354", fplug->owns_item != NULL); if (unlikely(node_is_empty(node))) return; coord_init_first_unit(&coord, node); /* * if leftmost item of @node belongs to @object, we cannot be sure * that @node is vroot of @object, because, some items of @object are * probably in the sub-tree rooted at the left neighbor of @node. */ if (fplug->owns_item(object, &coord)) return; coord_init_last_unit(&coord, node); /* mutatis mutandis for the rightmost item */ if (fplug->owns_item(object, &coord)) return; /* otherwise, @node is possible vroot of @object */ inode_set_vroot(object, node); }
/* * store on-wire representation of @inode's identity at the area beginning at * @start. */ static char *encode_inode(struct inode *inode, char *start) { assert("nikita-3517", inode != NULL); assert("nikita-3518", inode_file_plugin(inode) != NULL); assert("nikita-3519", inode_file_plugin(inode)->wire.write != NULL); /* * first, store two-byte identifier of object plugin, then */ save_plugin_id(file_plugin_to_plugin(inode_file_plugin(inode)), (d16 *) start); start += sizeof(d16); /* * call plugin to serialize object's identity */ return inode_file_plugin(inode)->wire.write(inode, start); }
/* this is helper for plugin->write_begin() */ int do_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to) { int result; file_plugin *fplug; struct inode *inode; assert("umka-3099", file != NULL); assert("umka-3100", page != NULL); assert("umka-3095", PageLocked(page)); if (to - from == PAGE_CACHE_SIZE || PageUptodate(page)) return 0; inode = page->mapping->host; fplug = inode_file_plugin(inode); if (page->mapping->a_ops->readpage == NULL) return RETERR(-EINVAL); result = page->mapping->a_ops->readpage(file, page); if (result != 0) { SetPageError(page); ClearPageUptodate(page); /* All reiser4 readpage() implementations should return the * page locked in case of error. */ assert("nikita-3472", PageLocked(page)); } else { /* * ->readpage() either: * * 1. starts IO against @page. @page is locked for IO in * this case. * * 2. doesn't start IO. @page is unlocked. * * In either case, page should be locked. */ lock_page(page); /* * IO (if any) is completed at this point. Check for IO * errors. */ if (!PageUptodate(page)) result = RETERR(-EIO); } assert("umka-3098", PageLocked(page)); return result; }
/* clear stat data's flag indicating that conversion is being converted */ static int complete_conversion(struct inode *inode) { int result; grab_space_enable(); result = reiser4_grab_space(inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT); if (result == 0) { reiser4_inode_clr_flag(inode, REISER4_PART_MIXED); result = reiser4_update_sd(inode); } if (result) warning("vs-1696", "Failed to clear converting bit of %llu: %i", (unsigned long long)get_inode_oid(inode), result); return 0; }
/* Audited by: green(2002.06.15) */ static int cut_formatting_items(struct inode *inode, loff_t offset, int count) { reiser4_key from, to; /* AUDIT: How about putting an assertion here, what would check all provided range is covered by tail items only? */ /* key of first byte in the range to be cut */ inode_file_plugin(inode)->key_by_inode(inode, offset, &from); /* key of last byte in that range */ to = from; set_key_offset(&to, (__u64) (offset + count - 1)); /* cut everything between those keys */ return reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to, inode, 0); }
static int change_cluster(struct inode *inode, reiser4_plugin * plugin, pset_member memb) { assert("edward-1324", inode != NULL); assert("edward-1325", plugin != NULL); assert("edward-1326", is_reiser4_inode(inode)); assert("edward-1327", plugin->h.type_id == REISER4_CLUSTER_PLUGIN_TYPE); /* Can't change the cluster plugin for already existent regular files */ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE)) return RETERR(-EINVAL); /* If matches, nothing to change. */ if (inode_hash_plugin(inode) != NULL && inode_hash_plugin(inode)->h.id == plugin->h.id) return 0; return aset_set_unsafe(&reiser4_inode_data(inode)->pset, PSET_CLUSTER, plugin); }
/** * find_start * @inode: * @id: * @offset: * * this is used by tail2extent and extent2tail to detect where previous * uncompleted conversion stopped */ static int find_start(struct inode *inode, reiser4_plugin_id id, __u64 *offset) { int result; lock_handle lh; coord_t coord; struct unix_file_info *ufo; int found; reiser4_key key; ufo = unix_file_inode_data(inode); init_lh(&lh); result = 0; found = 0; inode_file_plugin(inode)->key_by_inode(inode, *offset, &key); do { init_lh(&lh); result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode); if (result == CBK_COORD_FOUND) { if (coord.between == AT_UNIT) { /*coord_clear_iplug(&coord); */ result = zload(coord.node); if (result == 0) { if (item_id_by_coord(&coord) == id) found = 1; else item_plugin_by_coord(&coord)->s. file.append_key(&coord, &key); zrelse(coord.node); } } else result = RETERR(-ENOENT); } done_lh(&lh); } while (result == 0 && !found); *offset = get_key_offset(&key); return result; }
static int change_compression(struct inode *inode, reiser4_plugin * plugin, pset_member memb) { assert("edward-1316", inode != NULL); assert("edward-1317", plugin != NULL); assert("edward-1318", is_reiser4_inode(inode)); assert("edward-1319", plugin->h.type_id == REISER4_COMPRESSION_PLUGIN_TYPE); /* cannot change compression plugin of already existing regular object */ if (!plugin_of_group(inode_file_plugin(inode), REISER4_DIRECTORY_FILE)) return RETERR(-EINVAL); /* If matches, nothing to change. */ if (inode_hash_plugin(inode) != NULL && inode_hash_plugin(inode)->h.id == plugin->h.id) return 0; return aset_set_unsafe(&reiser4_inode_data(inode)->pset, PSET_COMPRESSION, plugin); }
static int reserve_extent2tail_iteration(struct inode *inode) { reiser4_tree *tree; tree = reiser4_tree_by_inode(inode); /* * reserve blocks for (in this order): * * 1. removal of extent item * * 2. insertion of tail by insert_flow() * * 3. drilling to the leaf level by coord_by_key() * * 4. possible update of stat-data */ grab_space_enable(); return reiser4_grab_space (estimate_one_item_removal(tree) + estimate_insert_flow(tree->height) + 1 + estimate_one_insert_item(tree) + inode_file_plugin(inode)->estimate.update(inode), BA_CAN_COMMIT); }
assert("nikita-686", data->id == SYMLINK_FILE_PLUGIN_ID); /* * stat data of symlink has symlink extension in which we store * symlink content, that is, path symlink is pointing to. */ reiser4_inode_data(symlink)->extmask |= (1 << SYMLINK_STAT); assert("vs-838", symlink->i_private == NULL); symlink->i_private = (void *)data->name; assert("vs-843", symlink->i_size == 0); INODE_SET_FIELD(symlink, i_size, strlen(data->name)); /* insert stat data appended with data->name */ result = inode_file_plugin(symlink)->write_sd_by_inode(symlink); if (result) { /* FIXME-VS: Make sure that symlink->i_private is not attached to kmalloced data */ INODE_SET_FIELD(symlink, i_size, 0); } else { assert("vs-849", symlink->i_private && reiser4_inode_get_flag(symlink, REISER4_GENERIC_PTR_USED)); assert("vs-850", !memcmp((char *)symlink->i_private, data->name, (size_t) symlink->i_size + 1)); } return result; }
/* for every page of file: read page, cut part of extent pointing to this page, put data of page tree by tail item */ int extent2tail(struct file * file, struct unix_file_info *uf_info) { int result; struct inode *inode; struct page *page; unsigned long num_pages, i; unsigned long start_page; reiser4_key from; reiser4_key to; unsigned count; __u64 offset; assert("nikita-3362", ea_obtained(uf_info)); inode = unix_file_info_to_inode(uf_info); assert("nikita-3412", !IS_RDONLY(inode)); assert("vs-1649", uf_info->container != UF_CONTAINER_TAILS); assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); offset = 0; if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { /* * file is marked on disk as there was a conversion which did * not complete due to either crash or some error. Find which * offset tail conversion stopped at */ result = find_start(inode, EXTENT_POINTER_ID, &offset); if (result == -ENOENT) { /* no extent found, everything is converted */ uf_info->container = UF_CONTAINER_TAILS; complete_conversion(inode); return 0; } else if (result != 0) /* some other error */ return result; } reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV); /* number of pages in the file */ num_pages = (inode->i_size + - offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; start_page = offset >> PAGE_CACHE_SHIFT; inode_file_plugin(inode)->key_by_inode(inode, offset, &from); to = from; result = 0; for (i = 0; i < num_pages; i++) { __u64 start_byte; result = reserve_extent2tail_iteration(inode); if (result != 0) break; if (i == 0 && offset == 0) { reiser4_inode_set_flag(inode, REISER4_PART_MIXED); reiser4_update_sd(inode); } page = read_mapping_page(inode->i_mapping, (unsigned)(i + start_page), NULL); if (IS_ERR(page)) { result = PTR_ERR(page); break; } wait_on_page_locked(page); if (!PageUptodate(page)) { page_cache_release(page); result = RETERR(-EIO); break; } /* cut part of file we have read */ start_byte = (__u64) ((i + start_page) << PAGE_CACHE_SHIFT); set_key_offset(&from, start_byte); set_key_offset(&to, start_byte + PAGE_CACHE_SIZE - 1); /* * reiser4_cut_tree_object() returns -E_REPEAT to allow atom * commits during over-long truncates. But * extent->tail conversion should be performed in one * transaction. */ result = reiser4_cut_tree(reiser4_tree_by_inode(inode), &from, &to, inode, 0); if (result) { page_cache_release(page); break; } /* put page data into tree via tail_write */ count = PAGE_CACHE_SIZE; if ((i == (num_pages - 1)) && (inode->i_size & ~PAGE_CACHE_MASK)) /* last page can be incompleted */ count = (inode->i_size & ~PAGE_CACHE_MASK); while (count) { loff_t pos = start_byte; assert("edward-1537", file != NULL && file->f_dentry != NULL); assert("edward-1538", file->f_dentry->d_inode == inode); result = reiser4_write_tail(file, inode, (char __user *)kmap(page), count, &pos); reiser4_free_file_fsdata(file); if (result <= 0) { warning("", "reiser4_write_tail failed"); page_cache_release(page); reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); return result; } count -= result; } /* release page */ lock_page(page); /* page is already detached from jnode and mapping. */ assert("vs-1086", page->mapping == NULL); assert("nikita-2690", (!PagePrivate(page) && jprivate(page) == 0)); /* waiting for writeback completion with page lock held is * perfectly valid. */ wait_on_page_writeback(page); reiser4_drop_page(page); /* release reference taken by read_cache_page() above */ page_cache_release(page); drop_exclusive_access(uf_info); /* * throttle the conversion. * FIXME-EDWARD: Calculate and pass the precise number * of pages that was dirtied */ reiser4_throttle_write(inode, 1); get_exclusive_access(uf_info); /* * nobody is allowed to complete conversion but a process which * started it */ assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED)); } reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); if (i == num_pages) { /* file is converted to formatted items */ assert("vs-1698", reiser4_inode_get_flag(inode, REISER4_PART_MIXED)); assert("vs-1260", inode_has_no_jnodes(reiser4_inode_data(inode))); uf_info->container = UF_CONTAINER_TAILS; complete_conversion(inode); return 0; } /* * conversion is not complete. Inode was already marked as * REISER4_PART_MIXED and stat-data were updated at the first * iteration of the loop above. */ warning("nikita-2282", "Partial conversion of %llu: %lu of %lu: %i", (unsigned long long)get_inode_oid(inode), i, num_pages, result); /* this flag should be cleared, otherwise get_exclusive_access_careful() will fall into infinite loop */ assert("edward-1550", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); return result; }
/** * tail2extent * @uf_info: * * */ int tail2extent(struct unix_file_info *uf_info) { int result; reiser4_key key; /* key of next byte to be moved to page */ char *p_data; /* data of page */ unsigned page_off = 0, /* offset within the page where to copy data */ count; /* number of bytes of item which can be * copied to page */ struct page *pages[TAIL2EXTENT_PAGE_NUM]; struct page *page; int done; /* set to 1 when all file is read */ char *item; int i; struct inode *inode; int first_iteration; int bytes; __u64 offset; assert("nikita-3362", ea_obtained(uf_info)); inode = unix_file_info_to_inode(uf_info); assert("nikita-3412", !IS_RDONLY(inode)); assert("vs-1649", uf_info->container != UF_CONTAINER_EXTENTS); assert("", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); offset = 0; first_iteration = 1; result = 0; if (reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) { /* * file is marked on disk as there was a conversion which did * not complete due to either crash or some error. Find which * offset tail conversion stopped at */ result = find_start(inode, FORMATTING_ID, &offset); if (result == -ENOENT) { /* no tail items found, everything is converted */ uf_info->container = UF_CONTAINER_EXTENTS; complete_conversion(inode); return 0; } else if (result != 0) /* some other error */ return result; first_iteration = 0; } reiser4_inode_set_flag(inode, REISER4_PART_IN_CONV); /* get key of first byte of a file */ inode_file_plugin(inode)->key_by_inode(inode, offset, &key); done = 0; while (done == 0) { memset(pages, 0, sizeof(pages)); result = reserve_tail2extent_iteration(inode); if (result != 0) { reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); goto out; } if (first_iteration) { reiser4_inode_set_flag(inode, REISER4_PART_MIXED); reiser4_update_sd(inode); first_iteration = 0; } bytes = 0; for (i = 0; i < sizeof_array(pages) && done == 0; i++) { assert("vs-598", (get_key_offset(&key) & ~PAGE_CACHE_MASK) == 0); page = alloc_page(reiser4_ctx_gfp_mask_get()); if (!page) { result = RETERR(-ENOMEM); goto error; } page->index = (unsigned long)(get_key_offset(&key) >> PAGE_CACHE_SHIFT); /* * usually when one is going to longterm lock znode (as * find_file_item does, for instance) he must not hold * locked pages. However, there is an exception for * case tail2extent. Pages appearing here are not * reachable to everyone else, they are clean, they do * not have jnodes attached so keeping them locked do * not risk deadlock appearance */ assert("vs-983", !PagePrivate(page)); reiser4_invalidate_pages(inode->i_mapping, page->index, 1, 0); for (page_off = 0; page_off < PAGE_CACHE_SIZE;) { coord_t coord; lock_handle lh; /* get next item */ /* FIXME: we might want to readahead here */ init_lh(&lh); result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode); if (result != CBK_COORD_FOUND) { /* * error happened of not items of file * were found */ done_lh(&lh); page_cache_release(page); goto error; } if (coord.between == AFTER_UNIT) { /* * end of file is reached. Padd page * with zeros */ done_lh(&lh); done = 1; p_data = kmap_atomic(page, KM_USER0); memset(p_data + page_off, 0, PAGE_CACHE_SIZE - page_off); kunmap_atomic(p_data, KM_USER0); break; } result = zload(coord.node); if (result) { page_cache_release(page); done_lh(&lh); goto error; } assert("vs-856", coord.between == AT_UNIT); item = ((char *)item_body_by_coord(&coord)) + coord.unit_pos; /* how many bytes to copy */ count = item_length_by_coord(&coord) - coord.unit_pos; /* limit length of copy to end of page */ if (count > PAGE_CACHE_SIZE - page_off) count = PAGE_CACHE_SIZE - page_off; /* * copy item (as much as will fit starting from * the beginning of the item) into the page */ p_data = kmap_atomic(page, KM_USER0); memcpy(p_data + page_off, item, count); kunmap_atomic(p_data, KM_USER0); page_off += count; bytes += count; set_key_offset(&key, get_key_offset(&key) + count); zrelse(coord.node); done_lh(&lh); } /* end of loop which fills one page by content of * formatting items */ if (page_off) { /* something was copied into page */ pages[i] = page; } else { page_cache_release(page); assert("vs-1648", done == 1); break; } } /* end of loop through pages of one conversion iteration */ if (i > 0) { result = replace(inode, pages, i, bytes); release_all_pages(pages, sizeof_array(pages)); if (result) goto error; /* * We have to drop exclusive access to avoid deadlock * which may happen because called by reiser4_writepages * capture_unix_file requires to get non-exclusive * access to a file. It is safe to drop EA in the middle * of tail2extent conversion because write_unix_file, * setattr_unix_file(truncate), mmap_unix_file, * release_unix_file(extent2tail) checks if conversion * is not in progress (see comments before * get_exclusive_access_careful(). * Other processes that acquire non-exclusive access * (read_unix_file, reiser4_writepages, etc) should work * on partially converted files. */ drop_exclusive_access(uf_info); /* throttle the conversion FIXME-EDWARD: Pass the precise number of pages that was dirtied */ reiser4_throttle_write(inode, 1); get_exclusive_access(uf_info); /* * nobody is allowed to complete conversion but a * process which started it */ assert("", reiser4_inode_get_flag(inode, REISER4_PART_MIXED)); } } if (result == 0) { /* file is converted to extent items */ reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); assert("vs-1697", reiser4_inode_get_flag(inode, REISER4_PART_MIXED)); uf_info->container = UF_CONTAINER_EXTENTS; complete_conversion(inode); } else { /* * conversion is not complete. Inode was already marked as * REISER4_PART_MIXED and stat-data were updated at the first * iteration of the loop above. */ error: release_all_pages(pages, sizeof_array(pages)); reiser4_inode_clr_flag(inode, REISER4_PART_IN_CONV); warning("edward-1548", "Partial conversion of %llu: %i", (unsigned long long)get_inode_oid(inode), result); } out: /* this flag should be cleared, otherwise get_exclusive_access_careful() will fall into infinite loop */ assert("edward-1549", !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV)); return result; }