static bool nova_can_skip_full_scan(struct super_block *sb) { struct nova_inode *pi = nova_get_inode_by_ino(sb, NOVA_BLOCKNODE_INO); int ret; if (pi->log_head == 0 || pi->log_tail == 0) return false; ret = nova_init_blockmap_from_inode(sb); if (ret) { nova_err(sb, "init blockmap failed, " "fall back to failure recovery\n"); return false; } ret = nova_init_inode_list_from_inode(sb); if (ret) { nova_err(sb, "init inode list failed, " "fall back to failure recovery\n"); nova_destroy_blocknode_trees(sb); return false; } return true; }
static int nova_traverse_dir_inode_log(struct super_block *sb, struct nova_inode *pi, struct scan_bitmap *bm) { struct nova_inode_log_page *curr_page; u64 curr_p; u64 next; curr_p = pi->log_head; if (curr_p == 0) { nova_err(sb, "Dir %llu log is NULL!\n", pi->nova_ino); BUG(); } nova_dbg_verbose("Log head 0x%llx, tail 0x%llx\n", curr_p, pi->log_tail); BUG_ON(curr_p & (PAGE_SIZE - 1)); set_bm(curr_p >> PAGE_SHIFT, bm, BM_4K); curr_page = (struct nova_inode_log_page *)nova_get_block(sb, curr_p); while ((next = curr_page->page_tail.next_page) != 0) { curr_p = next; BUG_ON(curr_p & (PAGE_SIZE - 1)); set_bm(curr_p >> PAGE_SHIFT, bm, BM_4K); curr_page = (struct nova_inode_log_page *) nova_get_block(sb, curr_p); } return 0; }
static int nova_insert_blocknode_map(struct super_block *sb, int cpuid, unsigned long low, unsigned long high) { struct nova_sb_info *sbi = NOVA_SB(sb); struct free_list *free_list; struct rb_root *tree; struct nova_range_node *blknode = NULL; unsigned long num_blocks = 0; int ret; num_blocks = high - low + 1; nova_dbgv("%s: cpu %d, low %lu, high %lu, num %lu\n", __func__, cpuid, low, high, num_blocks); free_list = nova_get_free_list(sb, cpuid); tree = &(free_list->block_free_tree); blknode = nova_alloc_blocknode(sb); if (blknode == NULL) return -ENOMEM; blknode->range_low = low; blknode->range_high = high; ret = nova_insert_blocktree(sbi, tree, blknode); if (ret) { nova_err(sb, "%s failed\n", __func__); nova_free_blocknode(sb, blknode); goto out; } if (!free_list->first_node) free_list->first_node = blknode; free_list->num_blocknode++; free_list->num_free_blocks += num_blocks; out: return ret; }
static int nova_get_block_info(struct super_block *sb, struct nova_sb_info *sbi) { void *virt_addr = NULL; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) pfn_t __pfn_t; #else unsigned long pfn; #endif long size; if (!sb->s_bdev->bd_disk->fops->direct_access) { nova_err(sb, "device does not support DAX\n"); return -EINVAL; } sbi->s_bdev = sb->s_bdev; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) size = sb->s_bdev->bd_disk->fops->direct_access(sb->s_bdev, 0, &virt_addr, &__pfn_t); #else size = sb->s_bdev->bd_disk->fops->direct_access(sb->s_bdev, 0, &virt_addr, &pfn); #endif if (size <= 0) { nova_err(sb, "direct_access failed\n"); return -EINVAL; } sbi->virt_addr = virt_addr; #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0) sbi->phys_addr = pfn_t_to_pfn(__pfn_t) << PAGE_SHIFT; #else sbi->phys_addr = pfn << PAGE_SHIFT; #endif sbi->initsize = size; nova_dbg("%s: dev %s, phys_addr 0x%llx, virt_addr %p, size %ld\n", __func__, sbi->s_bdev->bd_disk->disk_name, sbi->phys_addr, sbi->virt_addr, sbi->initsize); return 0; }
void nova_init_blockmap(struct super_block *sb, int recovery) { struct nova_sb_info *sbi = NOVA_SB(sb); struct rb_root *tree; unsigned long num_used_block; struct nova_range_node *blknode; struct free_list *free_list; unsigned long per_list_blocks; int i; int ret; num_used_block = sbi->reserved_blocks; /* Divide the block range among per-CPU free lists */ per_list_blocks = sbi->num_blocks / sbi->cpus; sbi->per_list_blocks = per_list_blocks; for (i = 0; i < sbi->cpus; i++) { free_list = nova_get_free_list(sb, i); tree = &(free_list->block_free_tree); free_list->block_start = per_list_blocks * i; free_list->block_end = free_list->block_start + per_list_blocks - 1; /* For recovery, update these fields later */ if (recovery == 0) { free_list->num_free_blocks = per_list_blocks; if (i == 0) { free_list->block_start += num_used_block; free_list->num_free_blocks -= num_used_block; } blknode = nova_alloc_blocknode(sb); if (blknode == NULL) NOVA_ASSERT(0); blknode->range_low = free_list->block_start; blknode->range_high = free_list->block_end; ret = nova_insert_blocktree(sbi, tree, blknode); if (ret) { nova_err(sb, "%s failed\n", __func__); nova_free_blocknode(sb, blknode); return; } free_list->first_node = blknode; free_list->num_blocknode = 1; } } free_list = nova_get_free_list(sb, (sbi->cpus - 1)); if (free_list->block_end + 1 < sbi->num_blocks) { /* Shared free list gets any remaining blocks */ sbi->shared_free_list.block_start = free_list->block_end + 1; sbi->shared_free_list.block_end = sbi->num_blocks - 1; } }
/* Append . and .. entries */ int nova_append_dir_init_entries(struct super_block *sb, struct nova_inode *pi, u64 self_ino, u64 parent_ino) { int allocated; u64 new_block; u64 curr_p; struct nova_dentry *de_entry; if (pi->log_head) { nova_dbg("%s: log head exists @ 0x%llx!\n", __func__, pi->log_head); return - EINVAL; } allocated = nova_allocate_inode_log_pages(sb, pi, 1, &new_block); if (allocated != 1) { nova_err(sb, "ERROR: no inode log page available\n"); return - ENOMEM; } pi->log_tail = pi->log_head = new_block; pi->i_blocks = 1; nova_flush_buffer(&pi->log_head, CACHELINE_SIZE, 0); de_entry = (struct nova_dentry *)nova_get_block(sb, new_block); de_entry->entry_type = DIR_LOG; de_entry->ino = cpu_to_le64(self_ino); de_entry->name_len = 1; de_entry->de_len = cpu_to_le16(NOVA_DIR_LOG_REC_LEN(1)); de_entry->mtime = CURRENT_TIME_SEC.tv_sec; de_entry->size = sb->s_blocksize; de_entry->links_count = 1; strncpy(de_entry->name, ".\0", 2); nova_flush_buffer(de_entry, NOVA_DIR_LOG_REC_LEN(1), 0); curr_p = new_block + NOVA_DIR_LOG_REC_LEN(1); de_entry = (struct nova_dentry *)((char *)de_entry + le16_to_cpu(de_entry->de_len)); de_entry->entry_type = DIR_LOG; de_entry->ino = cpu_to_le64(parent_ino); de_entry->name_len = 2; de_entry->de_len = cpu_to_le16(NOVA_DIR_LOG_REC_LEN(2)); de_entry->mtime = CURRENT_TIME_SEC.tv_sec; de_entry->size = sb->s_blocksize; de_entry->links_count = 2; strncpy(de_entry->name, "..\0", 3); nova_flush_buffer(de_entry, NOVA_DIR_LOG_REC_LEN(2), 0); curr_p += NOVA_DIR_LOG_REC_LEN(2); nova_update_tail(pi, curr_p); return 0; }
void nova_delete_dir_tree(struct super_block *sb, struct nova_inode_info_header *sih) { struct nova_dentry *direntry; unsigned long pos = 0; struct nova_dentry *entries[FREE_BATCH]; timing_t delete_time; int nr_entries; int i; void *ret; NOVA_START_TIMING(delete_dir_tree_t, delete_time); do { nr_entries = radix_tree_gang_lookup(&sih->tree, (void **)entries, pos, FREE_BATCH); for (i = 0; i < nr_entries; i++) { direntry = entries[i]; BUG_ON(!direntry); pos = BKDRHash(direntry->name, direntry->name_len); ret = radix_tree_delete(&sih->tree, pos); if (!ret || ret != direntry) { nova_err(sb, "dentry: type %d, inode %llu, " "name %s, namelen %u, rec len %u\n", direntry->entry_type, le64_to_cpu(direntry->ino), direntry->name, direntry->name_len, le16_to_cpu(direntry->de_len)); if (!ret) nova_dbg("ret is NULL\n"); } } pos++; } while (nr_entries == FREE_BATCH); NOVA_END_TIMING(delete_dir_tree_t, delete_time); return; }
static int nova_failure_insert_inodetree(struct super_block *sb, unsigned long ino_low, unsigned long ino_high) { struct nova_sb_info *sbi = NOVA_SB(sb); struct inode_map *inode_map; struct nova_range_node *prev = NULL, *next = NULL; struct nova_range_node *new_node; unsigned long internal_low, internal_high; int cpu; struct rb_root *tree; int ret; if (ino_low > ino_high) { nova_err(sb, "%s: ino low %lu, ino high %lu\n", __func__, ino_low, ino_high); BUG(); } cpu = ino_low % sbi->cpus; if (ino_high % sbi->cpus != cpu) { nova_err(sb, "%s: ino low %lu, ino high %lu\n", __func__, ino_low, ino_high); BUG(); } internal_low = ino_low / sbi->cpus; internal_high = ino_high / sbi->cpus; inode_map = &sbi->inode_maps[cpu]; tree = &inode_map->inode_inuse_tree; mutex_lock(&inode_map->inode_table_mutex); ret = nova_find_free_slot(sbi, tree, internal_low, internal_high, &prev, &next); if (ret) { nova_dbg("%s: ino %lu - %lu already exists!: %d\n", __func__, ino_low, ino_high, ret); mutex_unlock(&inode_map->inode_table_mutex); return ret; } if (prev && next && (internal_low == prev->range_high + 1) && (internal_high + 1 == next->range_low)) { /* fits the hole */ rb_erase(&next->node, tree); inode_map->num_range_node_inode--; prev->range_high = next->range_high; nova_free_inode_node(sb, next); goto finish; } if (prev && (internal_low == prev->range_high + 1)) { /* Aligns left */ prev->range_high += internal_high - internal_low + 1; goto finish; } if (next && (internal_high + 1 == next->range_low)) { /* Aligns right */ next->range_low -= internal_high - internal_low + 1; goto finish; } /* Aligns somewhere in the middle */ new_node = nova_alloc_inode_node(sb); NOVA_ASSERT(new_node); new_node->range_low = internal_low; new_node->range_high = internal_high; ret = nova_insert_inodetree(sbi, new_node, cpu); if (ret) { nova_err(sb, "%s failed\n", __func__); nova_free_inode_node(sb, new_node); goto finish; } inode_map->num_range_node_inode++; finish: mutex_unlock(&inode_map->inode_table_mutex); return ret; }
static int nova_init_inode_list_from_inode(struct super_block *sb) { struct nova_sb_info *sbi = NOVA_SB(sb); struct nova_inode *pi = nova_get_inode_by_ino(sb, NOVA_INODELIST1_INO); struct nova_range_node_lowhigh *entry; struct nova_range_node *range_node; struct inode_map *inode_map; size_t size = sizeof(struct nova_range_node_lowhigh); unsigned long num_inode_node = 0; u64 curr_p; unsigned long cpuid; int ret; sbi->s_inodes_used_count = 0; curr_p = pi->log_head; if (curr_p == 0) { nova_dbg("%s: pi head is 0!\n", __func__); return -EINVAL; } while (curr_p != pi->log_tail) { if (is_last_entry(curr_p, size)) { curr_p = next_log_page(sb, curr_p); } if (curr_p == 0) { nova_dbg("%s: curr_p is NULL!\n", __func__); NOVA_ASSERT(0); } entry = (struct nova_range_node_lowhigh *)nova_get_block(sb, curr_p); range_node = nova_alloc_inode_node(sb); if (range_node == NULL) NOVA_ASSERT(0); cpuid = (entry->range_low & CPUID_MASK) >> 56; if (cpuid >= sbi->cpus) { nova_err(sb, "Invalid cpuid %lu\n", cpuid); nova_free_inode_node(sb, range_node); NOVA_ASSERT(0); nova_destroy_inode_trees(sb); goto out; } range_node->range_low = entry->range_low & ~CPUID_MASK; range_node->range_high = entry->range_high; ret = nova_insert_inodetree(sbi, range_node, cpuid); if (ret) { nova_err(sb, "%s failed, %d\n", __func__, cpuid); nova_free_inode_node(sb, range_node); NOVA_ASSERT(0); nova_destroy_inode_trees(sb); goto out; } sbi->s_inodes_used_count += range_node->range_high - range_node->range_low + 1; num_inode_node++; inode_map = &sbi->inode_maps[cpuid]; inode_map->num_range_node_inode++; if (!inode_map->first_inode_range) inode_map->first_inode_range = range_node; curr_p += sizeof(struct nova_range_node_lowhigh); } nova_dbg("%s: %lu inode nodes\n", __func__, num_inode_node); out: nova_free_inode_log(sb, pi); return ret; }
static int nova_init_blockmap_from_inode(struct super_block *sb) { struct nova_sb_info *sbi = NOVA_SB(sb); struct nova_inode *pi = nova_get_inode_by_ino(sb, NOVA_BLOCKNODE_INO); struct free_list *free_list; struct nova_range_node_lowhigh *entry; struct nova_range_node *blknode; size_t size = sizeof(struct nova_range_node_lowhigh); u64 curr_p; u64 cpuid; int ret = 0; curr_p = pi->log_head; if (curr_p == 0) { nova_dbg("%s: pi head is 0!\n", __func__); return -EINVAL; } while (curr_p != pi->log_tail) { if (is_last_entry(curr_p, size)) { curr_p = next_log_page(sb, curr_p); } if (curr_p == 0) { nova_dbg("%s: curr_p is NULL!\n", __func__); NOVA_ASSERT(0); ret = -EINVAL; break; } entry = (struct nova_range_node_lowhigh *)nova_get_block(sb, curr_p); blknode = nova_alloc_blocknode(sb); if (blknode == NULL) NOVA_ASSERT(0); blknode->range_low = le64_to_cpu(entry->range_low); blknode->range_high = le64_to_cpu(entry->range_high); cpuid = get_cpuid(sbi, blknode->range_low); /* FIXME: Assume NR_CPUS not change */ free_list = nova_get_free_list(sb, cpuid); ret = nova_insert_blocktree(sbi, &free_list->block_free_tree, blknode); if (ret) { nova_err(sb, "%s failed\n", __func__); nova_free_blocknode(sb, blknode); NOVA_ASSERT(0); nova_destroy_blocknode_trees(sb); goto out; } free_list->num_blocknode++; if (free_list->num_blocknode == 1) free_list->first_node = blknode; free_list->num_free_blocks += blknode->range_high - blknode->range_low + 1; curr_p += sizeof(struct nova_range_node_lowhigh); } out: nova_free_inode_log(sb, pi); return ret; }
static int nova_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct nova_inode *pidir; struct nova_inode_info *si = NOVA_I(inode); struct nova_inode_info_header *sih = &si->header; struct nova_inode *child_pi; struct nova_inode *prev_child_pi = NULL; struct nova_dentry *entry = NULL; struct nova_dentry *prev_entry = NULL; unsigned short de_len; u64 pi_addr; unsigned long pos = 0; ino_t ino; void *addr; u64 curr_p; u8 type; int ret; timing_t readdir_time; NOVA_START_TIMING(readdir_t, readdir_time); pidir = nova_get_inode(sb, inode); nova_dbgv("%s: ino %llu, size %llu, pos 0x%llx\n", __func__, (u64)inode->i_ino, pidir->i_size, ctx->pos); if (pidir->log_head == 0) { nova_err(sb, "Dir %lu log is NULL!\n", inode->i_ino); BUG(); return -EINVAL; } pos = ctx->pos; if (pos == 0) { curr_p = pidir->log_head; } else if (pos == READDIR_END) { goto out; } else { curr_p = nova_find_next_dentry_addr(sb, sih, pos); if (curr_p == 0) goto out; } while (curr_p != pidir->log_tail) { if (goto_next_page(sb, curr_p)) { curr_p = next_log_page(sb, curr_p); } if (curr_p == 0) { nova_err(sb, "Dir %lu log is NULL!\n", inode->i_ino); BUG(); return -EINVAL; } addr = (void *)nova_get_block(sb, curr_p); type = nova_get_entry_type(addr); switch (type) { case SET_ATTR: curr_p += sizeof(struct nova_setattr_logentry); continue; case LINK_CHANGE: curr_p += sizeof(struct nova_link_change_entry); continue; case DIR_LOG: break; default: nova_dbg("%s: unknown type %d, 0x%llx\n", __func__, type, curr_p); BUG(); return -EINVAL; } entry = (struct nova_dentry *)nova_get_block(sb, curr_p); nova_dbgv("curr_p: 0x%llx, type %d, ino %llu, " "name %s, namelen %u, rec len %u\n", curr_p, entry->entry_type, le64_to_cpu(entry->ino), entry->name, entry->name_len, le16_to_cpu(entry->de_len)); de_len = le16_to_cpu(entry->de_len); if (entry->ino > 0 && entry->invalid == 0) { ino = __le64_to_cpu(entry->ino); pos = BKDRHash(entry->name, entry->name_len); ret = nova_get_inode_address(sb, ino, &pi_addr, 0); if (ret) { nova_dbg("%s: get child inode %lu address " "failed %d\n", __func__, ino, ret); ctx->pos = READDIR_END; return ret; } child_pi = nova_get_block(sb, pi_addr); nova_dbgv("ctx: ino %llu, name %s, " "name_len %u, de_len %u\n", (u64)ino, entry->name, entry->name_len, entry->de_len); if (prev_entry && !dir_emit(ctx, prev_entry->name, prev_entry->name_len, ino, IF2DT(le16_to_cpu(prev_child_pi->i_mode)))) { nova_dbgv("Here: pos %llu\n", ctx->pos); return 0; } prev_entry = entry; prev_child_pi = child_pi; } ctx->pos = pos; curr_p += de_len; } if (prev_entry && !dir_emit(ctx, prev_entry->name, prev_entry->name_len, ino, IF2DT(le16_to_cpu(prev_child_pi->i_mode)))) return 0; ctx->pos = READDIR_END; out: NOVA_END_TIMING(readdir_t, readdir_time); nova_dbgv("%s return\n", __func__); return 0; }
int nova_rebuild_dir_inode_tree(struct super_block *sb, struct nova_inode *pi, u64 pi_addr, struct nova_inode_info_header *sih) { struct nova_dentry *entry = NULL; struct nova_setattr_logentry *attr_entry = NULL; struct nova_link_change_entry *link_change_entry = NULL; struct nova_inode_log_page *curr_page; u64 ino = pi->nova_ino; unsigned short de_len; timing_t rebuild_time; void *addr; u64 curr_p; u64 next; u8 type; int ret; NOVA_START_TIMING(rebuild_dir_t, rebuild_time); nova_dbg_verbose("Rebuild dir %llu tree\n", ino); sih->pi_addr = pi_addr; curr_p = pi->log_head; if (curr_p == 0) { nova_err(sb, "Dir %llu log is NULL!\n", ino); BUG(); } nova_dbg_verbose("Log head 0x%llx, tail 0x%llx\n", curr_p, pi->log_tail); sih->log_pages = 1; while (curr_p != pi->log_tail) { if (goto_next_page(sb, curr_p)) { sih->log_pages++; curr_p = next_log_page(sb, curr_p); } if (curr_p == 0) { nova_err(sb, "Dir %llu log is NULL!\n", ino); BUG(); } addr = (void *)nova_get_block(sb, curr_p); type = nova_get_entry_type(addr); switch (type) { case SET_ATTR: attr_entry = (struct nova_setattr_logentry *)addr; nova_apply_setattr_entry(sb, pi, sih, attr_entry); sih->last_setattr = curr_p; curr_p += sizeof(struct nova_setattr_logentry); continue; case LINK_CHANGE: link_change_entry = (struct nova_link_change_entry *)addr; nova_apply_link_change_entry(pi, link_change_entry); sih->last_link_change = curr_p; curr_p += sizeof(struct nova_link_change_entry); continue; case DIR_LOG: break; default: nova_dbg("%s: unknown type %d, 0x%llx\n", __func__, type, curr_p); NOVA_ASSERT(0); } entry = (struct nova_dentry *)nova_get_block(sb, curr_p); nova_dbgv("curr_p: 0x%llx, type %d, ino %llu, " "name %s, namelen %u, rec len %u\n", curr_p, entry->entry_type, le64_to_cpu(entry->ino), entry->name, entry->name_len, le16_to_cpu(entry->de_len)); if (entry->ino > 0) { if (entry->invalid == 0) { /* A valid entry to add */ ret = nova_replay_add_dentry(sb, sih, entry); } } else { /* Delete the entry */ ret = nova_replay_remove_dentry(sb, sih, entry); } if (ret) { nova_err(sb, "%s ERROR %d\n", __func__, ret); break; } nova_rebuild_dir_time_and_size(sb, pi, entry); de_len = le16_to_cpu(entry->de_len); curr_p += de_len; } sih->i_size = le64_to_cpu(pi->i_size); sih->i_mode = le64_to_cpu(pi->i_mode); nova_flush_buffer(pi, sizeof(struct nova_inode), 0); /* Keep traversing until log ends */ curr_p &= PAGE_MASK; curr_page = (struct nova_inode_log_page *)nova_get_block(sb, curr_p); while ((next = curr_page->page_tail.next_page) != 0) { sih->log_pages++; curr_p = next; curr_page = (struct nova_inode_log_page *) nova_get_block(sb, curr_p); } pi->i_blocks = sih->log_pages; // nova_print_dir_tree(sb, sih, ino); NOVA_END_TIMING(rebuild_dir_t, rebuild_time); return 0; }