struct btrfs_free_space_info * search_free_space_info(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path, int cow) { struct btrfs_root *root = fs_info->free_space_root; struct btrfs_key key; int ret; key.objectid = block_group->key.objectid; key.type = BTRFS_FREE_SPACE_INFO_KEY; key.offset = block_group->key.offset; ret = btrfs_search_slot(trans, root, &key, path, 0, cow); if (ret < 0) return ERR_PTR(ret); if (ret != 0) { btrfs_warn(fs_info, "missing free space info for %llu\n", block_group->key.objectid); ASSERT(0); return ERR_PTR(-ENOENT); } return btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_free_space_info); }
/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */ static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid, u8 type, u64 subid) { int ret; struct btrfs_path *path = NULL; struct extent_buffer *eb; int slot; u32 item_size; unsigned long offset; struct btrfs_key key; if (WARN_ON_ONCE(!uuid_root)) { ret = -ENOENT; goto out; } path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } btrfs_uuid_to_key(uuid, type, &key); ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0); if (ret < 0) { goto out; } else if (ret > 0) { ret = -ENOENT; goto out; } eb = path->nodes[0]; slot = path->slots[0]; item_size = btrfs_item_size_nr(eb, slot); offset = btrfs_item_ptr_offset(eb, slot); ret = -ENOENT; if (!IS_ALIGNED(item_size, sizeof(u64))) { btrfs_warn(uuid_root->fs_info, "uuid item with illegal size %lu!", (unsigned long)item_size); goto out; } while (item_size) { __le64 data; read_extent_buffer(eb, &data, offset, sizeof(data)); if (le64_to_cpu(data) == subid) { ret = 0; break; } offset += sizeof(data); item_size -= sizeof(data); } out: btrfs_free_path(path); return ret; }
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, u64 subid_cpu) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *uuid_root = fs_info->uuid_root; int ret; struct btrfs_path *path = NULL; struct btrfs_key key; struct extent_buffer *eb; int slot; unsigned long offset; __le64 subid_le; ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu); if (ret != -ENOENT) return ret; if (WARN_ON_ONCE(!uuid_root)) { ret = -EINVAL; goto out; } btrfs_uuid_to_key(uuid, type, &key); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ret = btrfs_insert_empty_item(trans, uuid_root, path, &key, sizeof(subid_le)); if (ret >= 0) { /* Add an item for the type for the first time */ eb = path->nodes[0]; slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); } else if (ret == -EEXIST) { /* * An item with that type already exists. * Extend the item and store the new subid at the end. */ btrfs_extend_item(path, sizeof(subid_le)); eb = path->nodes[0]; slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le); } else { btrfs_warn(fs_info, "insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!", ret, (unsigned long long)key.objectid, (unsigned long long)key.offset, type); goto out; } ret = 0; subid_le = cpu_to_le64(subid_cpu); write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le)); btrfs_mark_buffer_dirty(eb); out: btrfs_free_path(path); return ret; }
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info, int (*check_func)(struct btrfs_fs_info *, u8 *, u8, u64)) { struct btrfs_root *root = fs_info->uuid_root; struct btrfs_key key; struct btrfs_path *path; int ret = 0; struct extent_buffer *leaf; int slot; u32 item_size; unsigned long offset; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } key.objectid = 0; key.type = 0; key.offset = 0; again_search_slot: ret = btrfs_search_forward(root, &key, path, BTRFS_OLDEST_GENERATION); if (ret) { if (ret > 0) ret = 0; goto out; } while (1) { cond_resched(); leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); if (key.type != BTRFS_UUID_KEY_SUBVOL && key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) goto skip; offset = btrfs_item_ptr_offset(leaf, slot); item_size = btrfs_item_size_nr(leaf, slot); if (!IS_ALIGNED(item_size, sizeof(u64))) { btrfs_warn(fs_info, "uuid item with illegal size %lu!", (unsigned long)item_size); goto skip; } while (item_size) { u8 uuid[BTRFS_UUID_SIZE]; __le64 subid_le; u64 subid_cpu; put_unaligned_le64(key.objectid, uuid); put_unaligned_le64(key.offset, uuid + sizeof(u64)); read_extent_buffer(leaf, &subid_le, offset, sizeof(subid_le)); subid_cpu = le64_to_cpu(subid_le); ret = check_func(fs_info, uuid, key.type, subid_cpu); if (ret < 0) goto out; if (ret > 0) { btrfs_release_path(path); ret = btrfs_uuid_iter_rem(root, uuid, key.type, subid_cpu); if (ret == 0) { /* * this might look inefficient, but the * justification is that it is an * exception that check_func returns 1, * and that in the regular case only one * entry per UUID exists. */ goto again_search_slot; } if (ret < 0 && ret != -ENOENT) goto out; } item_size -= sizeof(subid_le); offset += sizeof(subid_le); } skip: ret = btrfs_next_item(root, path); if (ret == 0) continue; else if (ret > 0) ret = 0; break; } out: btrfs_free_path(path); return ret; }
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type, u64 subid) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_root *uuid_root = fs_info->uuid_root; int ret; struct btrfs_path *path = NULL; struct btrfs_key key; struct extent_buffer *eb; int slot; unsigned long offset; u32 item_size; unsigned long move_dst; unsigned long move_src; unsigned long move_len; if (WARN_ON_ONCE(!uuid_root)) { ret = -EINVAL; goto out; } btrfs_uuid_to_key(uuid, type, &key); path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1); if (ret < 0) { btrfs_warn(fs_info, "error %d while searching for uuid item!", ret); goto out; } if (ret > 0) { ret = -ENOENT; goto out; } eb = path->nodes[0]; slot = path->slots[0]; offset = btrfs_item_ptr_offset(eb, slot); item_size = btrfs_item_size_nr(eb, slot); if (!IS_ALIGNED(item_size, sizeof(u64))) { btrfs_warn(fs_info, "uuid item with illegal size %lu!", (unsigned long)item_size); ret = -ENOENT; goto out; } while (item_size) { __le64 read_subid; read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid)); if (le64_to_cpu(read_subid) == subid) break; offset += sizeof(read_subid); item_size -= sizeof(read_subid); } if (!item_size) { ret = -ENOENT; goto out; } item_size = btrfs_item_size_nr(eb, slot); if (item_size == sizeof(subid)) { ret = btrfs_del_item(trans, uuid_root, path); goto out; } move_dst = offset; move_src = offset + sizeof(subid); move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot)); memmove_extent_buffer(eb, move_dst, move_src, move_len); btrfs_truncate_item(path, item_size - sizeof(subid), 1); out: btrfs_free_path(path); return ret; }
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) { struct btrfs_key key; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; struct extent_buffer *eb; int slot; int ret = 0; struct btrfs_path *path = NULL; int item_size; struct btrfs_dev_replace_item *ptr; u64 src_devid; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } key.objectid = 0; key.type = BTRFS_DEV_REPLACE_KEY; key.offset = 0; ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); if (ret) { no_valid_dev_replace_entry_found: ret = 0; dev_replace->replace_state = BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED; dev_replace->cont_reading_from_srcdev_mode = BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS; dev_replace->time_started = 0; dev_replace->time_stopped = 0; atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; dev_replace->is_valid = 0; dev_replace->item_needs_writeback = 0; goto out; } slot = path->slots[0]; eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, slot); ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); if (item_size != sizeof(struct btrfs_dev_replace_item)) { btrfs_warn(fs_info, "dev_replace entry found has unexpected size, ignore entry"); goto no_valid_dev_replace_entry_found; } src_devid = btrfs_dev_replace_src_devid(eb, ptr); dev_replace->cont_reading_from_srcdev_mode = btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr); dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr); dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr); dev_replace->time_stopped = btrfs_dev_replace_time_stopped(eb, ptr); atomic64_set(&dev_replace->num_write_errors, btrfs_dev_replace_num_write_errors(eb, ptr)); atomic64_set(&dev_replace->num_uncorrectable_read_errors, btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr)); dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr); dev_replace->committed_cursor_left = dev_replace->cursor_left; dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr); dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 0; switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: dev_replace->srcdev = btrfs_find_device(fs_info->fs_devices, src_devid, NULL, NULL, true); dev_replace->tgtdev = btrfs_find_device(fs_info->fs_devices, BTRFS_DEV_REPLACE_DEVID, NULL, NULL, true); /* * allow 'btrfs dev replace_cancel' if src/tgt device is * missing */ if (!dev_replace->srcdev && !btrfs_test_opt(fs_info, DEGRADED)) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); btrfs_warn(fs_info, "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?", src_devid); } if (!dev_replace->tgtdev && !btrfs_test_opt(fs_info, DEGRADED)) { ret = -EIO; btrfs_warn(fs_info, "cannot mount because device replace operation is ongoing and"); btrfs_warn(fs_info, "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?", BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { if (dev_replace->srcdev) { dev_replace->tgtdev->total_bytes = dev_replace->srcdev->total_bytes; dev_replace->tgtdev->disk_total_bytes = dev_replace->srcdev->disk_total_bytes; dev_replace->tgtdev->commit_total_bytes = dev_replace->srcdev->commit_total_bytes; dev_replace->tgtdev->bytes_used = dev_replace->srcdev->bytes_used; dev_replace->tgtdev->commit_bytes_used = dev_replace->srcdev->commit_bytes_used; } set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &dev_replace->tgtdev->dev_state); WARN_ON(fs_info->fs_devices->rw_devices == 0); dev_replace->tgtdev->io_width = fs_info->sectorsize; dev_replace->tgtdev->io_align = fs_info->sectorsize; dev_replace->tgtdev->sector_size = fs_info->sectorsize; dev_replace->tgtdev->fs_info = fs_info; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev_replace->tgtdev->dev_state); } break; } out: btrfs_free_path(path); return ret; }
/* * called from commit_transaction. Writes changed device replace state to * disk. */ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info) { int ret; struct btrfs_root *dev_root = fs_info->dev_root; struct btrfs_path *path; struct btrfs_key key; struct extent_buffer *eb; struct btrfs_dev_replace_item *ptr; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; down_read(&dev_replace->rwsem); if (!dev_replace->is_valid || !dev_replace->item_needs_writeback) { up_read(&dev_replace->rwsem); return 0; } up_read(&dev_replace->rwsem); key.objectid = 0; key.type = BTRFS_DEV_REPLACE_KEY; key.offset = 0; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto out; } ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { btrfs_warn(fs_info, "error %d while searching for dev_replace item!", ret); goto out; } if (ret == 0 && btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { /* * need to delete old one and insert a new one. * Since no attempt is made to recover any old state, if the * dev_replace state is 'running', the data on the target * drive is lost. * It would be possible to recover the state: just make sure * that the beginning of the item is never changed and always * contains all the essential information. Then read this * minimal set of information and use it as a base for the * new state. */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { btrfs_warn(fs_info, "delete too small dev_replace item failed %d!", ret); goto out; } ret = 1; } if (ret == 1) { /* need to insert a new item */ btrfs_release_path(path); ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { btrfs_warn(fs_info, "insert dev_replace item failed %d!", ret); goto out; } } eb = path->nodes[0]; ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_replace_item); down_write(&dev_replace->rwsem); if (dev_replace->srcdev) btrfs_set_dev_replace_src_devid(eb, ptr, dev_replace->srcdev->devid); else btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1); btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr, dev_replace->cont_reading_from_srcdev_mode); btrfs_set_dev_replace_replace_state(eb, ptr, dev_replace->replace_state); btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started); btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped); btrfs_set_dev_replace_num_write_errors(eb, ptr, atomic64_read(&dev_replace->num_write_errors)); btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr, atomic64_read(&dev_replace->num_uncorrectable_read_errors)); dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; btrfs_set_dev_replace_cursor_left(eb, ptr, dev_replace->cursor_left_last_write_of_item); btrfs_set_dev_replace_cursor_right(eb, ptr, dev_replace->cursor_right); dev_replace->item_needs_writeback = 0; up_write(&dev_replace->rwsem); btrfs_mark_buffer_dirty(eb); out: btrfs_free_path(path); return ret; }
int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; if (btrfs_fs_incompat(fs_info, RAID56)) { btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); return -EINVAL; } switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: break; default: return -EINVAL; } if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || args->start.tgtdev_name[0] == '\0') return -EINVAL; mutex_lock(&fs_info->volume_mutex); ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, &tgt_device); if (ret) { btrfs_err(fs_info, "target device %s is invalid!", args->start.tgtdev_name); mutex_unlock(&fs_info->volume_mutex); return -EINVAL; } ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, &src_device); mutex_unlock(&fs_info->volume_mutex); if (ret) { ret = -EINVAL; goto leave_no_lock; } if (tgt_device->total_bytes < src_device->total_bytes) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto leave_no_lock; } btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } dev_replace->cont_reading_from_srcdev_mode = args->start.cont_reading_from_srcdev_mode; WARN_ON(!src_device); dev_replace->srcdev = src_device; WARN_ON(!tgt_device); dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); tgt_device->total_bytes = src_device->total_bytes; tgt_device->disk_total_bytes = src_device->disk_total_bytes; tgt_device->bytes_used = src_device->bytes_used; /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); btrfs_wait_ordered_roots(root->fs_info, -1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_dev_replace_lock(dev_replace); goto leave; } ret = btrfs_commit_transaction(trans, root); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, src_device->total_bytes, &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(root->fs_info, ret); WARN_ON(ret); return 0; leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace); leave_no_lock: if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; }