static int init_dedup_info(struct btrfs_fs_info *fs_info, u16 type, u16 backend, u64 blocksize, u64 limit) { struct btrfs_dedup_info *dedup_info; int ret; fs_info->dedup_info = kzalloc(sizeof(*dedup_info), GFP_NOFS); if (!fs_info->dedup_info) return -ENOMEM; dedup_info = fs_info->dedup_info; dedup_info->hash_type = type; dedup_info->backend = backend; dedup_info->blocksize = blocksize; dedup_info->limit_nr = limit; /* Only support SHA256 yet */ dedup_info->dedup_driver = crypto_alloc_shash("sha256", 0, 0); if (IS_ERR(dedup_info->dedup_driver)) { btrfs_err(fs_info, "failed to init sha256 driver"); ret = PTR_ERR(dedup_info->dedup_driver); kfree(fs_info->dedup_info); fs_info->dedup_info = NULL; return ret; } dedup_info->hash_root = RB_ROOT; dedup_info->bytenr_root = RB_ROOT; dedup_info->current_nr = 0; INIT_LIST_HEAD(&dedup_info->lru_list); mutex_init(&dedup_info->lock); return 0; }
int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { struct btrfs_root *root = fs_info->free_space_root; struct btrfs_free_space_info *info; struct btrfs_key key, found_key; struct extent_buffer *leaf; unsigned long *bitmap; char *bitmap_cursor; u64 start, end; u64 bitmap_range, i; u32 bitmap_size, flags, expected_extent_count; u32 extent_count = 0; int done = 0, nr; int ret; bitmap_size = free_space_bitmap_size(block_group->key.offset, block_group->sectorsize); bitmap = alloc_bitmap(bitmap_size); if (!bitmap) { ret = -ENOMEM; goto out; } start = block_group->key.objectid; end = block_group->key.objectid + block_group->key.offset; key.objectid = end - 1; key.type = (u8)-1; key.offset = (u64)-1; while (!done) { ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (ret) goto out; leaf = path->nodes[0]; nr = 0; path->slots[0]++; while (path->slots[0] > 0) { btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { ASSERT(found_key.objectid == block_group->key.objectid); ASSERT(found_key.offset == block_group->key.offset); done = 1; break; } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { u64 first, last; ASSERT(found_key.objectid >= start); ASSERT(found_key.objectid < end); ASSERT(found_key.objectid + found_key.offset <= end); first = div_u64(found_key.objectid - start, block_group->sectorsize); last = div_u64(found_key.objectid + found_key.offset - start, block_group->sectorsize); bitmap_set(bitmap, first, last - first); extent_count++; nr++; path->slots[0]--; } else { ASSERT(0); } } ret = btrfs_del_items(trans, root, path, path->slots[0], nr); if (ret) goto out; btrfs_release_path(path); } info = search_free_space_info(trans, fs_info, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; } leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags |= BTRFS_FREE_SPACE_USING_BITMAPS; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); if (extent_count != expected_extent_count) { btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); ret = -EIO; goto out; } bitmap_cursor = (char *)bitmap; bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; i = start; while (i < end) { unsigned long ptr; u64 extent_size; u32 data_size; extent_size = min(end - i, bitmap_range); data_size = free_space_bitmap_size(extent_size, block_group->sectorsize); key.objectid = i; key.type = BTRFS_FREE_SPACE_BITMAP_KEY; key.offset = extent_size; ret = btrfs_insert_empty_item(trans, root, path, &key, data_size); if (ret) goto out; leaf = path->nodes[0]; ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); write_extent_buffer(leaf, bitmap_cursor, ptr, data_size); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); i += extent_size; bitmap_cursor += data_size; } ret = 0; out: kvfree(bitmap); if (ret) btrfs_abort_transaction(trans, ret); return ret; }
int convert_free_space_to_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, struct btrfs_block_group_cache *block_group, struct btrfs_path *path) { struct btrfs_root *root = fs_info->free_space_root; struct btrfs_free_space_info *info; struct btrfs_key key, found_key; struct extent_buffer *leaf; unsigned long *bitmap; u64 start, end; /* Initialize to silence GCC. */ u64 extent_start = 0; u64 offset; u32 bitmap_size, flags, expected_extent_count; int prev_bit = 0, bit, bitnr; u32 extent_count = 0; int done = 0, nr; int ret; bitmap_size = free_space_bitmap_size(block_group->key.offset, block_group->sectorsize); bitmap = alloc_bitmap(bitmap_size); if (!bitmap) { ret = -ENOMEM; goto out; } start = block_group->key.objectid; end = block_group->key.objectid + block_group->key.offset; key.objectid = end - 1; key.type = (u8)-1; key.offset = (u64)-1; while (!done) { ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); if (ret) goto out; leaf = path->nodes[0]; nr = 0; path->slots[0]++; while (path->slots[0] > 0) { btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { ASSERT(found_key.objectid == block_group->key.objectid); ASSERT(found_key.offset == block_group->key.offset); done = 1; break; } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { unsigned long ptr; char *bitmap_cursor; u32 bitmap_pos, data_size; ASSERT(found_key.objectid >= start); ASSERT(found_key.objectid < end); ASSERT(found_key.objectid + found_key.offset <= end); bitmap_pos = div_u64(found_key.objectid - start, block_group->sectorsize * BITS_PER_BYTE); bitmap_cursor = ((char *)bitmap) + bitmap_pos; data_size = free_space_bitmap_size(found_key.offset, block_group->sectorsize); ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1); read_extent_buffer(leaf, bitmap_cursor, ptr, data_size); nr++; path->slots[0]--; } else { ASSERT(0); } } ret = btrfs_del_items(trans, root, path, path->slots[0], nr); if (ret) goto out; btrfs_release_path(path); } info = search_free_space_info(trans, fs_info, block_group, path, 1); if (IS_ERR(info)) { ret = PTR_ERR(info); goto out; } leaf = path->nodes[0]; flags = btrfs_free_space_flags(leaf, info); flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; btrfs_set_free_space_flags(leaf, info, flags); expected_extent_count = btrfs_free_space_extent_count(leaf, info); btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); offset = start; bitnr = 0; while (offset < end) { bit = !!test_bit(bitnr, bitmap); if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { key.objectid = extent_start; key.type = BTRFS_FREE_SPACE_EXTENT_KEY; key.offset = offset - extent_start; ret = btrfs_insert_empty_item(trans, root, path, &key, 0); if (ret) goto out; btrfs_release_path(path); extent_count++; } prev_bit = bit; offset += block_group->sectorsize; bitnr++; } if (prev_bit == 1) { key.objectid = extent_start; key.type = BTRFS_FREE_SPACE_EXTENT_KEY; key.offset = end - extent_start; ret = btrfs_insert_empty_item(trans, root, path, &key, 0); if (ret) goto out; btrfs_release_path(path); extent_count++; } if (extent_count != expected_extent_count) { btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); ret = -EIO; goto out; } ret = 0; out: kvfree(bitmap); if (ret) btrfs_abort_transaction(trans, ret); return ret; }
static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, struct btrfs_path *path, u32 expected_extent_count) { struct btrfs_block_group_cache *block_group; struct btrfs_fs_info *fs_info; struct btrfs_root *root; struct btrfs_key key; int prev_bit = 0, bit; /* Initialize to silence GCC. */ u64 extent_start = 0; u64 end, offset; u64 total_found = 0; u32 extent_count = 0; int ret; block_group = caching_ctl->block_group; fs_info = block_group->fs_info; root = fs_info->free_space_root; end = block_group->key.objectid + block_group->key.offset; while (1) { ret = btrfs_next_item(root, path); if (ret < 0) goto out; if (ret) break; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_FREE_SPACE_INFO_KEY) break; ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); caching_ctl->progress = key.objectid; offset = key.objectid; while (offset < key.objectid + key.offset) { bit = free_space_test_bit(block_group, path, offset); if (prev_bit == 0 && bit == 1) { extent_start = offset; } else if (prev_bit == 1 && bit == 0) { total_found += add_new_free_space(block_group, fs_info, extent_start, offset); if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); } extent_count++; } prev_bit = bit; offset += block_group->sectorsize; } } if (prev_bit == 1) { total_found += add_new_free_space(block_group, fs_info, extent_start, end); extent_count++; } if (extent_count != expected_extent_count) { btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); ret = -EIO; goto out; } caching_ctl->progress = (u64)-1; ret = 0; out: return ret; }
static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, struct btrfs_path *path, u32 expected_extent_count) { struct btrfs_block_group_cache *block_group; struct btrfs_fs_info *fs_info; struct btrfs_root *root; struct btrfs_key key; u64 end; u64 total_found = 0; u32 extent_count = 0; int ret; block_group = caching_ctl->block_group; fs_info = block_group->fs_info; root = fs_info->free_space_root; end = block_group->key.objectid + block_group->key.offset; while (1) { ret = btrfs_next_item(root, path); if (ret < 0) goto out; if (ret) break; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.type == BTRFS_FREE_SPACE_INFO_KEY) break; ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); ASSERT(key.objectid < end && key.objectid + key.offset <= end); caching_ctl->progress = key.objectid; total_found += add_new_free_space(block_group, fs_info, key.objectid, key.objectid + key.offset); if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); } extent_count++; } if (extent_count != expected_extent_count) { btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", block_group->key.objectid, extent_count, expected_extent_count); ASSERT(0); ret = -EIO; goto out; } caching_ctl->progress = (u64)-1; ret = 0; out: return ret; }
static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info, const char *tgtdev_name, u64 srcdevid, const char *srcdev_name, int read_src) { struct btrfs_root *root = fs_info->dev_root; struct btrfs_trans_handle *trans; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; bool need_unlock; src_device = btrfs_find_device_by_devspec(fs_info, srcdevid, srcdev_name); if (IS_ERR(src_device)) return PTR_ERR(src_device); if (btrfs_pinned_by_swapfile(fs_info, src_device)) { btrfs_warn_in_rcu(fs_info, "cannot replace device %s (devid %llu) due to active swapfile", btrfs_dev_name(src_device), src_device->devid); return -ETXTBSY; } ret = btrfs_init_dev_replace_tgtdev(fs_info, tgtdev_name, src_device, &tgt_device); if (ret) return ret; /* * Here we commit the transaction to make sure commit_total_bytes * of all the devices are updated. */ trans = btrfs_attach_transaction(root); if (!IS_ERR(trans)) { ret = btrfs_commit_transaction(trans); if (ret) return ret; } else if (PTR_ERR(trans) != -ENOENT) { return PTR_ERR(trans); } need_unlock = true; down_write(&dev_replace->rwsem); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: ASSERT(0); ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } dev_replace->cont_reading_from_srcdev_mode = read_src; WARN_ON(!src_device); dev_replace->srcdev = src_device; dev_replace->tgtdev = tgt_device; btrfs_info_in_rcu(fs_info, "dev_replace from %s (devid %llu) to %s started", btrfs_dev_name(src_device), src_device->devid, rcu_str_deref(tgt_device->name)); /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = ktime_get_real_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); up_write(&dev_replace->rwsem); need_unlock = false; ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); if (ret) btrfs_err(fs_info, "kobj add dev failed %d", ret); btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); need_unlock = true; down_write(&dev_replace->rwsem); dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED; dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; goto leave; } ret = btrfs_commit_transaction(trans); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, btrfs_device_get_total_bytes(src_device), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(fs_info, ret); if (ret == -EINPROGRESS) { ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; } else if (ret != -ECANCELED) { WARN_ON(ret); } return ret; leave: if (need_unlock) up_write(&dev_replace->rwsem); btrfs_destroy_dev_replace_tgtdev(tgt_device); return ret; }
/* * Initialize a new device for device replace target from a given source dev * and path. * * Return 0 and new device in @device_out, otherwise return < 0 */ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, const char *device_path, struct btrfs_device *srcdev, struct btrfs_device **device_out) { struct btrfs_device *device; struct block_device *bdev; struct list_head *devices; struct rcu_string *name; u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; if (fs_info->fs_devices->seeding) { btrfs_err(fs_info, "the filesystem is a seed filesystem!"); return -EINVAL; } bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, fs_info->bdev_holder); if (IS_ERR(bdev)) { btrfs_err(fs_info, "target device %s is invalid!", device_path); return PTR_ERR(bdev); } filemap_write_and_wait(bdev->bd_inode->i_mapping); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { if (device->bdev == bdev) { btrfs_err(fs_info, "target device is in the filesystem!"); ret = -EEXIST; goto error; } } if (i_size_read(bdev->bd_inode) < btrfs_device_get_total_bytes(srcdev)) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto error; } device = btrfs_alloc_device(NULL, &devid, NULL); if (IS_ERR(device)) { ret = PTR_ERR(device); goto error; } name = rcu_string_strdup(device_path, GFP_KERNEL); if (!name) { btrfs_free_device(device); ret = -ENOMEM; goto error; } rcu_assign_pointer(device->name, name); mutex_lock(&fs_info->fs_devices->device_list_mutex); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); device->generation = 0; device->io_width = fs_info->sectorsize; device->io_align = fs_info->sectorsize; device->sector_size = fs_info->sectorsize; device->total_bytes = btrfs_device_get_total_bytes(srcdev); device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev); device->bytes_used = btrfs_device_get_bytes_used(srcdev); device->commit_total_bytes = srcdev->commit_total_bytes; device->commit_bytes_used = device->bytes_used; device->fs_info = fs_info; device->bdev = bdev; set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state); set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); device->mode = FMODE_EXCL; device->dev_stats_valid = 1; set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); fs_info->fs_devices->num_devices++; fs_info->fs_devices->open_devices++; mutex_unlock(&fs_info->fs_devices->device_list_mutex); *device_out = device; return 0; error: blkdev_put(bdev, FMODE_EXCL); return ret; }
int btrfs_dev_replace_start(struct btrfs_root *root, char *tgtdev_name, u64 srcdevid, char *srcdev_name, int read_src) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; /* the disk copy procedure reuses the scrub code */ mutex_lock(&fs_info->volume_mutex); ret = btrfs_find_device_by_devspec(root, srcdevid, srcdev_name, &src_device); if (ret) { mutex_unlock(&fs_info->volume_mutex); return ret; } ret = btrfs_init_dev_replace_tgtdev(root, tgtdev_name, src_device, &tgt_device); mutex_unlock(&fs_info->volume_mutex); if (ret) return ret; /* * Here we commit the transaction to make sure commit_total_bytes * of all the devices are updated. */ trans = btrfs_attach_transaction(root); if (!IS_ERR(trans)) { ret = btrfs_commit_transaction(trans, root); if (ret) return ret; } else if (PTR_ERR(trans) != -ENOENT) { return PTR_ERR(trans); } btrfs_dev_replace_lock(dev_replace, 1); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } dev_replace->cont_reading_from_srcdev_mode = read_src; WARN_ON(!src_device); dev_replace->srcdev = src_device; WARN_ON(!tgt_device); dev_replace->tgtdev = tgt_device; btrfs_info_in_rcu(fs_info, "dev_replace from %s (devid %llu) to %s started", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; atomic64_set(&dev_replace->num_write_errors, 0); atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); btrfs_dev_replace_unlock(dev_replace, 1); ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); if (ret) btrfs_err(fs_info, "kobj add dev failed %d\n", ret); btrfs_wait_ordered_roots(root->fs_info, -1, 0, (u64)-1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_dev_replace_lock(dev_replace, 1); goto leave; } ret = btrfs_commit_transaction(trans, root); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, btrfs_device_get_total_bytes(src_device), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(fs_info, ret); if (ret == -EINPROGRESS) { ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; } else { WARN_ON(ret); } return ret; leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace, 1); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; }
int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; if (btrfs_fs_incompat(fs_info, RAID56)) { btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); return -EINVAL; } switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: break; default: return -EINVAL; } if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || args->start.tgtdev_name[0] == '\0') return -EINVAL; mutex_lock(&fs_info->volume_mutex); ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, &tgt_device); if (ret) { btrfs_err(fs_info, "target device %s is invalid!", args->start.tgtdev_name); mutex_unlock(&fs_info->volume_mutex); return -EINVAL; } ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, &src_device); mutex_unlock(&fs_info->volume_mutex); if (ret) { ret = -EINVAL; goto leave_no_lock; } if (tgt_device->total_bytes < src_device->total_bytes) { btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto leave_no_lock; } btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } dev_replace->cont_reading_from_srcdev_mode = args->start.cont_reading_from_srcdev_mode; WARN_ON(!src_device); dev_replace->srcdev = src_device; WARN_ON(!tgt_device); dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); tgt_device->total_bytes = src_device->total_bytes; tgt_device->disk_total_bytes = src_device->disk_total_bytes; tgt_device->bytes_used = src_device->bytes_used; /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); btrfs_wait_ordered_roots(root->fs_info, -1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_dev_replace_lock(dev_replace); goto leave; } ret = btrfs_commit_transaction(trans, root); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, src_device->total_bytes, &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(root->fs_info, ret); WARN_ON(ret); return 0; leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace); leave_no_lock: if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; }
int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; int ret; struct btrfs_device *tgt_device = NULL; struct btrfs_device *src_device = NULL; switch (args->start.cont_reading_from_srcdev_mode) { case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: break; default: return -EINVAL; } if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || args->start.tgtdev_name[0] == '\0') return -EINVAL; /* * Here we commit the transaction to make sure commit_total_bytes * of all the devices are updated. */ trans = btrfs_attach_transaction(root); if (!IS_ERR(trans)) { ret = btrfs_commit_transaction(trans, root); if (ret) return ret; } else if (PTR_ERR(trans) != -ENOENT) { return PTR_ERR(trans); } /* the disk copy procedure reuses the scrub code */ mutex_lock(&fs_info->volume_mutex); ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, args->start.srcdev_name, &src_device); if (ret) { mutex_unlock(&fs_info->volume_mutex); return ret; } ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, src_device, &tgt_device); mutex_unlock(&fs_info->volume_mutex); if (ret) return ret; btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: break; case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; goto leave; } dev_replace->cont_reading_from_srcdev_mode = args->start.cont_reading_from_srcdev_mode; WARN_ON(!src_device); dev_replace->srcdev = src_device; WARN_ON(!tgt_device); dev_replace->tgtdev = tgt_device; ret = btrfs_kobj_add_device(tgt_device->fs_devices, tgt_device); if (ret) btrfs_err(root->fs_info, "kobj add dev failed %d\n", ret); printk_in_rcu(KERN_INFO "BTRFS: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name)); /* * from now on, the writes to the srcdev are all duplicated to * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; dev_replace->cursor_right = 0; dev_replace->is_valid = 1; dev_replace->item_needs_writeback = 1; args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); btrfs_wait_ordered_roots(root->fs_info, -1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_dev_replace_lock(dev_replace); goto leave; } ret = btrfs_commit_transaction(trans, root); WARN_ON(ret); /* the disk copy procedure reuses the scrub code */ ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, btrfs_device_get_total_bytes(src_device), &dev_replace->scrub_progress, 0, 1); ret = btrfs_dev_replace_finishing(root->fs_info, ret); /* don't warn if EINPROGRESS, someone else might be running scrub */ if (ret == -EINPROGRESS) { args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS; ret = 0; } else { WARN_ON(ret); } return ret; leave: dev_replace->srcdev = NULL; dev_replace->tgtdev = NULL; btrfs_dev_replace_unlock(dev_replace); btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); return ret; }