int dmu_objset_destroy(const char *name) { objset_t *os; int error; /* * If it looks like we'll be able to destroy it, and there's * an unplayed replay log sitting around, destroy the log. * It would be nicer to do this in dsl_dataset_destroy_sync(), * but the replay log objset is modified in open context. */ error = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_OWNER|DS_MODE_READONLY|DS_MODE_INCONSISTENT, &os); if (error == 0) { dsl_dataset_t *ds = os->os->os_dsl_dataset; zil_destroy(dmu_objset_zil(os), B_FALSE); error = dsl_dataset_destroy(ds, os); /* * dsl_dataset_destroy() closes the ds. */ kmem_free(os, sizeof (objset_t)); } return (error); }
/* ARGSUSED */ int zil_clear_log_chain(char *osname, void *txarg) { zilog_t *zilog; zil_header_t *zh; objset_t *os; dmu_tx_t *tx; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); tx = dmu_tx_create(zilog->zl_os); (void) dmu_tx_assign(tx, TXG_WAIT); zh = zil_header_in_syncing_context(zilog); BP_ZERO(&zh->zh_log); dsl_dataset_dirty(dmu_objset_ds(os), tx); dmu_tx_commit(tx); dmu_objset_close(os); return (0); }
/* * Open an intent log. */ zilog_t * zil_open(objset_t *os, zil_get_data_t *get_data) { zilog_t *zilog = dmu_objset_zil(os); zilog->zl_get_data = get_data; zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri, 2, 2, TASKQ_PREPOPULATE); return (zilog); }
int dmu_objset_snapshot(char *fsname, char *snapname, boolean_t recursive) { dsl_sync_task_t *dst; struct osnode *osn; struct snaparg sn = { 0 }; spa_t *spa; int err; (void) strcpy(sn.failed, fsname); err = spa_open(fsname, &spa, FTAG); if (err) return (err); sn.dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); sn.snapname = snapname; list_create(&sn.objsets, sizeof (struct osnode), offsetof(struct osnode, node)); if (recursive) { sn.checkperms = B_TRUE; err = dmu_objset_find(fsname, dmu_objset_snapshot_one, &sn, DS_FIND_CHILDREN); } else { sn.checkperms = B_FALSE; err = dmu_objset_snapshot_one(fsname, &sn); } if (err) goto out; err = dsl_sync_task_group_wait(sn.dstg); for (dst = list_head(&sn.dstg->dstg_tasks); dst; dst = list_next(&sn.dstg->dstg_tasks, dst)) { dsl_dataset_t *ds = dst->dst_arg1; if (dst->dst_err) dsl_dataset_name(ds, sn.failed); } out: while (osn = list_head(&sn.objsets)) { list_remove(&sn.objsets, osn); zil_resume(dmu_objset_zil(osn->os)); dmu_objset_close(osn->os); kmem_free(osn, sizeof (struct osnode)); } list_destroy(&sn.objsets); if (err) (void) strcpy(fsname, sn.failed); dsl_sync_task_group_destroy(sn.dstg); spa_close(spa, FTAG); return (err); }
int zil_claim(char *osname, void *txarg) { dmu_tx_t *tx = txarg; uint64_t first_txg = dmu_tx_get_txg(tx); zilog_t *zilog; zil_header_t *zh; objset_t *os; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); /* * Record here whether the zil has any records to replay. * If the header block pointer is null or the block points * to the stubby then we know there are no valid log records. * We use the header to store this state as the the zilog gets * freed later in dmu_objset_close(). * The flags (and the rest of the header fields) are cleared in * zil_sync() as a result of a zil_destroy(), after replaying the log. * * Note, the intent log can be empty but still need the * stubby to be claimed. */ if (!zil_empty(zilog)) zh->zh_flags |= ZIL_REPLAY_NEEDED; /* * Claim all log blocks if we haven't already done so, and remember * the highest claimed sequence number. This ensures that if we can * read only part of the log now (e.g. due to a missing device), * but we can read the entire log later, we will not try to replay * or destroy beyond the last block we successfully claimed. */ ASSERT3U(zh->zh_claim_txg, <=, first_txg); if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) { zh->zh_claim_txg = first_txg; zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx, first_txg); dsl_dataset_dirty(dmu_objset_ds(os), tx); } ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1)); dmu_objset_close(os); return (0); }
int dmu_objset_snapshot(char *fsname, char *snapname, nvlist_t *props, boolean_t recursive) { dsl_sync_task_t *dst; struct snaparg *sn; spa_t *spa; int err; sn = kmem_alloc(sizeof (struct snaparg), KM_SLEEP); (void) strcpy(sn->failed, fsname); err = spa_open(fsname, &spa, FTAG); if (err) { kmem_free(sn, sizeof (struct snaparg)); return (err); } sn->dstg = dsl_sync_task_group_create(spa_get_dsl(spa)); sn->snapname = snapname; sn->props = props; if (recursive) { sn->checkperms = B_TRUE; err = dmu_objset_find(fsname, dmu_objset_snapshot_one, sn, DS_FIND_CHILDREN); } else { sn->checkperms = B_FALSE; err = dmu_objset_snapshot_one(fsname, sn); } if (err == 0) err = dsl_sync_task_group_wait(sn->dstg); for (dst = list_head(&sn->dstg->dstg_tasks); dst; dst = list_next(&sn->dstg->dstg_tasks, dst)) { objset_t *os = dst->dst_arg1; dsl_dataset_t *ds = os->os->os_dsl_dataset; if (dst->dst_err) dsl_dataset_name(ds, sn->failed); zil_resume(dmu_objset_zil(os)); dmu_objset_close(os); } if (err) (void) strcpy(fsname, sn->failed); dsl_sync_task_group_destroy(sn->dstg); spa_close(spa, FTAG); kmem_free(sn, sizeof (struct snaparg)); return (err); }
static int dmu_objset_snapshot_one(char *name, void *arg) { struct snaparg *sn = arg; objset_t *os; int err; (void) strcpy(sn->failed, name); /* * Check permissions only when requested. This only applies when * doing a recursive snapshot. The permission checks for the starting * dataset have already been performed in zfs_secpolicy_snapshot() */ if (sn->checkperms == B_TRUE && (err = zfs_secpolicy_snapshot_perms(name, CRED()))) return (err); err = dmu_objset_open(name, DMU_OST_ANY, DS_MODE_USER, &os); if (err != 0) return (err); /* If the objset is in an inconsistent state, return busy */ if (os->os->os_dsl_dataset->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) { dmu_objset_close(os); return (EBUSY); } /* * NB: we need to wait for all in-flight changes to get to disk, * so that we snapshot those changes. zil_suspend does this as * a side effect. */ err = zil_suspend(dmu_objset_zil(os)); if (err == 0) { struct osnode *osn; dsl_sync_task_create(sn->dstg, dsl_dataset_snapshot_check, dsl_dataset_snapshot_sync, os->os->os_dsl_dataset, sn->snapname, 3); osn = kmem_alloc(sizeof (struct osnode), KM_SLEEP); osn->os = os; list_insert_tail(&sn->objsets, osn); } else { dmu_objset_close(os); } return (err); }
/* ARGSUSED */ int zil_vdev_offline(char *osname, void *arg) { objset_t *os; zilog_t *zilog; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) return (error); zilog = dmu_objset_zil(os); if (zil_suspend(zilog) != 0) error = EEXIST; else zil_resume(zilog); dmu_objset_close(os); return (error); }
/* ARGSUSED */ int zil_check_log_chain(char *osname, void *txarg) { zilog_t *zilog; zil_header_t *zh; blkptr_t blk; arc_buf_t *abuf; objset_t *os; char *lrbuf; zil_trailer_t *ztp; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); blk = zh->zh_log; if (BP_IS_HOLE(&blk)) { dmu_objset_close(os); return (0); /* no chain */ } for (;;) { error = zil_read_log_block(zilog, &blk, &abuf); if (error) break; lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); } dmu_objset_close(os); if (error == ECKSUM) return (0); /* normal end of chain */ return (error); }
/* * If this dataset has a non-empty intent log, replay it and destroy it. */ void zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) { zilog_t *zilog = dmu_objset_zil(os); const zil_header_t *zh = zilog->zl_header; zil_replay_arg_t zr; if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { zil_destroy(zilog, B_TRUE); return; } zr.zr_os = os; zr.zr_replay = replay_func; zr.zr_arg = arg; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); /* XXX: Changed to use vmem_alloc instead of kmem_alloc for * large allocation size (I think this is safe here). */ zr.zr_lrbuf = vmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); /* * Wait for in-progress removes to sync before starting replay. */ txg_wait_synced(zilog->zl_dmu_pool, 0); zilog->zl_replay = B_TRUE; zilog->zl_replay_time = lbolt; ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg); vmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_replay = B_FALSE; }
/* * If this dataset has a non-empty intent log, replay it and destroy it. */ void zil_replay(objset_t *os, void *arg, zil_replay_func_t *replay_func[TX_MAX_TYPE]) { zilog_t *zilog = dmu_objset_zil(os); const zil_header_t *zh = zilog->zl_header; zil_replay_arg_t zr; if ((zh->zh_flags & ZIL_REPLAY_NEEDED) == 0) { zil_destroy(zilog, B_TRUE); return; } //printf("ZFS: Replaying ZIL on %s...\n", os->os->os_spa->spa_name); zr.zr_os = os; zr.zr_replay = replay_func; zr.zr_arg = arg; zr.zr_byteswap = BP_SHOULD_BYTESWAP(&zh->zh_log); zr.zr_lrbuf = kmem_alloc(2 * SPA_MAXBLOCKSIZE, KM_SLEEP); /* * Wait for in-progress removes to sync before starting replay. */ txg_wait_synced(zilog->zl_dmu_pool, 0); zilog->zl_replay = B_TRUE; zilog->zl_replay_time = LBOLT; ASSERT(zilog->zl_replay_blks == 0); (void) zil_parse(zilog, zil_incr_blks, zil_replay_log_record, &zr, zh->zh_claim_txg); kmem_free(zr.zr_lrbuf, 2 * SPA_MAXBLOCKSIZE); zil_destroy(zilog, B_FALSE); txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg); zilog->zl_replay = B_FALSE; //printf("ZFS: Replay of ZIL on %s finished.\n", os->os->os_spa->spa_name); }
/* * Create a block device minor node and setup the linkage between it * and the specified volume. Once this function returns the block * device is live and ready for use. */ static int zvol_create_minor_impl(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; uint64_t len; unsigned minor = 0; int error = 0; mutex_enter(&zvol_state_lock); zv = zvol_find_by_name(name); if (zv) { error = SET_ERROR(EEXIST); goto out; } doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = SET_ERROR(EAGAIN); goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_queue, (DMU_MAX_ACCESS / 4) >> 9); blk_queue_max_segments(zv->zv_queue, UINT16_MAX); blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); blk_queue_max_discard_sectors(zv->zv_queue, (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); #ifdef QUEUE_FLAG_NONROT queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); #endif #ifdef QUEUE_FLAG_ADD_RANDOM queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zv->zv_queue); #endif if (spa_writeable(dmu_objset_spa(os))) { if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); } /* * When udev detects the addition of the device it will immediately * invoke blkid(8) to determine the type of content on the device. * Prefetching the blocks commonly scanned by blkid(8) will speed * up this process. */ len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE); if (len > 0) { dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ); dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len, ZIO_PRIORITY_SYNC_READ); } zv->zv_objset = NULL; out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); out_doi: kmem_free(doi, sizeof (dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); /* * Drop the lock to prevent deadlock with sys_open() -> * zvol_open(), which first takes bd_disk->bd_mutex and then * takes zvol_state_lock, whereas this code path first takes * zvol_state_lock, and then takes bd_disk->bd_mutex. */ mutex_exit(&zvol_state_lock); add_disk(zv->zv_disk); } else { mutex_exit(&zvol_state_lock); } return (SET_ERROR(error)); }
static int __zvol_create_minor(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; unsigned minor = 0; int error = 0; ASSERT(MUTEX_HELD(&zvol_state_lock)); zv = zvol_find_by_name(name); if (zv) { error = EEXIST; goto out; } doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; /* Make sure we have the key loaded if we need one. */ error = dsl_crypto_key_inherit(name); if (error != 0 && error != EEXIST) goto out_dmu_objset_disown; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = EAGAIN; goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_queue, UINT_MAX); blk_queue_max_segments(zv->zv_queue, UINT16_MAX); blk_queue_max_segment_size(zv->zv_queue, UINT_MAX); blk_queue_physical_block_size(zv->zv_queue, zv->zv_volblocksize); blk_queue_io_opt(zv->zv_queue, zv->zv_volblocksize); #ifdef HAVE_BLK_QUEUE_DISCARD blk_queue_max_discard_sectors(zv->zv_queue, (zvol_max_discard_blocks * zv->zv_volblocksize) >> 9); blk_queue_discard_granularity(zv->zv_queue, zv->zv_volblocksize); queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zv->zv_queue); #endif #ifdef HAVE_BLK_QUEUE_NONROT queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zv->zv_queue); #endif if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; out_doi: kmem_free(doi, sizeof(dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); add_disk(zv->zv_disk); } return (error); }
static int __zvol_create_minor(const char *name) { zvol_state_t *zv; objset_t *os; dmu_object_info_t *doi; uint64_t volsize; unsigned minor = 0; int error = 0; ASSERT(MUTEX_HELD(&zvol_state_lock)); zv = zvol_find_by_name(name); if (zv) { error = EEXIST; goto out; } doi = kmem_alloc(sizeof(dmu_object_info_t), KM_SLEEP); error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os); if (error) goto out_doi; error = dmu_object_info(os, ZVOL_OBJ, doi); if (error) goto out_dmu_objset_disown; error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize); if (error) goto out_dmu_objset_disown; error = zvol_find_minor(&minor); if (error) goto out_dmu_objset_disown; zv = zvol_alloc(MKDEV(zvol_major, minor), name); if (zv == NULL) { error = EAGAIN; goto out_dmu_objset_disown; } if (dmu_objset_is_snapshot(os)) zv->zv_flags |= ZVOL_RDONLY; zv->zv_volblocksize = doi->doi_data_block_size; zv->zv_volsize = volsize; zv->zv_objset = os; set_capacity(zv->zv_disk, zv->zv_volsize >> 9); if (zil_replay_disable) zil_destroy(dmu_objset_zil(os), B_FALSE); else zil_replay(os, zv, zvol_replay_vector); out_dmu_objset_disown: dmu_objset_disown(os, zvol_tag); zv->zv_objset = NULL; out_doi: kmem_free(doi, sizeof(dmu_object_info_t)); out: if (error == 0) { zvol_insert(zv); add_disk(zv->zv_disk); } return (error); }