dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, dsl_crypto_params_t *dcp, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); spa->spa_meta_objset = dp->dp_meta_objset; /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* * Some features may be needed when creating the root dataset, so we * create the feature objects here. */ if (spa_version(spa) >= SPA_VERSION_FEATURES) spa_feature_create_zap_objects(spa, tx); if (dcp != NULL && dcp->cp_crypt != ZIO_CRYPT_OFF && dcp->cp_crypt != ZIO_CRYPT_INHERIT) spa_feature_enable(spa, SPA_FEATURE_ENCRYPTION, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, dcp, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj_flags(dp, obj, DS_HOLD_FLAG_DECRYPT, FTAG, &ds)); #ifdef _KERNEL { objset_t *os; rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); rrw_exit(&ds->ds_bp_rwlock, FTAG); zfs_create_fs(os, kcred, zplprops, tx); } #endif dsl_dataset_rele_flags(ds, DS_HOLD_FLAG_DECRYPT, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); }
dsl_pool_t * dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg) { int err; dsl_pool_t *dp = dsl_pool_open_impl(spa, txg); dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg); objset_t *os; dsl_dataset_t *ds; uint64_t obj; rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); /* create and open the MOS (meta-objset) */ dp->dp_meta_objset = dmu_objset_create_impl(spa, NULL, &dp->dp_meta_rootbp, DMU_OST_META, tx); /* create the pool directory */ err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx); ASSERT0(err); /* Initialize scan structures */ VERIFY0(dsl_scan_init(dp, txg)); /* create and open the root dir */ dp->dp_root_dir_obj = dsl_dir_create_sync(dp, NULL, NULL, tx); VERIFY0(dsl_dir_hold_obj(dp, dp->dp_root_dir_obj, NULL, dp, &dp->dp_root_dir)); /* create and open the meta-objset dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, MOS_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, MOS_DIR_NAME, &dp->dp_mos_dir)); if (spa_version(spa) >= SPA_VERSION_DEADLISTS) { /* create and open the free dir */ (void) dsl_dir_create_sync(dp, dp->dp_root_dir, FREE_DIR_NAME, tx); VERIFY0(dsl_pool_open_special_dir(dp, FREE_DIR_NAME, &dp->dp_free_dir)); /* create and open the free_bplist */ obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx); VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0); VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj)); } if (spa_version(spa) >= SPA_VERSION_DSL_SCRUB) dsl_pool_create_origin(dp, tx); /* create the root dataset */ obj = dsl_dataset_create_sync_dd(dp->dp_root_dir, NULL, 0, tx); /* create the root objset */ VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds)); os = dmu_objset_create_impl(dp->dp_spa, ds, dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx); #ifdef _KERNEL zfs_create_fs(os, kcred, zplprops, tx); #endif dsl_dataset_rele(ds, FTAG); dmu_tx_commit(tx); rrw_exit(&dp->dp_config_rwlock, FTAG); return (dp); }
/* * Start a log block write and advance to the next log block. * Calls are serialized. */ static lwb_t * zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) { lwb_t *nlwb; zil_trailer_t *ztp = (zil_trailer_t *)(lwb->lwb_buf + lwb->lwb_sz) - 1; spa_t *spa = zilog->zl_spa; blkptr_t *bp = &ztp->zit_next_blk; uint64_t txg; uint64_t zil_blksz; int error; ASSERT(lwb->lwb_nused <= ZIL_BLK_DATA_SZ(lwb)); /* * Allocate the next block and save its address in this block * before writing it in order to establish the log chain. * Note that if the allocation of nlwb synced before we wrote * the block that points at it (lwb), we'd leak it if we crashed. * Therefore, we don't do txg_rele_to_sync() until zil_lwb_write_done(). */ txg = txg_hold_open(zilog->zl_dmu_pool, &lwb->lwb_txgh); txg_rele_to_quiesce(&lwb->lwb_txgh); /* * Pick a ZIL blocksize. We request a size that is the * maximum of the previous used size, the current used size and * the amount waiting in the queue. */ zil_blksz = MAX(zilog->zl_prev_used, zilog->zl_cur_used + sizeof (*ztp)); zil_blksz = MAX(zil_blksz, zilog->zl_itx_list_sz + sizeof (*ztp)); zil_blksz = P2ROUNDUP_TYPED(zil_blksz, ZIL_MIN_BLKSZ, uint64_t); if (zil_blksz > ZIL_MAX_BLKSZ) zil_blksz = ZIL_MAX_BLKSZ; BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg); if (error) { dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg); /* * We dirty the dataset to ensure that zil_sync() will * be called to remove this lwb from our zl_lwb_list. * Failing to do so, may leave an lwb with a NULL lwb_buf * hanging around on the zl_lwb_list. */ dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); dmu_tx_commit(tx); /* * Since we've just experienced an allocation failure so we * terminate the current lwb and send it on its way. */ ztp->zit_pad = 0; ztp->zit_nused = lwb->lwb_nused; ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; zio_nowait(lwb->lwb_zio); /* * By returning NULL the caller will call tx_wait_synced() */ return (NULL); } ASSERT3U(bp->blk_birth, ==, txg); ztp->zit_pad = 0; ztp->zit_nused = lwb->lwb_nused; ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; /* * Allocate a new log write buffer (lwb). */ nlwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); nlwb->lwb_zilog = zilog; nlwb->lwb_blk = *bp; nlwb->lwb_nused = 0; nlwb->lwb_sz = BP_GET_LSIZE(&nlwb->lwb_blk); nlwb->lwb_buf = zio_buf_alloc(nlwb->lwb_sz); nlwb->lwb_max_txg = txg; nlwb->lwb_zio = NULL; /* * Put new lwb at the end of the log chain */ mutex_enter(&zilog->zl_lock); list_insert_tail(&zilog->zl_lwb_list, nlwb); mutex_exit(&zilog->zl_lock); /* Record the block for later vdev flushing */ zil_add_block(zilog, &lwb->lwb_blk); /* * kick off the write for the old log block */ dprintf_bp(&lwb->lwb_blk, "lwb %p txg %llu: ", lwb, txg); ASSERT(lwb->lwb_zio); zio_nowait(lwb->lwb_zio); return (nlwb); }