/* called from dsl for meta-objset */ objset_t * dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx) { objset_t *os; dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); if (ds != NULL) VERIFY0(dmu_objset_from_ds(ds, &os)); else VERIFY0(dmu_objset_open_impl(spa, NULL, bp, &os)); mdn = DMU_META_DNODE(os); dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); /* * We don't want to have to increase the meta-dnode's nlevels * later, because then we could do it in quescing context while * we are also accessing it in open context. * * This precaution is not necessary for the MOS (ds == NULL), * because the MOS is only updated in syncing context. * This is most fortunate: the MOS is the only objset that * needs to be synced multiple times as spa_sync() iterates * to convergence, so minimizing its dn_nlevels matters. */ if (ds != NULL) { int levels = 1; /* * Determine the number of levels necessary for the meta-dnode * to contain DN_MAX_OBJECT dnodes. */ while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < DN_MAX_OBJECT * sizeof (dnode_phys_t)) levels++; mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = mdn->dn_nlevels = levels; } ASSERT(type != DMU_OST_NONE); ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); os->os_phys->os_type = type; if (dmu_objset_userused_enabled(os)) { os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; os->os_flags = os->os_phys->os_flags; } dsl_dataset_dirty(ds, tx); return (os); }
int dmu_objset_userspace_upgrade(objset_t *os) { uint64_t obj; int err = 0; if (dmu_objset_userspace_present(os)) return (0); if (!dmu_objset_userused_enabled(os->os)) return (ENOTSUP); if (dmu_objset_is_snapshot(os)) return (EINVAL); /* * We simply need to mark every object dirty, so that it will be * synced out and now accounted. If this is called * concurrently, or if we already did some work before crashing, * that's fine, since we track each object's accounted state * independently. */ for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE, 0)) { dmu_tx_t *tx; dmu_buf_t *db; int objerr; if (issig(JUSTLOOKING) && issig(FORREAL)) return (EINTR); objerr = dmu_bonus_hold(os, obj, FTAG, &db); if (objerr) continue; tx = dmu_tx_create(os); dmu_tx_hold_bonus(tx, obj); objerr = dmu_tx_assign(tx, TXG_WAIT); if (objerr) { dmu_tx_abort(tx); continue; } dmu_buf_will_dirty(db, tx); dmu_buf_rele(db, FTAG); dmu_tx_commit(tx); } os->os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; txg_wait_synced(dmu_objset_pool(os), 0); return (0); }
/* called from dsl for meta-objset */ objset_t * dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, dmu_objset_type_t type, dsl_crypto_ctx_t *dcc, dmu_tx_t *tx) { objset_t *os; dnode_t *mdn; ASSERT(dmu_tx_is_syncing(tx)); if (ds != NULL) VERIFY(0 == dmu_objset_from_ds(ds, &os)); else VERIFY(0 == dmu_objset_open_impl(spa, NULL, bp, &os)); mdn = DMU_META_DNODE(os); dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT, DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx); /* * We don't want to have to increase the meta-dnode's nlevels * later, because then we could do it in quescing context while * we are also accessing it in open context. * * This precaution is not necessary for the MOS (ds == NULL), * because the MOS is only updated in syncing context. * This is most fortunate: the MOS is the only objset that * needs to be synced multiple times as spa_sync() iterates * to convergence, so minimizing its dn_nlevels matters. */ if (ds != NULL) { int levels = 1; /* * Determine the number of levels necessary for the meta-dnode * to contain DN_MAX_OBJECT dnodes. */ while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift + (levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) < DN_MAX_OBJECT * sizeof (dnode_phys_t)) levels++; mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] = mdn->dn_nlevels = levels; } ASSERT(type != DMU_OST_NONE); ASSERT(type != DMU_OST_ANY); ASSERT(type < DMU_OST_NUMTYPES); /* * Note: although we should not be dirtying the objset outside of * sync context, the os_type is used directly from the os_phys * in dsl_scan so it may not be save to put os_type in the in-core * objset_t and set it in the phys in sync context (as with os_flags) */ dmu_objset_dirty(os); os->os_phys->os_type = type; if (dmu_objset_userused_enabled(os)) { os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE; // FIXME - removed in newer. // os->os_flags = os->os_phys->os_flags; } if (dcc != NULL && dcc->dcc_crypt != ZIO_CRYPT_INHERIT) { os->os_crypt = zio_crypt_select(dcc->dcc_crypt, ZIO_CRYPT_ON_VALUE); } dsl_dataset_dirty(ds, tx); return (os); }
/* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; zio_prop_t zp; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ SET_BOOKMARK(&zb, os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); arc_release(os->os_phys_buf, &os->os_phys_buf); dmu_write_policy(os, NULL, 0, 0, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ DMU_META_DNODE(os)->dn_zio = zio; dnode_sync(DMU_META_DNODE(os), tx); os->os_phys->os_flags = os->os_flags; if (DMU_USERUSED_DNODE(os) && DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { DMU_USERUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_USERUSED_DNODE(os), tx); DMU_GROUPUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_GROUPUSED_DNODE(os), tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; while (dr = list_head(list)) { ASSERT0(dr->dr_dbuf->db_level); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
/* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; writeprops_t wp = { 0 }; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ zb.zb_blkid = 0; wp.wp_type = DMU_OT_OBJSET; wp.wp_level = 0; /* on-disk BP level; see above */ wp.wp_copies = os->os_copies; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } arc_release(os->os_phys_buf, &os->os_phys_buf); zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ os->os_meta_dnode->dn_zio = zio; dnode_sync(os->os_meta_dnode, tx); os->os_phys->os_flags = os->os_flags; if (os->os_userused_dnode && os->os_userused_dnode->dn_type != DMU_OT_NONE) { os->os_userused_dnode->dn_zio = zio; dnode_sync(os->os_userused_dnode, tx); os->os_groupused_dnode->dn_zio = zio; dnode_sync(os->os_groupused_dnode, tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &os->os_meta_dnode->dn_dirty_records[txgoff]; while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
void dmu_objset_do_userquota_callbacks(objset_impl_t *os, dmu_tx_t *tx) { dnode_t *dn; list_t *list = &os->os_synced_dnodes; ASSERTV(static const char zerobuf[DN_MAX_BONUSLEN] = {0}); ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os)); while ((dn = list_head(list))) { dmu_object_type_t bonustype; ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object)); ASSERT(dn->dn_oldphys); ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE || dn->dn_phys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED); /* Allocate the user/groupused objects if necessary. */ if (os->os_userused_dnode->dn_type == DMU_OT_NONE) { VERIFY(0 == zap_create_claim(&os->os, DMU_USERUSED_OBJECT, DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); VERIFY(0 == zap_create_claim(&os->os, DMU_GROUPUSED_OBJECT, DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx)); } /* * If the object was not previously * accounted, pretend that it was free. */ if (!(dn->dn_oldphys->dn_flags & DNODE_FLAG_USERUSED_ACCOUNTED)) { bzero(dn->dn_oldphys, sizeof (dnode_phys_t)); } /* * If the object was freed, use the previous bonustype. */ bonustype = dn->dn_phys->dn_bonustype ? dn->dn_phys->dn_bonustype : dn->dn_oldphys->dn_bonustype; ASSERT(dn->dn_phys->dn_type != 0 || (bcmp(DN_BONUS(dn->dn_phys), zerobuf, DN_MAX_BONUSLEN) == 0 && DN_USED_BYTES(dn->dn_phys) == 0)); ASSERT(dn->dn_oldphys->dn_type != 0 || (bcmp(DN_BONUS(dn->dn_oldphys), zerobuf, DN_MAX_BONUSLEN) == 0 && DN_USED_BYTES(dn->dn_oldphys) == 0)); used_cbs[os->os_phys->os_type](&os->os, bonustype, DN_BONUS(dn->dn_oldphys), DN_BONUS(dn->dn_phys), DN_USED_BYTES(dn->dn_oldphys), DN_USED_BYTES(dn->dn_phys), tx); /* * The mutex is needed here for interlock with dnode_allocate. */ mutex_enter(&dn->dn_mtx); zio_buf_free(dn->dn_oldphys, sizeof (dnode_phys_t)); dn->dn_oldphys = NULL; mutex_exit(&dn->dn_mtx); list_remove(list, dn); dnode_rele(dn, list); } }