static void copies_changed_cb(void *arg, uint64_t newval) { objset_t *os = arg; /* * Inheritance and range checking should have been done by now. */ ASSERT(newval > 0); ASSERT(newval <= spa_max_replication(os->os_spa)); os->os_copies = newval; }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) aflags |= ARC_FLAG_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds != NULL) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } if (!ds->ds_is_snapshot) { if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name( ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), recordsize_changed_cb, os); } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } } else { /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; os->os_logbias = ZFS_LOGBIAS_LATENCY; os->os_sync = ZFS_SYNC_STANDARD; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; } if (ds == NULL || !ds->ds_is_snapshot) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } *osp = os; return (0); }
/* called from dsl */ void dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; zio_prop_t zp; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ SET_BOOKMARK(&zb, os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); arc_release(os->os_phys_buf, &os->os_phys_buf); dmu_write_policy(os, NULL, 0, 0, &zp); zio = arc_write(pio, os->os_spa, tx->tx_txg, os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os), &zp, dmu_objset_write_ready, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ DMU_META_DNODE(os)->dn_zio = zio; dnode_sync(DMU_META_DNODE(os), tx); os->os_phys->os_flags = os->os_flags; if (DMU_USERUSED_DNODE(os) && DMU_USERUSED_DNODE(os)->dn_type != DMU_OT_NONE) { DMU_USERUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_USERUSED_DNODE(os), tx); DMU_GROUPUSED_DNODE(os)->dn_zio = zio; dnode_sync(DMU_GROUPUSED_DNODE(os), tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff]; while (dr = list_head(list)) { ASSERT0(dr->dr_dbuf->db_level); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err = 0; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; dprintf_bp(os->os_rootbp, "reading %s", ""); /* * XXX when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = dsl_read_nolock(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. But they do need to know about * encryption so that clones from the snaphost inherit the * same encryption property regardless of where in the namespace * they get created. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "encryption", crypt_changed_cb, os); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "dedup", dedup_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "logbias", logbias_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "sync", sync_changed_cb, os); } if (err) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* * It's the meta-objset. * Encryption is off for ZFS metadata but on for ZPL metadata * and file/zvol contents. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_LZJB; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = 0; os->os_logbias = 0; os->os_sync = 0; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; os->os_crypt = ZIO_CRYPT_OFF; } if (ds == NULL || !dsl_dataset_is_snapshot(ds)) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); DMU_META_DNODE(os) = dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { DMU_USERUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } *osp = os; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *winner, *osi; int i, err, checksum; osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, osi->os_rootbp, dmu_ot[DMU_OT_OBJSET].ot_byteswap, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; arc_release(osi->os_phys_buf, &osi->os_phys_buf); } else { osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); /* * Metadata always gets compressed and checksummed. * If the data checksum is multi-bit correctable, and it's not * a ZBT-style checksum, then it's suitable for metadata as well. * Otherwise, the metadata checksum defaults to fletcher4. */ checksum = osi->os_checksum; if (zio_checksum_table[checksum].ci_correctable && !zio_checksum_table[checksum].ci_zbt) osi->os_md_checksum = checksum; else osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_md_compress = ZIO_COMPRESS_LZJB; for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (ds != NULL) { winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); if (winner) { dmu_objset_evict(ds, osi); osi = winner; } } *osip = osi; return (0); }
/* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; writeprops_t wp = { 0 }; zio_t *zio; list_t *list; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } wp.wp_type = DMU_OT_OBJSET; wp.wp_copies = os->os_copies; wp.wp_level = (uint8_t)-1; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; arc_release(os->os_phys_buf, &os->os_phys_buf); zio = arc_write(pio, os->os_spa, &wp, tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_METADATA, &zb); /* * Sync meta-dnode - the parent IO for the sync is the root block */ os->os_meta_dnode->dn_zio = zio; dnode_sync(os->os_meta_dnode, tx); txgoff = tx->tx_txg & TXG_MASK; dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], tx); list = &os->os_meta_dnode->dn_dirty_records[txgoff]; while (dr = list_head(list)) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; } else { #ifdef __APPLE_KERNEL__ osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA, TRUE/*alloc_buf*/); #else osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); #endif osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
/* called from dsl */ void dmu_objset_sync(objset_impl_t *os, zio_t *pio, dmu_tx_t *tx) { int txgoff; zbookmark_t zb; writeprops_t wp = { 0 }; zio_t *zio; list_t *list; list_t *newlist = NULL; dbuf_dirty_record_t *dr; dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg); ASSERT(dmu_tx_is_syncing(tx)); /* XXX the write_done callback should really give us the tx... */ os->os_synctx = tx; if (os->os_dsl_dataset == NULL) { /* * This is the MOS. If we have upgraded, * spa_max_replication() could change, so reset * os_copies here. */ os->os_copies = spa_max_replication(os->os_spa); } /* * Create the root block IO */ zb.zb_objset = os->os_dsl_dataset ? os->os_dsl_dataset->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; /* for block ordering; it's level 0 on disk */ zb.zb_blkid = 0; wp.wp_type = DMU_OT_OBJSET; wp.wp_level = 0; /* on-disk BP level; see above */ wp.wp_copies = os->os_copies; wp.wp_oschecksum = os->os_checksum; wp.wp_oscompress = os->os_compress; if (BP_IS_OLDER(os->os_rootbp, tx->tx_txg)) { (void) dsl_dataset_block_kill(os->os_dsl_dataset, os->os_rootbp, pio, tx); } arc_release(os->os_phys_buf, &os->os_phys_buf); zio = arc_write(pio, os->os_spa, &wp, DMU_OS_IS_L2CACHEABLE(os), tx->tx_txg, os->os_rootbp, os->os_phys_buf, ready, NULL, os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb); /* * Sync special dnodes - the parent IO for the sync is the root block */ os->os_meta_dnode->dn_zio = zio; dnode_sync(os->os_meta_dnode, tx); os->os_phys->os_flags = os->os_flags; if (os->os_userused_dnode && os->os_userused_dnode->dn_type != DMU_OT_NONE) { os->os_userused_dnode->dn_zio = zio; dnode_sync(os->os_userused_dnode, tx); os->os_groupused_dnode->dn_zio = zio; dnode_sync(os->os_groupused_dnode, tx); } txgoff = tx->tx_txg & TXG_MASK; if (dmu_objset_userused_enabled(os)) { newlist = &os->os_synced_dnodes; /* * We must create the list here because it uses the * dn_dirty_link[] of this txg. */ list_create(newlist, sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[txgoff])); } dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx); dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx); list = &os->os_meta_dnode->dn_dirty_records[txgoff]; while ((dr = list_head(list))) { ASSERT(dr->dr_dbuf->db_level == 0); list_remove(list, dr); if (dr->dr_zio) zio_nowait(dr->dr_zio); } /* * Free intent log blocks up to this tx. */ zil_sync(os->os_zil, tx); os->os_phys->os_zil_header = os->os_zil_header; zio_nowait(zio); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (DMU_OS_IS_L2CACHEABLE(osi)) aflags |= ARC_L2CACHE; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(osi->os_phys_buf->b_data, buf->b_data, arc_buf_size(osi->os_phys_buf)); (void) arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf); osi->os_phys_buf = buf; } osi->os_phys = osi->os_phys_buf->b_data; osi->os_flags = osi->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; osi->os_phys_buf = arc_buf_alloc(spa, size, &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, osi); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); } if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); osi->os_primary_cache = ZFS_CACHE_ALL; osi->os_secondary_cache = ZFS_CACHE_ALL; } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { osi->os_userused_dnode = dnode_special_open(osi, &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); osi->os_groupused_dnode = dnode_special_open(osi, &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }