int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) aflags |= ARC_FLAG_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds != NULL) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } if (!ds->ds_is_snapshot) { if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name( ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), recordsize_changed_cb, os); } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } } else { /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; os->os_logbias = ZFS_LOGBIAS_LATENCY; os->os_sync = ZFS_SYNC_STANDARD; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; } if (ds == NULL || !ds->ds_is_snapshot) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } *osp = os; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err = 0; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; dprintf_bp(os->os_rootbp, "reading %s", ""); /* * XXX when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = dsl_read_nolock(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. But they do need to know about * encryption so that clones from the snaphost inherit the * same encryption property regardless of where in the namespace * they get created. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "encryption", crypt_changed_cb, os); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "dedup", dedup_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "logbias", logbias_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "sync", sync_changed_cb, os); } if (err) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* * It's the meta-objset. * Encryption is off for ZFS metadata but on for ZPL metadata * and file/zvol contents. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_LZJB; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = 0; os->os_logbias = 0; os->os_sync = 0; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; os->os_crypt = ZIO_CRYPT_OFF; } if (ds == NULL || !dsl_dataset_is_snapshot(ds)) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); DMU_META_DNODE(os) = dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { DMU_USERUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } *osp = os; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *winner, *osi; int i, err, checksum; osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, osi->os_rootbp, dmu_ot[DMU_OT_OBJSET].ot_byteswap, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; arc_release(osi->os_phys_buf, &osi->os_phys_buf); } else { osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); /* * Metadata always gets compressed and checksummed. * If the data checksum is multi-bit correctable, and it's not * a ZBT-style checksum, then it's suitable for metadata as well. * Otherwise, the metadata checksum defaults to fletcher4. */ checksum = osi->os_checksum; if (zio_checksum_table[checksum].ci_correctable && !zio_checksum_table[checksum].ci_zbt) osi->os_md_checksum = checksum; else osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_md_compress = ZIO_COMPRESS_LZJB; for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (ds != NULL) { winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); if (winner) { dmu_objset_evict(ds, osi); osi = winner; } } *osip = osi; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; } else { #ifdef __APPLE_KERNEL__ osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA, TRUE/*alloc_buf*/); #else osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); #endif osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (DMU_OS_IS_L2CACHEABLE(osi)) aflags |= ARC_L2CACHE; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(osi->os_phys_buf->b_data, buf->b_data, arc_buf_size(osi->os_phys_buf)); (void) arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf); osi->os_phys_buf = buf; } osi->os_phys = osi->os_phys_buf->b_data; osi->os_flags = osi->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; osi->os_phys_buf = arc_buf_alloc(spa, size, &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, osi); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); } if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); osi->os_primary_cache = ZFS_CACHE_ALL; osi->os_secondary_cache = ZFS_CACHE_ALL; } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { osi->os_userused_dnode = dnode_special_open(osi, &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); osi->os_groupused_dnode = dnode_special_open(osi, &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
/* * Start a log block write and advance to the next log block. * Calls are serialized. */ static lwb_t * zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb) { lwb_t *nlwb; zil_trailer_t *ztp = (zil_trailer_t *)(lwb->lwb_buf + lwb->lwb_sz) - 1; spa_t *spa = zilog->zl_spa; blkptr_t *bp = &ztp->zit_next_blk; uint64_t txg; uint64_t zil_blksz; int error; ASSERT(lwb->lwb_nused <= ZIL_BLK_DATA_SZ(lwb)); /* * Allocate the next block and save its address in this block * before writing it in order to establish the log chain. * Note that if the allocation of nlwb synced before we wrote * the block that points at it (lwb), we'd leak it if we crashed. * Therefore, we don't do txg_rele_to_sync() until zil_lwb_write_done(). */ txg = txg_hold_open(zilog->zl_dmu_pool, &lwb->lwb_txgh); txg_rele_to_quiesce(&lwb->lwb_txgh); /* * Pick a ZIL blocksize. We request a size that is the * maximum of the previous used size, the current used size and * the amount waiting in the queue. */ zil_blksz = MAX(zilog->zl_prev_used, zilog->zl_cur_used + sizeof (*ztp)); zil_blksz = MAX(zil_blksz, zilog->zl_itx_list_sz + sizeof (*ztp)); zil_blksz = P2ROUNDUP_TYPED(zil_blksz, ZIL_MIN_BLKSZ, uint64_t); if (zil_blksz > ZIL_MAX_BLKSZ) zil_blksz = ZIL_MAX_BLKSZ; BP_ZERO(bp); /* pass the old blkptr in order to spread log blocks across devs */ error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg); if (error) { dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg); /* * We dirty the dataset to ensure that zil_sync() will * be called to remove this lwb from our zl_lwb_list. * Failing to do so, may leave an lwb with a NULL lwb_buf * hanging around on the zl_lwb_list. */ dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx); dmu_tx_commit(tx); /* * Since we've just experienced an allocation failure so we * terminate the current lwb and send it on its way. */ ztp->zit_pad = 0; ztp->zit_nused = lwb->lwb_nused; ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; zio_nowait(lwb->lwb_zio); /* * By returning NULL the caller will call tx_wait_synced() */ return (NULL); } ASSERT3U(bp->blk_birth, ==, txg); ztp->zit_pad = 0; ztp->zit_nused = lwb->lwb_nused; ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum = lwb->lwb_blk.blk_cksum; bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++; /* * Allocate a new log write buffer (lwb). */ nlwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP); nlwb->lwb_zilog = zilog; nlwb->lwb_blk = *bp; nlwb->lwb_nused = 0; nlwb->lwb_sz = BP_GET_LSIZE(&nlwb->lwb_blk); nlwb->lwb_buf = zio_buf_alloc(nlwb->lwb_sz); nlwb->lwb_max_txg = txg; nlwb->lwb_zio = NULL; /* * Put new lwb at the end of the log chain */ mutex_enter(&zilog->zl_lock); list_insert_tail(&zilog->zl_lwb_list, nlwb); mutex_exit(&zilog->zl_lock); /* Record the block for later vdev flushing */ zil_add_block(zilog, &lwb->lwb_blk); /* * kick off the write for the old log block */ dprintf_bp(&lwb->lwb_blk, "lwb %p txg %llu: ", lwb, txg); ASSERT(lwb->lwb_zio); zio_nowait(lwb->lwb_zio); return (nlwb); }