/* * Read a log block, make sure it's valid, and byteswap it if necessary. */ static int zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp) { blkptr_t blk = *bp; zbookmark_t zb; uint32_t aflags = ARC_WAIT; int error; zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET]; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ]; *abufpp = NULL; /* * We shouldn't be doing any scrubbing while we're doing log * replay, it's OK to not lock. */ error = arc_read_nolock(NULL, zilog->zl_spa, &blk, arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, &aflags, &zb); if (error == 0) { char *data = (*abufpp)->b_data; uint64_t blksz = BP_GET_LSIZE(bp); zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1; zio_cksum_t cksum = bp->blk_cksum; /* * Validate the checksummed log block. * * Sequence numbers should be... sequential. The checksum * verifier for the next block should be bp's checksum plus 1. * * Also check the log chain linkage and size used. */ cksum.zc_word[ZIL_ZC_SEQ]++; if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&ztp->zit_next_blk) || (ztp->zit_nused > (blksz - sizeof (zil_trailer_t)))) { error = ECKSUM; } if (error) { VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1); *abufpp = NULL; } } dprintf("error %d on %llu:%llu\n", error, zb.zb_objset, zb.zb_blkid); return (error); }
void dmu_objset_evict(dsl_dataset_t *ds, void *arg) { objset_impl_t *osi = arg; objset_t os; int i; for (i = 0; i < TXG_SIZE; i++) { ASSERT(list_head(&osi->os_dirty_dnodes[i]) == NULL); ASSERT(list_head(&osi->os_free_dnodes[i]) == NULL); } if (ds) { if (!dsl_dataset_is_snapshot(ds)) { VERIFY(0 == dsl_prop_unregister(ds, "checksum", checksum_changed_cb, osi)); VERIFY(0 == dsl_prop_unregister(ds, "compression", compression_changed_cb, osi)); VERIFY(0 == dsl_prop_unregister(ds, "copies", copies_changed_cb, osi)); } VERIFY(0 == dsl_prop_unregister(ds, "primarycache", primary_cache_changed_cb, osi)); VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", secondary_cache_changed_cb, osi)); } /* * We should need only a single pass over the dnode list, since * nothing can be added to the list at this point. */ os.os = osi; (void) dmu_objset_evict_dbufs(&os); dnode_special_close(osi->os_meta_dnode); if (osi->os_userused_dnode) { dnode_special_close(osi->os_userused_dnode); dnode_special_close(osi->os_groupused_dnode); } zil_free(osi->os_zil); ASSERT3P(list_head(&osi->os_dnodes), ==, NULL); VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); mutex_destroy(&osi->os_lock); mutex_destroy(&osi->os_obj_lock); mutex_destroy(&osi->os_user_ptr_lock); kmem_free(osi, sizeof (objset_impl_t)); }
/* * return true if the initial log block is not valid */ static boolean_t zil_empty(zilog_t *zilog) { const zil_header_t *zh = zilog->zl_header; arc_buf_t *abuf = NULL; if (BP_IS_HOLE(&zh->zh_log)) return (B_TRUE); if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0) return (B_TRUE); VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); return (B_FALSE); }
/* ARGSUSED */ int zil_check_log_chain(char *osname, void *txarg) { zilog_t *zilog; zil_header_t *zh; blkptr_t blk; arc_buf_t *abuf; objset_t *os; char *lrbuf; zil_trailer_t *ztp; int error; error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os); if (error) { cmn_err(CE_WARN, "can't open objset for %s", osname); return (0); } zilog = dmu_objset_zil(os); zh = zil_header_in_syncing_context(zilog); blk = zh->zh_log; if (BP_IS_HOLE(&blk)) { dmu_objset_close(os); return (0); /* no chain */ } for (;;) { error = zil_read_log_block(zilog, &blk, &abuf); if (error) break; lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); } dmu_objset_close(os); if (error == ECKSUM) return (0); /* normal end of chain */ return (error); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_SLEEP); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { arc_flags_t aflags = ARC_FLAG_WAIT; zbookmark_phys_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_FLAG_L2CACHE; if (DMU_OS_IS_L2COMPRESSIBLE(os)) aflags |= ARC_FLAG_L2COMPRESS; dprintf_bp(os->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err != 0) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = SET_ERROR(EIO); return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds != NULL) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os); if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os); } if (!ds->ds_is_snapshot) { if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name( ZFS_PROP_REDUNDANT_METADATA), redundant_metadata_changed_cb, os); } if (err == 0) { err = dsl_prop_register(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE), recordsize_changed_cb, os); } } if (err != 0) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); kmem_free(os, sizeof (objset_t)); return (err); } } else { /* It's the meta-objset. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_ON; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = B_FALSE; os->os_logbias = ZFS_LOGBIAS_LATENCY; os->os_sync = ZFS_SYNC_STANDARD; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; } if (ds == NULL || !ds->ds_is_snapshot) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } *osp = os; return (0); }
void dmu_objset_evict(objset_t *os) { dsl_dataset_t *ds = os->os_dsl_dataset; int t; for (t = 0; t < TXG_SIZE; t++) ASSERT(!dmu_objset_is_dirty(os, t)); if (ds) { if (!dsl_dataset_is_snapshot(ds)) { VERIFY(0 == dsl_prop_unregister(ds, "checksum", checksum_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "compression", compression_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "copies", copies_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "dedup", dedup_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "logbias", logbias_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "sync", sync_changed_cb, os)); } VERIFY(0 == dsl_prop_unregister(ds, "primarycache", primary_cache_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "secondarycache", secondary_cache_changed_cb, os)); VERIFY(0 == dsl_prop_unregister(ds, "encryption", crypt_changed_cb, os)); } if (os->os_sa) sa_tear_down(os); /* * We should need only a single pass over the dnode list, since * nothing can be added to the list at this point. */ (void) dmu_objset_evict_dbufs(os); dnode_special_close(&os->os_meta_dnode); if (DMU_USERUSED_DNODE(os)) { dnode_special_close(&os->os_userused_dnode); dnode_special_close(&os->os_groupused_dnode); } zil_free(os->os_zil); ASSERT3P(list_head(&os->os_dnodes), ==, NULL); VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); /* * This is a barrier to prevent the objset from going away in * dnode_move() until we can safely ensure that the objset is still in * use. We consider the objset valid before the barrier and invalid * after the barrier. */ rw_enter(&os_lock, RW_READER); rw_exit(&os_lock); mutex_destroy(&os->os_lock); mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_user_ptr_lock); kmem_free(os, sizeof (objset_t)); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_t **osp) { objset_t *os; int i, err = 0; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE); os->os_dsl_dataset = ds; os->os_spa = spa; os->os_rootbp = bp; if (!BP_IS_HOLE(os->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET, ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID); if (DMU_OS_IS_L2CACHEABLE(os)) aflags |= ARC_L2CACHE; dprintf_bp(os->os_rootbp, "reading %s", ""); /* * XXX when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = dsl_read_nolock(NULL, spa, os->os_rootbp, arc_getbuf_func, &os->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(os, sizeof (objset_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &os->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(os->os_phys_buf->b_data, buf->b_data, arc_buf_size(os->os_phys_buf)); (void) arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf); os->os_phys_buf = buf; } os->os_phys = os->os_phys_buf->b_data; os->os_flags = os->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; os->os_phys_buf = arc_buf_alloc(spa, size, &os->os_phys_buf, ARC_BUFC_METADATA); os->os_phys = os->os_phys_buf->b_data; bzero(os->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. But they do need to know about * encryption so that clones from the snaphost inherit the * same encryption property regardless of where in the namespace * they get created. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "encryption", crypt_changed_cb, os); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "dedup", dedup_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "logbias", logbias_changed_cb, os); if (err == 0) err = dsl_prop_register(ds, "sync", sync_changed_cb, os); } if (err) { VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf) == 1); kmem_free(os, sizeof (objset_t)); return (err); } } else if (ds == NULL) { /* * It's the meta-objset. * Encryption is off for ZFS metadata but on for ZPL metadata * and file/zvol contents. */ os->os_checksum = ZIO_CHECKSUM_FLETCHER_4; os->os_compress = ZIO_COMPRESS_LZJB; os->os_copies = spa_max_replication(spa); os->os_dedup_checksum = ZIO_CHECKSUM_OFF; os->os_dedup_verify = 0; os->os_logbias = 0; os->os_sync = 0; os->os_primary_cache = ZFS_CACHE_ALL; os->os_secondary_cache = ZFS_CACHE_ALL; os->os_crypt = ZIO_CRYPT_OFF; } if (ds == NULL || !dsl_dataset_is_snapshot(ds)) os->os_zil_header = os->os_phys->os_zil_header; os->os_zil = zil_alloc(os, &os->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&os->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&os->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); DMU_META_DNODE(os) = dnode_special_open(os, &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT, &os->os_meta_dnode); if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) { DMU_USERUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT, &os->os_userused_dnode); DMU_GROUPUSED_DNODE(os) = dnode_special_open(os, &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { mutex_enter(&ds->ds_lock); ASSERT(ds->ds_objset == NULL); ds->ds_objset = os; mutex_exit(&ds->ds_lock); } *osp = os; return (0); }
void dmu_objset_evict(objset_t *os) { dsl_dataset_t *ds = os->os_dsl_dataset; for (int t = 0; t < TXG_SIZE; t++) ASSERT(!dmu_objset_is_dirty(os, t)); if (ds) { if (!dsl_dataset_is_snapshot(ds)) { VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_CHECKSUM), checksum_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_COMPRESSION), compression_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_COPIES), copies_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEDUP), dedup_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_LOGBIAS), logbias_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SYNC), sync_changed_cb, os)); } VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE), primary_cache_changed_cb, os)); VERIFY0(dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE), secondary_cache_changed_cb, os)); } if (os->os_sa) sa_tear_down(os); dmu_objset_evict_dbufs(os); dnode_special_close(&os->os_meta_dnode); if (DMU_USERUSED_DNODE(os)) { dnode_special_close(&os->os_userused_dnode); dnode_special_close(&os->os_groupused_dnode); } zil_free(os->os_zil); ASSERT3P(list_head(&os->os_dnodes), ==, NULL); VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf)); /* * This is a barrier to prevent the objset from going away in * dnode_move() until we can safely ensure that the objset is still in * use. We consider the objset valid before the barrier and invalid * after the barrier. */ rw_enter(&os_lock, RW_READER); rw_exit(&os_lock); mutex_destroy(&os->os_lock); mutex_destroy(&os->os_obj_lock); mutex_destroy(&os->os_user_ptr_lock); kmem_free(os, sizeof (objset_t)); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *winner, *osi; int i, err, checksum; osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); err = arc_read(NULL, spa, osi->os_rootbp, dmu_ot[DMU_OT_OBJSET].ot_byteswap, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; arc_release(osi->os_phys_buf, &osi->os_phys_buf); } else { osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header); /* * Metadata always gets compressed and checksummed. * If the data checksum is multi-bit correctable, and it's not * a ZBT-style checksum, then it's suitable for metadata as well. * Otherwise, the metadata checksum defaults to fletcher4. */ checksum = osi->os_checksum; if (zio_checksum_table[checksum].ci_correctable && !zio_checksum_table[checksum].ci_zbt) osi->os_md_checksum = checksum; else osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_md_compress = ZIO_COMPRESS_LZJB; for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (ds != NULL) { winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict); if (winner) { dmu_objset_evict(ds, osi); osi = winner; } } *osip = osi; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; } else { #ifdef __APPLE_KERNEL__ osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA, TRUE/*alloc_buf*/); #else osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); #endif osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (DMU_OS_IS_L2CACHEABLE(osi)) aflags |= ARC_L2CACHE; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(osi->os_phys_buf->b_data, buf->b_data, arc_buf_size(osi->os_phys_buf)); (void) arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf); osi->os_phys_buf = buf; } osi->os_phys = osi->os_phys_buf->b_data; osi->os_flags = osi->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; osi->os_phys_buf = arc_buf_alloc(spa, size, &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, osi); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); } if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); osi->os_primary_cache = ZFS_CACHE_ALL; osi->os_secondary_cache = ZFS_CACHE_ALL; } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { osi->os_userused_dnode = dnode_special_open(osi, &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); osi->os_groupused_dnode = dnode_special_open(osi, &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
/* * Parse the intent log, and call parse_func for each valid record within. * Return the highest sequence number. */ uint64_t zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func, zil_parse_lr_func_t *parse_lr_func, void *arg, uint64_t txg) { const zil_header_t *zh = zilog->zl_header; uint64_t claim_seq = zh->zh_claim_seq; uint64_t seq = 0; uint64_t max_seq = 0; blkptr_t blk = zh->zh_log; arc_buf_t *abuf; char *lrbuf, *lrp; zil_trailer_t *ztp; int reclen, error; if (BP_IS_HOLE(&blk)) return (max_seq); /* * Starting at the block pointed to by zh_log we read the log chain. * For each block in the chain we strongly check that block to * ensure its validity. We stop when an invalid block is found. * For each block pointer in the chain we call parse_blk_func(). * For each record in each valid block we call parse_lr_func(). * If the log has been claimed, stop if we encounter a sequence * number greater than the highest claimed sequence number. */ zil_dva_tree_init(&zilog->zl_dva_tree); for (;;) { seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ]; if (claim_seq != 0 && seq > claim_seq) break; ASSERT(max_seq < seq); max_seq = seq; error = zil_read_log_block(zilog, &blk, &abuf); if (parse_blk_func != NULL) parse_blk_func(zilog, &blk, arg, txg); if (error) break; lrbuf = abuf->b_data; ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1; blk = ztp->zit_next_blk; if (parse_lr_func == NULL) { VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); continue; } for (lrp = lrbuf; lrp < lrbuf + ztp->zit_nused; lrp += reclen) { lr_t *lr = (lr_t *)lrp; reclen = lr->lrc_reclen; ASSERT3U(reclen, >=, sizeof (lr_t)); parse_lr_func(zilog, lr, arg, txg); } VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1); } zil_dva_tree_fini(&zilog->zl_dva_tree); return (max_seq); }