/* ARGSUSED */ int dmu_objset_prefetch(char *name, void *arg) { dsl_dataset_t *ds; if (dsl_dataset_hold(name, FTAG, &ds)) return (0); if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) { mutex_enter(&ds->ds_opening_lock); if (!dsl_dataset_get_user_ptr(ds)) { uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH; zbookmark_t zb; zb.zb_objset = ds->ds_object; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; (void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds), &ds->ds_phys->ds_bp, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &aflags, &zb); } mutex_exit(&ds->ds_opening_lock); } dsl_dataset_rele(ds, FTAG); return (0); }
/* * Read a log block, make sure it's valid, and byteswap it if necessary. */ static int zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp) { blkptr_t blk = *bp; zbookmark_t zb; uint32_t aflags = ARC_WAIT; int error; zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET]; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ]; *abufpp = NULL; /* * We shouldn't be doing any scrubbing while we're doing log * replay, it's OK to not lock. */ error = arc_read_nolock(NULL, zilog->zl_spa, &blk, arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, &aflags, &zb); if (error == 0) { char *data = (*abufpp)->b_data; uint64_t blksz = BP_GET_LSIZE(bp); zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1; zio_cksum_t cksum = bp->blk_cksum; /* * Validate the checksummed log block. * * Sequence numbers should be... sequential. The checksum * verifier for the next block should be bp's checksum plus 1. * * Also check the log chain linkage and size used. */ cksum.zc_word[ZIL_ZC_SEQ]++; if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum, sizeof (cksum)) || BP_IS_HOLE(&ztp->zit_next_blk) || (ztp->zit_nused > (blksz - sizeof (zil_trailer_t)))) { error = ECKSUM; } if (error) { VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1); *abufpp = NULL; } } dprintf("error %d on %llu:%llu\n", error, zb.zb_objset, zb.zb_blkid); return (error); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); return (err); } osi->os_phys = osi->os_phys_buf->b_data; } else { #ifdef __APPLE_KERNEL__ osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA, TRUE/*alloc_buf*/); #else osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); #endif osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, sizeof (objset_phys_t)); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know, and * registering would complicate clone promotion. */ if (ds && ds->ds_phys->ds_num_children == 0) { err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }
int dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp, objset_impl_t **osip) { objset_impl_t *osi; int i, err; ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock)); osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP); osi->os.os = osi; osi->os_dsl_dataset = ds; osi->os_spa = spa; osi->os_rootbp = bp; if (!BP_IS_HOLE(osi->os_rootbp)) { uint32_t aflags = ARC_WAIT; zbookmark_t zb; zb.zb_objset = ds ? ds->ds_object : 0; zb.zb_object = 0; zb.zb_level = -1; zb.zb_blkid = 0; if (DMU_OS_IS_L2CACHEABLE(osi)) aflags |= ARC_L2CACHE; dprintf_bp(osi->os_rootbp, "reading %s", ""); /* * NB: when bprewrite scrub can change the bp, * and this is called from dmu_objset_open_ds_os, the bp * could change, and we'll need a lock. */ err = arc_read_nolock(NULL, spa, osi->os_rootbp, arc_getbuf_func, &osi->os_phys_buf, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb); if (err) { kmem_free(osi, sizeof (objset_impl_t)); /* convert checksum errors into IO errors */ if (err == ECKSUM) err = EIO; return (err); } /* Increase the blocksize if we are permitted. */ if (spa_version(spa) >= SPA_VERSION_USERSPACE && arc_buf_size(osi->os_phys_buf) < sizeof (objset_phys_t)) { arc_buf_t *buf = arc_buf_alloc(spa, sizeof (objset_phys_t), &osi->os_phys_buf, ARC_BUFC_METADATA); bzero(buf->b_data, sizeof (objset_phys_t)); bcopy(osi->os_phys_buf->b_data, buf->b_data, arc_buf_size(osi->os_phys_buf)); (void) arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf); osi->os_phys_buf = buf; } osi->os_phys = osi->os_phys_buf->b_data; osi->os_flags = osi->os_phys->os_flags; } else { int size = spa_version(spa) >= SPA_VERSION_USERSPACE ? sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE; osi->os_phys_buf = arc_buf_alloc(spa, size, &osi->os_phys_buf, ARC_BUFC_METADATA); osi->os_phys = osi->os_phys_buf->b_data; bzero(osi->os_phys, size); } /* * Note: the changed_cb will be called once before the register * func returns, thus changing the checksum/compression from the * default (fletcher2/off). Snapshots don't need to know about * checksum/compression/copies. */ if (ds) { err = dsl_prop_register(ds, "primarycache", primary_cache_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "secondarycache", secondary_cache_changed_cb, osi); if (!dsl_dataset_is_snapshot(ds)) { if (err == 0) err = dsl_prop_register(ds, "checksum", checksum_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "compression", compression_changed_cb, osi); if (err == 0) err = dsl_prop_register(ds, "copies", copies_changed_cb, osi); } if (err) { VERIFY(arc_buf_remove_ref(osi->os_phys_buf, &osi->os_phys_buf) == 1); kmem_free(osi, sizeof (objset_impl_t)); return (err); } } else if (ds == NULL) { /* It's the meta-objset. */ osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4; osi->os_compress = ZIO_COMPRESS_LZJB; osi->os_copies = spa_max_replication(spa); osi->os_primary_cache = ZFS_CACHE_ALL; osi->os_secondary_cache = ZFS_CACHE_ALL; } osi->os_zil_header = osi->os_phys->os_zil_header; osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header); for (i = 0; i < TXG_SIZE; i++) { list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); list_create(&osi->os_free_dnodes[i], sizeof (dnode_t), offsetof(dnode_t, dn_dirty_link[i])); } list_create(&osi->os_dnodes, sizeof (dnode_t), offsetof(dnode_t, dn_link)); list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t), offsetof(dmu_buf_impl_t, db_link)); mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL); osi->os_meta_dnode = dnode_special_open(osi, &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT); if (arc_buf_size(osi->os_phys_buf) >= sizeof (objset_phys_t)) { osi->os_userused_dnode = dnode_special_open(osi, &osi->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT); osi->os_groupused_dnode = dnode_special_open(osi, &osi->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT); } /* * We should be the only thread trying to do this because we * have ds_opening_lock */ if (ds) { VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict)); } *osip = osi; return (0); }