コード例 #1
0
static void
dmu_objset_create_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	spa_t *spa = dd->dd_pool->dp_spa;
	struct oscarg *oa = arg2;
	uint64_t obj;

	ASSERT(dmu_tx_is_syncing(tx));

	obj = dsl_dataset_create_sync(dd, oa->lastname,
        oa->clone_origin, oa->crypto_ctx, oa->flags, oa->cr, tx);

	if (oa->clone_origin == NULL) {
		dsl_pool_t *dp = dd->dd_pool;
		dsl_dataset_t *ds;
		blkptr_t *bp;
		objset_t *os;

		VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
		bp = dsl_dataset_get_blkptr(ds);
		ASSERT(BP_IS_HOLE(bp));

		os = dmu_objset_create_impl(spa, ds, bp, oa->type,
                                    oa->crypto_ctx, tx);

		if (oa->userfunc)
			oa->userfunc(os, oa->userarg, oa->cr, tx);
		dsl_dataset_rele(ds, FTAG);
	}
コード例 #2
0
ファイル: dmu_objset.c プロジェクト: roddi/maczfs-10a286
static void
dmu_objset_create_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
{
	dsl_dir_t *dd = arg1;
	struct oscarg *oa = arg2;
	dsl_dataset_t *ds;
	blkptr_t *bp;
	uint64_t dsobj;

	ASSERT(dmu_tx_is_syncing(tx));

	dsobj = dsl_dataset_create_sync(dd, oa->lastname,
	    oa->clone_parent, oa->flags, cr, tx);

	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool, dsobj, FTAG, &ds));
	bp = dsl_dataset_get_blkptr(ds);
	if (BP_IS_HOLE(bp)) {
		objset_impl_t *osi;

		/* This is an empty dmu_objset; not a clone. */
		osi = dmu_objset_create_impl(dsl_dataset_get_spa(ds),
		    ds, bp, oa->type, tx);

		if (oa->userfunc)
			oa->userfunc(&osi->os, oa->userarg, cr, tx);
	}

	spa_history_internal_log(LOG_DS_CREATE, dd->dd_pool->dp_spa,
	    tx, cr, "dataset = %llu", dsobj);

	dsl_dataset_rele(ds, FTAG);
}
コード例 #3
0
ファイル: zdb_il.c プロジェクト: Acidburn0zzz/zfs
/* ARGSUSED */
void
dump_intent_log(zilog_t *zilog)
{
	const zil_header_t *zh = zilog->zl_header;
	int verbose = MAX(dump_opt['d'], dump_opt['i']);
	int i;

	if (BP_IS_HOLE(&zh->zh_log) || verbose < 1)
		return;

	(void) printf("\n    ZIL header: claim_txg %llu, "
	    "claim_blk_seq %llu, claim_lr_seq %llu",
	    (u_longlong_t)zh->zh_claim_txg,
	    (u_longlong_t)zh->zh_claim_blk_seq,
	    (u_longlong_t)zh->zh_claim_lr_seq);
	(void) printf(" replay_seq %llu, flags 0x%llx\n",
	    (u_longlong_t)zh->zh_replay_seq, (u_longlong_t)zh->zh_flags);

	for (i = 0; i < TX_MAX_TYPE; i++)
		zil_rec_info[i].zri_count = 0;

	if (verbose >= 2) {
		(void) printf("\n");
		(void) zil_parse(zilog, print_log_block, print_log_record, NULL,
		    zh->zh_claim_txg);
		print_log_stats(verbose);
	}
}
コード例 #4
0
ファイル: dmu_objset.c プロジェクト: harshada/zfs
/* ARGSUSED */
int
dmu_objset_prefetch(char *name, void *arg)
{
	dsl_dataset_t *ds;

	if (dsl_dataset_hold(name, FTAG, &ds))
		return (0);

	if (!BP_IS_HOLE(&ds->ds_phys->ds_bp)) {
		mutex_enter(&ds->ds_opening_lock);
		if (!dsl_dataset_get_user_ptr(ds)) {
			uint32_t aflags = ARC_NOWAIT | ARC_PREFETCH;
			zbookmark_t zb;

			zb.zb_objset = ds->ds_object;
			zb.zb_object = 0;
			zb.zb_level = -1;
			zb.zb_blkid = 0;

			(void) arc_read_nolock(NULL, dsl_dataset_get_spa(ds),
			    &ds->ds_phys->ds_bp, NULL, NULL,
			    ZIO_PRIORITY_ASYNC_READ,
			    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
			    &aflags, &zb);
		}
		mutex_exit(&ds->ds_opening_lock);
	}

	dsl_dataset_rele(ds, FTAG);
	return (0);
}
コード例 #5
0
ファイル: dmu_traverse.c プロジェクト: bahamas10/openzfs
static boolean_t
prefetch_needed(prefetch_data_t *pfd, const blkptr_t *bp)
{
	ASSERT(pfd->pd_flags & TRAVERSE_PREFETCH_DATA);
	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) ||
	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
		return (B_FALSE);
	return (B_TRUE);
}
コード例 #6
0
ファイル: zil.c プロジェクト: harshada/zfs
/*
 * In one tx, free all log blocks and clear the log header.
 * If keep_first is set, then we're replaying a log with no content.
 * We want to keep the first block, however, so that the first
 * synchronous transaction doesn't require a txg_wait_synced()
 * in zil_create().  We don't need to txg_wait_synced() here either
 * when keep_first is set, because both zil_create() and zil_destroy()
 * will wait for any in-progress destroys to complete.
 */
void
zil_destroy(zilog_t *zilog, boolean_t keep_first)
{
	const zil_header_t *zh = zilog->zl_header;
	lwb_t *lwb;
	dmu_tx_t *tx;
	uint64_t txg;

	/*
	 * Wait for any previous destroy to complete.
	 */
	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);

	if (BP_IS_HOLE(&zh->zh_log))
		return;

	tx = dmu_tx_create(zilog->zl_os);
	(void) dmu_tx_assign(tx, TXG_WAIT);
	dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
	txg = dmu_tx_get_txg(tx);

	mutex_enter(&zilog->zl_lock);

	/*
	 * It is possible for the ZIL to get the previously mounted zilog
	 * structure of the same dataset if quickly remounted and the dbuf
	 * eviction has not completed. In this case we can see a non
	 * empty lwb list and keep_first will be set. We fix this by
	 * clearing the keep_first. This will be slower but it's very rare.
	 */
	if (!list_is_empty(&zilog->zl_lwb_list) && keep_first)
		keep_first = B_FALSE;

	ASSERT3U(zilog->zl_destroy_txg, <, txg);
	zilog->zl_destroy_txg = txg;
	zilog->zl_keep_first = keep_first;

	if (!list_is_empty(&zilog->zl_lwb_list)) {
		ASSERT(zh->zh_claim_txg == 0);
		ASSERT(!keep_first);
		while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
			list_remove(&zilog->zl_lwb_list, lwb);
			if (lwb->lwb_buf != NULL)
				zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
			zio_free_blk(zilog->zl_spa, &lwb->lwb_blk, txg);
			kmem_cache_free(zil_lwb_cache, lwb);
		}
	} else {
		if (!keep_first) {
			(void) zil_parse(zilog, zil_free_log_block,
			    zil_free_log_record, tx, zh->zh_claim_txg);
		}
	}
	mutex_exit(&zilog->zl_lock);

	dmu_tx_commit(tx);
}
コード例 #7
0
ファイル: zil.c プロジェクト: harshada/zfs
/*
 * Read a log block, make sure it's valid, and byteswap it if necessary.
 */
static int
zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, arc_buf_t **abufpp)
{
	blkptr_t blk = *bp;
	zbookmark_t zb;
	uint32_t aflags = ARC_WAIT;
	int error;

	zb.zb_objset = bp->blk_cksum.zc_word[ZIL_ZC_OBJSET];
	zb.zb_object = 0;
	zb.zb_level = -1;
	zb.zb_blkid = bp->blk_cksum.zc_word[ZIL_ZC_SEQ];

	*abufpp = NULL;

	/*
	 * We shouldn't be doing any scrubbing while we're doing log
	 * replay, it's OK to not lock.
	 */
	error = arc_read_nolock(NULL, zilog->zl_spa, &blk,
	    arc_getbuf_func, abufpp, ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL |
	    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_SCRUB, &aflags, &zb);

	if (error == 0) {
		char *data = (*abufpp)->b_data;
		uint64_t blksz = BP_GET_LSIZE(bp);
		zil_trailer_t *ztp = (zil_trailer_t *)(data + blksz) - 1;
		zio_cksum_t cksum = bp->blk_cksum;

		/*
		 * Validate the checksummed log block.
		 *
		 * Sequence numbers should be... sequential.  The checksum
		 * verifier for the next block should be bp's checksum plus 1.
		 *
		 * Also check the log chain linkage and size used.
		 */
		cksum.zc_word[ZIL_ZC_SEQ]++;

		if (bcmp(&cksum, &ztp->zit_next_blk.blk_cksum,
		    sizeof (cksum)) || BP_IS_HOLE(&ztp->zit_next_blk) ||
		    (ztp->zit_nused > (blksz - sizeof (zil_trailer_t)))) {
			error = ECKSUM;
		}

		if (error) {
			VERIFY(arc_buf_remove_ref(*abufpp, abufpp) == 1);
			*abufpp = NULL;
		}
	}

	dprintf("error %d on %llu:%llu\n", error, zb.zb_objset, zb.zb_blkid);

	return (error);
}
コード例 #8
0
int
zil_claim(char *osname, void *txarg)
{
	dmu_tx_t *tx = txarg;
	uint64_t first_txg = dmu_tx_get_txg(tx);
	zilog_t *zilog;
	zil_header_t *zh;
	objset_t *os;
	int error;

	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
	if (error) {
		cmn_err(CE_WARN, "can't open objset for %s", osname);
		return (0);
	}

	zilog = dmu_objset_zil(os);
	zh = zil_header_in_syncing_context(zilog);

	/*
	 * Record here whether the zil has any records to replay.
	 * If the header block pointer is null or the block points
	 * to the stubby then we know there are no valid log records.
	 * We use the header to store this state as the the zilog gets
	 * freed later in dmu_objset_close().
	 * The flags (and the rest of the header fields) are cleared in
	 * zil_sync() as a result of a zil_destroy(), after replaying the log.
	 *
	 * Note, the intent log can be empty but still need the
	 * stubby to be claimed.
	 */
	if (!zil_empty(zilog))
		zh->zh_flags |= ZIL_REPLAY_NEEDED;

	/*
	 * Claim all log blocks if we haven't already done so, and remember
	 * the highest claimed sequence number.  This ensures that if we can
	 * read only part of the log now (e.g. due to a missing device),
	 * but we can read the entire log later, we will not try to replay
	 * or destroy beyond the last block we successfully claimed.
	 */
	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
		zh->zh_claim_txg = first_txg;
		zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block,
		    zil_claim_log_record, tx, first_txg);
		dsl_dataset_dirty(dmu_objset_ds(os), tx);
	}

	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
	dmu_objset_close(os);
	return (0);
}
コード例 #9
0
/*
 * Deferred entry; will be processed later by bplist_sync().
 */
void
bplist_enqueue_deferred(bplist_t *bpl, const blkptr_t *bp)
{
	bplist_q_t *bpq = kmem_alloc(sizeof (*bpq), KM_SLEEP);

	ASSERT(!BP_IS_HOLE(bp));
	mutex_enter(&bpl->bpl_lock);
	bpq->bpq_blk = *bp;
	bpq->bpq_next = bpl->bpl_queue;
	bpl->bpl_queue = bpq;
	mutex_exit(&bpl->bpl_lock);
}
コード例 #10
0
ファイル: zil.c プロジェクト: harshada/zfs
/*
 * return true if the initial log block is not valid
 */
static boolean_t
zil_empty(zilog_t *zilog)
{
	const zil_header_t *zh = zilog->zl_header;
	arc_buf_t *abuf = NULL;

	if (BP_IS_HOLE(&zh->zh_log))
		return (B_TRUE);

	if (zil_read_log_block(zilog, &zh->zh_log, &abuf) != 0)
		return (B_TRUE);

	VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
	return (B_FALSE);
}
コード例 #11
0
int
bplist_enqueue(bplist_t *bpl, const blkptr_t *bp, dmu_tx_t *tx)
{
	uint64_t blk, off;
	blkptr_t *bparray;
	int err;

	ASSERT(!BP_IS_HOLE(bp));
	mutex_enter(&bpl->bpl_lock);
	err = bplist_hold(bpl);
	if (err)
		return (err);

	blk = bpl->bpl_phys->bpl_entries >> bpl->bpl_bpshift;
	off = P2PHASE(bpl->bpl_phys->bpl_entries, 1ULL << bpl->bpl_bpshift);

	err = bplist_cache(bpl, blk);
	if (err) {
		mutex_exit(&bpl->bpl_lock);
		return (err);
	}

	dmu_buf_will_dirty(bpl->bpl_cached_dbuf, tx);
	bparray = bpl->bpl_cached_dbuf->db_data;
	bparray[off] = *bp;

	/* We never need the fill count. */
	bparray[off].blk_fill = 0;

	/* The bplist will compress better if we can leave off the checksum */
	if (!BP_GET_DEDUP(&bparray[off]))
		bzero(&bparray[off].blk_cksum, sizeof (bparray[off].blk_cksum));

	dmu_buf_will_dirty(bpl->bpl_dbuf, tx);
	bpl->bpl_phys->bpl_entries++;
	bpl->bpl_phys->bpl_bytes +=
	    bp_get_dsize_sync(dmu_objset_spa(bpl->bpl_mos), bp);
	if (bpl->bpl_havecomp) {
		bpl->bpl_phys->bpl_comp += BP_GET_PSIZE(bp);
		bpl->bpl_phys->bpl_uncomp += BP_GET_UCSIZE(bp);
	}
	mutex_exit(&bpl->bpl_lock);

	return (0);
}
コード例 #12
0
ファイル: dmu_traverse.c プロジェクト: bahamas10/openzfs
static int
traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
{
	traverse_data_t *td = arg;
	zbookmark_phys_t zb;

	if (BP_IS_HOLE(bp))
		return (0);

	if (claim_txg == 0 && bp->blk_birth >= spa_first_txg(td->td_spa))
		return (0);

	SET_BOOKMARK(&zb, td->td_objset, ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
	    bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);

	(void) td->td_func(td->td_spa, zilog, bp, &zb, NULL, td->td_arg);

	return (0);
}
コード例 #13
0
ファイル: dnode_sync.c プロジェクト: 64116278/zfs
static void
free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
{
	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
	uint64_t bytesfreed = 0;
	int i;

	dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);

	for (i = 0; i < num; i++, bp++) {
		uint64_t lsize, lvl;
		dmu_object_type_t type;

		if (BP_IS_HOLE(bp))
			continue;

		bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
		ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));

		/*
		 * Save some useful information on the holes being
		 * punched, including logical size, type, and indirection
		 * level. Retaining birth time enables detection of when
		 * holes are punched for reducing the number of free
		 * records transmitted during a zfs send.
		 */

		lsize = BP_GET_LSIZE(bp);
		type = BP_GET_TYPE(bp);
		lvl = BP_GET_LEVEL(bp);

		bzero(bp, sizeof (blkptr_t));

		if (spa_feature_is_active(dn->dn_objset->os_spa,
		    SPA_FEATURE_HOLE_BIRTH)) {
			BP_SET_LSIZE(bp, lsize);
			BP_SET_TYPE(bp, type);
			BP_SET_LEVEL(bp, lvl);
			BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
		}
	}
	dnode_diduse_space(dn, -bytesfreed);
}
コード例 #14
0
/*
 * Read in the data for the dmu_sync()ed block, and change the log
 * record to write this whole block.
 */
void
zil_get_replay_data(zilog_t *zilog, lr_write_t *lr)
{
	blkptr_t *wbp = &lr->lr_blkptr;
	char *wbuf = (char *)(lr + 1); /* data follows lr_write_t */
	uint64_t blksz;

	if (BP_IS_HOLE(wbp)) {	/* compressed to a hole */
		blksz = BP_GET_LSIZE(&lr->lr_blkptr);
		/*
		 * If the blksz is zero then we must be replaying a log
		 * from an version prior to setting the blksize of null blocks.
		 * So we just zero the actual write size reqeusted.
		 */
		if (blksz == 0) {
			bzero(wbuf, lr->lr_length);
			return;
		}
		bzero(wbuf, blksz);
	} else {
		/*
		 * A subsequent write may have overwritten this block, in which
		 * case wbp may have been been freed and reallocated, and our
		 * read of wbp may fail with a checksum error.  We can safely
		 * ignore this because the later write will provide the
		 * correct data.
		 */
		zbookmark_t zb;

		zb.zb_objset = dmu_objset_id(zilog->zl_os);
		zb.zb_object = lr->lr_foid;
		zb.zb_level = 0;
		zb.zb_blkid = -1; /* unknown */

		blksz = BP_GET_LSIZE(&lr->lr_blkptr);
		(void) zio_wait(zio_read(NULL, zilog->zl_spa, wbp, wbuf, blksz,
		    NULL, NULL, ZIO_PRIORITY_SYNC_READ,
		    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb));
	}
	lr->lr_offset -= lr->lr_offset % blksz;
	lr->lr_length = blksz;
}
コード例 #15
0
ファイル: dsl_scan.c プロジェクト: mcarpenter/illumos-gate
/* ARGSUSED */
static void
dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
    uint64_t objset, uint64_t object, uint64_t blkid)
{
	zbookmark_t czb;
	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;

	if (zfs_no_scrub_prefetch)
		return;

	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
		return;

	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);

	(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
	    NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
}
コード例 #16
0
ファイル: zil.c プロジェクト: harshada/zfs
/* ARGSUSED */
int
zil_check_log_chain(char *osname, void *txarg)
{
	zilog_t *zilog;
	zil_header_t *zh;
	blkptr_t blk;
	arc_buf_t *abuf;
	objset_t *os;
	char *lrbuf;
	zil_trailer_t *ztp;
	int error;

	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
	if (error) {
		cmn_err(CE_WARN, "can't open objset for %s", osname);
		return (0);
	}

	zilog = dmu_objset_zil(os);
	zh = zil_header_in_syncing_context(zilog);
	blk = zh->zh_log;
	if (BP_IS_HOLE(&blk)) {
		dmu_objset_close(os);
		return (0); /* no chain */
	}

	for (;;) {
		error = zil_read_log_block(zilog, &blk, &abuf);
		if (error)
			break;
		lrbuf = abuf->b_data;
		ztp = (zil_trailer_t *)(lrbuf + BP_GET_LSIZE(&blk)) - 1;
		blk = ztp->zit_next_blk;
		VERIFY(arc_buf_remove_ref(abuf, &abuf) == 1);
	}
	dmu_objset_close(os);
	if (error == ECKSUM)
		return (0); /* normal end of chain */
	return (error);
}
コード例 #17
0
ファイル: dnode_sync.c プロジェクト: Acidburn0zzz/zfs
static void
dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db;
	int txgoff = tx->tx_txg & TXG_MASK;
	int nblkptr = dn->dn_phys->dn_nblkptr;
	int old_toplvl = dn->dn_phys->dn_nlevels - 1;
	int new_level = dn->dn_next_nlevels[txgoff];
	int i;

	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);

	/* this dnode can't be paged out because it's dirty */
	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
	ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);

	db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
	ASSERT(db != NULL);

	dn->dn_phys->dn_nlevels = new_level;
	dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
	    dn->dn_object, dn->dn_phys->dn_nlevels);

	/* check for existing blkptrs in the dnode */
	for (i = 0; i < nblkptr; i++)
		if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
			break;
	if (i != nblkptr) {
		/* transfer dnode's block pointers to new indirect block */
		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
		ASSERT(db->db.db_data);
		ASSERT(arc_released(db->db_buf));
		ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
		bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
		    sizeof (blkptr_t) * nblkptr);
		arc_buf_freeze(db->db_buf);
	}
コード例 #18
0
ファイル: zil.c プロジェクト: harshada/zfs
/*
 * Function called when a log block write completes
 */
static void
zil_lwb_write_done(zio_t *zio)
{
	lwb_t *lwb = zio->io_private;
	zilog_t *zilog = lwb->lwb_zilog;

	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
	ASSERT(BP_GET_CHECKSUM(zio->io_bp) == ZIO_CHECKSUM_ZILOG);
	ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
	ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
	ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
	ASSERT(!BP_IS_GANG(zio->io_bp));
	ASSERT(!BP_IS_HOLE(zio->io_bp));
	ASSERT(zio->io_bp->blk_fill == 0);

	/*
	 * Ensure the lwb buffer pointer is cleared before releasing
	 * the txg. If we have had an allocation failure and
	 * the txg is waiting to sync then we want want zil_sync()
	 * to remove the lwb so that it's not picked up as the next new
	 * one in zil_commit_writer(). zil_sync() will only remove
	 * the lwb if lwb_buf is null.
	 */
	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
	mutex_enter(&zilog->zl_lock);
	lwb->lwb_buf = NULL;
	if (zio->io_error)
		zilog->zl_log_error = B_TRUE;

	/*
	 * Now that we've written this log block, we have a stable pointer
	 * to the next block in the chain, so it's OK to let the txg in
	 * which we allocated the next block sync. We still have the
	 * zl_lock to ensure zil_sync doesn't kmem free the lwb.
	 */
	txg_rele_to_sync(&lwb->lwb_txgh);
	mutex_exit(&zilog->zl_lock);
}
コード例 #19
0
ファイル: dmu_traverse.c プロジェクト: ornarium/freebsd
static void
traverse_prefetch_metadata(traverse_data_t *td,
    const blkptr_t *bp, const zbookmark_t *zb)
{
	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;

	if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
		return;
	/*
	 * If we are in the process of resuming, don't prefetch, because
	 * some children will not be needed (and in fact may have already
	 * been freed).
	 */
	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
		return;
	if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
		return;
	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
		return;

	(void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
}
コード例 #20
0
ファイル: dsl_destroy.c プロジェクト: koplover/zfs
static int
process_old_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
	struct process_old_arg *poa = arg;
	dsl_pool_t *dp = poa->ds->ds_dir->dd_pool;

	ASSERT(!BP_IS_HOLE(bp));

	if (bp->blk_birth <= dsl_dataset_phys(poa->ds)->ds_prev_snap_txg) {
		dsl_deadlist_insert(&poa->ds->ds_deadlist, bp, tx);
		if (poa->ds_prev && !poa->after_branch_point &&
		    bp->blk_birth >
		    dsl_dataset_phys(poa->ds_prev)->ds_prev_snap_txg) {
			dsl_dataset_phys(poa->ds_prev)->ds_unique_bytes +=
			    bp_get_dsize_sync(dp->dp_spa, bp);
		}
	} else {
		poa->used += bp_get_dsize_sync(dp->dp_spa, bp);
		poa->comp += BP_GET_PSIZE(bp);
		poa->uncomp += BP_GET_UCSIZE(bp);
		dsl_free_sync(poa->pio, dp, tx->tx_txg, bp);
	}
	return (0);
}
コード例 #21
0
ファイル: dmu_traverse.c プロジェクト: bahamas10/openzfs
static int
traverse_zil_record(zilog_t *zilog, lr_t *lrc, void *arg, uint64_t claim_txg)
{
	traverse_data_t *td = arg;

	if (lrc->lrc_txtype == TX_WRITE) {
		lr_write_t *lr = (lr_write_t *)lrc;
		blkptr_t *bp = &lr->lr_blkptr;
		zbookmark_phys_t zb;

		if (BP_IS_HOLE(bp))
			return (0);

		if (claim_txg == 0 || bp->blk_birth < claim_txg)
			return (0);

		SET_BOOKMARK(&zb, td->td_objset, lr->lr_foid,
		    ZB_ZIL_LEVEL, lr->lr_offset / BP_GET_LSIZE(bp));

		(void) td->td_func(td->td_spa, zilog, bp, &zb, NULL,
		    td->td_arg);
	}
	return (0);
}
コード例 #22
0
ファイル: dmu_objset.c プロジェクト: roddi/maczfs-10a286
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_impl_t **osip)
{
	objset_impl_t *osi;
	int i, err;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP);
	osi->os.os = osi;
	osi->os_dsl_dataset = ds;
	osi->os_spa = spa;
	osi->os_rootbp = bp;
	if (!BP_IS_HOLE(osi->os_rootbp)) {
		uint32_t aflags = ARC_WAIT;
		zbookmark_t zb;
		zb.zb_objset = ds ? ds->ds_object : 0;
		zb.zb_object = 0;
		zb.zb_level = -1;
		zb.zb_blkid = 0;

		dprintf_bp(osi->os_rootbp, "reading %s", "");
		/*
		 * NB: when bprewrite scrub can change the bp,
		 * and this is called from dmu_objset_open_ds_os, the bp
		 * could change, and we'll need a lock.
		 */
		err = arc_read_nolock(NULL, spa, osi->os_rootbp,
		    arc_getbuf_func, &osi->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err) {
			kmem_free(osi, sizeof (objset_impl_t));
			return (err);
		}
		osi->os_phys = osi->os_phys_buf->b_data;
	} else {
#ifdef __APPLE_KERNEL__
		osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
		    &osi->os_phys_buf, ARC_BUFC_METADATA, TRUE/*alloc_buf*/);
#else
		osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
		    &osi->os_phys_buf, ARC_BUFC_METADATA);
#endif
		osi->os_phys = osi->os_phys_buf->b_data;
		bzero(osi->os_phys, sizeof (objset_phys_t));
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know, and
	 * registering would complicate clone promotion.
	 */
	if (ds && ds->ds_phys->ds_num_children == 0) {
		err = dsl_prop_register(ds, "checksum",
		    checksum_changed_cb, osi);
		if (err == 0)
			err = dsl_prop_register(ds, "compression",
			    compression_changed_cb, osi);
		if (err == 0)
			err = dsl_prop_register(ds, "copies",
			    copies_changed_cb, osi);
		if (err) {
			VERIFY(arc_buf_remove_ref(osi->os_phys_buf,
			    &osi->os_phys_buf) == 1);
			kmem_free(osi, sizeof (objset_impl_t));
			return (err);
		}
	} else if (ds == NULL) {
		/* It's the meta-objset. */
		osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		osi->os_compress = ZIO_COMPRESS_LZJB;
		osi->os_copies = spa_max_replication(spa);
	}

	osi->os_zil_header = osi->os_phys->os_zil_header;
	osi->os_zil = zil_alloc(&osi->os, &osi->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&osi->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&osi->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&osi->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	osi->os_meta_dnode = dnode_special_open(osi,
	    &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);

	/*
	 * We should be the only thread trying to do this because we
	 * have ds_opening_lock
	 */
	if (ds) {
		VERIFY(NULL == dsl_dataset_set_user_ptr(ds, osi,
		    dmu_objset_evict));
	}

	*osip = osi;
	return (0);
}
コード例 #23
0
/*
 * dsl_crypto_key_clone
 *
 * Our caller (dmu_objset_create_sync) must have a lock on the dataset.
 */
int
dsl_crypto_key_clone(dsl_dir_t *dd, dsl_dataset_phys_t *dsphys,
                     uint64_t dsobj, dsl_dataset_t *clone_origin,
                     dsl_crypto_ctx_t *ctx, dmu_tx_t *tx)
{
    zcrypt_key_t *txgkey;
    zcrypt_key_t *wrappingkey = NULL;
    dsl_dataset_t *origin;
    uint64_t crypt;
    spa_t *spa = dd->dd_pool->dp_spa;
    int error = 0;

    ASSERT(ctx != NULL);

    if (BP_IS_HOLE(&dsphys->ds_bp)) {
        origin = ctx->dcc_origin_ds;
    } else {
        origin = clone_origin;
    }
    ASSERT(origin != NULL);

    /*
     * Need to look at the value of crypt on the origin snapshot
     * since it is sticky for encryption.
     */
    VERIFY(dsl_prop_get_ds(origin, zfs_prop_to_name(ZFS_PROP_ENCRYPTION),
                           8, 1, &crypt,/* DSL_PROP_GET_EFFECTIVE,*/ NULL) == 0);

    if (crypt == ZIO_CRYPT_OFF) {
        return (0);
    }

    wrappingkey = ctx->dcc_wrap_key;

    dsl_keychain_create_obj(dd, tx);
    dsl_keychain_clone_phys(origin, dd, tx, wrappingkey);

    if (ctx->dcc_salt != 0) {
        objset_t *mos = dd->dd_pool->dp_meta_objset;
        uint64_t props_zapobj = dsl_dir_phys(dd)->dd_props_zapobj;

        error = zap_update(mos, props_zapobj,
                           zfs_prop_to_name(ZFS_PROP_SALT), 8, 1,
                           (void *)&ctx->dcc_salt, tx);
    }

    if (ctx->dcc_clone_newkey) {
        caddr_t wkeybuf;
        size_t wkeylen;
        /*
         * Generate a new key and add it to the keychain
         * to be valid from this txg onwards.
         */
        zcrypt_key_hold(wrappingkey, FTAG);
        txgkey = zcrypt_key_gen(crypt);
        VERIFY(zcrypt_wrap_key(wrappingkey, txgkey,
                               &wkeybuf, &wkeylen, zio_crypt_select_wrap(crypt)) == 0);
        zcrypt_key_release(wrappingkey, FTAG);
        dsl_keychain_set_key(dd, tx, wkeybuf, wkeylen,
                             dsphys->ds_creation_time);
        kmem_free(wkeybuf, wkeylen);
        zcrypt_key_free(txgkey);
        spa_history_log_internal(spa, "key create", tx,
                                 "rekey succeeded dataset = %llu from dataset = %llu",
                                 dsobj, clone_origin->ds_object);
    } else {
        spa_history_log_internal(spa, "key create", tx,
                                 "succeeded dataset = %llu from dataset = %llu",
                                 dsobj, clone_origin->ds_object);
    }

    if (wrappingkey != NULL) {
        error = dsl_keychain_load_dd(dd, dsobj, crypt, wrappingkey);
    }

    return (error);
}
コード例 #24
0
ファイル: zvol.c プロジェクト: alek-p/zfs
/*
 * Get data to generate a TX_WRITE intent log record.
 */
static int
zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
{
	zvol_state_t *zv = arg;
	objset_t *os = zv->zv_objset;
	uint64_t object = ZVOL_OBJ;
	uint64_t offset = lr->lr_offset;
	uint64_t size = lr->lr_length;
	blkptr_t *bp = &lr->lr_blkptr;
	dmu_buf_t *db;
	zgd_t *zgd;
	int error;

	ASSERT(zio != NULL);
	ASSERT(size != 0);

	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
	zgd->zgd_zilog = zv->zv_zilog;
	zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
	    RL_READER);

	/*
	 * Write records come in two flavors: immediate and indirect.
	 * For small writes it's cheaper to store the data with the
	 * log record (immediate); for large writes it's cheaper to
	 * sync the data and get a pointer to it (indirect) so that
	 * we don't have to write the data twice.
	 */
	if (buf != NULL) { /* immediate write */
		error = dmu_read(os, object, offset, size, buf,
		    DMU_READ_NO_PREFETCH);
	} else {
		size = zv->zv_volblocksize;
		offset = P2ALIGN_TYPED(offset, size, uint64_t);
		error = dmu_buf_hold(os, object, offset, zgd, &db,
		    DMU_READ_NO_PREFETCH);
		if (error == 0) {
			blkptr_t *obp = dmu_buf_get_blkptr(db);
			if (obp) {
				ASSERT(BP_IS_HOLE(bp));
				*bp = *obp;
			}

			zgd->zgd_db = db;
			zgd->zgd_bp = &lr->lr_blkptr;

			ASSERT(db != NULL);
			ASSERT(db->db_offset == offset);
			ASSERT(db->db_size == size);

			error = dmu_sync(zio, lr->lr_common.lrc_txg,
			    zvol_get_done, zgd);

			if (error == 0)
				return (0);
		}
	}

	zvol_get_done(zgd, error);

	return (SET_ERROR(error));
}
コード例 #25
0
ファイル: dmu_objset.c プロジェクト: ngkaho1234/freebsd
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_t **osp)
{
	objset_t *os;
	int i, err;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	os = kmem_zalloc(sizeof (objset_t), KM_SLEEP);
	os->os_dsl_dataset = ds;
	os->os_spa = spa;
	os->os_rootbp = bp;
	if (!BP_IS_HOLE(os->os_rootbp)) {
		arc_flags_t aflags = ARC_FLAG_WAIT;
		zbookmark_phys_t zb;
		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

		if (DMU_OS_IS_L2CACHEABLE(os))
			aflags |= ARC_FLAG_L2CACHE;
		if (DMU_OS_IS_L2COMPRESSIBLE(os))
			aflags |= ARC_FLAG_L2COMPRESS;

		dprintf_bp(os->os_rootbp, "reading %s", "");
		err = arc_read(NULL, spa, os->os_rootbp,
		    arc_getbuf_func, &os->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err != 0) {
			kmem_free(os, sizeof (objset_t));
			/* convert checksum errors into IO errors */
			if (err == ECKSUM)
				err = SET_ERROR(EIO);
			return (err);
		}

		/* Increase the blocksize if we are permitted. */
		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
			arc_buf_t *buf = arc_buf_alloc(spa,
			    sizeof (objset_phys_t), &os->os_phys_buf,
			    ARC_BUFC_METADATA);
			bzero(buf->b_data, sizeof (objset_phys_t));
			bcopy(os->os_phys_buf->b_data, buf->b_data,
			    arc_buf_size(os->os_phys_buf));
			(void) arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf);
			os->os_phys_buf = buf;
		}

		os->os_phys = os->os_phys_buf->b_data;
		os->os_flags = os->os_phys->os_flags;
	} else {
		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
		os->os_phys_buf = arc_buf_alloc(spa, size,
		    &os->os_phys_buf, ARC_BUFC_METADATA);
		os->os_phys = os->os_phys_buf->b_data;
		bzero(os->os_phys, size);
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know about
	 * checksum/compression/copies.
	 */
	if (ds != NULL) {
		err = dsl_prop_register(ds,
		    zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
		    primary_cache_changed_cb, os);
		if (err == 0) {
			err = dsl_prop_register(ds,
			    zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
			    secondary_cache_changed_cb, os);
		}
		if (!ds->ds_is_snapshot) {
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_CHECKSUM),
				    checksum_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
				    compression_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_COPIES),
				    copies_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_DEDUP),
				    dedup_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_LOGBIAS),
				    logbias_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_SYNC),
				    sync_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(
				    ZFS_PROP_REDUNDANT_METADATA),
				    redundant_metadata_changed_cb, os);
			}
			if (err == 0) {
				err = dsl_prop_register(ds,
				    zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
				    recordsize_changed_cb, os);
			}
		}
		if (err != 0) {
			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf));
			kmem_free(os, sizeof (objset_t));
			return (err);
		}
	} else {
		/* It's the meta-objset. */
		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		os->os_compress = ZIO_COMPRESS_ON;
		os->os_copies = spa_max_replication(spa);
		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
		os->os_dedup_verify = B_FALSE;
		os->os_logbias = ZFS_LOGBIAS_LATENCY;
		os->os_sync = ZFS_SYNC_STANDARD;
		os->os_primary_cache = ZFS_CACHE_ALL;
		os->os_secondary_cache = ZFS_CACHE_ALL;
	}

	if (ds == NULL || !ds->ds_is_snapshot)
		os->os_zil_header = os->os_phys->os_zil_header;
	os->os_zil = zil_alloc(os, &os->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&os->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	dnode_special_open(os, &os->os_phys->os_meta_dnode,
	    DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
		dnode_special_open(os, &os->os_phys->os_userused_dnode,
		    DMU_USERUSED_OBJECT, &os->os_userused_dnode);
		dnode_special_open(os, &os->os_phys->os_groupused_dnode,
		    DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
	}

	*osp = os;
	return (0);
}
コード例 #26
0
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_t **osp)
{
	objset_t *os;
	int i, err = 0;

	ASSERT(ds == NULL || MUTEX_HELD(&ds->ds_opening_lock));

	os = kmem_zalloc(sizeof (objset_t), KM_PUSHPAGE);
	os->os_dsl_dataset = ds;
	os->os_spa = spa;
	os->os_rootbp = bp;
	if (!BP_IS_HOLE(os->os_rootbp)) {
		uint32_t aflags = ARC_WAIT;
		zbookmark_t zb;
		SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
		    ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);

		if (DMU_OS_IS_L2CACHEABLE(os))
			aflags |= ARC_L2CACHE;

		dprintf_bp(os->os_rootbp, "reading %s", "");
		/*
		 * XXX when bprewrite scrub can change the bp,
		 * and this is called from dmu_objset_open_ds_os, the bp
		 * could change, and we'll need a lock.
		 */
		err = dsl_read_nolock(NULL, spa, os->os_rootbp,
		    arc_getbuf_func, &os->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err) {
			kmem_free(os, sizeof (objset_t));
			/* convert checksum errors into IO errors */
			if (err == ECKSUM)
				err = EIO;
			return (err);
		}

		/* Increase the blocksize if we are permitted. */
		if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
		    arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
			arc_buf_t *buf = arc_buf_alloc(spa,
			    sizeof (objset_phys_t), &os->os_phys_buf,
			    ARC_BUFC_METADATA);
			bzero(buf->b_data, sizeof (objset_phys_t));
			bcopy(os->os_phys_buf->b_data, buf->b_data,
			    arc_buf_size(os->os_phys_buf));
			(void) arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf);
			os->os_phys_buf = buf;
		}

		os->os_phys = os->os_phys_buf->b_data;
		os->os_flags = os->os_phys->os_flags;
	} else {
		int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
		    sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
		os->os_phys_buf = arc_buf_alloc(spa, size,
		    &os->os_phys_buf, ARC_BUFC_METADATA);
		os->os_phys = os->os_phys_buf->b_data;
		bzero(os->os_phys, size);
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know about
	 * checksum/compression/copies.  But they do need to know about
	 * encryption so that clones from the snaphost inherit the
	 * same encryption property regardless of where in the namespace
	 * they get created.
	 */
	if (ds) {
		err = dsl_prop_register(ds, "primarycache",
		    primary_cache_changed_cb, os);
		if (err == 0)
			err = dsl_prop_register(ds, "secondarycache",
                                    secondary_cache_changed_cb, os);
		if (err == 0)
		  err = dsl_prop_register(ds, "encryption",
					  crypt_changed_cb, os);

		if (!dsl_dataset_is_snapshot(ds)) {
			if (err == 0)
				err = dsl_prop_register(ds, "checksum",
				    checksum_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "compression",
				    compression_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "copies",
				    copies_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "dedup",
				    dedup_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "logbias",
				    logbias_changed_cb, os);
			if (err == 0)
				err = dsl_prop_register(ds, "sync",
				    sync_changed_cb, os);
		}
		if (err) {
			VERIFY(arc_buf_remove_ref(os->os_phys_buf,
			    &os->os_phys_buf) == 1);
			kmem_free(os, sizeof (objset_t));
			return (err);
		}
	} else if (ds == NULL) {
        /*
         * It's the meta-objset.
         * Encryption is off for ZFS metadata but on for ZPL metadata
         * and file/zvol contents.
         */
		os->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		os->os_compress = ZIO_COMPRESS_LZJB;
		os->os_copies = spa_max_replication(spa);
		os->os_dedup_checksum = ZIO_CHECKSUM_OFF;
		os->os_dedup_verify = 0;
		os->os_logbias = 0;
		os->os_sync = 0;
		os->os_primary_cache = ZFS_CACHE_ALL;
		os->os_secondary_cache = ZFS_CACHE_ALL;
        os->os_crypt = ZIO_CRYPT_OFF;
	}

	if (ds == NULL || !dsl_dataset_is_snapshot(ds))
		os->os_zil_header = os->os_phys->os_zil_header;
	os->os_zil = zil_alloc(os, &os->os_zil_header);

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&os->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&os->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);

	DMU_META_DNODE(os) = dnode_special_open(os,
	    &os->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT,
	    &os->os_meta_dnode);
	if (arc_buf_size(os->os_phys_buf) >= sizeof (objset_phys_t)) {
		DMU_USERUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_userused_dnode, DMU_USERUSED_OBJECT,
		    &os->os_userused_dnode);
		DMU_GROUPUSED_DNODE(os) = dnode_special_open(os,
		    &os->os_phys->os_groupused_dnode, DMU_GROUPUSED_OBJECT,
		    &os->os_groupused_dnode);
	}

	/*
	 * We should be the only thread trying to do this because we
	 * have ds_opening_lock
	 */
	if (ds) {
		mutex_enter(&ds->ds_lock);
		ASSERT(ds->ds_objset == NULL);
		ds->ds_objset = os;
		mutex_exit(&ds->ds_lock);
	}

	*osp = os;

	return (0);
}
コード例 #27
0
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
    objset_impl_t **osip)
{
	objset_impl_t *winner, *osi;
	int i, err, checksum;

	osi = kmem_zalloc(sizeof (objset_impl_t), KM_SLEEP);
	osi->os.os = osi;
	osi->os_dsl_dataset = ds;
	osi->os_spa = spa;
	osi->os_rootbp = bp;
	if (!BP_IS_HOLE(osi->os_rootbp)) {
		uint32_t aflags = ARC_WAIT;
		zbookmark_t zb;
		zb.zb_objset = ds ? ds->ds_object : 0;
		zb.zb_object = 0;
		zb.zb_level = -1;
		zb.zb_blkid = 0;

		dprintf_bp(osi->os_rootbp, "reading %s", "");
		err = arc_read(NULL, spa, osi->os_rootbp,
		    dmu_ot[DMU_OT_OBJSET].ot_byteswap,
		    arc_getbuf_func, &osi->os_phys_buf,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &aflags, &zb);
		if (err) {
			kmem_free(osi, sizeof (objset_impl_t));
			return (err);
		}
		osi->os_phys = osi->os_phys_buf->b_data;
		arc_release(osi->os_phys_buf, &osi->os_phys_buf);
	} else {
		osi->os_phys_buf = arc_buf_alloc(spa, sizeof (objset_phys_t),
		    &osi->os_phys_buf, ARC_BUFC_METADATA);
		osi->os_phys = osi->os_phys_buf->b_data;
		bzero(osi->os_phys, sizeof (objset_phys_t));
	}

	/*
	 * Note: the changed_cb will be called once before the register
	 * func returns, thus changing the checksum/compression from the
	 * default (fletcher2/off).  Snapshots don't need to know, and
	 * registering would complicate clone promotion.
	 */
	if (ds && ds->ds_phys->ds_num_children == 0) {
		err = dsl_prop_register(ds, "checksum",
		    checksum_changed_cb, osi);
		if (err == 0)
			err = dsl_prop_register(ds, "compression",
			    compression_changed_cb, osi);
		if (err == 0)
			err = dsl_prop_register(ds, "copies",
			    copies_changed_cb, osi);
		if (err) {
			VERIFY(arc_buf_remove_ref(osi->os_phys_buf,
			    &osi->os_phys_buf) == 1);
			kmem_free(osi, sizeof (objset_impl_t));
			return (err);
		}
	} else if (ds == NULL) {
		/* It's the meta-objset. */
		osi->os_checksum = ZIO_CHECKSUM_FLETCHER_4;
		osi->os_compress = ZIO_COMPRESS_LZJB;
		osi->os_copies = spa_max_replication(spa);
	}

	osi->os_zil = zil_alloc(&osi->os, &osi->os_phys->os_zil_header);

	/*
	 * Metadata always gets compressed and checksummed.
	 * If the data checksum is multi-bit correctable, and it's not
	 * a ZBT-style checksum, then it's suitable for metadata as well.
	 * Otherwise, the metadata checksum defaults to fletcher4.
	 */
	checksum = osi->os_checksum;

	if (zio_checksum_table[checksum].ci_correctable &&
	    !zio_checksum_table[checksum].ci_zbt)
		osi->os_md_checksum = checksum;
	else
		osi->os_md_checksum = ZIO_CHECKSUM_FLETCHER_4;
	osi->os_md_compress = ZIO_COMPRESS_LZJB;

	for (i = 0; i < TXG_SIZE; i++) {
		list_create(&osi->os_dirty_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
		list_create(&osi->os_free_dnodes[i], sizeof (dnode_t),
		    offsetof(dnode_t, dn_dirty_link[i]));
	}
	list_create(&osi->os_dnodes, sizeof (dnode_t),
	    offsetof(dnode_t, dn_link));
	list_create(&osi->os_downgraded_dbufs, sizeof (dmu_buf_impl_t),
	    offsetof(dmu_buf_impl_t, db_link));

	mutex_init(&osi->os_lock, NULL, MUTEX_DEFAULT, NULL);
	mutex_init(&osi->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);

	osi->os_meta_dnode = dnode_special_open(osi,
	    &osi->os_phys->os_meta_dnode, DMU_META_DNODE_OBJECT);

	if (ds != NULL) {
		winner = dsl_dataset_set_user_ptr(ds, osi, dmu_objset_evict);
		if (winner) {
			dmu_objset_evict(ds, osi);
			osi = winner;
		}
	}

	*osip = osi;
	return (0);
}
コード例 #28
0
ファイル: dmu_traverse.c プロジェクト: bahamas10/openzfs
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
    const blkptr_t *bp, const zbookmark_phys_t *zb)
{
	zbookmark_phys_t czb;
	int err = 0;
	arc_buf_t *buf = NULL;
	prefetch_data_t *pd = td->td_pfd;
	boolean_t hard = td->td_flags & TRAVERSE_HARD;

	switch (resume_skip_check(td, dnp, zb)) {
	case RESUME_SKIP_ALL:
		return (0);
	case RESUME_SKIP_CHILDREN:
		goto post;
	case RESUME_SKIP_NONE:
		break;
	default:
		ASSERT(0);
	}

	if (bp->blk_birth == 0) {
		/*
		 * Since this block has a birth time of 0 it must be one of
		 * two things: a hole created before the
		 * SPA_FEATURE_HOLE_BIRTH feature was enabled, or a hole
		 * which has always been a hole in an object.
		 *
		 * If a file is written sparsely, then the unwritten parts of
		 * the file were "always holes" -- that is, they have been
		 * holes since this object was allocated.  However, we (and
		 * our callers) can not necessarily tell when an object was
		 * allocated.  Therefore, if it's possible that this object
		 * was freed and then its object number reused, we need to
		 * visit all the holes with birth==0.
		 *
		 * If it isn't possible that the object number was reused,
		 * then if SPA_FEATURE_HOLE_BIRTH was enabled before we wrote
		 * all the blocks we will visit as part of this traversal,
		 * then this hole must have always existed, so we can skip
		 * it.  We visit blocks born after (exclusive) td_min_txg.
		 *
		 * Note that the meta-dnode cannot be reallocated.
		 */
		if (!send_holes_without_birth_time &&
		    (!td->td_realloc_possible ||
		    zb->zb_object == DMU_META_DNODE_OBJECT) &&
		    td->td_hole_birth_enabled_txg <= td->td_min_txg)
			return (0);
	} else if (bp->blk_birth <= td->td_min_txg) {
		return (0);
	}

	if (pd != NULL && !pd->pd_exited && prefetch_needed(pd, bp)) {
		uint64_t size = BP_GET_LSIZE(bp);
		mutex_enter(&pd->pd_mtx);
		ASSERT(pd->pd_bytes_fetched >= 0);
		while (pd->pd_bytes_fetched < size && !pd->pd_exited)
			cv_wait(&pd->pd_cv, &pd->pd_mtx);
		pd->pd_bytes_fetched -= size;
		cv_broadcast(&pd->pd_cv);
		mutex_exit(&pd->pd_mtx);
	}

	if (BP_IS_HOLE(bp)) {
		err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
		if (err != 0)
			goto post;
		return (0);
	}

	if (td->td_flags & TRAVERSE_PRE) {
		err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
		    td->td_arg);
		if (err == TRAVERSE_VISIT_NO_CHILDREN)
			return (0);
		if (err != 0)
			goto post;
	}

	if (BP_GET_LEVEL(bp) > 0) {
		arc_flags_t flags = ARC_FLAG_WAIT;
		int i;
		blkptr_t *cbp;
		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;

		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
		if (err != 0)
			goto post;
		cbp = buf->b_data;

		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			traverse_prefetch_metadata(td, &cbp[i], &czb);
		}

		/* recursively visitbp() blocks below this */
		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			err = traverse_visitbp(td, dnp, &cbp[i], &czb);
			if (err != 0)
				break;
		}
	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
コード例 #29
0
ファイル: zdb_il.c プロジェクト: Acidburn0zzz/zfs
/* ARGSUSED */
static void
zil_prt_rec_write(zilog_t *zilog, int txtype, lr_write_t *lr)
{
	char *data, *dlimit;
	blkptr_t *bp = &lr->lr_blkptr;
	zbookmark_phys_t zb;
	char buf[SPA_MAXBLOCKSIZE];
	int verbose = MAX(dump_opt['d'], dump_opt['i']);
	int error;

	(void) printf("%sfoid %llu, offset %llx, length %llx\n", prefix,
	    (u_longlong_t)lr->lr_foid, (u_longlong_t)lr->lr_offset,
	    (u_longlong_t)lr->lr_length);

	if (txtype == TX_WRITE2 || verbose < 5)
		return;

	if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
		(void) printf("%shas blkptr, %s\n", prefix,
		    !BP_IS_HOLE(bp) &&
		    bp->blk_birth >= spa_first_txg(zilog->zl_spa) ?
		    "will claim" : "won't claim");
		print_log_bp(bp, prefix);

		if (BP_IS_HOLE(bp)) {
			(void) printf("\t\t\tLSIZE 0x%llx\n",
			    (u_longlong_t)BP_GET_LSIZE(bp));
			bzero(buf, sizeof (buf));
			(void) printf("%s<hole>\n", prefix);
			return;
		}
		if (bp->blk_birth < zilog->zl_header->zh_claim_txg) {
			(void) printf("%s<block already committed>\n", prefix);
			return;
		}

		SET_BOOKMARK(&zb, dmu_objset_id(zilog->zl_os),
		    lr->lr_foid, ZB_ZIL_LEVEL,
		    lr->lr_offset / BP_GET_LSIZE(bp));

		error = zio_wait(zio_read(NULL, zilog->zl_spa,
		    bp, buf, BP_GET_LSIZE(bp), NULL, NULL,
		    ZIO_PRIORITY_SYNC_READ, ZIO_FLAG_CANFAIL, &zb));
		if (error)
			return;
		data = buf;
	} else {
		data = (char *)(lr + 1);
	}

	dlimit = data + MIN(lr->lr_length,
	    (verbose < 6 ? 20 : SPA_MAXBLOCKSIZE));

	(void) printf("%s", prefix);
	while (data < dlimit) {
		if (isprint(*data))
			(void) printf("%c ", *data);
		else
			(void) printf("%2hhX", *data);
		data++;
	}
	(void) printf("\n");
}
コード例 #30
0
ファイル: dmu_traverse.c プロジェクト: ornarium/freebsd
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
    const blkptr_t *bp, const zbookmark_t *zb)
{
	zbookmark_t czb;
	int err = 0, lasterr = 0;
	arc_buf_t *buf = NULL;
	prefetch_data_t *pd = td->td_pfd;
	boolean_t hard = td->td_flags & TRAVERSE_HARD;
	boolean_t pause = B_FALSE;

	switch (resume_skip_check(td, dnp, zb)) {
	case RESUME_SKIP_ALL:
		return (0);
	case RESUME_SKIP_CHILDREN:
		goto post;
	case RESUME_SKIP_NONE:
		break;
	default:
		ASSERT(0);
	}

	if (BP_IS_HOLE(bp)) {
		err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
		return (err);
	}

	if (bp->blk_birth <= td->td_min_txg)
		return (0);

	if (pd && !pd->pd_exited &&
	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
		mutex_enter(&pd->pd_mtx);
		ASSERT(pd->pd_blks_fetched >= 0);
		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
			cv_wait(&pd->pd_cv, &pd->pd_mtx);
		pd->pd_blks_fetched--;
		cv_broadcast(&pd->pd_cv);
		mutex_exit(&pd->pd_mtx);
	}

	if (td->td_flags & TRAVERSE_PRE) {
		err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
		    td->td_arg);
		if (err == TRAVERSE_VISIT_NO_CHILDREN)
			return (0);
		if (err == ERESTART)
			pause = B_TRUE; /* handle pausing at a common point */
		if (err != 0)
			goto post;
	}

	if (BP_GET_LEVEL(bp) > 0) {
		uint32_t flags = ARC_WAIT;
		int i;
		blkptr_t *cbp;
		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;

		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
		if (err)
			return (err);
		cbp = buf->b_data;

		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			traverse_prefetch_metadata(td, &cbp[i], &czb);
		}

		/* recursively visitbp() blocks below this */
		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			err = traverse_visitbp(td, dnp, &cbp[i], &czb);
			if (err) {
				if (!hard)
					break;
				lasterr = err;
			}
		}
	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {