/* ARGSUSED */
static void
ready(zio_t *zio, arc_buf_t *abuf, void *arg)
{
	blkptr_t *bp = zio->io_bp;
	blkptr_t *bp_orig = &zio->io_bp_orig;
	objset_impl_t *os = arg;
	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;

	ASSERT(bp == os->os_rootbp);
	ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
	ASSERT(BP_GET_LEVEL(bp) == 0);

	/*
	 * Update rootbp fill count.
	 */
	bp->blk_fill = 1;	/* count the meta-dnode */
	for (int i = 0; i < dnp->dn_nblkptr; i++)
		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;

	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
		ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig)));
	} else {
		if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg)
			(void) dsl_dataset_block_kill(os->os_dsl_dataset,
			    &zio->io_bp_orig, zio, os->os_synctx);
		dsl_dataset_block_born(os->os_dsl_dataset, bp, os->os_synctx);
	}
}
Esempio n. 2
0
/* ARGSUSED */
static void
ready(zio_t *zio, arc_buf_t *abuf, void *arg)
{
	int i;

	blkptr_t *bp = zio->io_bp;
	objset_impl_t *os = arg;
	dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
	ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig);

	ASSERT(bp == os->os_rootbp);
	ASSERT(BP_GET_TYPE(bp) == DMU_OT_OBJSET);
	ASSERT(BP_GET_LEVEL(bp) == 0);

	/*
	 * Update rootbp fill count: it should be the number of objects
	 * allocated in the object set (not counting the "special"
	 * objects that are stored in the objset_phys_t -- the meta
	 * dnode and user/group accounting objects).
	 */
	bp->blk_fill = 0;
	for (i = 0; i < dnp->dn_nblkptr; i++)
		bp->blk_fill += dnp->dn_blkptr[i].blk_fill;

	if (zio->io_flags & ZIO_FLAG_IO_REWRITE) {
		ASSERT(DVA_EQUAL(BP_IDENTITY(bp), BP_IDENTITY(bp_orig)));
	} else {
		if (zio->io_bp_orig.blk_birth == os->os_synctx->tx_txg)
			(void) dsl_dataset_block_kill(os->os_dsl_dataset,
			    &zio->io_bp_orig, zio, os->os_synctx);
		dsl_dataset_block_born(os->os_dsl_dataset, bp, os->os_synctx);
	}
}
Esempio n. 3
0
static boolean_t
prefetch_needed(prefetch_data_t *pfd, const blkptr_t *bp)
{
	ASSERT(pfd->pd_flags & TRAVERSE_PREFETCH_DATA);
	if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp) ||
	    BP_GET_TYPE(bp) == DMU_OT_INTENT_LOG)
		return (B_FALSE);
	return (B_TRUE);
}
Esempio n. 4
0
/* ARGSUSED */
static int
backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
    const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
	dmu_sendarg_t *dsp = arg;
	dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
	int err = 0;

	if (issig(JUSTLOOKING) && issig(FORREAL))
		return (EINTR);

	if (zb->zb_object != DMU_META_DNODE_OBJECT &&
	    DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
		return (0);
	} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
		uint64_t span = BP_SPAN(dnp, zb->zb_level);
		uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
		err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
	} else if (bp == NULL) {
Esempio n. 5
0
static void
free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx)
{
	dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
	uint64_t bytesfreed = 0;
	int i;

	dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num);

	for (i = 0; i < num; i++, bp++) {
		uint64_t lsize, lvl;
		dmu_object_type_t type;

		if (BP_IS_HOLE(bp))
			continue;

		bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE);
		ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys));

		/*
		 * Save some useful information on the holes being
		 * punched, including logical size, type, and indirection
		 * level. Retaining birth time enables detection of when
		 * holes are punched for reducing the number of free
		 * records transmitted during a zfs send.
		 */

		lsize = BP_GET_LSIZE(bp);
		type = BP_GET_TYPE(bp);
		lvl = BP_GET_LEVEL(bp);

		bzero(bp, sizeof (blkptr_t));

		if (spa_feature_is_active(dn->dn_objset->os_spa,
		    SPA_FEATURE_HOLE_BIRTH)) {
			BP_SET_LSIZE(bp, lsize);
			BP_SET_TYPE(bp, type);
			BP_SET_LEVEL(bp, lvl);
			BP_SET_BIRTH(bp, dmu_tx_get_txg(tx), 0);
		}
	}
	dnode_diduse_space(dn, -bytesfreed);
}
Esempio n. 6
0
/* ARGSUSED */
static void
dsl_scan_prefetch(dsl_scan_t *scn, arc_buf_t *buf, blkptr_t *bp,
    uint64_t objset, uint64_t object, uint64_t blkid)
{
	zbookmark_t czb;
	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;

	if (zfs_no_scrub_prefetch)
		return;

	if (BP_IS_HOLE(bp) || bp->blk_birth <= scn->scn_phys.scn_min_txg ||
	    (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE))
		return;

	SET_BOOKMARK(&czb, objset, object, BP_GET_LEVEL(bp), blkid);

	(void) arc_read(scn->scn_zio_root, scn->scn_dp->dp_spa, bp,
	    NULL, NULL, ZIO_PRIORITY_ASYNC_READ,
	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SCAN_THREAD, &flags, &czb);
}
Esempio n. 7
0
/*
 * Determine if the I/O in question should return failure.  Returns the errno
 * to be returned to the caller.
 */
int
zio_handle_fault_injection(zio_t *zio, int error)
{
	int ret = 0;
	inject_handler_t *handler;

	/*
	 * Ignore I/O not associated with any logical data.
	 */
	if (zio->io_logical == NULL)
		return (0);

	/*
	 * Currently, we only support fault injection on reads.
	 */
	if (zio->io_type != ZIO_TYPE_READ)
		return (0);

	rw_enter(&inject_lock, RW_READER);

	for (handler = list_head(&inject_handlers); handler != NULL;
	    handler = list_next(&inject_handlers, handler)) {

		if (zio->io_spa != handler->zi_spa ||
		    handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
			continue;

		/* If this handler matches, return EIO */
		if (zio_match_handler(&zio->io_logical->io_bookmark,
		    zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
		    &handler->zi_record, error)) {
			ret = error;
			break;
		}
	}

	rw_exit(&inject_lock);

	return (ret);
}
Esempio n. 8
0
File: zil.c Progetto: harshada/zfs
/*
 * Function called when a log block write completes
 */
static void
zil_lwb_write_done(zio_t *zio)
{
	lwb_t *lwb = zio->io_private;
	zilog_t *zilog = lwb->lwb_zilog;

	ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
	ASSERT(BP_GET_CHECKSUM(zio->io_bp) == ZIO_CHECKSUM_ZILOG);
	ASSERT(BP_GET_TYPE(zio->io_bp) == DMU_OT_INTENT_LOG);
	ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
	ASSERT(BP_GET_BYTEORDER(zio->io_bp) == ZFS_HOST_BYTEORDER);
	ASSERT(!BP_IS_GANG(zio->io_bp));
	ASSERT(!BP_IS_HOLE(zio->io_bp));
	ASSERT(zio->io_bp->blk_fill == 0);

	/*
	 * Ensure the lwb buffer pointer is cleared before releasing
	 * the txg. If we have had an allocation failure and
	 * the txg is waiting to sync then we want want zil_sync()
	 * to remove the lwb so that it's not picked up as the next new
	 * one in zil_commit_writer(). zil_sync() will only remove
	 * the lwb if lwb_buf is null.
	 */
	zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
	mutex_enter(&zilog->zl_lock);
	lwb->lwb_buf = NULL;
	if (zio->io_error)
		zilog->zl_log_error = B_TRUE;

	/*
	 * Now that we've written this log block, we have a stable pointer
	 * to the next block in the chain, so it's OK to let the txg in
	 * which we allocated the next block sync. We still have the
	 * zl_lock to ensure zil_sync doesn't kmem free the lwb.
	 */
	txg_rele_to_sync(&lwb->lwb_txgh);
	mutex_exit(&zilog->zl_lock);
}
Esempio n. 9
0
static void
traverse_prefetch_metadata(traverse_data_t *td,
    const blkptr_t *bp, const zbookmark_t *zb)
{
	uint32_t flags = ARC_NOWAIT | ARC_PREFETCH;

	if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA))
		return;
	/*
	 * If we are in the process of resuming, don't prefetch, because
	 * some children will not be needed (and in fact may have already
	 * been freed).
	 */
	if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume))
		return;
	if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg)
		return;
	if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE)
		return;

	(void) arc_read(NULL, td->td_spa, bp, NULL, NULL,
	    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
}
Esempio n. 10
0
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
    arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
{
	zbookmark_t czb;
	int err = 0, lasterr = 0;
	arc_buf_t *buf = NULL;
	prefetch_data_t *pd = td->td_pfd;
	boolean_t hard = td->td_flags & TRAVERSE_HARD;

	if (bp->blk_birth == 0) {
		err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
		    td->td_arg);
		return (err);
	}

	if (bp->blk_birth <= td->td_min_txg)
		return (0);

	if (pd && !pd->pd_exited &&
	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
		mutex_enter(&pd->pd_mtx);
		ASSERT(pd->pd_blks_fetched >= 0);
		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
			cv_wait(&pd->pd_cv, &pd->pd_mtx);
		pd->pd_blks_fetched--;
		cv_broadcast(&pd->pd_cv);
		mutex_exit(&pd->pd_mtx);
	}

	if (td->td_flags & TRAVERSE_PRE) {
		err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
		    td->td_arg);
		if (err == TRAVERSE_VISIT_NO_CHILDREN)
			return (0);
		if (err)
			return (err);
	}

	if (BP_GET_LEVEL(bp) > 0) {
		uint32_t flags = ARC_WAIT;
		int i;
		blkptr_t *cbp;
		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;

		err = dsl_read(NULL, td->td_spa, bp, pbuf,
		    arc_getbuf_func, &buf,
		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
		if (err)
			return (err);

		/* recursively visitbp() blocks below this */
		cbp = buf->b_data;
		for (i = 0; i < epb; i++, cbp++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			err = traverse_visitbp(td, dnp, buf, cbp, &czb);
			if (err) {
				if (!hard)
					break;
				lasterr = err;
			}
		}
	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
Esempio n. 11
0
int
zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
    enum zio_checksum checksum, abd_t *abd, uint64_t size, uint64_t offset,
    zio_bad_cksum_t *info)
{
	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
	zio_cksum_t actual_cksum, expected_cksum;
	zio_eck_t eck;
	int byteswap;

	if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
		return (SET_ERROR(EINVAL));

	zio_checksum_template_init(checksum, spa);

	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
		zio_cksum_t verifier;
		size_t eck_offset;

		if (checksum == ZIO_CHECKSUM_ZILOG2) {
			zil_chain_t zilc;
			uint64_t nused;

			abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));

			eck = zilc.zc_eck;
			eck_offset = offsetof(zil_chain_t, zc_eck) +
			    offsetof(zio_eck_t, zec_cksum);

			if (eck.zec_magic == ZEC_MAGIC) {
				nused = zilc.zc_nused;
			} else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) {
				nused = BSWAP_64(zilc.zc_nused);
			} else {
				return (SET_ERROR(ECKSUM));
			}

			if (nused > size) {
				return (SET_ERROR(ECKSUM));
			}

			size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
		} else {
			eck_offset = size - sizeof (zio_eck_t);
			abd_copy_to_buf_off(&eck, abd, eck_offset,
			    sizeof (zio_eck_t));
			eck_offset += offsetof(zio_eck_t, zec_cksum);
		}

		if (checksum == ZIO_CHECKSUM_GANG_HEADER)
			zio_checksum_gang_verifier(&verifier, bp);
		else if (checksum == ZIO_CHECKSUM_LABEL)
			zio_checksum_label_verifier(&verifier, offset);
		else
			verifier = bp->blk_cksum;

		byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC));

		if (byteswap)
			byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));

		expected_cksum = eck.zec_cksum;

		abd_copy_from_buf_off(abd, &verifier, eck_offset,
		    sizeof (zio_cksum_t));

		ci->ci_func[byteswap](abd, size,
		    spa->spa_cksum_tmpls[checksum], &actual_cksum);

		abd_copy_from_buf_off(abd, &expected_cksum, eck_offset,
		    sizeof (zio_cksum_t));

		if (byteswap) {
			byteswap_uint64_array(&expected_cksum,
			    sizeof (zio_cksum_t));
		}
	} else {
		byteswap = BP_SHOULD_BYTESWAP(bp);
		expected_cksum = bp->blk_cksum;
		ci->ci_func[byteswap](abd, size,
		    spa->spa_cksum_tmpls[checksum], &actual_cksum);
	}

	/*
	 * MAC checksums are a special case since half of this checksum will
	 * actually be the encryption MAC. This will be verified by the
	 * decryption process, so we just check the truncated checksum now.
	 * Objset blocks use embedded MACs so we don't truncate the checksum
	 * for them.
	 */
	if (bp != NULL && BP_USES_CRYPT(bp) &&
	    BP_GET_TYPE(bp) != DMU_OT_OBJSET) {
		if (!(ci->ci_flags & ZCHECKSUM_FLAG_DEDUP)) {
			actual_cksum.zc_word[0] ^= actual_cksum.zc_word[2];
			actual_cksum.zc_word[1] ^= actual_cksum.zc_word[3];
		}

		actual_cksum.zc_word[2] = 0;
		actual_cksum.zc_word[3] = 0;
		expected_cksum.zc_word[2] = 0;
		expected_cksum.zc_word[3] = 0;
	}

	if (info != NULL) {
		info->zbc_expected = expected_cksum;
		info->zbc_actual = actual_cksum;
		info->zbc_checksum_name = ci->ci_name;
		info->zbc_byteswapped = byteswap;
		info->zbc_injected = 0;
		info->zbc_has_cksum = 1;
	}

	if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
		return (SET_ERROR(ECKSUM));

	return (0);
}
Esempio n. 12
0
/*
 * Generate the checksum.
 */
void
zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
    abd_t *abd, uint64_t size)
{
	static const uint64_t zec_magic = ZEC_MAGIC;
	blkptr_t *bp = zio->io_bp;
	uint64_t offset = zio->io_offset;
	zio_checksum_info_t *ci = &zio_checksum_table[checksum];
	zio_cksum_t cksum, saved;
	spa_t *spa = zio->io_spa;
	boolean_t insecure = (ci->ci_flags & ZCHECKSUM_FLAG_DEDUP) == 0;

	ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
	ASSERT(ci->ci_func[0] != NULL);

	zio_checksum_template_init(checksum, spa);

	if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
		zio_eck_t eck;
		size_t eck_offset;

		bzero(&saved, sizeof (zio_cksum_t));

		if (checksum == ZIO_CHECKSUM_ZILOG2) {
			zil_chain_t zilc;
			abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));

			size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
			    uint64_t);
			eck = zilc.zc_eck;
			eck_offset = offsetof(zil_chain_t, zc_eck);
		} else {
			eck_offset = size - sizeof (zio_eck_t);
			abd_copy_to_buf_off(&eck, abd, eck_offset,
			    sizeof (zio_eck_t));
		}

		if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
			zio_checksum_gang_verifier(&eck.zec_cksum, bp);
		} else if (checksum == ZIO_CHECKSUM_LABEL) {
			zio_checksum_label_verifier(&eck.zec_cksum, offset);
		} else {
			saved = eck.zec_cksum;
			eck.zec_cksum = bp->blk_cksum;
		}

		abd_copy_from_buf_off(abd, &zec_magic,
		    eck_offset + offsetof(zio_eck_t, zec_magic),
		    sizeof (zec_magic));
		abd_copy_from_buf_off(abd, &eck.zec_cksum,
		    eck_offset + offsetof(zio_eck_t, zec_cksum),
		    sizeof (zio_cksum_t));

		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
		    &cksum);
		if (bp != NULL && BP_USES_CRYPT(bp) &&
		    BP_GET_TYPE(bp) != DMU_OT_OBJSET)
			zio_checksum_handle_crypt(&cksum, &saved, insecure);

		abd_copy_from_buf_off(abd, &cksum,
		    eck_offset + offsetof(zio_eck_t, zec_cksum),
		    sizeof (zio_cksum_t));
	} else {
		saved = bp->blk_cksum;
		ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
		    &cksum);
		if (BP_USES_CRYPT(bp) && BP_GET_TYPE(bp) != DMU_OT_OBJSET)
			zio_checksum_handle_crypt(&cksum, &saved, insecure);
		bp->blk_cksum = cksum;
	}
}
Esempio n. 13
0
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
    const blkptr_t *bp, const zbookmark_t *zb)
{
	zbookmark_t czb;
	int err = 0, lasterr = 0;
	arc_buf_t *buf = NULL;
	prefetch_data_t *pd = td->td_pfd;
	boolean_t hard = td->td_flags & TRAVERSE_HARD;
	boolean_t pause = B_FALSE;

	switch (resume_skip_check(td, dnp, zb)) {
	case RESUME_SKIP_ALL:
		return (0);
	case RESUME_SKIP_CHILDREN:
		goto post;
	case RESUME_SKIP_NONE:
		break;
	default:
		ASSERT(0);
	}

	if (BP_IS_HOLE(bp)) {
		err = td->td_func(td->td_spa, NULL, NULL, zb, dnp, td->td_arg);
		return (err);
	}

	if (bp->blk_birth <= td->td_min_txg)
		return (0);

	if (pd && !pd->pd_exited &&
	    ((pd->pd_flags & TRAVERSE_PREFETCH_DATA) ||
	    BP_GET_TYPE(bp) == DMU_OT_DNODE || BP_GET_LEVEL(bp) > 0)) {
		mutex_enter(&pd->pd_mtx);
		ASSERT(pd->pd_blks_fetched >= 0);
		while (pd->pd_blks_fetched == 0 && !pd->pd_exited)
			cv_wait(&pd->pd_cv, &pd->pd_mtx);
		pd->pd_blks_fetched--;
		cv_broadcast(&pd->pd_cv);
		mutex_exit(&pd->pd_mtx);
	}

	if (td->td_flags & TRAVERSE_PRE) {
		err = td->td_func(td->td_spa, NULL, bp, zb, dnp,
		    td->td_arg);
		if (err == TRAVERSE_VISIT_NO_CHILDREN)
			return (0);
		if (err == ERESTART)
			pause = B_TRUE; /* handle pausing at a common point */
		if (err != 0)
			goto post;
	}

	if (BP_GET_LEVEL(bp) > 0) {
		uint32_t flags = ARC_WAIT;
		int i;
		blkptr_t *cbp;
		int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;

		err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
		    ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
		if (err)
			return (err);
		cbp = buf->b_data;

		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			traverse_prefetch_metadata(td, &cbp[i], &czb);
		}

		/* recursively visitbp() blocks below this */
		for (i = 0; i < epb; i++) {
			SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
			    zb->zb_level - 1,
			    zb->zb_blkid * epb + i);
			err = traverse_visitbp(td, dnp, &cbp[i], &czb);
			if (err) {
				if (!hard)
					break;
				lasterr = err;
			}
		}
	} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {