Beispiel #1
0
int
dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
	dnode_t *dn;
	int err;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (spa_version(dn->dn_objset->os_spa) < SPA_VERSION_SA) {
		err = SET_ERROR(EINVAL);
	} else {
		rw_enter(&dn->dn_struct_rwlock, RW_READER);

		if (!dn->dn_have_spill) {
			err = SET_ERROR(ENOENT);
		} else {
			err = dmu_spill_hold_by_dnode(dn,
			    DB_RF_HAVESTRUCT | DB_RF_CANFAIL, tag, dbp);
		}

		rw_exit(&dn->dn_struct_rwlock);
	}

	DB_DNODE_EXIT(db);
	return (err);
}
Beispiel #2
0
int
dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)bonus;
	dnode_t *dn;
	int err;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	err = dmu_spill_hold_by_dnode(dn, DB_RF_CANFAIL, tag, dbp);
	DB_DNODE_EXIT(db);

	return (err);
}
Beispiel #3
0
dmu_object_type_t
dmu_get_bonustype(dmu_buf_t *db_fake)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
	dnode_t *dn;
	dmu_object_type_t type;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	type = dn->dn_bonustype;
	DB_DNODE_EXIT(db);

	return (type);
}
Beispiel #4
0
int
dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
	dnode_t *dn;
	int error;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (!DMU_OT_IS_VALID(type)) {
		error = SET_ERROR(EINVAL);
	} else if (dn->dn_bonus != db) {
		error = SET_ERROR(EINVAL);
	} else {
		dnode_setbonus_type(dn, type, tx);
		error = 0;
	}

	DB_DNODE_EXIT(db);
	return (error);
}
Beispiel #5
0
int
dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
	dnode_t *dn;
	int error;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (dn->dn_bonus != db) {
		error = SET_ERROR(EINVAL);
	} else if (newsize < 0 || newsize > db_fake->db_size) {
		error = SET_ERROR(EINVAL);
	} else {
		dnode_setbonuslen(dn, newsize, tx);
		error = 0;
	}

	DB_DNODE_EXIT(db);
	return (error);
}
Beispiel #6
0
/*
 * Decide whether block should be l2cached. Returns true if block is a ddt
 * metadata and ddt metadata is cacheable, or if block isn't a ddt metadata
 */
boolean_t
dbuf_ddt_is_l2cacheable(dmu_buf_impl_t *db)
{
	dmu_object_type_t ot;
	spa_t *spa = db->db_objset->os_spa;
	spa_meta_placement_t *mp = &spa->spa_meta_policy;
	uint64_t specflags;
	boolean_t match;

	if (!spa_has_special(spa))
		return (B_TRUE);

	specflags = spa_specialclass_flags(db->db_objset);
	match = !!(SPECIAL_FLAG_DATAMETA & specflags);

	DB_DNODE_ENTER(db);
	ot = DB_DNODE(db)->dn_type;
	DB_DNODE_EXIT(db);

	if ((!DMU_OT_IS_DDT_META(ot)) || (!match))
		return (B_TRUE);

	return (mp->spa_ddt_to_special != META_PLACEMENT_ON);
}
Beispiel #7
0
/*
 * Decide whether block should be l2cached. Returns true if block's metadata
 * type is l2cacheable or block isn't a metadata one
 */
boolean_t
dbuf_meta_is_l2cacheable(dmu_buf_impl_t *db)
{
	boolean_t is_metadata, is_to_special;
	dmu_object_type_t ot = DMU_OT_NONE;
	spa_t *spa = db->db_objset->os_spa;

	DB_DNODE_ENTER(db);
	ot = DB_DNODE(db)->dn_type;
	DB_DNODE_EXIT(db);

	is_metadata = dmu_ot[ot].ot_metadata;

	if (!is_metadata)
		return (B_TRUE);

	is_to_special  = spa_meta_to_special(spa, db->db_objset, ot);

	if (!is_to_special)
		return (B_TRUE);

	return (spa_meta_is_dual(spa, db->db_objset->os_zpl_meta_to_special,
	    ot));
}
Beispiel #8
0
/*
 * add/remove/replace a single attribute and then rewrite the entire set
 * of attributes.
 */
static int
sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
    uint16_t buflen, dmu_tx_t *tx)
{
	sa_os_t *sa = hdl->sa_os->os_sa;
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
	dnode_t *dn;
	sa_bulk_attr_t *attr_desc;
	void *old_data[2];
	int bonus_attr_count = 0;
	int bonus_data_size, spill_data_size;
	int spill_attr_count = 0;
	int error;
	uint16_t length;
	int i, j, k, length_idx;
	sa_hdr_phys_t *hdr;
	sa_idx_tab_t *idx_tab;
	int attr_count;
	int count;

	ASSERT(MUTEX_HELD(&hdl->sa_lock));

	/* First make of copy of the old data */

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	if (dn->dn_bonuslen != 0) {
		bonus_data_size = hdl->sa_bonus->db_size;
		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
		bcopy(hdl->sa_bonus->db_data, old_data[0],
		    hdl->sa_bonus->db_size);
		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
	} else {
		old_data[0] = NULL;
	}
	DB_DNODE_EXIT(db);

	/* Bring spill buffer online if it isn't currently */

	if ((error = sa_get_spill(hdl)) == 0) {
		spill_data_size = hdl->sa_spill->db_size;
		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
		bcopy(hdl->sa_spill->db_data, old_data[1],
		    hdl->sa_spill->db_size);
		spill_attr_count =
		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
	} else if (error && error != ENOENT) {
		if (old_data[0])
			kmem_free(old_data[0], bonus_data_size);
		return (error);
	} else {
		old_data[1] = NULL;
	}

	/* build descriptor of all attributes */

	attr_count = bonus_attr_count + spill_attr_count;
	if (action == SA_ADD)
		attr_count++;
	else if (action == SA_REMOVE)
		attr_count--;

	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);

	/*
	 * loop through bonus and spill buffer if it exists, and
	 * build up new attr_descriptor to reset the attributes
	 */
	k = j = 0;
	count = bonus_attr_count;
	hdr = SA_GET_HDR(hdl, SA_BONUS);
	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
	for (; k != 2; k++) {
		/* iterate over each attribute in layout */
		for (i = 0, length_idx = 0; i != count; i++) {
			sa_attr_type_t attr;

			attr = idx_tab->sa_layout->lot_attrs[i];
			if (attr == newattr) {
				if (action == SA_REMOVE) {
					j++;
					continue;
				}
				ASSERT(SA_REGISTERED_LEN(sa, attr) == 0);
				ASSERT(action == SA_REPLACE);
				SA_ADD_BULK_ATTR(attr_desc, j, attr,
				    locator, datastart, buflen);
			} else {
				length = SA_REGISTERED_LEN(sa, attr);
				if (length == 0) {
					length = hdr->sa_lengths[length_idx++];
				}

				SA_ADD_BULK_ATTR(attr_desc, j, attr,
				    NULL, (void *)
				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
				    (uintptr_t)old_data[k]), length);
			}
		}
		if (k == 0 && hdl->sa_spill) {
			hdr = SA_GET_HDR(hdl, SA_SPILL);
			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
			count = spill_attr_count;
		} else {
			break;
		}
	}
	if (action == SA_ADD) {
		length = SA_REGISTERED_LEN(sa, newattr);
		if (length == 0) {
			length = buflen;
		}
		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
		    datastart, buflen);
	}

	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);

	if (old_data[0])
		kmem_free(old_data[0], bonus_data_size);
	if (old_data[1])
		kmem_free(old_data[1], spill_data_size);
	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);

	return (error);
}
Beispiel #9
0
static void
dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db;
	int txgoff = tx->tx_txg & TXG_MASK;
	int nblkptr = dn->dn_phys->dn_nblkptr;
	int old_toplvl = dn->dn_phys->dn_nlevels - 1;
	int new_level = dn->dn_next_nlevels[txgoff];
	int i;

	rw_enter(&dn->dn_struct_rwlock, RW_WRITER);

	/* this dnode can't be paged out because it's dirty */
	ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE);
	ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock));
	ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0);

	db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG);
	ASSERT(db != NULL);

	dn->dn_phys->dn_nlevels = new_level;
	dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
	    dn->dn_object, dn->dn_phys->dn_nlevels);

	/* transfer dnode's block pointers to new indirect block */
	(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
	ASSERT(db->db.db_data);
	ASSERT(arc_released(db->db_buf));
	ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
	bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
	    sizeof (blkptr_t) * nblkptr);
	arc_buf_freeze(db->db_buf);

	/* set dbuf's parent pointers to new indirect buf */
	for (i = 0; i < nblkptr; i++) {
		dmu_buf_impl_t *child =
		    dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i);

		if (child == NULL)
			continue;
#ifdef	DEBUG
		DB_DNODE_ENTER(child);
		ASSERT3P(DB_DNODE(child), ==, dn);
		DB_DNODE_EXIT(child);
#endif	/* DEBUG */
		if (child->db_parent && child->db_parent != dn->dn_dbuf) {
			ASSERT(child->db_parent->db_level == db->db_level);
			ASSERT(child->db_blkptr !=
			    &dn->dn_phys->dn_blkptr[child->db_blkid]);
			mutex_exit(&child->db_mtx);
			continue;
		}
		ASSERT(child->db_parent == NULL ||
		    child->db_parent == dn->dn_dbuf);

		child->db_parent = db;
		dbuf_add_ref(db, child);
		if (db->db.db_data)
			child->db_blkptr = (blkptr_t *)db->db.db_data + i;
		else
			child->db_blkptr = NULL;
		dprintf_dbuf_bp(child, child->db_blkptr,
		    "changed db_blkptr to new indirect %s", "");

		mutex_exit(&child->db_mtx);
	}

	bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr);

	dbuf_rele(db, FTAG);

	rw_exit(&dn->dn_struct_rwlock);
}
Beispiel #10
0
static void
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
    dmu_tx_t *tx)
{
	dnode_t *dn;
	blkptr_t *bp;
	dmu_buf_impl_t *subdb;
	uint64_t start, end, dbstart, dbend, i;
	int epbs, shift;

	/*
	 * There is a small possibility that this block will not be cached:
	 *   1 - if level > 1 and there are no children with level <= 1
	 *   2 - if this block was evicted since we read it from
	 *	 dmu_tx_hold_free().
	 */
	if (db->db_state != DB_CACHED)
		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);

	dbuf_release_bp(db);
	bp = db->db.db_data;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	shift = (db->db_level - 1) * epbs;
	dbstart = db->db_blkid << epbs;
	start = blkid >> shift;
	if (dbstart < start) {
		bp += start - dbstart;
	} else {
		start = dbstart;
	}
	dbend = ((db->db_blkid + 1) << epbs) - 1;
	end = (blkid + nblks - 1) >> shift;
	if (dbend <= end)
		end = dbend;

	ASSERT3U(start, <=, end);

	if (db->db_level == 1) {
		FREE_VERIFY(db, start, end, tx);
		free_blocks(dn, bp, end-start+1, tx);
	} else {
		for (i = start; i <= end; i++, bp++) {
			if (BP_IS_HOLE(bp))
				continue;
			rw_enter(&dn->dn_struct_rwlock, RW_READER);
			VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
			    i, TRUE, FALSE, FTAG, &subdb));
			rw_exit(&dn->dn_struct_rwlock);
			ASSERT3P(bp, ==, subdb->db_blkptr);

			free_children(subdb, blkid, nblks, tx);
			dbuf_rele(subdb, FTAG);
		}
	}

	/* If this whole block is free, free ourself too. */
	for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
		if (!BP_IS_HOLE(bp))
			break;
	}
	if (i == 1 << epbs) {
		/* didn't find any non-holes */
		bzero(db->db.db_data, db->db.db_size);
		free_blocks(dn, db->db_blkptr, 1, tx);
	} else {
		/*
		 * Partial block free; must be marked dirty so that it
		 * will be written out.
		 */
		ASSERT(db->db_dirtycnt > 0);
	}

	DB_DNODE_EXIT(db);
	arc_buf_freeze(db->db_buf);
}
Beispiel #11
0
static void
free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
{
	int off, num;
	int i, err, epbs;
	uint64_t txg = tx->tx_txg;
	dnode_t *dn;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	off = start - (db->db_blkid * 1<<epbs);
	num = end - start + 1;

	ASSERT3U(off, >=, 0);
	ASSERT3U(num, >=, 0);
	ASSERT3U(db->db_level, >, 0);
	ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
	ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
	ASSERT(db->db_blkptr != NULL);

	for (i = off; i < off+num; i++) {
		uint64_t *buf;
		dmu_buf_impl_t *child;
		dbuf_dirty_record_t *dr;
		int j;

		ASSERT(db->db_level == 1);

		rw_enter(&dn->dn_struct_rwlock, RW_READER);
		err = dbuf_hold_impl(dn, db->db_level-1,
		    (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
		rw_exit(&dn->dn_struct_rwlock);
		if (err == ENOENT)
			continue;
		ASSERT(err == 0);
		ASSERT(child->db_level == 0);
		dr = child->db_last_dirty;
		while (dr && dr->dr_txg > txg)
			dr = dr->dr_next;
		ASSERT(dr == NULL || dr->dr_txg == txg);

		/* data_old better be zeroed */
		if (dr) {
			buf = dr->dt.dl.dr_data->b_data;
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}

		/*
		 * db_data better be zeroed unless it's dirty in a
		 * future txg.
		 */
		mutex_enter(&child->db_mtx);
		buf = child->db.db_data;
		if (buf != NULL && child->db_state != DB_FILL &&
		    child->db_last_dirty == NULL) {
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}
		mutex_exit(&child->db_mtx);

		dbuf_rele(child, FTAG);
	}
	DB_DNODE_EXIT(db);
}