Ejemplo n.º 1
0
Archivo: dmu.c Proyecto: koplover/zfs
int
dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
	dnode_t *dn;
	int error;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (!DMU_OT_IS_VALID(type)) {
		error = SET_ERROR(EINVAL);
	} else if (dn->dn_bonus != db) {
		error = SET_ERROR(EINVAL);
	} else {
		dnode_setbonus_type(dn, type, tx);
		error = 0;
	}

	DB_DNODE_EXIT(db);
	return (error);
}
Ejemplo n.º 2
0
Archivo: dmu.c Proyecto: koplover/zfs
int
dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx)
{
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
	dnode_t *dn;
	int error;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);

	if (dn->dn_bonus != db) {
		error = SET_ERROR(EINVAL);
	} else if (newsize < 0 || newsize > db_fake->db_size) {
		error = SET_ERROR(EINVAL);
	} else {
		dnode_setbonuslen(dn, newsize, tx);
		error = 0;
	}

	DB_DNODE_EXIT(db);
	return (error);
}
Ejemplo n.º 3
0
/*
 * Decide whether block should be l2cached. Returns true if block is a ddt
 * metadata and ddt metadata is cacheable, or if block isn't a ddt metadata
 */
boolean_t
dbuf_ddt_is_l2cacheable(dmu_buf_impl_t *db)
{
	dmu_object_type_t ot;
	spa_t *spa = db->db_objset->os_spa;
	spa_meta_placement_t *mp = &spa->spa_meta_policy;
	uint64_t specflags;
	boolean_t match;

	if (!spa_has_special(spa))
		return (B_TRUE);

	specflags = spa_specialclass_flags(db->db_objset);
	match = !!(SPECIAL_FLAG_DATAMETA & specflags);

	DB_DNODE_ENTER(db);
	ot = DB_DNODE(db)->dn_type;
	DB_DNODE_EXIT(db);

	if ((!DMU_OT_IS_DDT_META(ot)) || (!match))
		return (B_TRUE);

	return (mp->spa_ddt_to_special != META_PLACEMENT_ON);
}
Ejemplo n.º 4
0
/*
 * Decide whether block should be l2cached. Returns true if block's metadata
 * type is l2cacheable or block isn't a metadata one
 */
boolean_t
dbuf_meta_is_l2cacheable(dmu_buf_impl_t *db)
{
	boolean_t is_metadata, is_to_special;
	dmu_object_type_t ot = DMU_OT_NONE;
	spa_t *spa = db->db_objset->os_spa;

	DB_DNODE_ENTER(db);
	ot = DB_DNODE(db)->dn_type;
	DB_DNODE_EXIT(db);

	is_metadata = dmu_ot[ot].ot_metadata;

	if (!is_metadata)
		return (B_TRUE);

	is_to_special  = spa_meta_to_special(spa, db->db_objset, ot);

	if (!is_to_special)
		return (B_TRUE);

	return (spa_meta_is_dual(spa, db->db_objset->os_zpl_meta_to_special,
	    ot));
}
Ejemplo n.º 5
0
int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj,
		       const struct lu_buf *buf, const char *name, int fl,
		       struct osd_thandle *oh)
{
	dmu_buf_impl_t *db;
	uchar_t *nv_value;
	size_t  size;
	int	nv_size;
	int	rc;
	int	too_big = 0;

	LASSERT(obj->oo_sa_hdl);
	if (obj->oo_sa_xattr == NULL) {
		rc = __osd_xattr_cache(env, obj);
		if (rc)
			return rc;
	}

	LASSERT(obj->oo_sa_xattr);
	/* Limited to 32k to keep nvpair memory allocations small */
	if (buf->lb_len > DXATTR_MAX_ENTRY_SIZE) {
		too_big = 1;
	} else {
		/* Prevent the DXATTR SA from consuming the entire SA
		 * region */
		rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR);
		if (rc)
			return rc;

		if (size + buf->lb_len > DXATTR_MAX_SA_SIZE)
			too_big = 1;
	}

	/* even in case of -EFBIG we must lookup xattr and check can we
	 * rewrite it then delete from SA */
	rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value,
					&nv_size);
	if (rc == 0) {
		if (fl & LU_XATTR_CREATE) {
			return -EEXIST;
		} else if (too_big) {
			rc = -nvlist_remove(obj->oo_sa_xattr, name,
						DATA_TYPE_BYTE_ARRAY);
			if (rc < 0)
				return rc;
			rc = __osd_sa_xattr_schedule_update(env, obj, oh);
			return rc == 0 ? -EFBIG : rc;
		}
	} else if (rc == -ENOENT) {
		if (fl & LU_XATTR_REPLACE)
			return -ENODATA;
		else if (too_big)
			return -EFBIG;
	} else {
		return rc;
	}

	/* Ensure xattr doesn't exist in ZAP */
	if (obj->oo_xattr != ZFS_NO_OBJECT) {
		struct osd_device *osd = osd_obj2dev(obj);
		uint64_t           objid;
		rc = -zap_lookup(osd->od_os, obj->oo_xattr,
				 name, 8, 1, &objid);
		if (rc == 0) {
			rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx);
			if (rc == 0)
				zap_remove(osd->od_os, obj->oo_xattr,
					   name, oh->ot_tx);
		}
	}

	rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name,
				    (uchar_t *)buf->lb_buf, buf->lb_len);
	if (rc)
		return rc;

	/* batch updates only for just created dnodes where we
	 * used to set number of EAs in a single transaction */
	db = (dmu_buf_impl_t *)obj->oo_db;
	if (DB_DNODE(db)->dn_allocated_txg == oh->ot_tx->tx_txg)
		rc = __osd_sa_xattr_schedule_update(env, obj, oh);
	else
		rc = __osd_sa_xattr_update(env, obj, oh);

	return rc;
}
Ejemplo n.º 6
0
/*
 * add/remove/replace a single attribute and then rewrite the entire set
 * of attributes.
 */
static int
sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
    sa_data_op_t action, sa_data_locator_t *locator, void *datastart,
    uint16_t buflen, dmu_tx_t *tx)
{
	sa_os_t *sa = hdl->sa_os->os_sa;
	dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus;
	dnode_t *dn;
	sa_bulk_attr_t *attr_desc;
	void *old_data[2];
	int bonus_attr_count = 0;
	int bonus_data_size, spill_data_size;
	int spill_attr_count = 0;
	int error;
	uint16_t length;
	int i, j, k, length_idx;
	sa_hdr_phys_t *hdr;
	sa_idx_tab_t *idx_tab;
	int attr_count;
	int count;

	ASSERT(MUTEX_HELD(&hdl->sa_lock));

	/* First make of copy of the old data */

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	if (dn->dn_bonuslen != 0) {
		bonus_data_size = hdl->sa_bonus->db_size;
		old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP);
		bcopy(hdl->sa_bonus->db_data, old_data[0],
		    hdl->sa_bonus->db_size);
		bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count;
	} else {
		old_data[0] = NULL;
	}
	DB_DNODE_EXIT(db);

	/* Bring spill buffer online if it isn't currently */

	if ((error = sa_get_spill(hdl)) == 0) {
		spill_data_size = hdl->sa_spill->db_size;
		old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP);
		bcopy(hdl->sa_spill->db_data, old_data[1],
		    hdl->sa_spill->db_size);
		spill_attr_count =
		    hdl->sa_spill_tab->sa_layout->lot_attr_count;
	} else if (error && error != ENOENT) {
		if (old_data[0])
			kmem_free(old_data[0], bonus_data_size);
		return (error);
	} else {
		old_data[1] = NULL;
	}

	/* build descriptor of all attributes */

	attr_count = bonus_attr_count + spill_attr_count;
	if (action == SA_ADD)
		attr_count++;
	else if (action == SA_REMOVE)
		attr_count--;

	attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP);

	/*
	 * loop through bonus and spill buffer if it exists, and
	 * build up new attr_descriptor to reset the attributes
	 */
	k = j = 0;
	count = bonus_attr_count;
	hdr = SA_GET_HDR(hdl, SA_BONUS);
	idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS);
	for (; k != 2; k++) {
		/* iterate over each attribute in layout */
		for (i = 0, length_idx = 0; i != count; i++) {
			sa_attr_type_t attr;

			attr = idx_tab->sa_layout->lot_attrs[i];
			if (attr == newattr) {
				if (action == SA_REMOVE) {
					j++;
					continue;
				}
				ASSERT(SA_REGISTERED_LEN(sa, attr) == 0);
				ASSERT(action == SA_REPLACE);
				SA_ADD_BULK_ATTR(attr_desc, j, attr,
				    locator, datastart, buflen);
			} else {
				length = SA_REGISTERED_LEN(sa, attr);
				if (length == 0) {
					length = hdr->sa_lengths[length_idx++];
				}

				SA_ADD_BULK_ATTR(attr_desc, j, attr,
				    NULL, (void *)
				    (TOC_OFF(idx_tab->sa_idx_tab[attr]) +
				    (uintptr_t)old_data[k]), length);
			}
		}
		if (k == 0 && hdl->sa_spill) {
			hdr = SA_GET_HDR(hdl, SA_SPILL);
			idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL);
			count = spill_attr_count;
		} else {
			break;
		}
	}
	if (action == SA_ADD) {
		length = SA_REGISTERED_LEN(sa, newattr);
		if (length == 0) {
			length = buflen;
		}
		SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
		    datastart, buflen);
	}

	error = sa_build_layouts(hdl, attr_desc, attr_count, tx);

	if (old_data[0])
		kmem_free(old_data[0], bonus_data_size);
	if (old_data[1])
		kmem_free(old_data[1], spill_data_size);
	kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);

	return (error);
}
Ejemplo n.º 7
0
int
__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
{
	arc_buf_info_t abi = { 0 };
	dmu_object_info_t doi = { 0 };
	dnode_t *dn = DB_DNODE(db);
	size_t nwritten;

	if (db->db_buf)
		arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);

	if (dn)
		__dmu_object_info_from_dnode(dn, &doi);

	nwritten = snprintf(buf, size,
	    "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
	    "%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu "
	    "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
	    "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
	    /* dmu_buf_impl_t */
	    spa_name(dn->dn_objset->os_spa),
	    (u_longlong_t)dmu_objset_id(db->db_objset),
	    (longlong_t)db->db.db_object,
	    (longlong_t)db->db_level,
	    (longlong_t)db->db_blkid,
	    (u_longlong_t)db->db.db_offset,
	    (u_longlong_t)db->db.db_size,
	    !!dbuf_is_metadata(db),
	    db->db_state,
	    (ulong_t)refcount_count(&db->db_holds),
	    /* arc_buf_info_t */
	    abi.abi_state_type,
	    abi.abi_state_contents,
	    abi.abi_flags,
	    (ulong_t)abi.abi_bufcnt,
	    (u_longlong_t)abi.abi_size,
	    (u_longlong_t)abi.abi_access,
	    (ulong_t)abi.abi_mru_hits,
	    (ulong_t)abi.abi_mru_ghost_hits,
	    (ulong_t)abi.abi_mfu_hits,
	    (ulong_t)abi.abi_mfu_ghost_hits,
	    (ulong_t)abi.abi_l2arc_hits,
	    (u_longlong_t)abi.abi_l2arc_dattr,
	    (u_longlong_t)abi.abi_l2arc_asize,
	    abi.abi_l2arc_compress,
	    (ulong_t)abi.abi_holds,
	    /* dmu_object_info_t */
	    doi.doi_type,
	    doi.doi_bonus_type,
	    (ulong_t)doi.doi_data_block_size,
	    (ulong_t)doi.doi_metadata_block_size,
	    (u_longlong_t)doi.doi_bonus_size,
	    (ulong_t)doi.doi_indirection,
	    (ulong_t)refcount_count(&dn->dn_holds),
	    (u_longlong_t)doi.doi_fill_count,
	    (u_longlong_t)doi.doi_max_offset);

	if (nwritten >= size)
		return (size);

	return (nwritten + 1);
}
Ejemplo n.º 8
0
static void
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
    dmu_tx_t *tx)
{
	dnode_t *dn;
	blkptr_t *bp;
	dmu_buf_impl_t *subdb;
	uint64_t start, end, dbstart, dbend, i;
	int epbs, shift;

	/*
	 * There is a small possibility that this block will not be cached:
	 *   1 - if level > 1 and there are no children with level <= 1
	 *   2 - if this block was evicted since we read it from
	 *	 dmu_tx_hold_free().
	 */
	if (db->db_state != DB_CACHED)
		(void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED);

	dbuf_release_bp(db);
	bp = db->db.db_data;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	shift = (db->db_level - 1) * epbs;
	dbstart = db->db_blkid << epbs;
	start = blkid >> shift;
	if (dbstart < start) {
		bp += start - dbstart;
	} else {
		start = dbstart;
	}
	dbend = ((db->db_blkid + 1) << epbs) - 1;
	end = (blkid + nblks - 1) >> shift;
	if (dbend <= end)
		end = dbend;

	ASSERT3U(start, <=, end);

	if (db->db_level == 1) {
		FREE_VERIFY(db, start, end, tx);
		free_blocks(dn, bp, end-start+1, tx);
	} else {
		for (i = start; i <= end; i++, bp++) {
			if (BP_IS_HOLE(bp))
				continue;
			rw_enter(&dn->dn_struct_rwlock, RW_READER);
			VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
			    i, TRUE, FALSE, FTAG, &subdb));
			rw_exit(&dn->dn_struct_rwlock);
			ASSERT3P(bp, ==, subdb->db_blkptr);

			free_children(subdb, blkid, nblks, tx);
			dbuf_rele(subdb, FTAG);
		}
	}

	/* If this whole block is free, free ourself too. */
	for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
		if (!BP_IS_HOLE(bp))
			break;
	}
	if (i == 1 << epbs) {
		/* didn't find any non-holes */
		bzero(db->db.db_data, db->db.db_size);
		free_blocks(dn, db->db_blkptr, 1, tx);
	} else {
		/*
		 * Partial block free; must be marked dirty so that it
		 * will be written out.
		 */
		ASSERT(db->db_dirtycnt > 0);
	}

	DB_DNODE_EXIT(db);
	arc_buf_freeze(db->db_buf);
}
Ejemplo n.º 9
0
static void
free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
{
	int off, num;
	int i, err, epbs;
	uint64_t txg = tx->tx_txg;
	dnode_t *dn;

	DB_DNODE_ENTER(db);
	dn = DB_DNODE(db);
	epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
	off = start - (db->db_blkid * 1<<epbs);
	num = end - start + 1;

	ASSERT3U(off, >=, 0);
	ASSERT3U(num, >=, 0);
	ASSERT3U(db->db_level, >, 0);
	ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift);
	ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT);
	ASSERT(db->db_blkptr != NULL);

	for (i = off; i < off+num; i++) {
		uint64_t *buf;
		dmu_buf_impl_t *child;
		dbuf_dirty_record_t *dr;
		int j;

		ASSERT(db->db_level == 1);

		rw_enter(&dn->dn_struct_rwlock, RW_READER);
		err = dbuf_hold_impl(dn, db->db_level-1,
		    (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
		rw_exit(&dn->dn_struct_rwlock);
		if (err == ENOENT)
			continue;
		ASSERT(err == 0);
		ASSERT(child->db_level == 0);
		dr = child->db_last_dirty;
		while (dr && dr->dr_txg > txg)
			dr = dr->dr_next;
		ASSERT(dr == NULL || dr->dr_txg == txg);

		/* data_old better be zeroed */
		if (dr) {
			buf = dr->dt.dl.dr_data->b_data;
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}

		/*
		 * db_data better be zeroed unless it's dirty in a
		 * future txg.
		 */
		mutex_enter(&child->db_mtx);
		buf = child->db.db_data;
		if (buf != NULL && child->db_state != DB_FILL &&
		    child->db_last_dirty == NULL) {
			for (j = 0; j < child->db.db_size >> 3; j++) {
				if (buf[j] != 0) {
					panic("freed data not zero: "
					    "child=%p i=%d off=%d num=%d\n",
					    (void *)child, i, off, num);
				}
			}
		}
		mutex_exit(&child->db_mtx);

		dbuf_rele(child, FTAG);
	}
	DB_DNODE_EXIT(db);
}