int dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dnode_t *dn; int error; DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (!DMU_OT_IS_VALID(type)) { error = SET_ERROR(EINVAL); } else if (dn->dn_bonus != db) { error = SET_ERROR(EINVAL); } else { dnode_setbonus_type(dn, type, tx); error = 0; } DB_DNODE_EXIT(db); return (error); }
int dmu_set_bonus(dmu_buf_t *db_fake, int newsize, dmu_tx_t *tx) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; dnode_t *dn; int error; DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (dn->dn_bonus != db) { error = SET_ERROR(EINVAL); } else if (newsize < 0 || newsize > db_fake->db_size) { error = SET_ERROR(EINVAL); } else { dnode_setbonuslen(dn, newsize, tx); error = 0; } DB_DNODE_EXIT(db); return (error); }
/* * Decide whether block should be l2cached. Returns true if block is a ddt * metadata and ddt metadata is cacheable, or if block isn't a ddt metadata */ boolean_t dbuf_ddt_is_l2cacheable(dmu_buf_impl_t *db) { dmu_object_type_t ot; spa_t *spa = db->db_objset->os_spa; spa_meta_placement_t *mp = &spa->spa_meta_policy; uint64_t specflags; boolean_t match; if (!spa_has_special(spa)) return (B_TRUE); specflags = spa_specialclass_flags(db->db_objset); match = !!(SPECIAL_FLAG_DATAMETA & specflags); DB_DNODE_ENTER(db); ot = DB_DNODE(db)->dn_type; DB_DNODE_EXIT(db); if ((!DMU_OT_IS_DDT_META(ot)) || (!match)) return (B_TRUE); return (mp->spa_ddt_to_special != META_PLACEMENT_ON); }
/* * Decide whether block should be l2cached. Returns true if block's metadata * type is l2cacheable or block isn't a metadata one */ boolean_t dbuf_meta_is_l2cacheable(dmu_buf_impl_t *db) { boolean_t is_metadata, is_to_special; dmu_object_type_t ot = DMU_OT_NONE; spa_t *spa = db->db_objset->os_spa; DB_DNODE_ENTER(db); ot = DB_DNODE(db)->dn_type; DB_DNODE_EXIT(db); is_metadata = dmu_ot[ot].ot_metadata; if (!is_metadata) return (B_TRUE); is_to_special = spa_meta_to_special(spa, db->db_objset, ot); if (!is_to_special) return (B_TRUE); return (spa_meta_is_dual(spa, db->db_objset->os_zpl_meta_to_special, ot)); }
int __osd_sa_xattr_set(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, struct osd_thandle *oh) { dmu_buf_impl_t *db; uchar_t *nv_value; size_t size; int nv_size; int rc; int too_big = 0; LASSERT(obj->oo_sa_hdl); if (obj->oo_sa_xattr == NULL) { rc = __osd_xattr_cache(env, obj); if (rc) return rc; } LASSERT(obj->oo_sa_xattr); /* Limited to 32k to keep nvpair memory allocations small */ if (buf->lb_len > DXATTR_MAX_ENTRY_SIZE) { too_big = 1; } else { /* Prevent the DXATTR SA from consuming the entire SA * region */ rc = -nvlist_size(obj->oo_sa_xattr, &size, NV_ENCODE_XDR); if (rc) return rc; if (size + buf->lb_len > DXATTR_MAX_SA_SIZE) too_big = 1; } /* even in case of -EFBIG we must lookup xattr and check can we * rewrite it then delete from SA */ rc = -nvlist_lookup_byte_array(obj->oo_sa_xattr, name, &nv_value, &nv_size); if (rc == 0) { if (fl & LU_XATTR_CREATE) { return -EEXIST; } else if (too_big) { rc = -nvlist_remove(obj->oo_sa_xattr, name, DATA_TYPE_BYTE_ARRAY); if (rc < 0) return rc; rc = __osd_sa_xattr_schedule_update(env, obj, oh); return rc == 0 ? -EFBIG : rc; } } else if (rc == -ENOENT) { if (fl & LU_XATTR_REPLACE) return -ENODATA; else if (too_big) return -EFBIG; } else { return rc; } /* Ensure xattr doesn't exist in ZAP */ if (obj->oo_xattr != ZFS_NO_OBJECT) { struct osd_device *osd = osd_obj2dev(obj); uint64_t objid; rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, 8, 1, &objid); if (rc == 0) { rc = -dmu_object_free(osd->od_os, objid, oh->ot_tx); if (rc == 0) zap_remove(osd->od_os, obj->oo_xattr, name, oh->ot_tx); } } rc = -nvlist_add_byte_array(obj->oo_sa_xattr, name, (uchar_t *)buf->lb_buf, buf->lb_len); if (rc) return rc; /* batch updates only for just created dnodes where we * used to set number of EAs in a single transaction */ db = (dmu_buf_impl_t *)obj->oo_db; if (DB_DNODE(db)->dn_allocated_txg == oh->ot_tx->tx_txg) rc = __osd_sa_xattr_schedule_update(env, obj, oh); else rc = __osd_sa_xattr_update(env, obj, oh); return rc; }
/* * add/remove/replace a single attribute and then rewrite the entire set * of attributes. */ static int sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr, sa_data_op_t action, sa_data_locator_t *locator, void *datastart, uint16_t buflen, dmu_tx_t *tx) { sa_os_t *sa = hdl->sa_os->os_sa; dmu_buf_impl_t *db = (dmu_buf_impl_t *)hdl->sa_bonus; dnode_t *dn; sa_bulk_attr_t *attr_desc; void *old_data[2]; int bonus_attr_count = 0; int bonus_data_size, spill_data_size; int spill_attr_count = 0; int error; uint16_t length; int i, j, k, length_idx; sa_hdr_phys_t *hdr; sa_idx_tab_t *idx_tab; int attr_count; int count; ASSERT(MUTEX_HELD(&hdl->sa_lock)); /* First make of copy of the old data */ DB_DNODE_ENTER(db); dn = DB_DNODE(db); if (dn->dn_bonuslen != 0) { bonus_data_size = hdl->sa_bonus->db_size; old_data[0] = kmem_alloc(bonus_data_size, KM_SLEEP); bcopy(hdl->sa_bonus->db_data, old_data[0], hdl->sa_bonus->db_size); bonus_attr_count = hdl->sa_bonus_tab->sa_layout->lot_attr_count; } else { old_data[0] = NULL; } DB_DNODE_EXIT(db); /* Bring spill buffer online if it isn't currently */ if ((error = sa_get_spill(hdl)) == 0) { spill_data_size = hdl->sa_spill->db_size; old_data[1] = kmem_alloc(spill_data_size, KM_SLEEP); bcopy(hdl->sa_spill->db_data, old_data[1], hdl->sa_spill->db_size); spill_attr_count = hdl->sa_spill_tab->sa_layout->lot_attr_count; } else if (error && error != ENOENT) { if (old_data[0]) kmem_free(old_data[0], bonus_data_size); return (error); } else { old_data[1] = NULL; } /* build descriptor of all attributes */ attr_count = bonus_attr_count + spill_attr_count; if (action == SA_ADD) attr_count++; else if (action == SA_REMOVE) attr_count--; attr_desc = kmem_zalloc(sizeof (sa_bulk_attr_t) * attr_count, KM_SLEEP); /* * loop through bonus and spill buffer if it exists, and * build up new attr_descriptor to reset the attributes */ k = j = 0; count = bonus_attr_count; hdr = SA_GET_HDR(hdl, SA_BONUS); idx_tab = SA_IDX_TAB_GET(hdl, SA_BONUS); for (; k != 2; k++) { /* iterate over each attribute in layout */ for (i = 0, length_idx = 0; i != count; i++) { sa_attr_type_t attr; attr = idx_tab->sa_layout->lot_attrs[i]; if (attr == newattr) { if (action == SA_REMOVE) { j++; continue; } ASSERT(SA_REGISTERED_LEN(sa, attr) == 0); ASSERT(action == SA_REPLACE); SA_ADD_BULK_ATTR(attr_desc, j, attr, locator, datastart, buflen); } else { length = SA_REGISTERED_LEN(sa, attr); if (length == 0) { length = hdr->sa_lengths[length_idx++]; } SA_ADD_BULK_ATTR(attr_desc, j, attr, NULL, (void *) (TOC_OFF(idx_tab->sa_idx_tab[attr]) + (uintptr_t)old_data[k]), length); } } if (k == 0 && hdl->sa_spill) { hdr = SA_GET_HDR(hdl, SA_SPILL); idx_tab = SA_IDX_TAB_GET(hdl, SA_SPILL); count = spill_attr_count; } else { break; } } if (action == SA_ADD) { length = SA_REGISTERED_LEN(sa, newattr); if (length == 0) { length = buflen; } SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator, datastart, buflen); } error = sa_build_layouts(hdl, attr_desc, attr_count, tx); if (old_data[0]) kmem_free(old_data[0], bonus_data_size); if (old_data[1]) kmem_free(old_data[1], spill_data_size); kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count); return (error); }
int __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db) { arc_buf_info_t abi = { 0 }; dmu_object_info_t doi = { 0 }; dnode_t *dn = DB_DNODE(db); size_t nwritten; if (db->db_buf) arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index); if (dn) __dmu_object_info_from_dnode(dn, &doi); nwritten = snprintf(buf, size, "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | " "%-5d %-5d 0x%-6x %-6lu %-8llu %-12llu " "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | " "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n", /* dmu_buf_impl_t */ spa_name(dn->dn_objset->os_spa), (u_longlong_t)dmu_objset_id(db->db_objset), (longlong_t)db->db.db_object, (longlong_t)db->db_level, (longlong_t)db->db_blkid, (u_longlong_t)db->db.db_offset, (u_longlong_t)db->db.db_size, !!dbuf_is_metadata(db), db->db_state, (ulong_t)refcount_count(&db->db_holds), /* arc_buf_info_t */ abi.abi_state_type, abi.abi_state_contents, abi.abi_flags, (ulong_t)abi.abi_bufcnt, (u_longlong_t)abi.abi_size, (u_longlong_t)abi.abi_access, (ulong_t)abi.abi_mru_hits, (ulong_t)abi.abi_mru_ghost_hits, (ulong_t)abi.abi_mfu_hits, (ulong_t)abi.abi_mfu_ghost_hits, (ulong_t)abi.abi_l2arc_hits, (u_longlong_t)abi.abi_l2arc_dattr, (u_longlong_t)abi.abi_l2arc_asize, abi.abi_l2arc_compress, (ulong_t)abi.abi_holds, /* dmu_object_info_t */ doi.doi_type, doi.doi_bonus_type, (ulong_t)doi.doi_data_block_size, (ulong_t)doi.doi_metadata_block_size, (u_longlong_t)doi.doi_bonus_size, (ulong_t)doi.doi_indirection, (ulong_t)refcount_count(&dn->dn_holds), (u_longlong_t)doi.doi_fill_count, (u_longlong_t)doi.doi_max_offset); if (nwritten >= size) return (size); return (nwritten + 1); }
static void free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) { dnode_t *dn; blkptr_t *bp; dmu_buf_impl_t *subdb; uint64_t start, end, dbstart, dbend, i; int epbs, shift; /* * There is a small possibility that this block will not be cached: * 1 - if level > 1 and there are no children with level <= 1 * 2 - if this block was evicted since we read it from * dmu_tx_hold_free(). */ if (db->db_state != DB_CACHED) (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); dbuf_release_bp(db); bp = db->db.db_data; DB_DNODE_ENTER(db); dn = DB_DNODE(db); epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; shift = (db->db_level - 1) * epbs; dbstart = db->db_blkid << epbs; start = blkid >> shift; if (dbstart < start) { bp += start - dbstart; } else { start = dbstart; } dbend = ((db->db_blkid + 1) << epbs) - 1; end = (blkid + nblks - 1) >> shift; if (dbend <= end) end = dbend; ASSERT3U(start, <=, end); if (db->db_level == 1) { FREE_VERIFY(db, start, end, tx); free_blocks(dn, bp, end-start+1, tx); } else { for (i = start; i <= end; i++, bp++) { if (BP_IS_HOLE(bp)) continue; rw_enter(&dn->dn_struct_rwlock, RW_READER); VERIFY0(dbuf_hold_impl(dn, db->db_level - 1, i, TRUE, FALSE, FTAG, &subdb)); rw_exit(&dn->dn_struct_rwlock); ASSERT3P(bp, ==, subdb->db_blkptr); free_children(subdb, blkid, nblks, tx); dbuf_rele(subdb, FTAG); } } /* If this whole block is free, free ourself too. */ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) { if (!BP_IS_HOLE(bp)) break; } if (i == 1 << epbs) { /* didn't find any non-holes */ bzero(db->db.db_data, db->db.db_size); free_blocks(dn, db->db_blkptr, 1, tx); } else { /* * Partial block free; must be marked dirty so that it * will be written out. */ ASSERT(db->db_dirtycnt > 0); } DB_DNODE_EXIT(db); arc_buf_freeze(db->db_buf); }
static void free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) { int off, num; int i, err, epbs; uint64_t txg = tx->tx_txg; dnode_t *dn; DB_DNODE_ENTER(db); dn = DB_DNODE(db); epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; off = start - (db->db_blkid * 1<<epbs); num = end - start + 1; ASSERT3U(off, >=, 0); ASSERT3U(num, >=, 0); ASSERT3U(db->db_level, >, 0); ASSERT3U(db->db.db_size, ==, 1 << dn->dn_phys->dn_indblkshift); ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); ASSERT(db->db_blkptr != NULL); for (i = off; i < off+num; i++) { uint64_t *buf; dmu_buf_impl_t *child; dbuf_dirty_record_t *dr; int j; ASSERT(db->db_level == 1); rw_enter(&dn->dn_struct_rwlock, RW_READER); err = dbuf_hold_impl(dn, db->db_level-1, (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child); rw_exit(&dn->dn_struct_rwlock); if (err == ENOENT) continue; ASSERT(err == 0); ASSERT(child->db_level == 0); dr = child->db_last_dirty; while (dr && dr->dr_txg > txg) dr = dr->dr_next; ASSERT(dr == NULL || dr->dr_txg == txg); /* data_old better be zeroed */ if (dr) { buf = dr->dt.dl.dr_data->b_data; for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " "child=%p i=%d off=%d num=%d\n", (void *)child, i, off, num); } } } /* * db_data better be zeroed unless it's dirty in a * future txg. */ mutex_enter(&child->db_mtx); buf = child->db.db_data; if (buf != NULL && child->db_state != DB_FILL && child->db_last_dirty == NULL) { for (j = 0; j < child->db.db_size >> 3; j++) { if (buf[j] != 0) { panic("freed data not zero: " "child=%p i=%d off=%d num=%d\n", (void *)child, i, off, num); } } } mutex_exit(&child->db_mtx); dbuf_rele(child, FTAG); } DB_DNODE_EXIT(db); }