static int spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp, dmu_tx_t *tx) { uint64_t firstwrite, phys_eof; objset_t *mos = spa->spa_meta_objset; int err; ASSERT(MUTEX_HELD(&spa->spa_history_lock)); /* see if we need to reset logical BOF */ while (shpp->sh_phys_max_off - shpp->sh_pool_create_len - (shpp->sh_eof - shpp->sh_bof) <= len) { if ((err = spa_history_advance_bof(spa, shpp)) != 0) { return (err); } } phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp); firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof); shpp->sh_eof += len; dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx); len -= firstwrite; if (len > 0) { /* write out the rest at the beginning of physical file */ dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len, len, (char *)buf + firstwrite, tx); } return (0); }
/* * Replay a TX_WRITE ZIL transaction that didn't get committed * after a system failure */ static int zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap) { objset_t *os = zv->zv_objset; char *data = (char *)(lr + 1); /* data follows lr_write_t */ uint64_t off = lr->lr_offset; uint64_t len = lr->lr_length; dmu_tx_t *tx; int error; if (byteswap) byteswap_uint64_array(lr, sizeof (*lr)); tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, len); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, len, data, tx); dmu_tx_commit(tx); } return (SET_ERROR(error)); }
void vdev_indirect_births_add_entry(vdev_indirect_births_t *vib, uint64_t max_offset, uint64_t txg, dmu_tx_t *tx) { vdev_indirect_birth_entry_phys_t vibe; uint64_t old_size; uint64_t new_size; vdev_indirect_birth_entry_phys_t *new_entries; ASSERT(dmu_tx_is_syncing(tx)); ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx))); ASSERT(vdev_indirect_births_verify(vib)); dmu_buf_will_dirty(vib->vib_dbuf, tx); vibe.vibe_offset = max_offset; vibe.vibe_phys_birth_txg = txg; old_size = vdev_indirect_births_size_impl(vib); dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe), &vibe, tx); vib->vib_phys->vib_count++; new_size = vdev_indirect_births_size_impl(vib); new_entries = kmem_alloc(new_size, KM_SLEEP); if (old_size > 0) { bcopy(vib->vib_entries, new_entries, old_size); kmem_free(vib->vib_entries, old_size); } new_entries[vib->vib_phys->vib_count - 1] = vibe; vib->vib_entries = new_entries; }
static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, const struct lu_buf *buf, loff_t *pos, struct thandle *th, int ignore_quota) { struct osd_object *obj = osd_dt_obj(dt); struct osd_device *osd = osd_obj2dev(obj); struct osd_thandle *oh; uint64_t offset = *pos; int rc; ENTRY; LASSERT(dt_object_exists(dt)); LASSERT(obj->oo_db); LASSERT(th != NULL); oh = container_of0(th, struct osd_thandle, ot_super); record_start_io(osd, WRITE, 0); dmu_write(osd->od_os, obj->oo_db->db_object, offset, (uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx); write_lock(&obj->oo_attr_lock); if (obj->oo_attr.la_size < offset + buf->lb_len) { obj->oo_attr.la_size = offset + buf->lb_len; write_unlock(&obj->oo_attr_lock); /* osd_object_sa_update() will be copying directly from oo_attr * into dbuf. any update within a single txg will copy the * most actual */ rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd), &obj->oo_attr.la_size, 8, oh); if (unlikely(rc)) GOTO(out, rc); } else { write_unlock(&obj->oo_attr_lock); } *pos += buf->lb_len; rc = buf->lb_len; out: record_end_io(osd, WRITE, 0, buf->lb_len, buf->lb_len >> PAGE_CACHE_SHIFT); RETURN(rc); }
static int zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object, uint64_t offset, uint64_t size, const void *buf) { struct dmu_tx *tx; int rc, how = TXG_WAIT; // int flags = 0; if (run_args->flags & DMU_WRITE_NOWAIT) how = TXG_NOWAIT; while (1) { tx = dmu_tx_create(os); dmu_tx_hold_write(tx, object, offset, size); rc = dmu_tx_assign(tx, how); if (rc) { if (rc == ERESTART && how == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); continue; } zpios_print(run_args->file, "Error in dmu_tx_assign(), %d", rc); dmu_tx_abort(tx); return (rc); } break; } // if (run_args->flags & DMU_WRITE_ZC) // flags |= DMU_WRITE_ZEROCOPY; dmu_write(os, object, offset, size, buf, tx); dmu_tx_commit(tx); return (0); }
static #endif int zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain, dmu_tx_t *tx) { fuid_domain_t searchnode, *findnode; avl_index_t loc; /* * If the dummy "nobody" domain then return an index of 0 * to cause the created FUID to be a standard POSIX id * for the user nobody. */ if (domain[0] == '\0') { *retdomain = ""; return (0); } searchnode.f_ksid = ksid_lookupdomain(domain); if (retdomain) { *retdomain = searchnode.f_ksid->kd_name; } if (!zfsvfs->z_fuid_loaded) zfs_fuid_init(zfsvfs, tx); rw_enter(&zfsvfs->z_fuid_lock, RW_READER); findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc); rw_exit(&zfsvfs->z_fuid_lock); if (findnode) { ksiddomain_rele(searchnode.f_ksid); return (findnode->f_idx); } else { fuid_domain_t *domnode; nvlist_t *nvp; nvlist_t **fuids; uint64_t retidx; size_t nvsize = 0; char *packed; dmu_buf_t *db; int i = 0; domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP); domnode->f_ksid = searchnode.f_ksid; rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1; avl_add(&zfsvfs->z_fuid_domain, domnode); avl_add(&zfsvfs->z_fuid_idx, domnode); /* * Now resync the on-disk nvlist. */ VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); domnode = avl_first(&zfsvfs->z_fuid_domain); fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP); while (domnode) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN, domnode->f_ksid->kd_name) == 0); domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode); } VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, fuids, retidx) == 0); for (i = 0; i != retidx; i++) nvlist_free(fuids[i]); kmem_free(fuids, retidx * sizeof (void *)); VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); rw_exit(&zfsvfs->z_fuid_lock); return (retidx); } }
int __osd_xattr_set(const struct lu_env *env, struct osd_object *obj, const struct lu_buf *buf, const char *name, int fl, struct osd_thandle *oh) { struct osd_device *osd = osd_obj2dev(obj); dmu_buf_t *xa_zap_db = NULL; dmu_buf_t *xa_data_db = NULL; uint64_t xa_data_obj; sa_handle_t *sa_hdl = NULL; dmu_tx_t *tx = oh->ot_tx; uint64_t size; int rc; LASSERT(obj->oo_sa_hdl); if (obj->oo_xattr == ZFS_NO_OBJECT) { struct lu_attr *la = &osd_oti_get(env)->oti_la; la->la_valid = LA_MODE; la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la, obj->oo_db->db_object, 0); if (rc) return rc; obj->oo_xattr = xa_zap_db->db_object; rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd), &obj->oo_xattr, 8, oh); if (rc) goto out; } rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1, &xa_data_obj); if (rc == 0) { if (fl & LU_XATTR_CREATE) { rc = -EEXIST; goto out; } /* * Entry already exists. * We'll truncate the existing object. */ rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj, &xa_data_db); if (rc) goto out; rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) goto out; rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8); if (rc) goto out_sa; rc = -dmu_free_range(osd->od_os, xa_data_db->db_object, 0, DMU_OBJECT_END, tx); if (rc) goto out_sa; } else if (rc == -ENOENT) { struct lu_attr *la = &osd_oti_get(env)->oti_la; /* * Entry doesn't exist, we need to create a new one and a new * object to store the value. */ if (fl & LU_XATTR_REPLACE) { /* should be ENOATTR according to the * man, but that is undefined here */ rc = -ENODATA; goto out; } la->la_valid = LA_MODE; la->la_mode = S_IFREG | S_IRUGO | S_IWUSR; rc = __osd_object_create(env, obj, &xa_data_db, tx, la, obj->oo_xattr); if (rc) goto out; xa_data_obj = xa_data_db->db_object; rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL, SA_HDL_PRIVATE, &sa_hdl); if (rc) goto out; rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1, &xa_data_obj, tx); if (rc) goto out_sa; } else { /* There was an error looking up the xattr name */ goto out; } /* Finally write the xattr value */ dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx); size = buf->lb_len; rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx); out_sa: sa_handle_destroy(sa_hdl); out: if (xa_data_db != NULL) dmu_buf_rele(xa_data_db, FTAG); if (xa_zap_db != NULL) dmu_buf_rele(xa_zap_db, FTAG); return rc; }
/* * sync out AVL trees to persistent storage. */ void zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { #ifdef HAVE_ZPL nvlist_t *nvp; nvlist_t **fuids; size_t nvsize = 0; char *packed; dmu_buf_t *db; fuid_domain_t *domnode; int numnodes; int i; if (!zfsvfs->z_fuid_dirty) { return; } rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER); /* * First see if table needs to be created? */ if (zfsvfs->z_fuid_obj == 0) { zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os, DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE, sizeof (uint64_t), tx); VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, sizeof (uint64_t), 1, &zfsvfs->z_fuid_obj, tx) == 0); } VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); numnodes = avl_numnodes(&zfsvfs->z_fuid_idx); fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP); for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++, domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) { VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX, domnode->f_idx) == 0); VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0); VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN, domnode->f_ksid->kd_name) == 0); } VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY, fuids, numnodes) == 0); for (i = 0; i != numnodes; i++) nvlist_free(fuids[i]); kmem_free(fuids, numnodes * sizeof (void *)); VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0); packed = kmem_alloc(nvsize, KM_SLEEP); VERIFY(nvlist_pack(nvp, &packed, &nvsize, NV_ENCODE_XDR, KM_SLEEP) == 0); nvlist_free(nvp); zfsvfs->z_fuid_size = nvsize; dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0, zfsvfs->z_fuid_size, packed, tx); kmem_free(packed, zfsvfs->z_fuid_size); VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj, FTAG, &db)); dmu_buf_will_dirty(db, tx); *(uint64_t *)db->db_data = zfsvfs->z_fuid_size; dmu_buf_rele(db, FTAG); zfsvfs->z_fuid_dirty = B_FALSE; rw_exit(&zfsvfs->z_fuid_lock); #endif /* HAVE_ZPL */ }
int zvol_strategy(buf_t *bp) { zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev)); uint64_t off, volsize; size_t size, resid; char *addr; objset_t *os; int error = 0; int sync; int reading; int txg_sync_needed = B_FALSE; if (zv == NULL) { bioerror(bp, ENXIO); biodone(bp); return (0); } if (getminor(bp->b_edev) == 0) { bioerror(bp, EINVAL); biodone(bp); return (0); } if (zv->zv_readonly && !(bp->b_flags & B_READ)) { bioerror(bp, EROFS); biodone(bp); return (0); } off = ldbtob(bp->b_blkno); volsize = zv->zv_volsize; os = zv->zv_objset; ASSERT(os != NULL); sync = !(bp->b_flags & B_ASYNC) && !(zil_disable); bp_mapin(bp); addr = bp->b_un.b_addr; resid = bp->b_bcount; /* * There must be no buffer changes when doing a dmu_sync() because * we can't change the data whilst calculating the checksum. * A better approach than a per zvol rwlock would be to lock ranges. */ reading = bp->b_flags & B_READ; if (reading || resid <= zvol_immediate_write_sz) rw_enter(&zv->zv_dslock, RW_READER); else rw_enter(&zv->zv_dslock, RW_WRITER); while (resid != 0 && off < volsize) { size = MIN(resid, 1UL << 20); /* cap at 1MB per tx */ if (size > volsize - off) /* don't write past the end */ size = volsize - off; if (reading) { error = dmu_read(os, ZVOL_OBJ, off, size, addr); } else { dmu_tx_t *tx = dmu_tx_create(os); dmu_tx_hold_write(tx, ZVOL_OBJ, off, size); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); } else { dmu_write(os, ZVOL_OBJ, off, size, addr, tx); if (sync) { /* use the ZIL to commit this write */ if (zvol_log_write(zv, tx, off, size, addr) != 0) { txg_sync_needed = B_TRUE; } } dmu_tx_commit(tx); } } if (error) break; off += size; addr += size; resid -= size; } rw_exit(&zv->zv_dslock); if ((bp->b_resid = resid) == bp->b_bcount) bioerror(bp, off > volsize ? EINVAL : error); biodone(bp); if (sync) { if (txg_sync_needed) txg_wait_synced(dmu_objset_pool(os), 0); else zil_commit(zv->zv_zilog, UINT64_MAX, 0); } return (0); }