Example #1
0
static int
spa_history_write(spa_t *spa, void *buf, uint64_t len, spa_history_phys_t *shpp,
    dmu_tx_t *tx)
{
	uint64_t firstwrite, phys_eof;
	objset_t *mos = spa->spa_meta_objset;
	int err;

	ASSERT(MUTEX_HELD(&spa->spa_history_lock));

	/* see if we need to reset logical BOF */
	while (shpp->sh_phys_max_off - shpp->sh_pool_create_len -
	    (shpp->sh_eof - shpp->sh_bof) <= len) {
		if ((err = spa_history_advance_bof(spa, shpp)) != 0) {
			return (err);
		}
	}

	phys_eof = spa_history_log_to_phys(shpp->sh_eof, shpp);
	firstwrite = MIN(len, shpp->sh_phys_max_off - phys_eof);
	shpp->sh_eof += len;
	dmu_write(mos, spa->spa_history, phys_eof, firstwrite, buf, tx);

	len -= firstwrite;
	if (len > 0) {
		/* write out the rest at the beginning of physical file */
		dmu_write(mos, spa->spa_history, shpp->sh_pool_create_len,
		    len, (char *)buf + firstwrite, tx);
	}

	return (0);
}
Example #2
0
File: zvol.c Project: alek-p/zfs
/*
 * Replay a TX_WRITE ZIL transaction that didn't get committed
 * after a system failure
 */
static int
zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
{
	objset_t *os = zv->zv_objset;
	char *data = (char *)(lr + 1);	/* data follows lr_write_t */
	uint64_t off = lr->lr_offset;
	uint64_t len = lr->lr_length;
	dmu_tx_t *tx;
	int error;

	if (byteswap)
		byteswap_uint64_array(lr, sizeof (*lr));

	tx = dmu_tx_create(os);
	dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
	error = dmu_tx_assign(tx, TXG_WAIT);
	if (error) {
		dmu_tx_abort(tx);
	} else {
		dmu_write(os, ZVOL_OBJ, off, len, data, tx);
		dmu_tx_commit(tx);
	}

	return (SET_ERROR(error));
}
Example #3
0
void
vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
{
	vdev_indirect_birth_entry_phys_t vibe;
	uint64_t old_size;
	uint64_t new_size;
	vdev_indirect_birth_entry_phys_t *new_entries;

	ASSERT(dmu_tx_is_syncing(tx));
	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
	ASSERT(vdev_indirect_births_verify(vib));

	dmu_buf_will_dirty(vib->vib_dbuf, tx);

	vibe.vibe_offset = max_offset;
	vibe.vibe_phys_birth_txg = txg;

	old_size = vdev_indirect_births_size_impl(vib);
	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
	    &vibe, tx);
	vib->vib_phys->vib_count++;
	new_size = vdev_indirect_births_size_impl(vib);

	new_entries = kmem_alloc(new_size, KM_SLEEP);
	if (old_size > 0) {
		bcopy(vib->vib_entries, new_entries, old_size);
		kmem_free(vib->vib_entries, old_size);
	}
	new_entries[vib->vib_phys->vib_count - 1] = vibe;
	vib->vib_entries = new_entries;
}
Example #4
0
static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt,
			const struct lu_buf *buf, loff_t *pos,
			struct thandle *th, int ignore_quota)
{
	struct osd_object  *obj  = osd_dt_obj(dt);
	struct osd_device  *osd = osd_obj2dev(obj);
	struct osd_thandle *oh;
	uint64_t            offset = *pos;
	int                 rc;

	ENTRY;

	LASSERT(dt_object_exists(dt));
	LASSERT(obj->oo_db);

	LASSERT(th != NULL);
	oh = container_of0(th, struct osd_thandle, ot_super);

	record_start_io(osd, WRITE, 0);

	dmu_write(osd->od_os, obj->oo_db->db_object, offset,
		(uint64_t)buf->lb_len, buf->lb_buf, oh->ot_tx);
	write_lock(&obj->oo_attr_lock);
	if (obj->oo_attr.la_size < offset + buf->lb_len) {
		obj->oo_attr.la_size = offset + buf->lb_len;
		write_unlock(&obj->oo_attr_lock);
		/* osd_object_sa_update() will be copying directly from oo_attr
		 * into dbuf.  any update within a single txg will copy the
		 * most actual */
		rc = osd_object_sa_update(obj, SA_ZPL_SIZE(osd),
					&obj->oo_attr.la_size, 8, oh);
		if (unlikely(rc))
			GOTO(out, rc);
	} else {
		write_unlock(&obj->oo_attr_lock);
	}

	*pos += buf->lb_len;
	rc = buf->lb_len;

out:
	record_end_io(osd, WRITE, 0, buf->lb_len,
		      buf->lb_len >> PAGE_CACHE_SHIFT);

	RETURN(rc);
}
Example #5
0
File: pios.c Project: 64116278/zfs
static int
zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
		uint64_t offset, uint64_t size, const void *buf)
{
	struct dmu_tx *tx;
	int rc, how = TXG_WAIT;
//	int flags = 0;

	if (run_args->flags & DMU_WRITE_NOWAIT)
		how = TXG_NOWAIT;

	while (1) {
		tx = dmu_tx_create(os);
		dmu_tx_hold_write(tx, object, offset, size);
		rc = dmu_tx_assign(tx, how);

		if (rc) {
			if (rc == ERESTART && how == TXG_NOWAIT) {
				dmu_tx_wait(tx);
				dmu_tx_abort(tx);
				continue;
			}
			zpios_print(run_args->file,
				    "Error in dmu_tx_assign(), %d", rc);
			dmu_tx_abort(tx);
			return (rc);
		}
		break;
	}

//	if (run_args->flags & DMU_WRITE_ZC)
//		flags |= DMU_WRITE_ZEROCOPY;

	dmu_write(os, object, offset, size, buf, tx);
	dmu_tx_commit(tx);

	return (0);
}
Example #6
0
static
#endif
int
zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain, char **retdomain,
    dmu_tx_t *tx)
{
	fuid_domain_t searchnode, *findnode;
	avl_index_t loc;

	/*
	 * If the dummy "nobody" domain then return an index of 0
	 * to cause the created FUID to be a standard POSIX id
	 * for the user nobody.
	 */
	if (domain[0] == '\0') {
		*retdomain = "";
		return (0);
	}

	searchnode.f_ksid = ksid_lookupdomain(domain);
	if (retdomain) {
		*retdomain = searchnode.f_ksid->kd_name;
	}
	if (!zfsvfs->z_fuid_loaded)
		zfs_fuid_init(zfsvfs, tx);

	rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
	findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
	rw_exit(&zfsvfs->z_fuid_lock);

	if (findnode) {
		ksiddomain_rele(searchnode.f_ksid);
		return (findnode->f_idx);
	} else {
		fuid_domain_t *domnode;
		nvlist_t *nvp;
		nvlist_t **fuids;
		uint64_t retidx;
		size_t nvsize = 0;
		char *packed;
		dmu_buf_t *db;
		int i = 0;

		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
		domnode->f_ksid = searchnode.f_ksid;

		rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
		retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;

		avl_add(&zfsvfs->z_fuid_domain, domnode);
		avl_add(&zfsvfs->z_fuid_idx, domnode);
		/*
		 * Now resync the on-disk nvlist.
		 */
		VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

		domnode = avl_first(&zfsvfs->z_fuid_domain);
		fuids = kmem_alloc(retidx * sizeof (void *), KM_SLEEP);
		while (domnode) {
			VERIFY(nvlist_alloc(&fuids[i],
			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
			VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
			    domnode->f_idx) == 0);
			VERIFY(nvlist_add_uint64(fuids[i],
			    FUID_OFFSET, 0) == 0);
			VERIFY(nvlist_add_string(fuids[i++], FUID_DOMAIN,
			    domnode->f_ksid->kd_name) == 0);
			domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode);
		}
		VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
		    fuids, retidx) == 0);
		for (i = 0; i != retidx; i++)
			nvlist_free(fuids[i]);
		kmem_free(fuids, retidx * sizeof (void *));
		VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
		packed = kmem_alloc(nvsize, KM_SLEEP);
		VERIFY(nvlist_pack(nvp, &packed, &nvsize,
		    NV_ENCODE_XDR, KM_SLEEP) == 0);
		nvlist_free(nvp);
		zfsvfs->z_fuid_size = nvsize;
		dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
		    zfsvfs->z_fuid_size, packed, tx);
		kmem_free(packed, zfsvfs->z_fuid_size);
		VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
		    FTAG, &db));
		dmu_buf_will_dirty(db, tx);
		*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
		dmu_buf_rele(db, FTAG);

		rw_exit(&zfsvfs->z_fuid_lock);
		return (retidx);
	}
}
Example #7
0
int
__osd_xattr_set(const struct lu_env *env, struct osd_object *obj,
		const struct lu_buf *buf, const char *name, int fl,
		struct osd_thandle *oh)
{
	struct osd_device *osd = osd_obj2dev(obj);
	dmu_buf_t         *xa_zap_db = NULL;
	dmu_buf_t         *xa_data_db = NULL;
	uint64_t           xa_data_obj;
	sa_handle_t       *sa_hdl = NULL;
	dmu_tx_t          *tx = oh->ot_tx;
	uint64_t           size;
	int                rc;

	LASSERT(obj->oo_sa_hdl);

	if (obj->oo_xattr == ZFS_NO_OBJECT) {
		struct lu_attr *la = &osd_oti_get(env)->oti_la;

		la->la_valid = LA_MODE;
		la->la_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
		rc = __osd_zap_create(env, osd, &xa_zap_db, tx, la,
				      obj->oo_db->db_object, 0);
		if (rc)
			return rc;

		obj->oo_xattr = xa_zap_db->db_object;
		rc = osd_object_sa_update(obj, SA_ZPL_XATTR(osd),
				&obj->oo_xattr, 8, oh);
		if (rc)
			goto out;
	}

	rc = -zap_lookup(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t), 1,
			 &xa_data_obj);
	if (rc == 0) {
		if (fl & LU_XATTR_CREATE) {
			rc = -EEXIST;
			goto out;
		}
		/*
		 * Entry already exists.
		 * We'll truncate the existing object.
		 */
		rc = __osd_obj2dbuf(env, osd->od_os, xa_data_obj,
					&xa_data_db);
		if (rc)
			goto out;

		rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
					SA_HDL_PRIVATE, &sa_hdl);
		if (rc)
			goto out;

		rc = -sa_lookup(sa_hdl, SA_ZPL_SIZE(osd), &size, 8);
		if (rc)
			goto out_sa;

		rc = -dmu_free_range(osd->od_os, xa_data_db->db_object,
				     0, DMU_OBJECT_END, tx);
		if (rc)
			goto out_sa;
	} else if (rc == -ENOENT) {
		struct lu_attr *la = &osd_oti_get(env)->oti_la;
		/*
		 * Entry doesn't exist, we need to create a new one and a new
		 * object to store the value.
		 */
		if (fl & LU_XATTR_REPLACE) {
			/* should be ENOATTR according to the
			 * man, but that is undefined here */
			rc = -ENODATA;
			goto out;
		}

		la->la_valid = LA_MODE;
		la->la_mode = S_IFREG | S_IRUGO | S_IWUSR;
		rc = __osd_object_create(env, obj, &xa_data_db, tx, la,
					 obj->oo_xattr);
		if (rc)
			goto out;
		xa_data_obj = xa_data_db->db_object;

		rc = -sa_handle_get(osd->od_os, xa_data_obj, NULL,
					SA_HDL_PRIVATE, &sa_hdl);
		if (rc)
			goto out;

		rc = -zap_add(osd->od_os, obj->oo_xattr, name, sizeof(uint64_t),
				1, &xa_data_obj, tx);
		if (rc)
			goto out_sa;
	} else {
		/* There was an error looking up the xattr name */
		goto out;
	}

	/* Finally write the xattr value */
	dmu_write(osd->od_os, xa_data_obj, 0, buf->lb_len, buf->lb_buf, tx);

	size = buf->lb_len;
	rc = -sa_update(sa_hdl, SA_ZPL_SIZE(osd), &size, 8, tx);

out_sa:
	sa_handle_destroy(sa_hdl);
out:
	if (xa_data_db != NULL)
		dmu_buf_rele(xa_data_db, FTAG);
	if (xa_zap_db != NULL)
		dmu_buf_rele(xa_zap_db, FTAG);

	return rc;
}
Example #8
0
/*
 * sync out AVL trees to persistent storage.
 */
void
zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
#ifdef HAVE_ZPL
	nvlist_t *nvp;
	nvlist_t **fuids;
	size_t nvsize = 0;
	char *packed;
	dmu_buf_t *db;
	fuid_domain_t *domnode;
	int numnodes;
	int i;

	if (!zfsvfs->z_fuid_dirty) {
		return;
	}

	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);

	/*
	 * First see if table needs to be created?
	 */
	if (zfsvfs->z_fuid_obj == 0) {
		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
		    sizeof (uint64_t), tx);
		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
		    &zfsvfs->z_fuid_obj, tx) == 0);
	}

	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);

	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
		    domnode->f_idx) == 0);
		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
		    domnode->f_ksid->kd_name) == 0);
	}
	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
	    fuids, numnodes) == 0);
	for (i = 0; i != numnodes; i++)
		nvlist_free(fuids[i]);
	kmem_free(fuids, numnodes * sizeof (void *));
	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
	packed = kmem_alloc(nvsize, KM_SLEEP);
	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
	    NV_ENCODE_XDR, KM_SLEEP) == 0);
	nvlist_free(nvp);
	zfsvfs->z_fuid_size = nvsize;
	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
	    zfsvfs->z_fuid_size, packed, tx);
	kmem_free(packed, zfsvfs->z_fuid_size);
	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
	    FTAG, &db));
	dmu_buf_will_dirty(db, tx);
	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
	dmu_buf_rele(db, FTAG);

	zfsvfs->z_fuid_dirty = B_FALSE;
	rw_exit(&zfsvfs->z_fuid_lock);
#endif /* HAVE_ZPL */
}
Example #9
0
int
zvol_strategy(buf_t *bp)
{
	zvol_state_t *zv = ddi_get_soft_state(zvol_state, getminor(bp->b_edev));
	uint64_t off, volsize;
	size_t size, resid;
	char *addr;
	objset_t *os;
	int error = 0;
	int sync;
	int reading;
	int txg_sync_needed = B_FALSE;

	if (zv == NULL) {
		bioerror(bp, ENXIO);
		biodone(bp);
		return (0);
	}

	if (getminor(bp->b_edev) == 0) {
		bioerror(bp, EINVAL);
		biodone(bp);
		return (0);
	}

	if (zv->zv_readonly && !(bp->b_flags & B_READ)) {
		bioerror(bp, EROFS);
		biodone(bp);
		return (0);
	}

	off = ldbtob(bp->b_blkno);
	volsize = zv->zv_volsize;

	os = zv->zv_objset;
	ASSERT(os != NULL);
	sync = !(bp->b_flags & B_ASYNC) && !(zil_disable);

	bp_mapin(bp);
	addr = bp->b_un.b_addr;
	resid = bp->b_bcount;

	/*
	 * There must be no buffer changes when doing a dmu_sync() because
	 * we can't change the data whilst calculating the checksum.
	 * A better approach than a per zvol rwlock would be to lock ranges.
	 */
	reading = bp->b_flags & B_READ;
	if (reading || resid <= zvol_immediate_write_sz)
		rw_enter(&zv->zv_dslock, RW_READER);
	else
		rw_enter(&zv->zv_dslock, RW_WRITER);

	while (resid != 0 && off < volsize) {

		size = MIN(resid, 1UL << 20);	/* cap at 1MB per tx */

		if (size > volsize - off)	/* don't write past the end */
			size = volsize - off;

		if (reading) {
			error = dmu_read(os, ZVOL_OBJ, off, size, addr);
		} else {
			dmu_tx_t *tx = dmu_tx_create(os);
			dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
			error = dmu_tx_assign(tx, TXG_WAIT);
			if (error) {
				dmu_tx_abort(tx);
			} else {
				dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
				if (sync) {
					/* use the ZIL to commit this write */
					if (zvol_log_write(zv, tx, off, size,
					    addr) != 0) {
						txg_sync_needed = B_TRUE;
					}
				}
				dmu_tx_commit(tx);
			}
		}
		if (error)
			break;
		off += size;
		addr += size;
		resid -= size;
	}
	rw_exit(&zv->zv_dslock);

	if ((bp->b_resid = resid) == bp->b_bcount)
		bioerror(bp, off > volsize ? EINVAL : error);

	biodone(bp);

	if (sync) {
		if (txg_sync_needed)
			txg_wait_synced(dmu_objset_pool(os), 0);
		else
			zil_commit(zv->zv_zilog, UINT64_MAX, 0);
	}

	return (0);
}