Пример #1
0
/* ARGSUSED */
int
zil_clear_log_chain(char *osname, void *txarg)
{
	zilog_t *zilog;
	zil_header_t *zh;
	objset_t *os;
	dmu_tx_t *tx;
	int error;

	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
	if (error) {
		cmn_err(CE_WARN, "can't open objset for %s", osname);
		return (0);
	}

	zilog = dmu_objset_zil(os);
	tx = dmu_tx_create(zilog->zl_os);
	(void) dmu_tx_assign(tx, TXG_WAIT);
	zh = zil_header_in_syncing_context(zilog);
	BP_ZERO(&zh->zh_log);
	dsl_dataset_dirty(dmu_objset_ds(os), tx);
	dmu_tx_commit(tx);
	dmu_objset_close(os);
	return (0);
}
Пример #2
0
/*
 * Create and return an immediate write ZIL transaction.
 */
itx_t *
zvol_immediate_itx(offset_t off, ssize_t len, char *addr)
{
	itx_t *itx;
	lr_write_t *lr;

	itx = zil_itx_create(TX_WRITE, sizeof (*lr) + len);
	lr = (lr_write_t *)&itx->itx_lr;
	lr->lr_foid = ZVOL_OBJ;
	lr->lr_offset = off;
	lr->lr_length = len;
	lr->lr_blkoff = 0;
	BP_ZERO(&lr->lr_blkptr);
	bcopy(addr, (char *)itx + offsetof(itx_t, itx_lr) +
	    sizeof (*lr), len);
	itx->itx_wr_state = WR_COPIED;
	return (itx);
}
Пример #3
0
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
	znode_t *zp, offset_t off, ssize_t resid, int ioflag,
	zil_callback_t callback, void *callback_data)
{
	itx_wr_state_t write_state;
	boolean_t slogging;
	uintptr_t fsync_cnt;
	ssize_t immediate_write_sz;

	if (zil_replaying(zilog, tx) || zp->z_unlinked) {
		if (callback != NULL)
			callback(callback_data);
		return;
	}

	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
	    ? 0 : (ssize_t)zfs_immediate_write_sz;

	slogging = spa_has_slogs(zilog->zl_spa) &&
	    (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
	if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
		write_state = WR_INDIRECT;
	else if (ioflag & (FSYNC | FDSYNC))
		write_state = WR_COPIED;
	else
		write_state = WR_NEED_COPY;

	if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
	}

	while (resid) {
		itx_t *itx;
		lr_write_t *lr;
		ssize_t len;

		/*
		 * If the write would overflow the largest block then split it.
		 */
		if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
			len = SPA_MAXBLOCKSIZE >> 1;
		else
			len = resid;

		itx = zil_itx_create(txtype, sizeof (*lr) +
		    (write_state == WR_COPIED ? len : 0));
		lr = (lr_write_t *)&itx->itx_lr;
		if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
		    zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
			zil_itx_destroy(itx);
			itx = zil_itx_create(txtype, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			write_state = WR_NEED_COPY;
		}

		itx->itx_wr_state = write_state;
		if (write_state == WR_NEED_COPY)
			itx->itx_sod += len;
		lr->lr_foid = zp->z_id;
		lr->lr_offset = off;
		lr->lr_length = len;
		lr->lr_blkoff = 0;
		BP_ZERO(&lr->lr_blkptr);

		itx->itx_private = ZTOZSB(zp);

		if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
		    (fsync_cnt == 0))
			itx->itx_sync = B_FALSE;

		itx->itx_callback = callback;
		itx->itx_callback_data = callback_data;
		zil_itx_assign(zilog, itx, tx);

		off += len;
		resid -= len;
	}
Пример #4
0
Файл: zvol.c Проект: alek-p/zfs
static void
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
    uint64_t size, int sync)
{
	uint32_t blocksize = zv->zv_volblocksize;
	zilog_t *zilog = zv->zv_zilog;
	boolean_t slogging;
	ssize_t immediate_write_sz;

	if (zil_replaying(zilog, tx))
		return;

	immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
		? 0 : zvol_immediate_write_sz;
	slogging = spa_has_slogs(zilog->zl_spa) &&
		(zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);

	while (size) {
		itx_t *itx;
		lr_write_t *lr;
		ssize_t len;
		itx_wr_state_t write_state;

		/*
		 * Unlike zfs_log_write() we can be called with
		 * up to DMU_MAX_ACCESS/2 (5MB) writes.
		 */
		if (blocksize > immediate_write_sz && !slogging &&
		    size >= blocksize && offset % blocksize == 0) {
			write_state = WR_INDIRECT; /* uses dmu_sync */
			len = blocksize;
		} else if (sync) {
			write_state = WR_COPIED;
			len = MIN(ZIL_MAX_LOG_DATA, size);
		} else {
			write_state = WR_NEED_COPY;
			len = MIN(ZIL_MAX_LOG_DATA, size);
		}

		itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
		    (write_state == WR_COPIED ? len : 0));
		lr = (lr_write_t *)&itx->itx_lr;
		if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
		    ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
			zil_itx_destroy(itx);
			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			write_state = WR_NEED_COPY;
		}

		itx->itx_wr_state = write_state;
		if (write_state == WR_NEED_COPY)
			itx->itx_sod += len;
		lr->lr_foid = ZVOL_OBJ;
		lr->lr_offset = offset;
		lr->lr_length = len;
		lr->lr_blkoff = 0;
		BP_ZERO(&lr->lr_blkptr);

		itx->itx_private = zv;
		itx->itx_sync = sync;

		(void) zil_itx_assign(zilog, itx, tx);

		offset += len;
		size -= len;
	}
}
Пример #5
0
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
    znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
	uint32_t blocksize = zp->z_blksz;
	itx_wr_state_t write_state;
	uintptr_t fsync_cnt;

	if (zil_replaying(zilog, tx) || zp->z_unlinked)
		return;

	if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
		write_state = WR_INDIRECT;
	else if (!spa_has_slogs(zilog->zl_spa) &&
	    resid >= zfs_immediate_write_sz)
		write_state = WR_INDIRECT;
	else if (ioflag & (FSYNC | FDSYNC))
		write_state = WR_COPIED;
	else
		write_state = WR_NEED_COPY;

	if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
	}

	while (resid) {
		itx_t *itx;
		lr_write_t *lr;
		itx_wr_state_t wr_state = write_state;
		ssize_t len = resid;

		if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
			wr_state = WR_NEED_COPY;
		else if (wr_state == WR_INDIRECT)
			len = MIN(blocksize - P2PHASE(off, blocksize), resid);

		itx = zil_itx_create(txtype, sizeof (*lr) +
		    (wr_state == WR_COPIED ? len : 0));
		lr = (lr_write_t *)&itx->itx_lr;
		if (wr_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
		    zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
			zil_itx_destroy(itx);
			itx = zil_itx_create(txtype, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			wr_state = WR_NEED_COPY;
		}

		itx->itx_wr_state = wr_state;
		lr->lr_foid = zp->z_id;
		lr->lr_offset = off;
		lr->lr_length = len;
		lr->lr_blkoff = 0;
		BP_ZERO(&lr->lr_blkptr);

		itx->itx_private = zp->z_zfsvfs;

		if (!(ioflag & (FSYNC | FDSYNC)) && (zp->z_sync_cnt == 0) &&
		    (fsync_cnt == 0))
			itx->itx_sync = B_FALSE;

		zil_itx_assign(zilog, itx, tx);

		off += len;
		resid -= len;
	}
}
Пример #6
0
int
zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len,
    char *addr)
{
	dmu_object_info_t doi;
	ssize_t nbytes;
	itx_t *itx;
	lr_write_t *lr;
	objset_t *os;
	dmu_buf_t *db;
	uint64_t txg;
	uint64_t boff;
	int error;
	uint32_t blocksize;

	/* handle common case */
	if (len <= zvol_immediate_write_sz) {
		itx = zvol_immediate_itx(off, len, addr);
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		return (0);
	}

	txg = dmu_tx_get_txg(tx);
	os = zv->zv_objset;

	/*
	 * We need to dmu_sync() each block in the range.
	 * For this we need the blocksize.
	 */
	error = dmu_object_info(os, ZVOL_OBJ, &doi);
	if (error)
		return (error);
	blocksize = doi.doi_data_block_size;

	/*
	 * We need to immediate write or dmu_sync() each block in the range.
	 */
	while (len) {
		nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
		if (nbytes <= zvol_immediate_write_sz) {
			itx = zvol_immediate_itx(off, nbytes, addr);
		} else {
			boff =  P2ALIGN_TYPED(off, blocksize, uint64_t);
			itx = zil_itx_create(TX_WRITE, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			lr->lr_foid = ZVOL_OBJ;
			lr->lr_offset = off;
			lr->lr_length = nbytes;
			lr->lr_blkoff = off - boff;
			BP_ZERO(&lr->lr_blkptr);

			/* XXX - we should do these IOs in parallel */
			VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, boff,
			    FTAG, &db));
			ASSERT(boff == db->db_offset);
			error = dmu_sync(NULL, db, &lr->lr_blkptr,
			    txg, NULL, NULL);
			dmu_buf_rele(db, FTAG);
			if (error) {
				kmem_free(itx, offsetof(itx_t, itx_lr));
				return (error);
			}
			itx->itx_wr_state = WR_COPIED;
		}
		(void) zil_itx_assign(zv->zv_zilog, itx, tx);
		len -= nbytes;
		off += nbytes;
	}
	return (0);
}
Пример #7
0
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
	znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
	itx_wr_state_t write_state;
	boolean_t slogging;
	uintptr_t fsync_cnt;

	if (zilog == NULL || zp->z_unlinked)
		return;

	/*
	 * Writes are handled in three different ways:
	 *
	 * WR_INDIRECT:
	 *    If the write is greater than zfs_immediate_write_sz and there are
	 *    no separate logs in this pool then later *if* we need to log the
	 *    write then dmu_sync() is used to immediately write the block and
	 *    its block pointer is put in the log record.
	 * WR_COPIED:
	 *    If we know we'll immediately be committing the
	 *    transaction (FDSYNC (O_DSYNC)), the we allocate a larger
	 *    log record here for the data and copy the data in.
	 * WR_NEED_COPY:
	 *    Otherwise we don't allocate a buffer, and *if* we need to
	 *    flush the write later then a buffer is allocated and
	 *    we retrieve the data using the dmu.
	 */
	slogging = spa_has_slogs(zilog->zl_spa);
	if (resid > zfs_immediate_write_sz && !slogging)
		write_state = WR_INDIRECT;
	else if (ioflag & FDSYNC)
		write_state = WR_COPIED;
	else
		write_state = WR_NEED_COPY;
#ifndef __APPLE__
	if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
	}
#endif
	while (resid) {
		itx_t *itx;
		lr_write_t *lr;
		ssize_t len;

		/*
		 * If there are slogs and the write would overflow the largest
		 * block, then because we don't want to use the main pool
		 * to dmu_sync, we have to split the write.
		 */
		if (slogging && resid > ZIL_MAX_LOG_DATA)
			len = SPA_MAXBLOCKSIZE >> 1;
		else
			len = resid;

		itx = zil_itx_create(txtype, sizeof (*lr) +
		    (write_state == WR_COPIED ? len : 0));
		lr = (lr_write_t *)&itx->itx_lr;
		if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
		    zp->z_id, off, len, lr + 1) != 0) {
			kmem_free(itx, offsetof(itx_t, itx_lr) +
			    itx->itx_lr.lrc_reclen);
			itx = zil_itx_create(txtype, sizeof (*lr));
			lr = (lr_write_t *)&itx->itx_lr;
			write_state = WR_NEED_COPY;
		}

		itx->itx_wr_state = write_state;
		lr->lr_foid = zp->z_id;
		lr->lr_offset = off;
		lr->lr_length = len;
		lr->lr_blkoff = 0;
		BP_ZERO(&lr->lr_blkptr);

		itx->itx_private = zp->z_zfsvfs;

		if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0))
			itx->itx_sync = B_TRUE;
		else
			itx->itx_sync = B_FALSE;

		zp->z_last_itx = zil_itx_assign(zilog, itx, tx);

		off += len;
		resid -= len;
	}
Пример #8
0
Файл: zil.c Проект: harshada/zfs
/*
 * Start a log block write and advance to the next log block.
 * Calls are serialized.
 */
static lwb_t *
zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
{
	lwb_t *nlwb;
	zil_trailer_t *ztp = (zil_trailer_t *)(lwb->lwb_buf + lwb->lwb_sz) - 1;
	spa_t *spa = zilog->zl_spa;
	blkptr_t *bp = &ztp->zit_next_blk;
	uint64_t txg;
	uint64_t zil_blksz;
	int error;

	ASSERT(lwb->lwb_nused <= ZIL_BLK_DATA_SZ(lwb));

	/*
	 * Allocate the next block and save its address in this block
	 * before writing it in order to establish the log chain.
	 * Note that if the allocation of nlwb synced before we wrote
	 * the block that points at it (lwb), we'd leak it if we crashed.
	 * Therefore, we don't do txg_rele_to_sync() until zil_lwb_write_done().
	 */
	txg = txg_hold_open(zilog->zl_dmu_pool, &lwb->lwb_txgh);
	txg_rele_to_quiesce(&lwb->lwb_txgh);

	/*
	 * Pick a ZIL blocksize. We request a size that is the
	 * maximum of the previous used size, the current used size and
	 * the amount waiting in the queue.
	 */
	zil_blksz = MAX(zilog->zl_prev_used,
	    zilog->zl_cur_used + sizeof (*ztp));
	zil_blksz = MAX(zil_blksz, zilog->zl_itx_list_sz + sizeof (*ztp));
	zil_blksz = P2ROUNDUP_TYPED(zil_blksz, ZIL_MIN_BLKSZ, uint64_t);
	if (zil_blksz > ZIL_MAX_BLKSZ)
		zil_blksz = ZIL_MAX_BLKSZ;

	BP_ZERO(bp);
	/* pass the old blkptr in order to spread log blocks across devs */
	error = zio_alloc_blk(spa, zil_blksz, bp, &lwb->lwb_blk, txg);
	if (error) {
		dmu_tx_t *tx = dmu_tx_create_assigned(zilog->zl_dmu_pool, txg);

		/*
		 * We dirty the dataset to ensure that zil_sync() will
		 * be called to remove this lwb from our zl_lwb_list.
		 * Failing to do so, may leave an lwb with a NULL lwb_buf
		 * hanging around on the zl_lwb_list.
		 */
		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
		dmu_tx_commit(tx);

		/*
		 * Since we've just experienced an allocation failure so we
		 * terminate the current lwb and send it on its way.
		 */
		ztp->zit_pad = 0;
		ztp->zit_nused = lwb->lwb_nused;
		ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum;
		zio_nowait(lwb->lwb_zio);

		/*
		 * By returning NULL the caller will call tx_wait_synced()
		 */
		return (NULL);
	}

	ASSERT3U(bp->blk_birth, ==, txg);
	ztp->zit_pad = 0;
	ztp->zit_nused = lwb->lwb_nused;
	ztp->zit_bt.zbt_cksum = lwb->lwb_blk.blk_cksum;
	bp->blk_cksum = lwb->lwb_blk.blk_cksum;
	bp->blk_cksum.zc_word[ZIL_ZC_SEQ]++;

	/*
	 * Allocate a new log write buffer (lwb).
	 */
	nlwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);

	nlwb->lwb_zilog = zilog;
	nlwb->lwb_blk = *bp;
	nlwb->lwb_nused = 0;
	nlwb->lwb_sz = BP_GET_LSIZE(&nlwb->lwb_blk);
	nlwb->lwb_buf = zio_buf_alloc(nlwb->lwb_sz);
	nlwb->lwb_max_txg = txg;
	nlwb->lwb_zio = NULL;

	/*
	 * Put new lwb at the end of the log chain
	 */
	mutex_enter(&zilog->zl_lock);
	list_insert_tail(&zilog->zl_lwb_list, nlwb);
	mutex_exit(&zilog->zl_lock);

	/* Record the block for later vdev flushing */
	zil_add_block(zilog, &lwb->lwb_blk);

	/*
	 * kick off the write for the old log block
	 */
	dprintf_bp(&lwb->lwb_blk, "lwb %p txg %llu: ", lwb, txg);
	ASSERT(lwb->lwb_zio);
	zio_nowait(lwb->lwb_zio);

	return (nlwb);
}
Пример #9
0
Файл: zil.c Проект: harshada/zfs
int
zil_claim(char *osname, void *txarg)
{
	dmu_tx_t *tx = txarg;
	uint64_t first_txg = dmu_tx_get_txg(tx);
	zilog_t *zilog;
	zil_header_t *zh;
	objset_t *os;
	int error;

	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
	if (error) {
		cmn_err(CE_WARN, "can't open objset for %s", osname);
		return (0);
	}

	zilog = dmu_objset_zil(os);
	zh = zil_header_in_syncing_context(zilog);

	if (zilog->zl_spa->spa_log_state == SPA_LOG_CLEAR) {
		if (!BP_IS_HOLE(&zh->zh_log))
			zio_free_blk(zilog->zl_spa, &zh->zh_log, first_txg);
		BP_ZERO(&zh->zh_log);
		dsl_dataset_dirty(dmu_objset_ds(os), tx);
	}

	/*
	 * Record here whether the zil has any records to replay.
	 * If the header block pointer is null or the block points
	 * to the stubby then we know there are no valid log records.
	 * We use the header to store this state as the the zilog gets
	 * freed later in dmu_objset_close().
	 * The flags (and the rest of the header fields) are cleared in
	 * zil_sync() as a result of a zil_destroy(), after replaying the log.
	 *
	 * Note, the intent log can be empty but still need the
	 * stubby to be claimed.
	 */
	if (!zil_empty(zilog)) {
		zh->zh_flags |= ZIL_REPLAY_NEEDED;
		dsl_dataset_dirty(dmu_objset_ds(os), tx);
	}

	/*
	 * Claim all log blocks if we haven't already done so, and remember
	 * the highest claimed sequence number.  This ensures that if we can
	 * read only part of the log now (e.g. due to a missing device),
	 * but we can read the entire log later, we will not try to replay
	 * or destroy beyond the last block we successfully claimed.
	 */
	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
		zh->zh_claim_txg = first_txg;
		zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block,
		    zil_claim_log_record, tx, first_txg);
		dsl_dataset_dirty(dmu_objset_ds(os), tx);
	}

	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
	dmu_objset_close(os);
	return (0);
}
Пример #10
0
Файл: zil.c Проект: harshada/zfs
/*
 * Create an on-disk intent log.
 */
static void
zil_create(zilog_t *zilog)
{
	const zil_header_t *zh = zilog->zl_header;
	lwb_t *lwb;
	uint64_t txg = 0;
	dmu_tx_t *tx = NULL;
	blkptr_t blk;
	int error = 0;

	/*
	 * Wait for any previous destroy to complete.
	 */
	txg_wait_synced(zilog->zl_dmu_pool, zilog->zl_destroy_txg);

	ASSERT(zh->zh_claim_txg == 0);
	ASSERT(zh->zh_replay_seq == 0);

	blk = zh->zh_log;

	/*
	 * If we don't already have an initial log block or we have one
	 * but it's the wrong endianness then allocate one.
	 */
	if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) {
		tx = dmu_tx_create(zilog->zl_os);
		(void) dmu_tx_assign(tx, TXG_WAIT);
		dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
		txg = dmu_tx_get_txg(tx);

		if (!BP_IS_HOLE(&blk)) {
			zio_free_blk(zilog->zl_spa, &blk, txg);
			BP_ZERO(&blk);
		}

		error = zio_alloc_blk(zilog->zl_spa, ZIL_MIN_BLKSZ, &blk,
		    NULL, txg);

		if (error == 0)
			zil_init_log_chain(zilog, &blk);
	}

	/*
	 * Allocate a log write buffer (lwb) for the first log block.
	 */
	if (error == 0) {
		lwb = kmem_cache_alloc(zil_lwb_cache, KM_SLEEP);
		lwb->lwb_zilog = zilog;
		lwb->lwb_blk = blk;
		lwb->lwb_nused = 0;
		lwb->lwb_sz = BP_GET_LSIZE(&lwb->lwb_blk);
		lwb->lwb_buf = zio_buf_alloc(lwb->lwb_sz);
		lwb->lwb_max_txg = txg;
		lwb->lwb_zio = NULL;

		mutex_enter(&zilog->zl_lock);
		list_insert_tail(&zilog->zl_lwb_list, lwb);
		mutex_exit(&zilog->zl_lock);
	}

	/*
	 * If we just allocated the first log block, commit our transaction
	 * and wait for zil_sync() to stuff the block poiner into zh_log.
	 * (zh is part of the MOS, so we cannot modify it in open context.)
	 */
	if (tx != NULL) {
		dmu_tx_commit(tx);
		txg_wait_synced(zilog->zl_dmu_pool, txg);
	}

	ASSERT(bcmp(&blk, &zh->zh_log, sizeof (blk)) == 0);
}
Пример #11
0
Файл: zil.c Проект: harshada/zfs
/*
 * Called in syncing context to free committed log blocks and update log header.
 */
void
zil_sync(zilog_t *zilog, dmu_tx_t *tx)
{
	zil_header_t *zh = zil_header_in_syncing_context(zilog);
	uint64_t txg = dmu_tx_get_txg(tx);
	spa_t *spa = zilog->zl_spa;
	lwb_t *lwb;

	/*
	 * We don't zero out zl_destroy_txg, so make sure we don't try
	 * to destroy it twice.
	 */
	if (spa_sync_pass(spa) != 1)
		return;

	mutex_enter(&zilog->zl_lock);

	ASSERT(zilog->zl_stop_sync == 0);

	zh->zh_replay_seq = zilog->zl_replayed_seq[txg & TXG_MASK];

	if (zilog->zl_destroy_txg == txg) {
		blkptr_t blk = zh->zh_log;

		ASSERT(list_head(&zilog->zl_lwb_list) == NULL);

		bzero(zh, sizeof (zil_header_t));
		bzero(zilog->zl_replayed_seq, sizeof (zilog->zl_replayed_seq));

		if (zilog->zl_keep_first) {
			/*
			 * If this block was part of log chain that couldn't
			 * be claimed because a device was missing during
			 * zil_claim(), but that device later returns,
			 * then this block could erroneously appear valid.
			 * To guard against this, assign a new GUID to the new
			 * log chain so it doesn't matter what blk points to.
			 */
			zil_init_log_chain(zilog, &blk);
			zh->zh_log = blk;
		}
	}

	while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
		zh->zh_log = lwb->lwb_blk;
		if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
			break;
		list_remove(&zilog->zl_lwb_list, lwb);
		zio_free_blk(spa, &lwb->lwb_blk, txg);
		kmem_cache_free(zil_lwb_cache, lwb);

		/*
		 * If we don't have anything left in the lwb list then
		 * we've had an allocation failure and we need to zero
		 * out the zil_header blkptr so that we don't end
		 * up freeing the same block twice.
		 */
		if (list_head(&zilog->zl_lwb_list) == NULL)
			BP_ZERO(&zh->zh_log);
	}
	mutex_exit(&zilog->zl_lock);
}
Пример #12
0
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
              znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
    itx_wr_state_t write_state;
    boolean_t slogging;
    uintptr_t fsync_cnt;

    if (zilog == NULL || zp->z_unlinked)
        return;

    ZFS_HANDLE_REPLAY(zilog, tx); /* exits if replay */

    slogging = spa_has_slogs(zilog->zl_spa);
    if (resid > zfs_immediate_write_sz && !slogging && resid <= zp->z_blksz)
        write_state = WR_INDIRECT;
    else if (ioflag & (FSYNC | FDSYNC))
        write_state = WR_COPIED;
    else
        write_state = WR_NEED_COPY;

    if ((fsync_cnt = (uintptr_t)tsd_get(zfs_fsyncer_key)) != 0) {
        (void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
    }

    while (resid) {
        itx_t *itx;
        lr_write_t *lr;
        ssize_t len;

        /*
         * If the write would overflow the largest block then split it.
         */
        if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
            len = SPA_MAXBLOCKSIZE >> 1;
        else
            len = resid;

        itx = zil_itx_create(txtype, sizeof (*lr) +
                             (write_state == WR_COPIED ? len : 0));
        lr = (lr_write_t *)&itx->itx_lr;
        if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
                zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
            kmem_free(itx, offsetof(itx_t, itx_lr) +
                      itx->itx_lr.lrc_reclen);
            itx = zil_itx_create(txtype, sizeof (*lr));
            lr = (lr_write_t *)&itx->itx_lr;
            write_state = WR_NEED_COPY;
        }

        itx->itx_wr_state = write_state;
        if (write_state == WR_NEED_COPY)
            itx->itx_sod += len;
        lr->lr_foid = zp->z_id;
        lr->lr_offset = off;
        lr->lr_length = len;
        lr->lr_blkoff = 0;
        BP_ZERO(&lr->lr_blkptr);

        itx->itx_private = zp->z_zfsvfs;

        if ((zp->z_sync_cnt != 0) || (fsync_cnt != 0) ||
                (ioflag & (FSYNC | FDSYNC)))
            itx->itx_sync = B_TRUE;
        else
            itx->itx_sync = B_FALSE;

        zp->z_last_itx = zil_itx_assign(zilog, itx, tx);

        off += len;
        resid -= len;
    }