示例#1
0
文件: trim_map.c 项目: coyizumi/cs111
static void
trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
{
	trim_map_t *tm = vd->vdev_trimmap;
	trim_seg_t *ts;
	uint64_t size, offset, txgtarget, txgsafe;
	int64_t hard, soft;
	hrtime_t timelimit;

	ASSERT(vd->vdev_ops->vdev_op_leaf);

	if (tm == NULL)
		return;

	timelimit = gethrtime() - (hrtime_t)trim_timeout * NANOSEC;
	if (vd->vdev_isl2cache) {
		txgsafe = UINT64_MAX;
		txgtarget = UINT64_MAX;
	} else {
		txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
		if (txgsafe > trim_txg_delay)
			txgtarget = txgsafe - trim_txg_delay;
		else
			txgtarget = 0;
	}

	mutex_enter(&tm->tm_lock);
	hard = 0;
	if (tm->tm_pending > trim_vdev_max_pending)
		hard = (tm->tm_pending - trim_vdev_max_pending) / 4;
	soft = P2ROUNDUP(hard + tm->tm_pending / trim_timeout + 1, 64);
	/* Loop until we have sent all outstanding free's */
	while (soft > 0 &&
	    (ts = trim_map_first(tm, txgtarget, txgsafe, timelimit, hard > 0))
	    != NULL) {
		TRIM_MAP_REM(tm, ts);
		avl_remove(&tm->tm_queued_frees, ts);
		avl_add(&tm->tm_inflight_frees, ts);
		size = ts->ts_end - ts->ts_start;
		offset = ts->ts_start;
		/*
		 * We drop the lock while we call zio_nowait as the IO
		 * scheduler can result in a different IO being run e.g.
		 * a write which would result in a recursive lock.
		 */
		mutex_exit(&tm->tm_lock);

		zio_nowait(zio_trim(zio, spa, vd, offset, size));

		soft -= TRIM_MAP_SEGS(size);
		hard -= TRIM_MAP_SEGS(size);
		mutex_enter(&tm->tm_lock);
	}
	mutex_exit(&tm->tm_lock);
}
示例#2
0
int
zil_claim(char *osname, void *txarg)
{
	dmu_tx_t *tx = txarg;
	uint64_t first_txg = dmu_tx_get_txg(tx);
	zilog_t *zilog;
	zil_header_t *zh;
	objset_t *os;
	int error;

	error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
	if (error) {
		cmn_err(CE_WARN, "can't open objset for %s", osname);
		return (0);
	}

	zilog = dmu_objset_zil(os);
	zh = zil_header_in_syncing_context(zilog);

	/*
	 * Record here whether the zil has any records to replay.
	 * If the header block pointer is null or the block points
	 * to the stubby then we know there are no valid log records.
	 * We use the header to store this state as the the zilog gets
	 * freed later in dmu_objset_close().
	 * The flags (and the rest of the header fields) are cleared in
	 * zil_sync() as a result of a zil_destroy(), after replaying the log.
	 *
	 * Note, the intent log can be empty but still need the
	 * stubby to be claimed.
	 */
	if (!zil_empty(zilog))
		zh->zh_flags |= ZIL_REPLAY_NEEDED;

	/*
	 * Claim all log blocks if we haven't already done so, and remember
	 * the highest claimed sequence number.  This ensures that if we can
	 * read only part of the log now (e.g. due to a missing device),
	 * but we can read the entire log later, we will not try to replay
	 * or destroy beyond the last block we successfully claimed.
	 */
	ASSERT3U(zh->zh_claim_txg, <=, first_txg);
	if (zh->zh_claim_txg == 0 && !BP_IS_HOLE(&zh->zh_log)) {
		zh->zh_claim_txg = first_txg;
		zh->zh_claim_seq = zil_parse(zilog, zil_claim_log_block,
		    zil_claim_log_record, tx, first_txg);
		dsl_dataset_dirty(dmu_objset_ds(os), tx);
	}

	ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
	dmu_objset_close(os);
	return (0);
}
示例#3
0
文件: zil.c 项目: harshada/zfs
/*
 * If there are any in-memory intent log transactions which have now been
 * synced then start up a taskq to free them.
 */
void
zil_clean(zilog_t *zilog)
{
	itx_t *itx;

	mutex_enter(&zilog->zl_lock);
	itx = list_head(&zilog->zl_itx_list);
	if ((itx != NULL) &&
	    (itx->itx_lr.lrc_txg <= spa_last_synced_txg(zilog->zl_spa))) {
		(void) taskq_dispatch(zilog->zl_clean_taskq,
		    (task_func_t *)zil_itx_clean, zilog, TQ_SLEEP);
	}
	mutex_exit(&zilog->zl_lock);
}
示例#4
0
/*
 * Update all disk labels, generate a fresh config based on the current
 * in-core state, and sync the global config cache (do not sync the config
 * cache if this is a booting rootpool).
 */
void
spa_config_update(spa_t *spa, int what)
{
	vdev_t *rvd = spa->spa_root_vdev;
	uint64_t txg;
	int c;

	ASSERT(MUTEX_HELD(&spa_namespace_lock));

	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
	txg = spa_last_synced_txg(spa) + 1;
	if (what == SPA_CONFIG_UPDATE_POOL) {
		vdev_config_dirty(rvd);
	} else {
		/*
		 * If we have top-level vdevs that were added but have
		 * not yet been prepared for allocation, do that now.
		 * (It's safe now because the config cache is up to date,
		 * so it will be able to translate the new DVAs.)
		 * See comments in spa_vdev_add() for full details.
		 */
		for (c = 0; c < rvd->vdev_children; c++) {
			vdev_t *tvd = rvd->vdev_child[c];
			if (tvd->vdev_ms_array == 0)
				vdev_metaslab_set_size(tvd);
			vdev_expand(tvd, txg);
		}
	}
	spa_config_exit(spa, SCL_ALL, FTAG);

	/*
	 * Wait for the mosconfig to be regenerated and synced.
	 */
	txg_wait_synced(spa->spa_dsl_pool, txg);

	/*
	 * Update the global config cache to reflect the new mosconfig.
	 */
	if (!spa->spa_is_root) {
		spa_write_cachefile(spa, B_FALSE,
		    what != SPA_CONFIG_UPDATE_POOL);
	}

	if (what == SPA_CONFIG_UPDATE_POOL)
		spa_config_update(spa, SPA_CONFIG_UPDATE_VDEVS);
}
示例#5
0
static void
trim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
{
	trim_map_t *tm = vd->vdev_trimmap;
	trim_seg_t *ts;
	uint64_t size, txgtarget, txgsafe;
	hrtime_t timelimit;

	ASSERT(vd->vdev_ops->vdev_op_leaf);

	if (tm == NULL)
		return;

	timelimit = gethrtime() - trim_timeout * NANOSEC;
	if (vd->vdev_isl2cache) {
		txgsafe = UINT64_MAX;
		txgtarget = UINT64_MAX;
	} else {
		txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa));
		if (txgsafe > trim_txg_delay)
			txgtarget = txgsafe - trim_txg_delay;
		else
			txgtarget = 0;
	}

	mutex_enter(&tm->tm_lock);
	/* Loop until we have sent all outstanding free's */
	while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit))
	    != NULL) {
		list_remove(&tm->tm_head, ts);
		avl_remove(&tm->tm_queued_frees, ts);
		avl_add(&tm->tm_inflight_frees, ts);
		size = ts->ts_end - ts->ts_start;
		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size));
		TRIM_MAP_SDEC(tm, size);
		TRIM_MAP_QDEC(tm);
	}
	mutex_exit(&tm->tm_lock);
}
示例#6
0
文件: zil.c 项目: harshada/zfs
/*
 * Free up all in-memory intent log transactions that have now been synced.
 */
static void
zil_itx_clean(zilog_t *zilog)
{
	uint64_t synced_txg = spa_last_synced_txg(zilog->zl_spa);
	uint64_t freeze_txg = spa_freeze_txg(zilog->zl_spa);
	list_t clean_list;
	itx_t *itx;

	list_create(&clean_list, sizeof (itx_t), offsetof(itx_t, itx_node));

	mutex_enter(&zilog->zl_lock);
	/* wait for a log writer to finish walking list */
	while (zilog->zl_writer) {
		cv_wait(&zilog->zl_cv_writer, &zilog->zl_lock);
	}

	/*
	 * Move the sync'd log transactions to a separate list so we can call
	 * kmem_free without holding the zl_lock.
	 *
	 * There is no need to set zl_writer as we don't drop zl_lock here
	 */
	while ((itx = list_head(&zilog->zl_itx_list)) != NULL &&
	    itx->itx_lr.lrc_txg <= MIN(synced_txg, freeze_txg)) {
		list_remove(&zilog->zl_itx_list, itx);
		zilog->zl_itx_list_sz -= itx->itx_sod;
		list_insert_tail(&clean_list, itx);
	}
	cv_broadcast(&zilog->zl_cv_writer);
	mutex_exit(&zilog->zl_lock);

	/* destroy sync'd log transactions */
	while ((itx = list_head(&clean_list)) != NULL) {
		list_remove(&clean_list, itx);
		kmem_free(itx, offsetof(itx_t, itx_lr)
		    + itx->itx_lr.lrc_reclen);
	}
	list_destroy(&clean_list);
}
示例#7
0
文件: zil.c 项目: harshada/zfs
static void
zil_commit_writer(zilog_t *zilog, uint64_t seq, uint64_t foid)
{
	uint64_t txg;
	uint64_t commit_seq = 0;
	itx_t *itx, *itx_next = (itx_t *)-1;
	lwb_t *lwb;
	spa_t *spa;

	zilog->zl_writer = B_TRUE;
	ASSERT(zilog->zl_root_zio == NULL);
	spa = zilog->zl_spa;

	if (zilog->zl_suspend) {
		lwb = NULL;
	} else {
		lwb = list_tail(&zilog->zl_lwb_list);
		if (lwb == NULL) {
			/*
			 * Return if there's nothing to flush before we
			 * dirty the fs by calling zil_create()
			 */
			if (list_is_empty(&zilog->zl_itx_list)) {
				zilog->zl_writer = B_FALSE;
				return;
			}
			mutex_exit(&zilog->zl_lock);
			zil_create(zilog);
			mutex_enter(&zilog->zl_lock);
			lwb = list_tail(&zilog->zl_lwb_list);
		}
	}

	/* Loop through in-memory log transactions filling log blocks. */
	DTRACE_PROBE1(zil__cw1, zilog_t *, zilog);
	for (;;) {
		/*
		 * Find the next itx to push:
		 * Push all transactions related to specified foid and all
		 * other transactions except TX_WRITE, TX_TRUNCATE,
		 * TX_SETATTR and TX_ACL for all other files.
		 */
		if (itx_next != (itx_t *)-1)
			itx = itx_next;
		else
			itx = list_head(&zilog->zl_itx_list);
		for (; itx != NULL; itx = list_next(&zilog->zl_itx_list, itx)) {
			if (foid == 0) /* push all foids? */
				break;
			if (itx->itx_sync) /* push all O_[D]SYNC */
				break;
			switch (itx->itx_lr.lrc_txtype) {
			case TX_SETATTR:
			case TX_WRITE:
			case TX_TRUNCATE:
			case TX_ACL:
				/* lr_foid is same offset for these records */
				if (((lr_write_t *)&itx->itx_lr)->lr_foid
				    != foid) {
					continue; /* skip this record */
				}
			}
			break;
		}
		if (itx == NULL)
			break;

		if ((itx->itx_lr.lrc_seq > seq) &&
		    ((lwb == NULL) || (lwb->lwb_nused == 0) ||
		    (lwb->lwb_nused + itx->itx_sod > ZIL_BLK_DATA_SZ(lwb)))) {
			break;
		}

		/*
		 * Save the next pointer.  Even though we soon drop
		 * zl_lock all threads that may change the list
		 * (another writer or zil_itx_clean) can't do so until
		 * they have zl_writer.
		 */
		itx_next = list_next(&zilog->zl_itx_list, itx);
		list_remove(&zilog->zl_itx_list, itx);
		zilog->zl_itx_list_sz -= itx->itx_sod;
		mutex_exit(&zilog->zl_lock);
		txg = itx->itx_lr.lrc_txg;
		ASSERT(txg);

		if (txg > spa_last_synced_txg(spa) ||
		    txg > spa_freeze_txg(spa))
			lwb = zil_lwb_commit(zilog, itx, lwb);
		kmem_free(itx, offsetof(itx_t, itx_lr)
		    + itx->itx_lr.lrc_reclen);
		mutex_enter(&zilog->zl_lock);
	}
	DTRACE_PROBE1(zil__cw2, zilog_t *, zilog);
	/* determine commit sequence number */
	itx = list_head(&zilog->zl_itx_list);
	if (itx)
		commit_seq = itx->itx_lr.lrc_seq;
	else
		commit_seq = zilog->zl_itx_seq;
	mutex_exit(&zilog->zl_lock);

	/* write the last block out */
	if (lwb != NULL && lwb->lwb_zio != NULL)
		lwb = zil_lwb_write_start(zilog, lwb);

	zilog->zl_prev_used = zilog->zl_cur_used;
	zilog->zl_cur_used = 0;

	/*
	 * Wait if necessary for the log blocks to be on stable storage.
	 */
	if (zilog->zl_root_zio) {
		DTRACE_PROBE1(zil__cw3, zilog_t *, zilog);
		(void) zio_wait(zilog->zl_root_zio);
		zilog->zl_root_zio = NULL;
		DTRACE_PROBE1(zil__cw4, zilog_t *, zilog);
		zil_flush_vdevs(zilog);
	}

	if (zilog->zl_log_error || lwb == NULL) {
		zilog->zl_log_error = 0;
		txg_wait_synced(zilog->zl_dmu_pool, 0);
	}

	mutex_enter(&zilog->zl_lock);
	zilog->zl_writer = B_FALSE;

	ASSERT3U(commit_seq, >=, zilog->zl_commit_seq);
	zilog->zl_commit_seq = commit_seq;
}