Exemple #1
0
/**********************************************************************
Validates the flush list. */
static
ibool
buf_flush_validate_low(void)
/*========================*/
		/* out: TRUE if ok */
{
	buf_block_t*	block;
	dulint		om;

	UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list);

	block = UT_LIST_GET_FIRST(buf_pool->flush_list);

	while (block != NULL) {
		om = block->oldest_modification;
		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
		ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0);

		block = UT_LIST_GET_NEXT(flush_list, block);

		if (block) {
			ut_a(ut_dulint_cmp(om, block->oldest_modification)
			     >= 0);
		}
	}

	return(TRUE);
}
Exemple #2
0
/***************************************************************
Checks if also the previous version of the clustered index record was
modified or inserted by the same transaction, and its undo number is such
that it should be undone in the same rollback. */
UNIV_INLINE
ibool
row_undo_mod_undo_also_prev_vers(
/*=============================*/
				/* out: TRUE if also previous modify or
				insert of this row should be undone */
 	undo_node_t*	node,	/* in: row undo node */
	que_thr_t*	thr,	/* in: query thread */
	dulint*		undo_no)/* out: the undo number */
{
	trx_undo_rec_t*	undo_rec;
	ibool		ret;
	trx_t*		trx;

	UT_NOT_USED(thr);

	trx = node->trx;
	
	if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {

		return(FALSE);
	}

	undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);

	*undo_no = trx_undo_rec_get_undo_no(undo_rec);

	if (ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0) {
		ret = TRUE;
	} else {
		ret = FALSE;
	}
	
	return(ret);
}
/***************************************************************
Checks if also the previous version of the clustered index record was
modified or inserted by the same transaction, and its undo number is such
that it should be undone in the same rollback. */
UNIV_INLINE
ibool
row_undo_mod_undo_also_prev_vers(
/*=============================*/
				/* out: TRUE if also previous modify or
				insert of this row should be undone */
	undo_node_t*	node,	/* in: row undo node */
	dulint*		undo_no)/* out: the undo number */
{
	trx_undo_rec_t*	undo_rec;
	trx_t*		trx;

	trx = node->trx;

	if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {

		*undo_no = ut_dulint_zero;
		return(FALSE);
	}

	undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);

	*undo_no = trx_undo_rec_get_undo_no(undo_rec);

	return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
}
Exemple #4
0
/****************************************************************//**
Inserts the trx handle in the trx system trx list in the right position.
The list is sorted on the trx id so that the biggest id is at the list
start. This function is used at the database startup to insert incomplete
transactions to the list. */
static
void
trx_list_insert_ordered(
/*====================*/
	trx_t*	trx)	/*!< in: trx handle */
{
	trx_t*	trx2;

	ut_ad(mutex_own(&kernel_mutex));

	trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);

	while (trx2 != NULL) {
		if (ut_dulint_cmp(trx->id, trx2->id) >= 0) {

			ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1);
			break;
		}
		trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
	}

	if (trx2 != NULL) {
		trx2 = UT_LIST_GET_PREV(trx_list, trx2);

		if (trx2 == NULL) {
			UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
		} else {
			UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
					     trx2, trx);
		}
	} else {
		UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
	}
}
Exemple #5
0
ibool
buf_flush_ready_for_replace(
/*========================*/
				/* out: TRUE if can replace immediately */
	buf_block_t*	block)	/* in: buffer control block, must be in state
				BUF_BLOCK_FILE_PAGE and in the LRU list */
{
	ut_ad(mutex_own(&(buf_pool->mutex)));
	ut_ad(mutex_own(&block->mutex));
	if (block->state != BUF_BLOCK_FILE_PAGE) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"  InnoDB: Error: buffer block state %lu"
			" in the LRU list!\n",
			(ulong)block->state);
		ut_print_buf(stderr, block, sizeof(buf_block_t));

		return(FALSE);
	}

	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
	    || (block->buf_fix_count != 0)
	    || (block->io_fix != 0)) {

		return(FALSE);
	}

	return(TRUE);
}
Exemple #6
0
/**********************************************************************
Takes a block out of the LRU list and page hash table and sets the block
state to BUF_BLOCK_REMOVE_HASH. */
static
void
buf_LRU_block_remove_hashed_page(
/*=============================*/
	buf_block_t*	block)	/* in: block, must contain a file page and
				be in a state where it can be freed; there
				may or may not be a hash index to the page */
{
	ut_ad(mutex_own(&(buf_pool->mutex)));
	ut_ad(mutex_own(&block->mutex));
	ut_ad(block);

	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
	ut_a(block->io_fix == 0);
	ut_a(block->buf_fix_count == 0);
	ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);

	buf_LRU_remove_block(block);

	buf_pool->freed_page_clock += 1;

	/* Note that if AWE is enabled the block may not have a frame at all */

	buf_block_modify_clock_inc(block);

	if (block != buf_page_hash_get(block->space, block->offset)) {
		fprintf(stderr,
			"InnoDB: Error: page %lu %lu not found"
			" in the hash table\n",
			(ulong) block->space,
			(ulong) block->offset);
		if (buf_page_hash_get(block->space, block->offset)) {
			fprintf(stderr,
				"InnoDB: In hash table we find block"
				" %p of %lu %lu which is not %p\n",
				(void*) buf_page_hash_get
				(block->space, block->offset),
				(ulong) buf_page_hash_get
				(block->space, block->offset)->space,
				(ulong) buf_page_hash_get
				(block->space, block->offset)->offset,
				(void*) block);
		}

#ifdef UNIV_DEBUG
		buf_print();
		buf_LRU_print();
		buf_validate();
		buf_LRU_validate();
#endif
		ut_a(0);
	}

	HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
		    buf_page_address_fold(block->space, block->offset),
		    block);

	UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
	block->state = BUF_BLOCK_REMOVE_HASH;
}
Exemple #7
0
/************************************************************************
Returns TRUE if the block is modified and ready for flushing. */
UNIV_INLINE
ibool
buf_flush_ready_for_flush(
/*======================*/
				/* out: TRUE if can flush immediately */
	buf_block_t*	block,	/* in: buffer control block, must be in state
				BUF_BLOCK_FILE_PAGE */
	ulint		flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
{
	ut_ad(mutex_own(&(buf_pool->mutex)));
	ut_ad(mutex_own(&(block->mutex)));
	ut_a(block->state == BUF_BLOCK_FILE_PAGE);

	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
	    && (block->io_fix == 0)) {
		if (flush_type != BUF_FLUSH_LRU) {

			return(TRUE);

		} else if (block->buf_fix_count == 0) {

			/* If we are flushing the LRU list, to avoid deadlocks
			we require the block not to be bufferfixed, and hence
			not latched. */

			return(TRUE);
		}
	}

	return(FALSE);
}
Exemple #8
0
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
	buf_block_t*	block)	/* in: block which is modified */
{
	buf_block_t*	prev_b;
	buf_block_t*	b;

	ut_ad(mutex_own(&(buf_pool->mutex)));

	prev_b = NULL;
	b = UT_LIST_GET_FIRST(buf_pool->flush_list);

	while (b && (ut_dulint_cmp(b->oldest_modification,
				   block->oldest_modification) > 0)) {
		prev_b = b;
		b = UT_LIST_GET_NEXT(flush_list, b);
	}

	if (prev_b == NULL) {
		UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);
	} else {
		UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b,
				     block);
	}

	ut_ad(buf_flush_validate_low());
}
/*******************************************************************//**
Gets the biggest pair of a trx number and an undo number in a purge array. */
static
void
trx_purge_arr_get_biggest(
/*======================*/
	trx_undo_arr_t*	arr,	/*!< in: purge array */
	trx_id_t*	trx_no,	/*!< out: transaction number: ut_dulint_zero
				if array is empty */
	undo_no_t*	undo_no)/*!< out: undo number */
{
	trx_undo_inf_t*	cell;
	trx_id_t	pair_trx_no;
	undo_no_t	pair_undo_no;
	int		trx_cmp;
	ulint		n_used;
	ulint		i;
	ulint		n;

	n = 0;
	n_used = arr->n_used;
	pair_trx_no = ut_dulint_zero;
	pair_undo_no = ut_dulint_zero;

	for (i = 0;; i++) {
		cell = trx_undo_arr_get_nth_info(arr, i);

		if (cell->in_use) {
			n++;
			trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no);

			if ((trx_cmp > 0)
			    || ((trx_cmp == 0)
				&& (ut_dulint_cmp(cell->undo_no,
						  pair_undo_no) >= 0))) {

				pair_trx_no = cell->trx_no;
				pair_undo_no = cell->undo_no;
			}
		}

		if (n == n_used) {
			*trx_no = pair_trx_no;
			*undo_no = pair_undo_no;

			return;
		}
	}
}
Exemple #10
0
ibool
row_undo_search_clust_to_pcur(
/*==========================*/
				/* out: TRUE if found; NOTE the node->pcur
				must be closed by the caller, regardless of
				the return value */
	undo_node_t*	node)	/* in: row undo node */
{
	dict_index_t*	clust_index;
	ibool		found;
	mtr_t		mtr;
	ibool		ret;
	rec_t*		rec;
	mem_heap_t*	heap		= NULL;
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
	ulint*		offsets		= offsets_;
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;

	mtr_start(&mtr);

	clust_index = dict_table_get_first_index(node->table);

	found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
				      node->table, node->ref, &mtr);

	rec = btr_pcur_get_rec(&(node->pcur));

	offsets = rec_get_offsets(rec, clust_index, offsets,
				  ULINT_UNDEFINED, &heap);

	if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
					 row_get_rec_roll_ptr(rec, clust_index,
							      offsets))) {

		/* We must remove the reservation on the undo log record
		BEFORE releasing the latch on the clustered index page: this
		is to make sure that some thread will eventually undo the
		modification corresponding to node->roll_ptr. */

		/* fputs("--------------------undoing a previous version\n",
		stderr); */

		ret = FALSE;
	} else {
		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
				      offsets, node->heap);
		btr_pcur_store_position(&(node->pcur), &mtr);

		ret = TRUE;
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	if (UNIV_LIKELY_NULL(heap)) {
		mem_heap_free(heap);
	}
	return(ret);
}
Exemple #11
0
void
buf_LRU_print(void)
/*===============*/
{
	buf_block_t*	block;
	buf_frame_t*	frame;
	ulint		len;

	ut_ad(buf_pool);
	mutex_enter(&(buf_pool->mutex));

	fprintf(stderr, "Pool ulint clock %lu\n",
		(ulong) buf_pool->ulint_clock);

	block = UT_LIST_GET_FIRST(buf_pool->LRU);

	len = 0;

	while (block != NULL) {

		fprintf(stderr, "BLOCK %lu ", (ulong) block->offset);

		if (block->old) {
			fputs("old ", stderr);
		}

		if (block->buf_fix_count) {
			fprintf(stderr, "buffix count %lu ",
				(ulong) block->buf_fix_count);
		}

		if (block->io_fix) {
			fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix);
		}

		if (ut_dulint_cmp(block->oldest_modification,
				  ut_dulint_zero) > 0) {
			fputs("modif. ", stderr);
		}

		frame = buf_block_get_frame(block);

		fprintf(stderr, "LRU pos %lu type %lu index id %lu ",
			(ulong) block->LRU_position,
			(ulong) fil_page_get_type(frame),
			(ulong) ut_dulint_get_low
			(btr_page_get_index_id(frame)));

		block = UT_LIST_GET_NEXT(LRU, block);
		if (++len == 10) {
			len = 0;
			putc('\n', stderr);
		}
	}

	mutex_exit(&(buf_pool->mutex));
}
/********************************************************************//**
Removes unnecessary history data from rollback segments. NOTE that when this
function is called, the caller must not have any latches on undo log pages! */
static
void
trx_purge_truncate_history(void)
/*============================*/
{
	trx_rseg_t*	rseg;
	trx_id_t	limit_trx_no;
	undo_no_t	limit_undo_no;

	ut_ad(mutex_own(&(purge_sys->mutex)));

	trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
				  &limit_undo_no);

	if (ut_dulint_is_zero(limit_trx_no)) {

		limit_trx_no = purge_sys->purge_trx_no;
		limit_undo_no = purge_sys->purge_undo_no;
	}

	/* We play safe and set the truncate limit at most to the purge view
	low_limit number, though this is not necessary */

	if (ut_dulint_cmp(limit_trx_no, purge_sys->view->low_limit_no) >= 0) {
		limit_trx_no = purge_sys->view->low_limit_no;
		limit_undo_no = ut_dulint_zero;
	}

	ut_ad((ut_dulint_cmp(limit_trx_no,
			     purge_sys->view->low_limit_no) <= 0));

	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);

	while (rseg) {
		trx_purge_truncate_rseg_history(rseg, limit_trx_no,
						limit_undo_no);
		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
	}
}
Exemple #13
0
ibool
row_undo_search_clust_to_pcur(
/*==========================*/
				/* out: TRUE if found; NOTE the node->pcur
				must be closed by the caller, regardless of
				the return value */
	undo_node_t*	node,	/* in: row undo node */
	que_thr_t*	thr)	/* in: query thread */
{
	dict_index_t*	clust_index;
	ibool		found;
	mtr_t		mtr;
	ibool		ret;
	rec_t*		rec;

	UT_NOT_USED(thr);

	mtr_start(&mtr);

	clust_index = dict_table_get_first_index(node->table);
	
	found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
					node->table, node->ref, &mtr);

	rec = btr_pcur_get_rec(&(node->pcur));

	if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
		   		row_get_rec_roll_ptr(rec, clust_index))) {

		/* We must remove the reservation on the undo log record
		BEFORE releasing the latch on the clustered index page: this
		is to make sure that some thread will eventually undo the
		modification corresponding to node->roll_ptr. */
		
		/* printf("--------------------undoing a previous version\n");
		*/
		   
		ret = FALSE;
	} else {
		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
								node->heap);
		btr_pcur_store_position(&(node->pcur), &mtr);

		ret = TRUE;
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	return(ret);
}
Exemple #14
0
/***********************************************************//**
Checks if also the previous version of the clustered index record was
modified or inserted by the same transaction, and its undo number is such
that it should be undone in the same rollback.
@return	TRUE if also previous modify or insert of this row should be undone */
static
ibool
row_undo_mod_undo_also_prev_vers(
    /*=============================*/
    undo_node_t*	node,	/*!< in: row undo node */
    undo_no_t*	undo_no)/*!< out: the undo number */
{
    trx_undo_rec_t*	undo_rec;
    trx_t*		trx;

    trx = node->trx;

    if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) {

        *undo_no = ut_dulint_zero;
        return(FALSE);
    }

    undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);

    *undo_no = trx_undo_rec_get_undo_no(undo_rec);

    return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0);
}
Exemple #15
0
int
trx_weight_cmp(
/*===========*/
			/* out: <0, 0 or >0; similar to strcmp(3) */
	trx_t*	a,	/* in: the first transaction to be compared */
	trx_t*	b)	/* in: the second transaction to be compared */
{
	ibool	a_notrans_edit;
	ibool	b_notrans_edit;

	/* If mysql_thd is NULL for a transaction we assume that it has
	not edited non-transactional tables. */

	a_notrans_edit = a->mysql_thd != NULL
	    && thd_has_edited_nontrans_tables(a->mysql_thd);

	b_notrans_edit = b->mysql_thd != NULL
	    && thd_has_edited_nontrans_tables(b->mysql_thd);

	if (a_notrans_edit && !b_notrans_edit) {

		return(1);
	}

	if (!a_notrans_edit && b_notrans_edit) {

		return(-1);
	}

	/* Either both had edited non-transactional tables or both had
	not, we fall back to comparing the number of altered/locked
	rows. */

#if 0
	fprintf(stderr,
		"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
		__func__,
		ut_conv_dulint_to_longlong(a->undo_no),
		UT_LIST_GET_LEN(a->trx_locks),
		ut_conv_dulint_to_longlong(b->undo_no),
		UT_LIST_GET_LEN(b->trx_locks));
#endif

#define TRX_WEIGHT(t)	\
	ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))

	return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
}
Exemple #16
0
void
buf_flush_insert_into_flush_list(
/*=============================*/
	buf_block_t*	block)	/* in: block which is modified */
{
	ut_ad(mutex_own(&(buf_pool->mutex)));
	ut_a(block->state == BUF_BLOCK_FILE_PAGE);

	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
	      || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list))
				->oldest_modification,
				block->oldest_modification) <= 0));

	UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block);

	ut_ad(buf_flush_validate_low());
}
Exemple #17
0
/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
function!
@return NULL if committed, else the active transaction */
UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
	const rec_t*	rec,	/*!< in: record in a secondary index */
	dict_index_t*	index,	/*!< in: the secondary index */
	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
{
	dict_index_t*	clust_index;
	rec_t*		clust_rec;
	ulint*		clust_offsets;
	rec_t*		version;
	trx_id_t	trx_id;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	dtuple_t*	row;
	dtuple_t*	entry	= NULL; /* assignment to eliminate compiler
					warning */
	trx_t*		trx;
	ulint		rec_del;
#ifdef UNIV_DEBUG
	ulint		err;
#endif /* UNIV_DEBUG */
	mtr_t		mtr;
	ulint		comp;

	ut_ad(mutex_own(&kernel_mutex));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	mutex_exit(&kernel_mutex);

	mtr_start(&mtr);

	/* Search for the clustered index record: this is a time-consuming
	operation: therefore we release the kernel mutex; also, the release
	is required by the latching order convention. The latch on the
	clustered index locks the top of the stack of versions. We also
	reserve purge_latch to lock the bottom of the version stack. */

	clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
				      &clust_index, &mtr);
	if (!clust_rec) {
		/* In a rare case it is possible that no clust rec is found
		for a secondary index record: if in row0umod.c
		row_undo_mod_remove_clust_low() we have already removed the
		clust rec, while purge is still cleaning and removing
		secondary index records associated with earlier versions of
		the clustered index record. In that case there cannot be
		any implicit lock on the secondary index record, because
		an active transaction which has modified the secondary index
		record has also modified the clustered index record. And in
		a rollback we always undo the modifications to secondary index
		records before the clustered index record. */

		mutex_enter(&kernel_mutex);
		mtr_commit(&mtr);

		return(NULL);
	}

	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);
	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);

	mtr_s_lock(&(purge_sys->latch), &mtr);

	mutex_enter(&kernel_mutex);

	trx = NULL;
	if (!trx_is_active(trx_id)) {
		/* The transaction that modified or inserted clust_rec is no
		longer active: no implicit lock on rec */
		goto exit_func;
	}

	if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
				      clust_offsets, TRUE)) {
		/* Corruption noticed: try to avoid a crash by returning */
		goto exit_func;
	}

	comp = page_rec_is_comp(rec);
	ut_ad(index->table == clust_index->table);
	ut_ad(!!comp == dict_table_is_comp(index->table));
	ut_ad(!comp == !page_rec_is_comp(clust_rec));

	/* We look up if some earlier version, which was modified by the trx_id
	transaction, of the clustered index record would require rec to be in
	a different state (delete marked or unmarked, or have different field
	values, or not existing). If there is such a version, then rec was
	modified by the trx_id transaction, and it has an implicit x-lock on
	rec. Note that if clust_rec itself would require rec to be in a
	different state, then the trx_id transaction has not yet had time to
	modify rec, and does not necessarily have an implicit x-lock on rec. */

	rec_del = rec_get_deleted_flag(rec, comp);
	trx = NULL;

	version = clust_rec;

	for (;;) {
		rec_t*		prev_version;
		ulint		vers_del;
		row_ext_t*	ext;
		trx_id_t	prev_trx_id;

		mutex_exit(&kernel_mutex);

		/* While we retrieve an earlier version of clust_rec, we
		release the kernel mutex, because it may take time to access
		the disk. After the release, we have to check if the trx_id
		transaction is still active. We keep the semaphore in mtr on
		the clust_rec page, so that no other transaction can update
		it and get an implicit x-lock on rec. */

		heap2 = heap;
		heap = mem_heap_create(1024);
#ifdef UNIV_DEBUG
		err =
#endif /* UNIV_DEBUG */
		trx_undo_prev_version_build(clust_rec, &mtr, version,
					    clust_index, clust_offsets,
					    heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (prev_version == NULL) {
			mutex_enter(&kernel_mutex);

			if (!trx_is_active(trx_id)) {
				/* Transaction no longer active: no
				implicit x-lock */

				break;
			}

			/* If the transaction is still active,
			clust_rec must be a fresh insert, because no
			previous version was found. */
			ut_ad(err == DB_SUCCESS);

			/* It was a freshly inserted version: there is an
			implicit x-lock on rec */

			trx = trx_get_on_id(trx_id);

			break;
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		vers_del = rec_get_deleted_flag(prev_version, comp);
		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
						 clust_offsets);
		/* The stack of versions is locked by mtr.  Thus, it
		is safe to fetch the prefixes for externally stored
		columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
				clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);
		/* entry may be NULL if a record was inserted in place
		of a deleted record, and the BLOB pointers of the new
		record were not initialized yet.  But in that case,
		prev_version should be NULL. */
		ut_a(entry);

		mutex_enter(&kernel_mutex);

		if (!trx_is_active(trx_id)) {
			/* Transaction no longer active: no implicit x-lock */

			break;
		}

		/* If we get here, we know that the trx_id transaction is
		still active and it has modified prev_version. Let us check
		if prev_version would require rec to be in a different
		state. */

		/* The previous version of clust_rec must be
		accessible, because the transaction is still active
		and clust_rec was not a fresh insert. */
		ut_ad(err == DB_SUCCESS);

		/* We check if entry and rec are identified in the alphabetical
		ordering */
		if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
			/* The delete marks of rec and prev_version should be
			equal for rec to be in the state required by
			prev_version */

			if (rec_del != vers_del) {
				trx = trx_get_on_id(trx_id);

				break;
			}

			/* It is possible that the row was updated so that the
			secondary index record remained the same in
			alphabetical ordering, but the field values changed
			still. For example, 'abc' -> 'ABC'. Check also that. */

			dtuple_set_types_binary(entry,
						dtuple_get_n_fields(entry));
			if (0 != cmp_dtuple_rec(entry, rec, offsets)) {

				trx = trx_get_on_id(trx_id);

				break;
			}
		} else if (!rec_del) {
			/* The delete mark should be set in rec for it to be
			in the state required by prev_version */

			trx = trx_get_on_id(trx_id);

			break;
		}

		if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
			/* The versions modified by the trx_id transaction end
			to prev_version: no implicit x-lock */

			break;
		}

		version = prev_version;
	}/* for (;;) */

exit_func:
	mtr_commit(&mtr);
	mem_heap_free(heap);

	return(trx);
}
Exemple #18
0
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read.
@return	DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
ulint
row_vers_build_for_semi_consistent_read(
/*====================================*/
	const rec_t*	rec,	/*!< in: record in a clustered index; the
				caller must have a latch on the page; this
				latch locks the top of the stack of versions
				of this records */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
	dict_index_t*	index,	/*!< in: the clustered index */
	ulint**		offsets,/*!< in/out: offsets returned by
				rec_get_offsets(rec, index) */
	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
				the offsets are allocated */
	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
				*old_vers is allocated; memory for possible
				intermediate versions is allocated and freed
				locally within the function */
	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
				record does not exist in the view, that is,
				it was freshly inserted afterwards */
{
	const rec_t*	version;
	mem_heap_t*	heap		= NULL;
	byte*		buf;
	ulint		err;
	trx_id_t	rec_trx_id	= ut_dulint_zero;

	ut_ad(dict_index_is_clust(index));
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	ut_ad(rec_offs_validate(rec, index, *offsets));

	rw_lock_s_lock(&(purge_sys->latch));
	/* The S-latch on purge_sys prevents the purge view from
	changing.  Thus, if we have an uncommitted transaction at
	this point, then purge cannot remove its undo log even if
	the transaction could commit now. */

	version = rec;

	for (;;) {
		trx_t*		version_trx;
		mem_heap_t*	heap2;
		rec_t*		prev_version;
		trx_id_t	version_trx_id;

		version_trx_id = row_get_rec_trx_id(version, index, *offsets);
		if (rec == version) {
			rec_trx_id = version_trx_id;
		}

		mutex_enter(&kernel_mutex);
		version_trx = trx_get_on_id(version_trx_id);
		if (version_trx
		    && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY
			|| version_trx->conc_state == TRX_NOT_STARTED)) {

			version_trx = NULL;
		}
		mutex_exit(&kernel_mutex);

		if (!version_trx) {

			/* We found a version that belongs to a
			committed transaction: return it. */

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
			ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

			if (rec == version) {
				*old_vers = rec;
				err = DB_SUCCESS;
				break;
			}

			/* We assume that a rolled-back transaction stays in
			TRX_ACTIVE state until all the changes have been
			rolled back and the transaction is removed from
			the global list of transactions. */

			if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
				/* The transaction was committed while
				we searched for earlier versions.
				Return the current version as a
				semi-consistent read. */

				version = rec;
				*offsets = rec_get_offsets(version,
							   index, *offsets,
							   ULINT_UNDEFINED,
							   offset_heap);
			}

			buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
			*old_vers = rec_copy(buf, version, *offsets);
			rec_offs_make_valid(*old_vers, index, *offsets);
			err = DB_SUCCESS;

			break;
		}

		heap2 = heap;
		heap = mem_heap_create(1024);

		err = trx_undo_prev_version_build(rec, mtr, version, index,
						  *offsets, heap,
						  &prev_version);
		if (heap2) {
			mem_heap_free(heap2); /* free version */
		}

		if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
			break;
		}

		if (prev_version == NULL) {
			/* It was a freshly inserted version */
			*old_vers = NULL;
			err = DB_SUCCESS;

			break;
		}

		version = prev_version;
		*offsets = rec_get_offsets(version, index, *offsets,
					   ULINT_UNDEFINED, offset_heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
		ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
	}/* for (;;) */

	if (heap) {
		mem_heap_free(heap);
	}
	rw_lock_s_unlock(&(purge_sys->latch));

	return(err);
}
cursor_view_t*
read_cursor_view_create_for_mysql(
/*==============================*/
	trx_t*	cr_trx)	/* in: trx where cursor view is created */
{
	cursor_view_t*	curview;
	read_view_t*	view;
	mem_heap_t*	heap;
	trx_t*		trx;
	ulint		n;

	ut_a(cr_trx);

	/* Use larger heap than in trx_create when creating a read_view
	because cursors are quite long. */

	heap = mem_heap_create(512);

	curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t));
	curview->heap = heap;

	/* Drop cursor tables from consideration when evaluating the need of
	auto-commit */
	curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
	cr_trx->n_mysql_tables_in_use = 0;

	mutex_enter(&kernel_mutex);

	curview->read_view = read_view_create_low(
		UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap);

	view = curview->read_view;
	view->creator_trx_id = cr_trx->id;
	view->type = VIEW_HIGH_GRANULARITY;
	view->undo_no = cr_trx->undo_no;

	/* No future transactions should be visible in the view */

	view->low_limit_no = trx_sys->max_trx_id;
	view->low_limit_id = view->low_limit_no;

	n = 0;
	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);

	/* No active transaction should be visible */

	while (trx) {

		if (trx->conc_state == TRX_ACTIVE
		    || trx->conc_state == TRX_PREPARED) {

			read_view_set_nth_trx_id(view, n, trx->id);

			n++;

			/* NOTE that a transaction whose trx number is <
			trx_sys->max_trx_id can still be active, if it is
			in the middle of its commit! Note that when a
			transaction starts, we initialize trx->no to
			ut_dulint_max. */

			if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {

				view->low_limit_no = trx->no;
			}
		}

		trx = UT_LIST_GET_NEXT(trx_list, trx);
	}

	view->n_trx_ids = n;

	if (n > 0) {
		/* The last active transaction has the smallest id: */
		view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
	} else {
		view->up_limit_id = view->low_limit_id;
	}

	UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);

	mutex_exit(&kernel_mutex);

	return(curview);
}
/********************************************************************//**
Removes unnecessary history data from a rollback segment. */
static
void
trx_purge_truncate_rseg_history(
/*============================*/
	trx_rseg_t*	rseg,		/*!< in: rollback segment */
	trx_id_t	limit_trx_no,	/*!< in: remove update undo logs whose
					trx number is < limit_trx_no */
	undo_no_t	limit_undo_no)	/*!< in: if transaction number is equal
					to limit_trx_no, truncate undo records
					with undo number < limit_undo_no */
{
	fil_addr_t	hdr_addr;
	fil_addr_t	prev_hdr_addr;
	trx_rsegf_t*	rseg_hdr;
	page_t*		undo_page;
	trx_ulogf_t*	log_hdr;
	trx_usegf_t*	seg_hdr;
	int		cmp;
	ulint		n_removed_logs	= 0;
	mtr_t		mtr;

	ut_ad(mutex_own(&(purge_sys->mutex)));

	mtr_start(&mtr);
	mutex_enter(&(rseg->mutex));

	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
				 rseg->page_no, &mtr);

	hdr_addr = trx_purge_get_log_from_hist(
		flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
loop:
	if (hdr_addr.page == FIL_NULL) {

		mutex_exit(&(rseg->mutex));

		mtr_commit(&mtr);

		return;
	}

	undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
				      hdr_addr.page, &mtr);

	log_hdr = undo_page + hdr_addr.boffset;

	cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO),
			    limit_trx_no);
	if (cmp == 0) {
		trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page,
					hdr_addr.boffset, limit_undo_no);
	}

	if (cmp >= 0) {
		mutex_enter(&kernel_mutex);
		ut_a(trx_sys->rseg_history_len >= n_removed_logs);
		trx_sys->rseg_history_len -= n_removed_logs;
		mutex_exit(&kernel_mutex);

		flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
				  log_hdr + TRX_UNDO_HISTORY_NODE,
				  n_removed_logs, &mtr);

		mutex_exit(&(rseg->mutex));
		mtr_commit(&mtr);

		return;
	}

	prev_hdr_addr = trx_purge_get_log_from_hist(
		flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
	n_removed_logs++;

	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;

	if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
	    && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {

		/* We can free the whole log segment */

		mutex_exit(&(rseg->mutex));
		mtr_commit(&mtr);

		trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);

		n_removed_logs = 0;
	} else {
		mutex_exit(&(rseg->mutex));
		mtr_commit(&mtr);
	}

	mtr_start(&mtr);
	mutex_enter(&(rseg->mutex));

	rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
				 rseg->page_no, &mtr);

	hdr_addr = prev_hdr_addr;

	goto loop;
}
Exemple #21
0
ulint
buf_flush_batch(
/*============*/
				/* out: number of blocks for which the write
				request was queued; ULINT_UNDEFINED if there
				was a flush of the same type already running */
	ulint	flush_type,	/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if
				BUF_FLUSH_LIST, then the caller must not own
				any latches on pages */
	ulint	min_n,		/* in: wished minimum mumber of blocks flushed
				(it is not guaranteed that the actual number
				is that big, though) */
	dulint	lsn_limit)	/* in the case BUF_FLUSH_LIST all blocks whose
				oldest_modification is smaller than this
				should be flushed (if their number does not
				exceed min_n), otherwise ignored */
{
	buf_block_t*	block;
	ulint		page_count	= 0;
	ulint		old_page_count;
	ulint		space;
	ulint		offset;
	ibool		found;

	ut_ad((flush_type == BUF_FLUSH_LRU)
	      || (flush_type == BUF_FLUSH_LIST));
#ifdef UNIV_SYNC_DEBUG
	ut_ad((flush_type != BUF_FLUSH_LIST)
	      || sync_thread_levels_empty_gen(TRUE));
#endif /* UNIV_SYNC_DEBUG */
	mutex_enter(&(buf_pool->mutex));

	if ((buf_pool->n_flush[flush_type] > 0)
	    || (buf_pool->init_flush[flush_type] == TRUE)) {

		/* There is already a flush batch of the same type running */

		mutex_exit(&(buf_pool->mutex));

		return(ULINT_UNDEFINED);
	}

	(buf_pool->init_flush)[flush_type] = TRUE;

	for (;;) {
		/* If we have flushed enough, leave the loop */
		if (page_count >= min_n) {

			break;
		}

		/* Start from the end of the list looking for a suitable
		block to be flushed. */

		if (flush_type == BUF_FLUSH_LRU) {
			block = UT_LIST_GET_LAST(buf_pool->LRU);
		} else {
			ut_ad(flush_type == BUF_FLUSH_LIST);

			block = UT_LIST_GET_LAST(buf_pool->flush_list);
			if (!block
			    || (ut_dulint_cmp(block->oldest_modification,
					      lsn_limit) >= 0)) {
				/* We have flushed enough */

				break;
			}
		}

		found = FALSE;

		/* Note that after finding a single flushable page, we try to
		flush also all its neighbors, and after that start from the
		END of the LRU list or flush list again: the list may change
		during the flushing and we cannot safely preserve within this
		function a pointer to a block in the list! */

		while ((block != NULL) && !found) {
			ut_a(block->state == BUF_BLOCK_FILE_PAGE);

			mutex_enter(&block->mutex);

			if (buf_flush_ready_for_flush(block, flush_type)) {

				found = TRUE;
				space = block->space;
				offset = block->offset;

				mutex_exit(&block->mutex);
				mutex_exit(&(buf_pool->mutex));

				old_page_count = page_count;

				/* Try to flush also all the neighbors */
				page_count += buf_flush_try_neighbors(
					space, offset, flush_type);
				/* fprintf(stderr,
				"Flush type %lu, page no %lu, neighb %lu\n",
				flush_type, offset,
				page_count - old_page_count); */

				mutex_enter(&(buf_pool->mutex));

			} else if (flush_type == BUF_FLUSH_LRU) {

				mutex_exit(&block->mutex);

				block = UT_LIST_GET_PREV(LRU, block);
			} else {
				ut_ad(flush_type == BUF_FLUSH_LIST);

				mutex_exit(&block->mutex);

				block = UT_LIST_GET_PREV(flush_list, block);
			}
		}

		/* If we could not find anything to flush, leave the loop */

		if (!found) {
			break;
		}
	}

	(buf_pool->init_flush)[flush_type] = FALSE;

	if ((buf_pool->n_flush[flush_type] == 0)
	    && (buf_pool->init_flush[flush_type] == FALSE)) {

		/* The running flush batch has ended */

		os_event_set(buf_pool->no_flush[flush_type]);
	}

	mutex_exit(&(buf_pool->mutex));

	buf_flush_buffered_writes();

#ifdef UNIV_DEBUG
	if (buf_debug_prints && page_count > 0) {
		ut_a(flush_type == BUF_FLUSH_LRU
		     || flush_type == BUF_FLUSH_LIST);
		fprintf(stderr, flush_type == BUF_FLUSH_LRU
			? "Flushed %lu pages in LRU flush\n"
			: "Flushed %lu pages in flush list flush\n",
			(ulong) page_count);
	}
#endif /* UNIV_DEBUG */

	srv_buf_pool_flushed += page_count;

	return(page_count);
}
Exemple #22
0
/**********************************************************************//**
Reports in the undo log of an update or delete marking of a clustered index
record.
@return byte offset of the inserted undo log entry on the page if
succeed, 0 if fail */
static
ulint
trx_undo_page_report_modify(
/*========================*/
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index where update or
					delete marking is done */
	const rec_t*	rec,		/*!< in: clustered index record which
					has NOT yet been modified */
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
	const upd_t*	update,		/*!< in: update vector which tells the
					columns to be updated; in the case of
					a delete, this should be set to NULL */
	ulint		cmpl_info,	/*!< in: compiler info on secondary
					index updates */
	mtr_t*		mtr)		/*!< in: mtr */
{
	dict_table_t*	table;
	ulint		first_free;
	byte*		ptr;
	const byte*	field;
	ulint		flen;
	ulint		col_no;
	ulint		type_cmpl;
	byte*		type_cmpl_ptr;
	ulint		i;
	trx_id_t	trx_id;
	ibool		ignore_prefix = FALSE;
	byte		ext_buf[REC_MAX_INDEX_COL_LEN
				+ BTR_EXTERN_FIELD_REF_SIZE];

	ut_a(dict_index_is_clust(index));
	ut_ad(rec_offs_validate(rec, index, offsets));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
	table = index->table;

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
				      + TRX_UNDO_PAGE_FREE);
	ptr = undo_page + first_free;

	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 50) {

		/* NOTE: the value 50 must be big enough so that the general
		fields written below fit on the undo log page */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */

	if (!update) {
		type_cmpl = TRX_UNDO_DEL_MARK_REC;
	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
		type_cmpl = TRX_UNDO_UPD_DEL_REC;
		/* We are about to update a delete marked record.
		We don't typically need the prefix in this case unless
		the delete marking is done by the same transaction
		(which we check below). */
		ignore_prefix = TRUE;
	} else {
		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
	}

	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
	type_cmpl_ptr = ptr;

	*ptr++ = (byte) type_cmpl;
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);

	ptr += mach_dulint_write_much_compressed(ptr, table->id);

	/*----------------------------------------*/
	/* Store the state of the info bits */

	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));

	/* Store the values of the system columns */
	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_TRX_ID), &flen);
	ut_ad(flen == DATA_TRX_ID_LEN);

	trx_id = trx_read_trx_id(field);

	/* If it is an update of a delete marked record, then we are
	allowed to ignore blob prefixes if the delete marking was done
	by some other trx as it must have committed by now for us to
	allow an over-write. */
	if (ignore_prefix) {
		ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
	}
	ptr += mach_dulint_write_compressed(ptr, trx_id);

	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_ROLL_PTR), &flen);
	ut_ad(flen == DATA_ROLL_PTR_LEN);

	ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));

	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the
	record which will be modified in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		field = rec_get_nth_field(rec, offsets, i, &flen);

		/* The ordering columns must not be stored externally. */
		ut_ad(!rec_offs_nth_extern(offsets, i));
		ut_ad(dict_index_get_nth_col(index, i)->ord_part);

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, flen);

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, field, flen);
			ptr += flen;
		}
	}

	/*----------------------------------------*/
	/* Save to the undo log the old values of the columns to be updated. */

	if (update) {
		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));

		for (i = 0; i < upd_get_n_fields(update); i++) {

			ulint	pos = upd_get_nth_field(update, i)->field_no;

			/* Write field number to undo log */
			if (trx_undo_left(undo_page, ptr) < 5) {

				return(0);
			}

			ptr += mach_write_compressed(ptr, pos);

			/* Save the old value of field */
			field = rec_get_nth_field(rec, offsets, pos, &flen);

			if (trx_undo_left(undo_page, ptr) < 15) {

				return(0);
			}

			if (rec_offs_nth_extern(offsets, pos)) {
				ptr = trx_undo_page_report_modify_ext(
					ptr,
					dict_index_get_nth_col(index, pos)
					->ord_part
					&& !ignore_prefix
					&& flen < REC_MAX_INDEX_COL_LEN
					? ext_buf : NULL,
					dict_table_zip_size(table),
					&field, &flen);

				/* Notify purge that it eventually has to
				free the old externally stored field */

				trx->update_undo->del_marks = TRUE;

				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
			} else {
				ptr += mach_write_compressed(ptr, flen);
			}

			if (flen != UNIV_SQL_NULL) {
				if (trx_undo_left(undo_page, ptr) < flen) {

					return(0);
				}

				ut_memcpy(ptr, field, flen);
				ptr += flen;
			}
		}
	}

	/*----------------------------------------*/
	/* In the case of a delete marking, and also in the case of an update
	where any ordering field of any index changes, store the values of all
	columns which occur as ordering fields in any index. This info is used
	in the purge of old versions where we use it to build and search the
	delete marked index records, to look if we can remove them from the
	index tree. Note that starting from 4.0.14 also externally stored
	fields can be ordering in some index. Starting from 5.2, we no longer
	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
	but we can construct the column prefix fields in the index by
	fetching the first page of the BLOB that is pointed to by the
	clustered index. This works also in crash recovery, because all pages
	(including BLOBs) are recovered before anything is rolled back. */

	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
		byte*	old_ptr = ptr;

		trx->update_undo->del_marks = TRUE;

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		/* Reserve 2 bytes to write the number of bytes the stored
		fields take in this undo record */

		ptr += 2;

		for (col_no = 0; col_no < dict_table_get_n_cols(table);
		     col_no++) {

			const dict_col_t*	col
				= dict_table_get_nth_col(table, col_no);

			if (col->ord_part) {
				ulint	pos;

				/* Write field number to undo log */
				if (trx_undo_left(undo_page, ptr) < 5 + 15) {

					return(0);
				}

				pos = dict_index_get_nth_col_pos(index,
								 col_no);
				ptr += mach_write_compressed(ptr, pos);

				/* Save the old value of field */
				field = rec_get_nth_field(rec, offsets, pos,
							  &flen);

				if (rec_offs_nth_extern(offsets, pos)) {
					ptr = trx_undo_page_report_modify_ext(
						ptr,
						flen < REC_MAX_INDEX_COL_LEN
						&& !ignore_prefix
						? ext_buf : NULL,
						dict_table_zip_size(table),
						&field, &flen);
				} else {
					ptr += mach_write_compressed(
						ptr, flen);
				}

				if (flen != UNIV_SQL_NULL) {
					if (trx_undo_left(undo_page, ptr)
					    < flen) {

						return(0);
					}

					ut_memcpy(ptr, field, flen);
					ptr += flen;
				}
			}
		}

		mach_write_to_2(old_ptr, ptr - old_ptr);
	}

	/*----------------------------------------*/
	/* Write pointers to the previous and the next undo log records */
	if (trx_undo_left(undo_page, ptr) < 2) {

		return(0);
	}

	mach_write_to_2(ptr, first_free);
	ptr += 2;
	mach_write_to_2(undo_page + first_free, ptr - undo_page);

	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
			ptr - undo_page);

	/* Write to the REDO log about this change in the UNDO log */

	trx_undof_page_add_undo_rec_log(undo_page, first_free,
					ptr - undo_page, mtr);
	return(first_free);
}
Exemple #23
0
void
trx_print(
/*======*/
	FILE*	f,		/* in: output stream */
	trx_t*	trx,		/* in: transaction */
	ulint	max_query_len)	/* in: max query length to print, or 0 to
				   use the default max length */
{
	ibool	newline;

	fprintf(f, "TRANSACTION %lu %lu",
		(ulong) ut_dulint_get_high(trx->id),
		(ulong) ut_dulint_get_low(trx->id));

	switch (trx->conc_state) {
	case TRX_NOT_STARTED:
		fputs(", not started", f);
		break;
	case TRX_ACTIVE:
		fprintf(f, ", ACTIVE %lu sec",
			(ulong)difftime(time(NULL), trx->start_time));
		break;
	case TRX_PREPARED:
		fprintf(f, ", ACTIVE (PREPARED) %lu sec",
			(ulong)difftime(time(NULL), trx->start_time));
		break;
	case TRX_COMMITTED_IN_MEMORY:
		fputs(", COMMITTED IN MEMORY", f);
		break;
	default:
		fprintf(f, " state %lu", (ulong) trx->conc_state);
	}

#ifdef UNIV_LINUX
	fprintf(f, ", process no %lu", trx->mysql_process_no);
#endif
	fprintf(f, ", OS thread id %lu",
		(ulong) os_thread_pf(trx->mysql_thread_id));

	if (*trx->op_info) {
		putc(' ', f);
		fputs(trx->op_info, f);
	}

	if (trx->is_purge) {
		fputs(" purge trx", f);
	}

	if (trx->declared_to_be_inside_innodb) {
		fprintf(f, ", thread declared inside InnoDB %lu",
			(ulong) trx->n_tickets_to_enter_innodb);
	}

	putc('\n', f);

	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
		fprintf(f, "mysql tables in use %lu, locked %lu\n",
			(ulong) trx->n_mysql_tables_in_use,
			(ulong) trx->mysql_n_tables_locked);
	}

	newline = TRUE;

	switch (trx->que_state) {
	case TRX_QUE_RUNNING:
		newline = FALSE; break;
	case TRX_QUE_LOCK_WAIT:
		fputs("LOCK WAIT ", f); break;
	case TRX_QUE_ROLLING_BACK:
		fputs("ROLLING BACK ", f); break;
	case TRX_QUE_COMMITTING:
		fputs("COMMITTING ", f); break;
	default:
		fprintf(f, "que state %lu ", (ulong) trx->que_state);
	}

	if (0 < UT_LIST_GET_LEN(trx->trx_locks)
	    || mem_heap_get_size(trx->lock_heap) > 400) {
		newline = TRUE;

		fprintf(f, "%lu lock struct(s), heap size %lu,"
			" %lu row lock(s)",
			(ulong) UT_LIST_GET_LEN(trx->trx_locks),
			(ulong) mem_heap_get_size(trx->lock_heap),
			(ulong) lock_number_of_rows_locked(trx));
	}

	if (trx->has_search_latch) {
		newline = TRUE;
		fputs(", holds adaptive hash latch", f);
	}

	if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
		newline = TRUE;
		fprintf(f, ", undo log entries %lu",
			(ulong) ut_dulint_get_low(trx->undo_no));
	}

	if (newline) {
		putc('\n', f);
	}

	if (trx->mysql_thd != NULL) {
		innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
	}
}
/***********************************************************************//**
Chooses the next undo log to purge and updates the info in purge_sys. This
function is used to initialize purge_sys when the next record to purge is
not known, and also to update the purge system info on the next record when
purge has handled the whole undo log for a transaction. */
static
void
trx_purge_choose_next_log(void)
/*===========================*/
{
	trx_undo_rec_t*	rec;
	trx_rseg_t*	rseg;
	trx_rseg_t*	min_rseg;
	trx_id_t	min_trx_no;
	ulint		space = 0;   /* remove warning (??? bug ???) */
	ulint		zip_size = 0;
	ulint		page_no = 0; /* remove warning (??? bug ???) */
	ulint		offset = 0;  /* remove warning (??? bug ???) */
	mtr_t		mtr;

	ut_ad(mutex_own(&(purge_sys->mutex)));
	ut_ad(purge_sys->next_stored == FALSE);

	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);

	min_trx_no = ut_dulint_max;

	min_rseg = NULL;

	while (rseg) {
		mutex_enter(&(rseg->mutex));

		if (rseg->last_page_no != FIL_NULL) {

			if ((min_rseg == NULL)
			    || (ut_dulint_cmp(min_trx_no,
					      rseg->last_trx_no) > 0)) {

				min_rseg = rseg;
				min_trx_no = rseg->last_trx_no;
				space = rseg->space;
				zip_size = rseg->zip_size;
				ut_a(space == 0); /* We assume in purge of
						  externally stored fields
						  that space id == 0 */
				page_no = rseg->last_page_no;
				offset = rseg->last_offset;
			}
		}

		mutex_exit(&(rseg->mutex));

		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
	}

	if (min_rseg == NULL) {

		return;
	}

	mtr_start(&mtr);

	if (!min_rseg->last_del_marks) {
		/* No need to purge this log */

		rec = &trx_purge_dummy_rec;
	} else {
		rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
					     RW_S_LATCH, &mtr);
		if (rec == NULL) {
			/* Undo log empty */

			rec = &trx_purge_dummy_rec;
		}
	}

	purge_sys->next_stored = TRUE;
	purge_sys->rseg = min_rseg;

	purge_sys->hdr_page_no = page_no;
	purge_sys->hdr_offset = offset;

	purge_sys->purge_trx_no = min_trx_no;

	if (rec == &trx_purge_dummy_rec) {

		purge_sys->purge_undo_no = ut_dulint_zero;
		purge_sys->page_no = page_no;
		purge_sys->offset = 0;
	} else {
		purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);

		purge_sys->page_no = page_get_page_no(page_align(rec));
		purge_sys->offset = page_offset(rec);
	}

	mtr_commit(&mtr);
}
Exemple #25
0
/*******************************************************************
Removes a clustered index record. The pcur in node was positioned on the
record, now it is detached. */
static
ulint
row_undo_ins_remove_clust_rec(
/*==========================*/
				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
	undo_node_t*	node,	/* in: undo node */
	que_thr_t*	thr)	/* in: query thread */
{
	btr_cur_t*	btr_cur;		
	ibool		success;
	ulint		err;
	ulint		n_tries		= 0;
	mtr_t		mtr;
	
	UT_NOT_USED(thr);

	mtr_start(&mtr);
	
	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
									&mtr);
	ut_a(success);

	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {

		/* Drop the index tree associated with the row in
		SYS_INDEXES table: */
	
		dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);

		mtr_commit(&mtr);

		mtr_start(&mtr);

		success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
						&(node->pcur), &mtr);
		ut_a(success);
	}
		
	btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
	
	success = btr_cur_optimistic_delete(btr_cur, &mtr);

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	if (success) {
		trx_undo_rec_release(node->trx, node->undo_no);

		return(DB_SUCCESS);
	}
retry:
	/* If did not succeed, try pessimistic descent to tree */
	mtr_start(&mtr);
	
	success = btr_pcur_restore_position(BTR_MODIFY_TREE,
							&(node->pcur), &mtr);
	ut_a(success);

	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);

	/* The delete operation may fail if we have little
	file space left: TODO: easiest to crash the database
	and restart with more file space */

	if (err == DB_OUT_OF_FILE_SPACE
				&& n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {

		btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

		n_tries++;

		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
			
		goto retry;
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	trx_undo_rec_release(node->trx, node->undo_no);

	return(err);
}
/********************************************************************//**
Fetches the next undo log record from the history list to purge. It must be
released with the corresponding release function.
@return copy of an undo log record or pointer to trx_purge_dummy_rec,
if the whole undo log can skipped in purge; NULL if none left */
UNIV_INTERN
trx_undo_rec_t*
trx_purge_fetch_next_rec(
/*=====================*/
	roll_ptr_t*	roll_ptr,/*!< out: roll pointer to undo record */
	trx_undo_inf_t** cell,	/*!< out: storage cell for the record in the
				purge array */
	mem_heap_t*	heap)	/*!< in: memory heap where copied */
{
	trx_undo_rec_t*	undo_rec;

	mutex_enter(&(purge_sys->mutex));

	if (purge_sys->state == TRX_STOP_PURGE) {
		trx_purge_truncate_if_arr_empty();

		mutex_exit(&(purge_sys->mutex));

		return(NULL);
	}

	if (!purge_sys->next_stored) {
		trx_purge_choose_next_log();

		if (!purge_sys->next_stored) {
			purge_sys->state = TRX_STOP_PURGE;

			trx_purge_truncate_if_arr_empty();

			if (srv_print_thread_releases) {
				fprintf(stderr,
					"Purge: No logs left in the"
					" history list; pages handled %lu\n",
					(ulong) purge_sys->n_pages_handled);
			}

			mutex_exit(&(purge_sys->mutex));

			return(NULL);
		}
	}

	if (purge_sys->n_pages_handled >= purge_sys->handle_limit) {

		purge_sys->state = TRX_STOP_PURGE;

		trx_purge_truncate_if_arr_empty();

		mutex_exit(&(purge_sys->mutex));

		return(NULL);
	}

	if (ut_dulint_cmp(purge_sys->purge_trx_no,
			  purge_sys->view->low_limit_no) >= 0) {
		purge_sys->state = TRX_STOP_PURGE;

		trx_purge_truncate_if_arr_empty();

		mutex_exit(&(purge_sys->mutex));

		return(NULL);
	}

	/*	fprintf(stderr, "Thread %lu purging trx %lu undo record %lu\n",
	os_thread_get_curr_id(),
	ut_dulint_get_low(purge_sys->purge_trx_no),
	ut_dulint_get_low(purge_sys->purge_undo_no)); */

	*roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
					    purge_sys->page_no,
					    purge_sys->offset);

	*cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
					 purge_sys->purge_undo_no);

	ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no,
			    (purge_sys->view)->low_limit_no) < 0);

	/* The following call will advance the stored values of purge_trx_no
	and purge_undo_no, therefore we had to store them first */

	undo_rec = trx_purge_get_next_rec(heap);

	mutex_exit(&(purge_sys->mutex));

	return(undo_rec);
}
Exemple #27
0
ulint
dict_truncate_index_tree(
/*=====================*/
				/* out: new root page number, or
				FIL_NULL on failure */
	dict_table_t*	table,	/* in: the table the index belongs to */
	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
	mtr_t*		mtr)	/* in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
	ulint		root_page_no;
	ulint		space;
	ulint		type;
	dulint		index_id;
	rec_t*		rec;
	byte*		ptr;
	ulint		len;
	ulint		comp;
	dict_index_t*	index;

	ut_ad(mutex_own(&(dict_sys->mutex)));
	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
	rec = btr_pcur_get_rec(pcur);
	ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

	ut_ad(len == 4);

	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (root_page_no == FIL_NULL) {
		/* The tree has been freed. */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing index of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

	ut_ad(len == 4);

	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (!fil_tablespace_exists_in_mem(space)) {
		/* It is a single table tablespace and the .ibd file is
		missing: do nothing */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing .ibd file of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_TYPE_FIELD, &len);
	ut_ad(len == 4);
	type = mach_read_from_4(ptr);

	ptr = rec_get_nth_field_old(rec, 1, &len);
	ut_ad(len == 8);
	index_id = mach_read_from_8(ptr);

	/* We free all the pages but the root page first; this operation
	may span several mini-transactions */

	btr_free_but_not_root(space, root_page_no);

	/* Then we free the root page in the same mini-transaction where
	we create the b-tree and write its new root page number to the
	appropriate field in the SYS_INDEXES record: this mini-transaction
	marks the B-tree totally truncated */

	comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
					 mtr));

	btr_free_root(space, root_page_no, mtr);
	/* We will temporarily write FIL_NULL to the PAGE_NO field
	in SYS_INDEXES, so that the database will not get into an
	inconsistent state in case it crashes between the mtr_commit()
	below and the following mtr_commit() call. */
	page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     FIL_NULL, mtr);

	/* We will need to commit the mini-transaction in order to avoid
	deadlocks in the btr_create() call, because otherwise we would
	be freeing and allocating pages in the same mini-transaction. */
	btr_pcur_store_position(pcur, mtr);
	mtr_commit(mtr);

	mtr_start(mtr);
	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

	/* Find the index corresponding to this SYS_INDEXES record. */
	for (index = UT_LIST_GET_FIRST(table->indexes);
	     index;
	     index = UT_LIST_GET_NEXT(indexes, index)) {
		if (!ut_dulint_cmp(index->id, index_id)) {
			break;
		}
	}

	root_page_no = btr_create(type, space, index_id, comp, mtr);
	if (index) {
		index->page = (unsigned int) root_page_no;
	} else {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"  InnoDB: Index %lu %lu of table %s is missing\n"
			"InnoDB: from the data dictionary during TRUNCATE!\n",
			ut_dulint_get_high(index_id),
			ut_dulint_get_low(index_id),
			table->name);
	}

	return(root_page_no);
}
read_view_t*
read_view_oldest_copy_or_open_new(
/*==============================*/
					/* out, own: read view struct */
	dulint		cr_trx_id,	/* in: trx_id of creating
					transaction, or (0, 0) used in purge*/
	mem_heap_t*	heap)		/* in: memory heap from which
					allocated */
{
	read_view_t*	old_view;
	read_view_t*	view_copy;
	ibool		needs_insert	= TRUE;
	ulint		insert_done	= 0;
	ulint		n;
	ulint		i;

	ut_ad(mutex_own(&kernel_mutex));

	old_view = UT_LIST_GET_LAST(trx_sys->view_list);

	if (old_view == NULL) {

		return(read_view_open_now(cr_trx_id, heap));
	}

	n = old_view->n_trx_ids;

	if (ut_dulint_cmp(old_view->creator_trx_id,
			  ut_dulint_create(0,0)) != 0) {
		n++;
	} else {
		needs_insert = FALSE;
	}

	view_copy = read_view_create_low(n, heap);

	/* Insert the id of the creator in the right place of the descending
	array of ids, if needs_insert is TRUE: */

	i = 0;
	while (i < n) {
		if (needs_insert
		    && (i >= old_view->n_trx_ids
			|| ut_dulint_cmp(old_view->creator_trx_id,
					 read_view_get_nth_trx_id(old_view, i))
			> 0)) {

			read_view_set_nth_trx_id(view_copy, i,
						 old_view->creator_trx_id);
			needs_insert = FALSE;
			insert_done = 1;
		} else {
			read_view_set_nth_trx_id(view_copy, i,
						 read_view_get_nth_trx_id(
							 old_view,
							 i - insert_done));
		}

		i++;
	}

	view_copy->creator_trx_id = cr_trx_id;

	view_copy->low_limit_no = old_view->low_limit_no;
	view_copy->low_limit_id = old_view->low_limit_id;


	if (n > 0) {
		/* The last active transaction has the smallest id: */
		view_copy->up_limit_id = read_view_get_nth_trx_id(
			view_copy, n - 1);
	} else {
		view_copy->up_limit_id = old_view->up_limit_id;
	}

	UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy);

	return(view_copy);
}
Exemple #29
0
/****************************************************************//**
Creates trx objects for transactions and initializes the trx list of
trx_sys at database start. Rollback segment and undo log lists must
already exist when this function is called, because the lists of
transactions to be rolled back or cleaned up are built based on the
undo log lists. */
UNIV_INTERN
void
trx_lists_init_at_db_start(void)
/*============================*/
{
	trx_rseg_t*	rseg;
	trx_undo_t*	undo;
	trx_t*		trx;

	ut_ad(mutex_own(&kernel_mutex));
	UT_LIST_INIT(trx_sys->trx_list);

	/* Look from the rollback segments if there exist undo logs for
	transactions */

	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);

	while (rseg != NULL) {
		undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);

		while (undo != NULL) {

			trx = trx_create(trx_dummy_sess);

			trx->is_recovered = TRUE;
			trx->id = undo->trx_id;
			trx->xid = undo->xid;
			trx->insert_undo = undo;
			trx->rseg = rseg;

			if (undo->state != TRX_UNDO_ACTIVE) {

				/* Prepared transactions are left in
				the prepared state waiting for a
				commit or abort decision from MySQL */

				if (undo->state == TRX_UNDO_PREPARED) {

					fprintf(stderr,
						"InnoDB: Transaction "
						TRX_ID_FMT
						" was in the"
						" XA prepared state.\n",
						TRX_ID_PREP_PRINTF(trx->id));

					if (srv_force_recovery == 0) {

						trx->conc_state = TRX_PREPARED;
					} else {
						fprintf(stderr,
							"InnoDB: Since"
							" innodb_force_recovery"
							" > 0, we will"
							" rollback it"
							" anyway.\n");

						trx->conc_state = TRX_ACTIVE;
					}
				} else {
					trx->conc_state
						= TRX_COMMITTED_IN_MEMORY;
				}

				/* We give a dummy value for the trx no;
				this should have no relevance since purge
				is not interested in committed transaction
				numbers, unless they are in the history
				list, in which case it looks the number
				from the disk based undo log structure */

				trx->no = trx->id;
			} else {
				trx->conc_state = TRX_ACTIVE;

				/* A running transaction always has the number
				field inited to ut_dulint_max */

				trx->no = ut_dulint_max;
			}

			if (undo->dict_operation) {
				trx_set_dict_operation(
					trx, TRX_DICT_OP_TABLE);
				trx->table_id = undo->table_id;
			}

			if (!undo->empty) {
				trx->undo_no = ut_dulint_add(undo->top_undo_no,
							     1);
			}

			trx_list_insert_ordered(trx);

			undo = UT_LIST_GET_NEXT(undo_list, undo);
		}

		undo = UT_LIST_GET_FIRST(rseg->update_undo_list);

		while (undo != NULL) {
			trx = trx_get_on_id(undo->trx_id);

			if (NULL == trx) {
				trx = trx_create(trx_dummy_sess);

				trx->is_recovered = TRUE;
				trx->id = undo->trx_id;
				trx->xid = undo->xid;

				if (undo->state != TRX_UNDO_ACTIVE) {

					/* Prepared transactions are left in
					the prepared state waiting for a
					commit or abort decision from MySQL */

					if (undo->state == TRX_UNDO_PREPARED) {
						fprintf(stderr,
							"InnoDB: Transaction "
							TRX_ID_FMT " was in the"
							" XA prepared state.\n",
							TRX_ID_PREP_PRINTF(
								trx->id));

						if (srv_force_recovery == 0) {

							trx->conc_state
								= TRX_PREPARED;
						} else {
							fprintf(stderr,
								"InnoDB: Since"
								" innodb_force_recovery"
								" > 0, we will"
								" rollback it"
								" anyway.\n");

							trx->conc_state
								= TRX_ACTIVE;
						}
					} else {
						trx->conc_state
							= TRX_COMMITTED_IN_MEMORY;
					}

					/* We give a dummy value for the trx
					number */

					trx->no = trx->id;
				} else {
					trx->conc_state = TRX_ACTIVE;

					/* A running transaction always has
					the number field inited to
					ut_dulint_max */

					trx->no = ut_dulint_max;
				}

				trx->rseg = rseg;
				trx_list_insert_ordered(trx);

				if (undo->dict_operation) {
					trx_set_dict_operation(
						trx, TRX_DICT_OP_TABLE);
					trx->table_id = undo->table_id;
				}
			}

			trx->update_undo = undo;

			if ((!undo->empty)
			    && (ut_dulint_cmp(undo->top_undo_no,
					      trx->undo_no) >= 0)) {

				trx->undo_no = ut_dulint_add(undo->top_undo_no,
							     1);
			}

			undo = UT_LIST_GET_NEXT(undo_list, undo);
		}

		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
	}
}
read_view_t*
read_view_open_now(
/*===============*/
					/* out, own: read view struct */
	dulint		cr_trx_id,	/* in: trx_id of creating
					transaction, or (0, 0) used in
					purge */
	mem_heap_t*	heap)		/* in: memory heap from which
					allocated */
{
	read_view_t*	view;
	trx_t*		trx;
	ulint		n;

	ut_ad(mutex_own(&kernel_mutex));

	view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap);

	view->creator_trx_id = cr_trx_id;
	view->type = VIEW_NORMAL;
	view->undo_no = ut_dulint_create(0, 0);

	/* No future transactions should be visible in the view */

	view->low_limit_no = trx_sys->max_trx_id;
	view->low_limit_id = view->low_limit_no;

	n = 0;
	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);

	/* No active transaction should be visible, except cr_trx */

	while (trx) {
		if (ut_dulint_cmp(trx->id, cr_trx_id) != 0
		    && (trx->conc_state == TRX_ACTIVE
			|| trx->conc_state == TRX_PREPARED)) {

			read_view_set_nth_trx_id(view, n, trx->id);

			n++;

			/* NOTE that a transaction whose trx number is <
			trx_sys->max_trx_id can still be active, if it is
			in the middle of its commit! Note that when a
			transaction starts, we initialize trx->no to
			ut_dulint_max. */

			if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) {

				view->low_limit_no = trx->no;
			}
		}

		trx = UT_LIST_GET_NEXT(trx_list, trx);
	}

	view->n_trx_ids = n;

	if (n > 0) {
		/* The last active transaction has the smallest id: */
		view->up_limit_id = read_view_get_nth_trx_id(view, n - 1);
	} else {
		view->up_limit_id = view->low_limit_id;
	}


	UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);

	return(view);
}