Exemplo n.º 1
0
/***************************************************************
Stores to the heap the row on which the node->pcur is positioned. */
static
void
row_upd_store_row(
/*==============*/
	upd_node_t*	node)	/* in: row update node */
{
	dict_index_t*	clust_index;
	upd_t*		update;
	rec_t*		rec;
	
	ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);

	if (node->row != NULL) {
		mem_heap_empty(node->heap);
		node->row = NULL;
	}
	
	clust_index = dict_table_get_first_index(node->table);

	rec = btr_pcur_get_rec(node->pcur);
	
	node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap);

	node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint)
				                    * rec_get_n_fields(rec));
	if (node->is_delete) {
		update = NULL;
	} else {
		update = node->update;
	}
	
	node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
								rec, update);
}
Exemplo n.º 2
0
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_DEL.
@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_upd_del_sec(
/*=====================*/
	undo_node_t*	node,	/*!< in: row undo node */
	que_thr_t*	thr)	/*!< in: query thread */
{
	mem_heap_t*	heap;
	dtuple_t*	entry;
	dict_index_t*	index;
	ulint		err	= DB_SUCCESS;

	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
	heap = mem_heap_create(1024);

	while (node->index != NULL) {

		/* Skip all corrupted secondary index */
		dict_table_skip_corrupt_index(node->index);

		if (!node->index) {
			break;
		}

		index = node->index;

		entry = row_build_index_entry(node->row, node->ext,
					      index, heap);
		if (UNIV_UNLIKELY(!entry)) {
			/* The database must have crashed after
			inserting a clustered index record but before
			writing all the externally stored columns of
			that record.  Because secondary index entries
			are inserted after the clustered index record,
			we may assume that the secondary index record
			does not exist.  However, this situation may
			only occur during the rollback of incomplete
			transactions. */
			ut_a(thr_is_recv(thr));
		} else {
			err = row_undo_mod_del_mark_or_remove_sec(
				node, thr, index, entry);

			if (err != DB_SUCCESS) {

				break;
			}
		}

		mem_heap_empty(heap);

		node->index = dict_table_get_next_index(node->index);
	}

	mem_heap_free(heap);

	return(err);
}
Exemplo n.º 3
0
void
row_purge(
    /*======*/
    purge_node_t*	node,	/*!< in: row purge node */
    que_thr_t*	thr)	/*!< in: query thread */
{
    ibool		updated_extern;

    ut_ad(node);
    ut_ad(thr);

    node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr,
                     &node->reservation,
                     node->heap);
    if (!node->undo_rec) {
        /* Purge completed for this query thread */

        thr->run_node = que_node_get_parent(node);

        return;
    }

    if (node->undo_rec != &trx_purge_dummy_rec
            && row_purge_parse_undo_rec(node, &updated_extern, thr)) {
        node->found_clust = FALSE;

        node->index = dict_table_get_next_index(
                          dict_table_get_first_index(node->table));

        if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
            row_purge_del_mark(node);

        } else if (updated_extern
                   || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {

            row_purge_upd_exist_or_extern(thr, node);
        }

        if (node->found_clust) {
            btr_pcur_close(&(node->pcur));
        }

        row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
    }

    /* Do some cleanup */
    trx_purge_rec_release(node->reservation);
    mem_heap_empty(node->heap);

    thr->run_node = node;
}
Exemplo n.º 4
0
void
read_view_close_for_mysql(
/*======================*/
	trx_t*	trx)	/* in: trx which has a read view */
{
	ut_a(trx->global_read_view);

	mutex_enter(&kernel_mutex);

	read_view_close(trx->global_read_view);

	mem_heap_empty(trx->global_read_view_heap);

	trx->read_view = NULL;
	trx->global_read_view = NULL;

	mutex_exit(&kernel_mutex);
}
Exemplo n.º 5
0
/********************************************************************//**
Disable the adaptive hash search system and empty the index. */
UNIV_INTERN
void
btr_search_disable(void)
/*====================*/
{
	dict_table_t*	table;

	mutex_enter(&dict_sys->mutex);
	rw_lock_x_lock(&btr_search_latch);

	btr_search_enabled = FALSE;

	/* Clear the index->search_info->ref_count of every index in
	the data dictionary cache. */
	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table;
	     table = UT_LIST_GET_NEXT(table_LRU, table)) {

		dict_index_t*	index;

		for (index = dict_table_get_first_index(table); index;
		     index = dict_table_get_next_index(index)) {

			index->search_info->ref_count = 0;
		}
	}

	mutex_exit(&dict_sys->mutex);

	/* Set all block->index = NULL. */
	buf_pool_clear_hash_index();

	/* Clear the adaptive hash index. */
	hash_table_clear(btr_search_sys->hash_index);
	mem_heap_empty(btr_search_sys->hash_index->heap);

	rw_lock_x_unlock(&btr_search_latch);
}
Exemplo n.º 6
0
/*******************************************************************//**
This function runs a purge batch.
@return	number of undo log pages handled in the batch */
UNIV_INTERN
ulint
trx_purge(
    /*======*/
    ulint	limit)		/*!< in: the maximum number of records to
				purge in one batch */
{
    que_thr_t*	thr;
    ulint		old_pages_handled;

    if (srv_fake_write)
        return(0);

    ut_a(purge_sys->trx->n_active_thrs == 0);

    rw_lock_x_lock(&purge_sys->latch);

    mutex_enter(&kernel_mutex);

    /* Close and free the old purge view */

    read_view_close(purge_sys->view);
    purge_sys->view = NULL;
    mem_heap_empty(purge_sys->heap);

    /* Determine how much data manipulation language (DML) statements
    need to be delayed in order to reduce the lagging of the purge
    thread. */
    srv_dml_needed_delay = 0; /* in microseconds; default: no delay */

    /* If we cannot advance the 'purge view' because of an old
    'consistent read view', then the DML statements cannot be delayed.
    Also, srv_max_purge_lag <= 0 means 'infinity'. */
    if (srv_max_purge_lag > 0) {
        float	ratio = (float) trx_sys->rseg_history_len
                        / srv_max_purge_lag;
        if (ratio > ULINT_MAX / 10000) {
            /* Avoid overflow: maximum delay is 4295 seconds */
            srv_dml_needed_delay = ULINT_MAX;
        } else if (ratio > 1) {
            /* If the history list length exceeds the
            innodb_max_purge_lag, the
            data manipulation statements are delayed
            by at least 5000 microseconds. */
            srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
        }
    }

    purge_sys->view = read_view_oldest_copy_or_open_new(
                          0, purge_sys->heap);

    mutex_exit(&kernel_mutex);

    rw_lock_x_unlock(&(purge_sys->latch));

    purge_sys->state = TRX_PURGE_ON;

    purge_sys->handle_limit = purge_sys->n_pages_handled + limit;

    old_pages_handled = purge_sys->n_pages_handled;


    mutex_enter(&kernel_mutex);

    thr = que_fork_start_command(purge_sys->query);

    ut_ad(thr);

    mutex_exit(&kernel_mutex);

    if (srv_print_thread_releases) {

        fputs("Starting purge\n", stderr);
    }

    que_run_threads(thr);

    if (srv_print_thread_releases) {

        fprintf(stderr,
                "Purge ends; pages handled %lu\n",
                (ulong) purge_sys->n_pages_handled);
    }

    return(purge_sys->n_pages_handled - old_pages_handled);
}
Exemplo n.º 7
0
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
	trx_t*	trx)	/*!< in: transaction */
{
	page_t*		update_hdr_page;
	ib_uint64_t	lsn		= 0;
	trx_rseg_t*	rseg;
	trx_undo_t*	undo;
	mtr_t		mtr;

	ut_ad(mutex_own(&kernel_mutex));

	trx->must_flush_log_later = FALSE;

	rseg = trx->rseg;

	if (trx->insert_undo != NULL || trx->update_undo != NULL) {

		mutex_exit(&kernel_mutex);

		mtr_start(&mtr);

		/* Change the undo log segment states from TRX_UNDO_ACTIVE
		to some other state: these modifications to the file data
		structure define the transaction as committed in the file
		based world, at the serialization point of the log sequence
		number lsn obtained below. */

		mutex_enter(&(rseg->mutex));

		if (trx->insert_undo != NULL) {
			trx_undo_set_state_at_finish(
				rseg, trx, trx->insert_undo, &mtr);
		}

		undo = trx->update_undo;

		if (undo) {
			mutex_enter(&kernel_mutex);
			trx->no = trx_sys_get_new_trx_no();

			mutex_exit(&kernel_mutex);

			/* It is not necessary to obtain trx->undo_mutex here
			because only a single OS thread is allowed to do the
			transaction commit for this transaction. */

			update_hdr_page = trx_undo_set_state_at_finish(
				rseg, trx, undo, &mtr);

			/* We have to do the cleanup for the update log while
			holding the rseg mutex because update log headers
			have to be put to the history list in the order of
			the trx number. */

			trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
		}

		mutex_exit(&(rseg->mutex));

		/* Update the latest MySQL binlog name and offset info
		in trx sys header if MySQL binlogging is on or the database
		server is a MySQL replication slave */

		if (trx->mysql_log_file_name
		    && trx->mysql_log_file_name[0] != '\0') {
			trx_sys_update_mysql_binlog_offset(
				trx->mysql_log_file_name,
				trx->mysql_log_offset,
				TRX_SYS_MYSQL_LOG_INFO, &mtr);
			trx->mysql_log_file_name = NULL;
		}

		/* The following call commits the mini-transaction, making the
		whole transaction committed in the file-based world, at this
		log sequence number. The transaction becomes 'durable' when
		we write the log to disk, but in the logical sense the commit
		in the file-based data structures (undo logs etc.) happens
		here.

		NOTE that transaction numbers, which are assigned only to
		transactions with an update undo log, do not necessarily come
		in exactly the same order as commit lsn's, if the transactions
		have different rollback segments. To get exactly the same
		order we should hold the kernel mutex up to this point,
		adding to the contention of the kernel mutex. However, if
		a transaction T2 is able to see modifications made by
		a transaction T1, T2 will always get a bigger transaction
		number and a bigger commit lsn than T1. */

		/*--------------*/
		mtr_commit(&mtr);
		/*--------------*/
		lsn = mtr.end_lsn;

		mutex_enter(&kernel_mutex);
	}

	ut_ad(trx->conc_state == TRX_ACTIVE
	      || trx->conc_state == TRX_PREPARED);
	ut_ad(mutex_own(&kernel_mutex));

	/* The following assignment makes the transaction committed in memory
	and makes its changes to data visible to other transactions.
	NOTE that there is a small discrepancy from the strict formal
	visibility rules here: a human user of the database can see
	modifications made by another transaction T even before the necessary
	log segment has been flushed to the disk. If the database happens to
	crash before the flush, the user has seen modifications from T which
	will never be a committed transaction. However, any transaction T2
	which sees the modifications of the committing transaction T, and
	which also itself makes modifications to the database, will get an lsn
	larger than the committing transaction T. In the case where the log
	flush fails, and T never gets committed, also T2 will never get
	committed. */

	/*--------------------------------------*/
	trx->conc_state = TRX_COMMITTED_IN_MEMORY;
	/*--------------------------------------*/

	/* If we release kernel_mutex below and we are still doing
	recovery i.e.: back ground rollback thread is still active
	then there is a chance that the rollback thread may see
	this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
	up calling trx_cleanup_at_db_startup(). This can happen
	in the case we are committing a trx here that is left in
	PREPARED state during the crash. Note that commit of the
	rollback of a PREPARED trx happens in the recovery thread
	while the rollback of other transactions happen in the
	background thread. To avoid this race we unconditionally
	unset the is_recovered flag from the trx. */

	trx->is_recovered = FALSE;

	lock_release_off_kernel(trx);

	if (trx->global_read_view) {
		read_view_close(trx->global_read_view);
		mem_heap_empty(trx->global_read_view_heap);
		trx->global_read_view = NULL;
	}

	trx->read_view = NULL;

	if (lsn) {

		mutex_exit(&kernel_mutex);

		if (trx->insert_undo != NULL) {

			trx_undo_insert_cleanup(trx);
		}

		/* NOTE that we could possibly make a group commit more
		efficient here: call os_thread_yield here to allow also other
		trxs to come to commit! */

		/*-------------------------------------*/

		/* Depending on the my.cnf options, we may now write the log
		buffer to the log files, making the transaction durable if
		the OS does not crash. We may also flush the log files to
		disk, making the transaction durable also at an OS crash or a
		power outage.

		The idea in InnoDB's group commit is that a group of
		transactions gather behind a trx doing a physical disk write
		to log files, and when that physical write has been completed,
		one of those transactions does a write which commits the whole
		group. Note that this group commit will only bring benefit if
		there are > 2 users in the database. Then at least 2 users can
		gather behind one doing the physical log write to disk.

		If we are calling trx_commit() under prepare_commit_mutex, we
		will delay possible log write and flush to a separate function
		trx_commit_complete_for_mysql(), which is only called when the
		thread has released the mutex. This is to make the
		group commit algorithm to work. Otherwise, the prepare_commit
		mutex would serialize all commits and prevent a group of
		transactions from gathering. */

		if (trx->flush_log_later) {
			/* Do nothing yet */
			trx->must_flush_log_later = TRUE;
		} else if (srv_flush_log_at_trx_commit == 0) {
			/* Do nothing */
		} else if (srv_flush_log_at_trx_commit == 1) {
			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
				/* Write the log but do not flush it to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
						FALSE);
			} else {
				/* Write the log to the log files AND flush
				them to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
			}
		} else if (srv_flush_log_at_trx_commit == 2) {

			/* Write the log but do not flush it to disk */

			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
		} else {
			ut_error;
		}

		trx->commit_lsn = lsn;

		/*-------------------------------------*/

		mutex_enter(&kernel_mutex);
	}

	/* Free all savepoints */
	trx_roll_free_all_savepoints(trx);

	trx->conc_state = TRX_NOT_STARTED;
	trx->rseg = NULL;
	trx->undo_no = ut_dulint_zero;
	trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;

	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
	ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);

	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
Exemplo n.º 8
0
/***********************************************************//**
Fetches an undo log record and does the undo for the recorded operation.
If none left, or a partial rollback completed, returns control to the
parent node, which is always a query thread node.
@return	DB_SUCCESS if operation successfully completed, else error code */
static
ulint
row_undo(
/*=====*/
	undo_node_t*	node,	/*!< in: row undo node */
	que_thr_t*	thr)	/*!< in: query thread */
{
	ulint		err;
	trx_t*		trx;
	roll_ptr_t	roll_ptr;
	ibool		locked_data_dict;

	ut_ad(node && thr);

	trx = node->trx;

	if (node->state == UNDO_NODE_FETCH_NEXT) {

		node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
							     trx->roll_limit,
							     &roll_ptr,
							     node->heap);
		if (!node->undo_rec) {
			/* Rollback completed for this query thread */

			thr->run_node = que_node_get_parent(node);

			return(DB_SUCCESS);
		}

		node->roll_ptr = roll_ptr;
		node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);

		if (trx_undo_roll_ptr_is_insert(roll_ptr)) {

			node->state = UNDO_NODE_INSERT;
		} else {
			node->state = UNDO_NODE_MODIFY;
		}

	} else if (node->state == UNDO_NODE_PREV_VERS) {

		/* Undo should be done to the same clustered index record
		again in this same rollback, restoring the previous version */

		roll_ptr = node->new_roll_ptr;

		node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
							   node->heap);
		node->roll_ptr = roll_ptr;
		node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);

		if (trx_undo_roll_ptr_is_insert(roll_ptr)) {

			node->state = UNDO_NODE_INSERT;
		} else {
			node->state = UNDO_NODE_MODIFY;
		}
	}

	/* Prevent DROP TABLE etc. while we are rolling back this row.
	If we are doing a TABLE CREATE or some other dictionary operation,
	then we already have dict_operation_lock locked in x-mode. Do not
	try to lock again, because that would cause a hang. */

	locked_data_dict = (trx->dict_operation_lock_mode == 0);

	if (locked_data_dict) {

		row_mysql_lock_data_dictionary(trx);
	}

	if (node->state == UNDO_NODE_INSERT) {

		err = row_undo_ins(node);

		node->state = UNDO_NODE_FETCH_NEXT;
	} else {
		ut_ad(node->state == UNDO_NODE_MODIFY);
		err = row_undo_mod(node, thr);
	}

	if (locked_data_dict) {

		row_mysql_unlock_data_dictionary(trx);
	}

	/* Do some cleanup */
	btr_pcur_close(&(node->pcur));

	mem_heap_empty(node->heap);

	thr->run_node = node;

	return(err);
}
Exemplo n.º 9
0
/*******************************************************************//**
This function runs a purge batch.
@return	number of undo log pages handled in the batch */
UNIV_INTERN
ulint
trx_purge(void)
/*===========*/
{
	que_thr_t*	thr;
	/*	que_thr_t*	thr2; */
	ulonglong	old_pages_handled;

	mutex_enter(&(purge_sys->mutex));

	if (purge_sys->trx->n_active_thrs > 0) {

		mutex_exit(&(purge_sys->mutex));

		/* Should not happen */

		ut_error;

		return(0);
	}

	rw_lock_x_lock(&(purge_sys->latch));

	mutex_enter(&kernel_mutex);

	/* Close and free the old purge view */

	read_view_close(purge_sys->view);
	purge_sys->view = NULL;
	mem_heap_empty(purge_sys->heap);

	/* Determine how much data manipulation language (DML) statements
	need to be delayed in order to reduce the lagging of the purge
	thread. */
	srv_dml_needed_delay = 0; /* in microseconds; default: no delay */

	/* If we cannot advance the 'purge view' because of an old
	'consistent read view', then the DML statements cannot be delayed.
	Also, srv_max_purge_lag <= 0 means 'infinity'. */
	if (srv_max_purge_lag > 0
	    && !UT_LIST_GET_LAST(trx_sys->view_list)) {
		float	ratio = (float) trx_sys->rseg_history_len
			/ srv_max_purge_lag;
		if (ratio > ULINT_MAX / 10000) {
			/* Avoid overflow: maximum delay is 4295 seconds */
			srv_dml_needed_delay = ULINT_MAX;
		} else if (ratio > 1) {
			/* If the history list length exceeds the
			innodb_max_purge_lag, the
			data manipulation statements are delayed
			by at least 5000 microseconds. */
			srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000);
		}
	}

	purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero,
							    purge_sys->heap);
	mutex_exit(&kernel_mutex);

	rw_lock_x_unlock(&(purge_sys->latch));

#ifdef UNIV_DEBUG
	if (srv_purge_view_update_only_debug) {
		mutex_exit(&(purge_sys->mutex));
		return(0);
	}
#endif

	purge_sys->state = TRX_PURGE_ON;

	/* Handle at most 20 undo log pages in one purge batch */

	purge_sys->handle_limit = purge_sys->n_pages_handled + 20;

	old_pages_handled = purge_sys->n_pages_handled;

	mutex_exit(&(purge_sys->mutex));

	mutex_enter(&kernel_mutex);

	thr = que_fork_start_command(purge_sys->query);

	ut_ad(thr);

	/*	thr2 = que_fork_start_command(purge_sys->query);

	ut_ad(thr2); */


	mutex_exit(&kernel_mutex);

	/*	srv_que_task_enqueue(thr2); */

	if (srv_print_thread_releases) {

		fputs("Starting purge\n", stderr);
	}

	que_run_threads(thr);

	if (srv_print_thread_releases) {

		fprintf(stderr,
			"Purge ends; pages handled %lu\n",
			(ulong) purge_sys->n_pages_handled);
	}

	return((ulint) (purge_sys->n_pages_handled - old_pages_handled));
}
Exemplo n.º 10
0
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
@return	DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
ulint
row_vers_build_for_consistent_read(
/*===============================*/
	const rec_t*	rec,	/*!< in: record in a clustered index; the
				caller must have a latch on the page; this
				latch locks the top of the stack of versions
				of this records */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
	dict_index_t*	index,	/*!< in: the clustered index */
	ulint**		offsets,/*!< in/out: offsets returned by
				rec_get_offsets(rec, index) */
	read_view_t*	view,	/*!< in: the consistent read view */
	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
				the offsets are allocated */
	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
				*old_vers is allocated; memory for possible
				intermediate versions is allocated and freed
				locally within the function */
	rec_t**		old_vers)/*!< out, own: old version, or NULL if the
				record does not exist in the view, that is,
				it was freshly inserted afterwards */
{
	const rec_t*	version;
	rec_t*		prev_version;
	trx_id_t	trx_id;
	mem_heap_t*	heap		= NULL;
	byte*		buf;
	ulint		err;

	ut_ad(dict_index_is_clust(index));
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	ut_ad(rec_offs_validate(rec, index, *offsets));

	trx_id = row_get_rec_trx_id(rec, index, *offsets);

	ut_ad(!read_view_sees_trx_id(view, trx_id));

	rw_lock_s_lock(&(purge_sys->latch));
	version = rec;

	for (;;) {
		mem_heap_t*	heap2	= heap;
		trx_undo_rec_t* undo_rec;
		roll_ptr_t	roll_ptr;
		undo_no_t	undo_no;
		heap = mem_heap_create(1024);

		/* If we have high-granularity consistent read view and
		creating transaction of the view is the same as trx_id in
		the record we see this record only in the case when
		undo_no of the record is < undo_no in the view. */

		if (view->type == VIEW_HIGH_GRANULARITY
		    && ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {

			roll_ptr = row_get_rec_roll_ptr(version, index,
							*offsets);
			undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
			undo_no = trx_undo_rec_get_undo_no(undo_rec);
			mem_heap_empty(heap);

			if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
				/* The view already sees this version: we can
				copy it to in_heap and return */

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
				ut_a(!rec_offs_any_null_extern(
					     version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

				buf = mem_heap_alloc(in_heap,
						     rec_offs_size(*offsets));
				*old_vers = rec_copy(buf, version, *offsets);
				rec_offs_make_valid(*old_vers, index,
						    *offsets);
				err = DB_SUCCESS;

				break;
			}
		}

		err = trx_undo_prev_version_build(rec, mtr, version, index,
						  *offsets, heap,
						  &prev_version);
		if (heap2) {
			mem_heap_free(heap2); /* free version */
		}

		if (err != DB_SUCCESS) {
			break;
		}

		if (prev_version == NULL) {
			/* It was a freshly inserted version */
			*old_vers = NULL;
			err = DB_SUCCESS;

			break;
		}

		*offsets = rec_get_offsets(prev_version, index, *offsets,
					   ULINT_UNDEFINED, offset_heap);

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);

		if (read_view_sees_trx_id(view, trx_id)) {

			/* The view already sees this version: we can copy
			it to in_heap and return */

			buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
			*old_vers = rec_copy(buf, prev_version, *offsets);
			rec_offs_make_valid(*old_vers, index, *offsets);
			err = DB_SUCCESS;

			break;
		}

		version = prev_version;
	}/* for (;;) */

	mem_heap_free(heap);
	rw_lock_s_unlock(&(purge_sys->latch));

	return(err);
}
Exemplo n.º 11
0
/***************************************************************
Updates the affected index records of a row. When the control is transferred
to this node, we assume that we have a persistent cursor which was on a
record, and the position of the cursor is stored in the cursor. */
static
ulint
row_upd(
/*====*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code or DB_LOCK_WAIT */
	upd_node_t*	node,	/* in: row update node */
	que_thr_t*	thr)	/* in: query thread */
{
	ulint	err	= DB_SUCCESS;
	
	ut_ad(node && thr);

	if (node->in_mysql_interface) {
	
		/* We do not get the cmpl_info value from the MySQL
		interpreter: we must calculate it on the fly: */
		
		if (node->is_delete ||
			row_upd_changes_some_index_ord_field_binary(
					node->table, node->update)) {
			node->cmpl_info = 0; 
		} else {
			node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
		}
	}

	if (node->state == UPD_NODE_UPDATE_CLUSTERED
				|| node->state == UPD_NODE_INSERT_CLUSTERED) {

		err = row_upd_clust_step(node, thr);
		
		if (err != DB_SUCCESS) {

			goto function_exit;
		}
	}

	if (!node->is_delete && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {

		goto function_exit;
	}

	while (node->index != NULL) {
		err = row_upd_sec_step(node, thr);

		if (err != DB_SUCCESS) {

			goto function_exit;
		}

		node->index = dict_table_get_next_index(node->index);
        }

function_exit:
	if (err == DB_SUCCESS) {
		/* Do some cleanup */

		if (node->row != NULL) {
			mem_heap_empty(node->heap);
			node->row = NULL;
			node->n_ext_vec = 0;
		}

		node->state = UPD_NODE_UPDATE_CLUSTERED;
	}

        return(err);
}
Exemplo n.º 12
0
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
UNIV_INTERN
void
dict_boot(void)
/*===========*/
{
	dict_table_t*	table;
	dict_index_t*	index;
	dict_hdr_t*	dict_hdr;
	mem_heap_t*	heap;
	mtr_t		mtr;
	ulint		error;

	mtr_start(&mtr);

	/* Create the hash tables etc. */
	dict_init();

	heap = mem_heap_create(450);

	mutex_enter(&(dict_sys->mutex));

	/* Get the dictionary header */
	dict_hdr = dict_hdr_get(&mtr);

	/* Because we only write new row ids to disk-based data structure
	(dictionary header) when it is divisible by
	DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
	the latest value of the row id counter. Therefore we advance
	the counter at the database startup to avoid overlapping values.
	Note that when a user after database startup first time asks for
	a new row id, then because the counter is now divisible by
	..._MARGIN, it will immediately be updated to the disk-based
	header. */

	dict_sys->row_id = ut_dulint_add(
		ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID,
						   &mtr),
				   DICT_HDR_ROW_ID_WRITE_MARGIN),
		DICT_HDR_ROW_ID_WRITE_MARGIN);

	/* Insert into the dictionary cache the descriptions of the basic
	system tables */
	/*-------------------------*/
	table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0);

	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
	/* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
	dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
	/* TYPE is either DICT_TABLE_ORDINARY, or (TYPE & DICT_TF_COMPACT)
	and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);

	table->id = DICT_TABLES_ID;

	dict_table_add_to_cache(table, heap);
	dict_sys->sys_tables = table;
	mem_heap_empty(heap);

	index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
				      DICT_HDR_SPACE,
				      DICT_UNIQUE | DICT_CLUSTERED, 1);

	dict_mem_index_add_field(index, "NAME", 0);

	index->id = DICT_TABLES_ID;

	error = dict_index_add_to_cache(table, index,
					mtr_read_ulint(dict_hdr
						       + DICT_HDR_TABLES,
						       MLOG_4BYTES, &mtr),
					FALSE);
	ut_a(error == DB_SUCCESS);

	/*-------------------------*/
	index = dict_mem_index_create("SYS_TABLES", "ID_IND",
				      DICT_HDR_SPACE, DICT_UNIQUE, 1);
	dict_mem_index_add_field(index, "ID", 0);

	index->id = DICT_TABLE_IDS_ID;
	error = dict_index_add_to_cache(table, index,
					mtr_read_ulint(dict_hdr
						       + DICT_HDR_TABLE_IDS,
						       MLOG_4BYTES, &mtr),
					FALSE);
	ut_a(error == DB_SUCCESS);

	/*-------------------------*/
	table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0);

	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);

	table->id = DICT_COLUMNS_ID;

	dict_table_add_to_cache(table, heap);
	dict_sys->sys_columns = table;
	mem_heap_empty(heap);

	index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
				      DICT_HDR_SPACE,
				      DICT_UNIQUE | DICT_CLUSTERED, 2);

	dict_mem_index_add_field(index, "TABLE_ID", 0);
	dict_mem_index_add_field(index, "POS", 0);

	index->id = DICT_COLUMNS_ID;
	error = dict_index_add_to_cache(table, index,
					mtr_read_ulint(dict_hdr
						       + DICT_HDR_COLUMNS,
						       MLOG_4BYTES, &mtr),
					FALSE);
	ut_a(error == DB_SUCCESS);

	/*-------------------------*/
	table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0);

	dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);

	/* The '+ 2' below comes from the 2 system fields */
#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
#endif
#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
#endif
#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
#endif

	table->id = DICT_INDEXES_ID;
	dict_table_add_to_cache(table, heap);
	dict_sys->sys_indexes = table;
	mem_heap_empty(heap);

	index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
				      DICT_HDR_SPACE,
				      DICT_UNIQUE | DICT_CLUSTERED, 2);

	dict_mem_index_add_field(index, "TABLE_ID", 0);
	dict_mem_index_add_field(index, "ID", 0);

	index->id = DICT_INDEXES_ID;
	error = dict_index_add_to_cache(table, index,
					mtr_read_ulint(dict_hdr
						       + DICT_HDR_INDEXES,
						       MLOG_4BYTES, &mtr),
					FALSE);
	ut_a(error == DB_SUCCESS);

	/*-------------------------*/
	table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0);

	dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
	dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
	dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);

	table->id = DICT_FIELDS_ID;
	dict_table_add_to_cache(table, heap);
	dict_sys->sys_fields = table;
	mem_heap_free(heap);

	index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
				      DICT_HDR_SPACE,
				      DICT_UNIQUE | DICT_CLUSTERED, 2);

	dict_mem_index_add_field(index, "INDEX_ID", 0);
	dict_mem_index_add_field(index, "POS", 0);

	index->id = DICT_FIELDS_ID;
	error = dict_index_add_to_cache(table, index,
					mtr_read_ulint(dict_hdr
						       + DICT_HDR_FIELDS,
						       MLOG_4BYTES, &mtr),
					FALSE);
	ut_a(error == DB_SUCCESS);

	mtr_commit(&mtr);
	/*-------------------------*/

	/* Initialize the insert buffer table and index for each tablespace */

	ibuf_init_at_db_start();

	/* Load definitions of other indexes on system tables */

	dict_load_sys_table(dict_sys->sys_tables);
	dict_load_sys_table(dict_sys->sys_columns);
	dict_load_sys_table(dict_sys->sys_indexes);
	dict_load_sys_table(dict_sys->sys_fields);

	mutex_exit(&(dict_sys->mutex));
}
Exemplo n.º 13
0
/***************************************************************
Fetches an undo log record and does the purge for the recorded operation.
If none left, or the current purge completed, returns the control to the
parent node, which is always a query thread node. */
static
ulint
row_purge(
/*======*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code */
	purge_node_t*	node,	/* in: row purge node */
	que_thr_t*	thr)	/* in: query thread */
{
	dulint	roll_ptr;
	ibool	purge_needed;
	ibool	updated_extern;
	
	ut_ad(node && thr);

	node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr,
						&(node->reservation),
						node->heap);
	if (!node->undo_rec) {
		/* Purge completed for this query thread */

		thr->run_node = que_node_get_parent(node);

		return(DB_SUCCESS);
	}

	node->roll_ptr = roll_ptr;

	if (node->undo_rec == &trx_purge_dummy_rec) {
		purge_needed = FALSE;
	} else {
		purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
									thr);
	}

	if (purge_needed) {
		node->found_clust = FALSE;
	
		node->index = dict_table_get_next_index(
				dict_table_get_first_index(node->table));

		if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
			row_purge_del_mark(node, thr);

		} else if (updated_extern
			    || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {

			row_purge_upd_exist_or_extern(node, thr);
		}

		if (node->found_clust) {
			btr_pcur_close(&(node->pcur));
		}

		rw_lock_x_unlock(&(purge_sys->purge_is_running));		
	}

	/* Do some cleanup */
	trx_purge_rec_release(node->reservation);
	mem_heap_empty(node->heap);
	
	thr->run_node = node;

	return(DB_SUCCESS);
}
Exemplo n.º 14
0
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_upd_exist_sec(
/*=======================*/
	undo_node_t*	node,	/*!< in: row undo node */
	que_thr_t*	thr)	/*!< in: query thread */
{
	mem_heap_t*	heap;
	dtuple_t*	entry;
	dict_index_t*	index;
	ulint		err;

	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
		/* No change in secondary indexes */

		return(DB_SUCCESS);
	}

	heap = mem_heap_create(1024);

	while (node->index != NULL) {
		/* Skip all corrupted secondary index */
		dict_table_skip_corrupt_index(node->index);

		if (!node->index) {
			break;
		}

		index = node->index;

		if (row_upd_changes_ord_field_binary(node->index, node->update,
						     thr,
						     node->row, node->ext)) {

			/* Build the newest version of the index entry */
			entry = row_build_index_entry(node->row, node->ext,
						      index, heap);
			if (UNIV_UNLIKELY(!entry)) {
				/* The server must have crashed in
				row_upd_clust_rec_by_insert() before
				the updated externally stored columns (BLOBs)
				of the new clustered index entry were
				written. */

				/* The table must be in DYNAMIC or COMPRESSED
				format.  REDUNDANT and COMPACT formats
				store a local 768-byte prefix of each
				externally stored column. */
				ut_a(dict_table_get_format(index->table)
				     >= DICT_TF_FORMAT_ZIP);

				/* This is only legitimate when
				rolling back an incomplete transaction
				after crash recovery. */
				ut_a(thr_get_trx(thr)->is_recovered);

				/* The server must have crashed before
				completing the insert of the new
				clustered index entry and before
				inserting to the secondary indexes.
				Because node->row was not yet written
				to this index, we can ignore it.  But
				we must restore node->undo_row. */
			} else {
				/* NOTE that if we updated the fields of a
				delete-marked secondary index record so that
				alphabetically they stayed the same, e.g.,
				'abc' -> 'aBc', we cannot return to the
				original values because we do not know them.
				But this should not cause problems because
				in row0sel.c, in queries we always retrieve
				the clustered index record or an earlier
				version of it, if the secondary index record
				through which we do the search is
				delete-marked. */

				err = row_undo_mod_del_mark_or_remove_sec(
					node, thr, index, entry);
				if (err != DB_SUCCESS) {
					mem_heap_free(heap);

					return(err);
				}

				mem_heap_empty(heap);
			}

			/* We may have to update the delete mark in the
			secondary index record of the previous version of
			the row. We also need to update the fields of
			the secondary index record if we updated its fields
			but alphabetically they stayed the same, e.g.,
			'abc' -> 'aBc'. */
			entry = row_build_index_entry(node->undo_row,
						      node->undo_ext,
						      index, heap);
			ut_a(entry);

			err = row_undo_mod_del_unmark_sec_and_undo_update(
				BTR_MODIFY_LEAF, thr, index, entry);
			if (err == DB_FAIL) {
				err = row_undo_mod_del_unmark_sec_and_undo_update(
					BTR_MODIFY_TREE, thr, index, entry);
			}

			if (err != DB_SUCCESS) {
				mem_heap_free(heap);

				return(err);
			}
		}

		node->index = dict_table_get_next_index(node->index);
	}

	mem_heap_free(heap);

	return(DB_SUCCESS);
}