Beispiel #1
0
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index.
@return	buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
	mtr_t*		mtr,	/*!< in: mtr */
	const byte*	rec,	/*!< in: index record or page */
	dict_index_t*	index,	/*!< in: record descriptor */
	byte		type,	/*!< in: log item type */
	ulint		size)	/*!< in: requested buffer size in bytes
				(if 0, calls mlog_close() and returns NULL) */
{
	byte*		log_ptr;
	const byte*	log_start;
	const byte*	log_end;

	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));

	if (!page_rec_is_comp(rec)) {
		log_start = log_ptr = mlog_open(mtr, 11 + size);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		log_end = log_ptr + 11 + size;
	} else {
		ulint	i;
		ulint	n	= dict_index_get_n_fields(index);
		/* total size needed */
		ulint	total	= 11 + size + (n + 2) * 2;
		ulint	alloc	= total;
		/* allocate at most DYN_ARRAY_DATA_SIZE at a time */
		if (alloc > DYN_ARRAY_DATA_SIZE) {
			alloc = DYN_ARRAY_DATA_SIZE;
		}
		log_start = log_ptr = mlog_open(mtr, alloc);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_end = log_ptr + alloc;
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		mach_write_to_2(log_ptr, n);
		log_ptr += 2;
		mach_write_to_2(log_ptr,
				dict_index_get_n_unique_in_tree(index));
		log_ptr += 2;
		for (i = 0; i < n; i++) {
			dict_field_t*		field;
			const dict_col_t*	col;
			ulint			len;

			field = dict_index_get_nth_field(index, i);
			col = dict_field_get_col(field);
			len = field->fixed_len;
			ut_ad(len < 0x7fff);
			if (len == 0
			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
				/* variable-length field
				with maximum length > 255 */
				len = 0x7fff;
			}
			if (col->prtype & DATA_NOT_NULL) {
				len |= 0x8000;
			}
			if (log_ptr + 2 > log_end) {
				mlog_close(mtr, log_ptr);
				ut_a(total > (ulint) (log_ptr - log_start));
				total -= log_ptr - log_start;
				alloc = total;
				if (alloc > DYN_ARRAY_DATA_SIZE) {
					alloc = DYN_ARRAY_DATA_SIZE;
				}
				log_start = log_ptr = mlog_open(mtr, alloc);
				if (!log_ptr) {
					return(NULL); /* logging is disabled */
				}
				log_end = log_ptr + alloc;
			}
			mach_write_to_2(log_ptr, len);
			log_ptr += 2;
		}
	}
	if (size == 0) {
		mlog_close(mtr, log_ptr);
		log_ptr = NULL;
	} else if (log_ptr + size > log_end) {
		mlog_close(mtr, log_ptr);
		log_ptr = mlog_open(mtr, size);
	}
	return(log_ptr);
}
Beispiel #2
0
/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry and ientry are identified in
the alphabetical ordering; exactly in this case we return TRUE.
@return	TRUE if earlier version should have */
UNIV_INTERN
ibool
row_vers_old_has_index_entry(
/*=========================*/
	ibool		also_curr,/*!< in: TRUE if also rec is included in the
				versions to search; otherwise only versions
				prior to it are searched */
	const rec_t*	rec,	/*!< in: record in the clustered index; the
				caller must have a latch on the page */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
				also hold the latch on purge_view */
	dict_index_t*	index,	/*!< in: the secondary index */
	const dtuple_t*	ientry)	/*!< in: the secondary index entry */
{
	const rec_t*	version;
	rec_t*		prev_version;
	dict_index_t*	clust_index;
	ulint*		clust_offsets;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	const dtuple_t*	row;
	const dtuple_t*	entry;
	ulint		err;
	ulint		comp;

	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
	mtr_s_lock(&(purge_sys->latch), mtr);

	clust_index = dict_table_get_first_index(index->table);

	comp = page_rec_is_comp(rec);
	ut_ad(!dict_table_is_comp(index->table) == !comp);
	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);

	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
		row_ext_t*	ext;

		/* The stack of versions is locked by mtr.
		Thus, it is safe to fetch the prefixes for
		externally stored columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index,
				rec, clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);

		/* If entry == NULL, the record contains unset BLOB
		pointers.  This must be a freshly inserted record.  If
		this is called from
		row_purge_remove_sec_if_poss_low(), the thread will
		hold latches on the clustered index and the secondary
		index.  Because the insert works in three steps:

			(1) insert the record to clustered index
			(2) store the BLOBs and update BLOB pointers
			(3) insert records to secondary indexes

		the purge thread can safely ignore freshly inserted
		records and delete the secondary index record.  The
		thread that inserted the new record will be inserting
		the secondary index records. */

		/* NOTE that we cannot do the comparison as binary
		fields because the row is maybe being modified so that
		the clustered index record has already been updated to
		a different binary value in a char field, but the
		collation identifies the old and new value anyway! */
		if (entry && !dtuple_coll_cmp(ientry, entry)) {

			mem_heap_free(heap);

			return(TRUE);
		}
	}

	version = rec;

	for (;;) {
		heap2 = heap;
		heap = mem_heap_create(1024);
		err = trx_undo_prev_version_build(rec, mtr, version,
						  clust_index, clust_offsets,
						  heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (err != DB_SUCCESS || !prev_version) {
			/* Versions end here */

			mem_heap_free(heap);

			return(FALSE);
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		if (!rec_get_deleted_flag(prev_version, comp)) {
			row_ext_t*	ext;

			/* The stack of versions is locked by mtr.
			Thus, it is safe to fetch the prefixes for
			externally stored columns. */
			row = row_build(ROW_COPY_POINTERS, clust_index,
					prev_version, clust_offsets,
					NULL, &ext, heap);
			entry = row_build_index_entry(row, ext, index, heap);

			/* If entry == NULL, the record contains unset
			BLOB pointers.  This must be a freshly
			inserted record that we can safely ignore.
			For the justification, see the comments after
			the previous row_build_index_entry() call. */

			/* NOTE that we cannot do the comparison as binary
			fields because maybe the secondary index record has
			already been updated to a different binary value in
			a char field, but the collation identifies the old
			and new value anyway! */

			if (entry && !dtuple_coll_cmp(ientry, entry)) {

				mem_heap_free(heap);

				return(TRUE);
			}
		}

		version = prev_version;
	}
}
Beispiel #3
0
/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
function!
@return NULL if committed, else the active transaction */
UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
	const rec_t*	rec,	/*!< in: record in a secondary index */
	dict_index_t*	index,	/*!< in: the secondary index */
	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
{
	dict_index_t*	clust_index;
	rec_t*		clust_rec;
	ulint*		clust_offsets;
	rec_t*		version;
	trx_id_t	trx_id;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	dtuple_t*	row;
	dtuple_t*	entry	= NULL; /* assignment to eliminate compiler
					warning */
	trx_t*		trx;
	ulint		rec_del;
#ifdef UNIV_DEBUG
	ulint		err;
#endif /* UNIV_DEBUG */
	mtr_t		mtr;
	ulint		comp;

	ut_ad(mutex_own(&kernel_mutex));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	mutex_exit(&kernel_mutex);

	mtr_start(&mtr);

	/* Search for the clustered index record: this is a time-consuming
	operation: therefore we release the kernel mutex; also, the release
	is required by the latching order convention. The latch on the
	clustered index locks the top of the stack of versions. We also
	reserve purge_latch to lock the bottom of the version stack. */

	clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
				      &clust_index, &mtr);
	if (!clust_rec) {
		/* In a rare case it is possible that no clust rec is found
		for a secondary index record: if in row0umod.c
		row_undo_mod_remove_clust_low() we have already removed the
		clust rec, while purge is still cleaning and removing
		secondary index records associated with earlier versions of
		the clustered index record. In that case there cannot be
		any implicit lock on the secondary index record, because
		an active transaction which has modified the secondary index
		record has also modified the clustered index record. And in
		a rollback we always undo the modifications to secondary index
		records before the clustered index record. */

		mutex_enter(&kernel_mutex);
		mtr_commit(&mtr);

		return(NULL);
	}

	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);
	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);

	mtr_s_lock(&(purge_sys->latch), &mtr);

	mutex_enter(&kernel_mutex);

	trx = NULL;
	if (!trx_is_active(trx_id)) {
		/* The transaction that modified or inserted clust_rec is no
		longer active: no implicit lock on rec */
		goto exit_func;
	}

	if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
				      clust_offsets, TRUE)) {
		/* Corruption noticed: try to avoid a crash by returning */
		goto exit_func;
	}

	comp = page_rec_is_comp(rec);
	ut_ad(index->table == clust_index->table);
	ut_ad(!!comp == dict_table_is_comp(index->table));
	ut_ad(!comp == !page_rec_is_comp(clust_rec));

	/* We look up if some earlier version, which was modified by the trx_id
	transaction, of the clustered index record would require rec to be in
	a different state (delete marked or unmarked, or have different field
	values, or not existing). If there is such a version, then rec was
	modified by the trx_id transaction, and it has an implicit x-lock on
	rec. Note that if clust_rec itself would require rec to be in a
	different state, then the trx_id transaction has not yet had time to
	modify rec, and does not necessarily have an implicit x-lock on rec. */

	rec_del = rec_get_deleted_flag(rec, comp);
	trx = NULL;

	version = clust_rec;

	for (;;) {
		rec_t*		prev_version;
		ulint		vers_del;
		row_ext_t*	ext;
		trx_id_t	prev_trx_id;

		mutex_exit(&kernel_mutex);

		/* While we retrieve an earlier version of clust_rec, we
		release the kernel mutex, because it may take time to access
		the disk. After the release, we have to check if the trx_id
		transaction is still active. We keep the semaphore in mtr on
		the clust_rec page, so that no other transaction can update
		it and get an implicit x-lock on rec. */

		heap2 = heap;
		heap = mem_heap_create(1024);
#ifdef UNIV_DEBUG
		err =
#endif /* UNIV_DEBUG */
		trx_undo_prev_version_build(clust_rec, &mtr, version,
					    clust_index, clust_offsets,
					    heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (prev_version == NULL) {
			mutex_enter(&kernel_mutex);

			if (!trx_is_active(trx_id)) {
				/* Transaction no longer active: no
				implicit x-lock */

				break;
			}

			/* If the transaction is still active,
			clust_rec must be a fresh insert, because no
			previous version was found. */
			ut_ad(err == DB_SUCCESS);

			/* It was a freshly inserted version: there is an
			implicit x-lock on rec */

			trx = trx_get_on_id(trx_id);

			break;
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		vers_del = rec_get_deleted_flag(prev_version, comp);
		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
						 clust_offsets);
		/* The stack of versions is locked by mtr.  Thus, it
		is safe to fetch the prefixes for externally stored
		columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
				clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);
		/* entry may be NULL if a record was inserted in place
		of a deleted record, and the BLOB pointers of the new
		record were not initialized yet.  But in that case,
		prev_version should be NULL. */
		ut_a(entry);

		mutex_enter(&kernel_mutex);

		if (!trx_is_active(trx_id)) {
			/* Transaction no longer active: no implicit x-lock */

			break;
		}

		/* If we get here, we know that the trx_id transaction is
		still active and it has modified prev_version. Let us check
		if prev_version would require rec to be in a different
		state. */

		/* The previous version of clust_rec must be
		accessible, because the transaction is still active
		and clust_rec was not a fresh insert. */
		ut_ad(err == DB_SUCCESS);

		/* We check if entry and rec are identified in the alphabetical
		ordering */
		if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
			/* The delete marks of rec and prev_version should be
			equal for rec to be in the state required by
			prev_version */

			if (rec_del != vers_del) {
				trx = trx_get_on_id(trx_id);

				break;
			}

			/* It is possible that the row was updated so that the
			secondary index record remained the same in
			alphabetical ordering, but the field values changed
			still. For example, 'abc' -> 'ABC'. Check also that. */

			dtuple_set_types_binary(entry,
						dtuple_get_n_fields(entry));
			if (0 != cmp_dtuple_rec(entry, rec, offsets)) {

				trx = trx_get_on_id(trx_id);

				break;
			}
		} else if (!rec_del) {
			/* The delete mark should be set in rec for it to be
			in the state required by prev_version */

			trx = trx_get_on_id(trx_id);

			break;
		}

		if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
			/* The versions modified by the trx_id transaction end
			to prev_version: no implicit x-lock */

			break;
		}

		version = prev_version;
	}/* for (;;) */

exit_func:
	mtr_commit(&mtr);
	mem_heap_free(heap);

	return(trx);
}
Beispiel #4
0
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index.
@return	buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
	mtr_t*		mtr,	/*!< in: mtr */
	const byte*	rec,	/*!< in: index record or page */
	dict_index_t*	index,	/*!< in: record descriptor */
	byte		type,	/*!< in: log item type */
	ulint		size)	/*!< in: requested buffer size in bytes
				(if 0, calls mlog_close() and returns NULL) */
{
	byte*		log_ptr;
	const byte*	log_start;
	const byte*	log_end;

	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));

	if (!page_rec_is_comp(rec)) {
		log_start = log_ptr = mlog_open(mtr, 11 + size);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		log_end = log_ptr + 11 + size;
	} else {
		ulint	i;
		ulint	n	= dict_index_get_n_fields(index);
        ibool   is_gcs_cluster = dict_index_is_gcs_clust_after_alter_table(index);
		/* total size needed */
        /* redo日志有可能需要多两字节,total需要根据实际情况分配空间! */
        ulint	total	= 11 + size + (n + 2 + (is_gcs_cluster ? 1 : 0)) * 2;
		ulint	alloc	= total;
        
        /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
		if (alloc > DYN_ARRAY_DATA_SIZE) {
			alloc = DYN_ARRAY_DATA_SIZE;
		}
		log_start = log_ptr = mlog_open(mtr, alloc);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_end = log_ptr + alloc;
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);

        /* 在第一次alter table前,所有gcs表可以当成compact表,即使是redo过程,这样redo log也兼容! */
        if (is_gcs_cluster)
        {
            //ut_ad(rec_is_gcs(rec) || rec == page_align(rec) || rec_get_status(rec) & REC_STATUS_NODE_PTR);               /* rec有可能就是页头,如日志MLOG_COMP_LIST_END_COPY_CREATED */
            mach_write_to_2(log_ptr, n | 0x8000);                         /* 标记是gcs表 */
        }
        else
        {
            ut_ad(rec == page_align(rec) || !rec_is_gcs(rec));
		    mach_write_to_2(log_ptr, n);
        }

        /* 对于gcs聚集索引,记录第一次alter table前聚集索引的字段数 */
        if (is_gcs_cluster)
        {
            log_ptr += 2;
            mach_write_to_2(log_ptr, (ulint)index->n_fields_before_alter);

            ut_ad(!index->n_fields_before_alter == !dict_index_is_gcs_clust_after_alter_table(index));
        }

		log_ptr += 2;
		mach_write_to_2(log_ptr,
				dict_index_get_n_unique_in_tree(index));
		log_ptr += 2;
		for (i = 0; i < n; i++) {
			dict_field_t*		field;
			const dict_col_t*	col;
			ulint			len;

			field = dict_index_get_nth_field(index, i);
			col = dict_field_get_col(field);
			len = field->fixed_len;
			ut_ad(len < 0x7fff);
			if (len == 0
			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
				/* variable-length field
				with maximum length > 255 */
				len = 0x7fff;
			}
			if (col->prtype & DATA_NOT_NULL) {
				len |= 0x8000;
			}
			if (log_ptr + 2 > log_end) {
				mlog_close(mtr, log_ptr);
				ut_a(total > (ulint) (log_ptr - log_start));
				total -= log_ptr - log_start;
				alloc = total;
				if (alloc > DYN_ARRAY_DATA_SIZE) {
					alloc = DYN_ARRAY_DATA_SIZE;
				}
				log_start = log_ptr = mlog_open(mtr, alloc);
				if (!log_ptr) {
					return(NULL); /* logging is disabled */
				}
				log_end = log_ptr + alloc;
			}
			mach_write_to_2(log_ptr, len);
			log_ptr += 2;
		}
	}
	if (size == 0) {
		mlog_close(mtr, log_ptr);
		log_ptr = NULL;
	} else if (log_ptr + size > log_end) {
		mlog_close(mtr, log_ptr);
		log_ptr = mlog_open(mtr, size);
	}
	return(log_ptr);
}