예제 #1
0
/*******************************************************************//**
Drops the index tree associated with a row in SYS_INDEXES table. */
UNIV_INTERN
void
dict_drop_index_tree(
/*=================*/
	rec_t*	rec,	/*!< in/out: record in the clustered index
			of SYS_INDEXES table */
	mtr_t*	mtr)	/*!< in: mtr having the latch on the record page */
{
	ulint		root_page_no;
	ulint		space;
	ulint		zip_size;
	const byte*	ptr;
	ulint		len;

	ut_ad(mutex_own(&(dict_sys->mutex)));
	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
	ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

	ut_ad(len == 4);

	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (root_page_no == FIL_NULL) {
		/* The tree has already been freed */

		return;
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

	ut_ad(len == 4);

	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
	zip_size = fil_space_get_zip_size(space);

	if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
		/* It is a single table tablespace and the .ibd file is
		missing: do nothing */

		return;
	}

	/* We free all the pages but the root page first; this operation
	may span several mini-transactions */

	btr_free_but_not_root(space, zip_size, root_page_no);

	/* Then we free the root page in the same mini-transaction where
	we write FIL_NULL to the appropriate field in the SYS_INDEXES
	record: this mini-transaction marks the B-tree totally freed */

	/* printf("Dropping index tree in space %lu root page %lu\n", space,
	root_page_no); */
	btr_free_root(space, zip_size, root_page_no, mtr);

	page_rec_write_index_page_no(rec,
				     DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     FIL_NULL, mtr);
}
예제 #2
0
파일: dict0crea.c 프로젝트: Coco-wan/git-1
/*******************************************************************
Creates an index tree for the index if it is not a member of a cluster. */
static
ulint
dict_create_index_tree_step(
/*========================*/
				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
	ind_node_t*	node)	/* in: index create node */
{
	dict_index_t*	index;
	dict_table_t*	sys_indexes;
	dict_table_t*	table;
	dtuple_t*	search_tuple;
	btr_pcur_t	pcur;
	mtr_t		mtr;

	ut_ad(mutex_own(&(dict_sys->mutex)));

	index = node->index;
	table = node->table;

	sys_indexes = dict_sys->sys_indexes;

	/* Run a mini-transaction in which the index tree is allocated for
	the index and its root address is written to the index entry in
	sys_indexes */

	mtr_start(&mtr);

	search_tuple = dict_create_search_tuple(node->ind_row, node->heap);

	btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
		      search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
		      &pcur, &mtr);

	btr_pcur_move_to_next_user_rec(&pcur, &mtr);

	node->page_no = btr_create(index->type, index->space, index->id,
				   dict_table_is_comp(table), &mtr);
	/* printf("Created a new index tree in space %lu root page %lu\n",
	index->space, index->page_no); */

	page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
				     DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     node->page_no, &mtr);
	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	if (node->page_no == FIL_NULL) {

		return(DB_OUT_OF_FILE_SPACE);
	}

	return(DB_SUCCESS);
}
예제 #3
0
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index.
@return	buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
	mtr_t*		mtr,	/*!< in: mtr */
	const byte*	rec,	/*!< in: index record or page */
	dict_index_t*	index,	/*!< in: record descriptor */
	byte		type,	/*!< in: log item type */
	ulint		size)	/*!< in: requested buffer size in bytes
				(if 0, calls mlog_close() and returns NULL) */
{
	byte*		log_ptr;
	const byte*	log_start;
	const byte*	log_end;

	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));

	if (!page_rec_is_comp(rec)) {
		log_start = log_ptr = mlog_open(mtr, 11 + size);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		log_end = log_ptr + 11 + size;
	} else {
		ulint	i;
		ulint	n	= dict_index_get_n_fields(index);
		/* total size needed */
		ulint	total	= 11 + size + (n + 2) * 2;
		ulint	alloc	= total;
		/* allocate at most DYN_ARRAY_DATA_SIZE at a time */
		if (alloc > DYN_ARRAY_DATA_SIZE) {
			alloc = DYN_ARRAY_DATA_SIZE;
		}
		log_start = log_ptr = mlog_open(mtr, alloc);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_end = log_ptr + alloc;
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		mach_write_to_2(log_ptr, n);
		log_ptr += 2;
		mach_write_to_2(log_ptr,
				dict_index_get_n_unique_in_tree(index));
		log_ptr += 2;
		for (i = 0; i < n; i++) {
			dict_field_t*		field;
			const dict_col_t*	col;
			ulint			len;

			field = dict_index_get_nth_field(index, i);
			col = dict_field_get_col(field);
			len = field->fixed_len;
			ut_ad(len < 0x7fff);
			if (len == 0
			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
				/* variable-length field
				with maximum length > 255 */
				len = 0x7fff;
			}
			if (col->prtype & DATA_NOT_NULL) {
				len |= 0x8000;
			}
			if (log_ptr + 2 > log_end) {
				mlog_close(mtr, log_ptr);
				ut_a(total > (ulint) (log_ptr - log_start));
				total -= log_ptr - log_start;
				alloc = total;
				if (alloc > DYN_ARRAY_DATA_SIZE) {
					alloc = DYN_ARRAY_DATA_SIZE;
				}
				log_start = log_ptr = mlog_open(mtr, alloc);
				if (!log_ptr) {
					return(NULL); /* logging is disabled */
				}
				log_end = log_ptr + alloc;
			}
			mach_write_to_2(log_ptr, len);
			log_ptr += 2;
		}
	}
	if (size == 0) {
		mlog_close(mtr, log_ptr);
		log_ptr = NULL;
	} else if (log_ptr + size > log_end) {
		mlog_close(mtr, log_ptr);
		log_ptr = mlog_open(mtr, size);
	}
	return(log_ptr);
}
예제 #4
0
파일: dict0crea.c 프로젝트: Coco-wan/git-1
ulint
dict_truncate_index_tree(
/*=====================*/
				/* out: new root page number, or
				FIL_NULL on failure */
	dict_table_t*	table,	/* in: the table the index belongs to */
	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
	mtr_t*		mtr)	/* in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
	ulint		root_page_no;
	ulint		space;
	ulint		type;
	dulint		index_id;
	rec_t*		rec;
	byte*		ptr;
	ulint		len;
	ulint		comp;
	dict_index_t*	index;

	ut_ad(mutex_own(&(dict_sys->mutex)));
	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
	rec = btr_pcur_get_rec(pcur);
	ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

	ut_ad(len == 4);

	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (root_page_no == FIL_NULL) {
		/* The tree has been freed. */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing index of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

	ut_ad(len == 4);

	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (!fil_tablespace_exists_in_mem(space)) {
		/* It is a single table tablespace and the .ibd file is
		missing: do nothing */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing .ibd file of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_TYPE_FIELD, &len);
	ut_ad(len == 4);
	type = mach_read_from_4(ptr);

	ptr = rec_get_nth_field_old(rec, 1, &len);
	ut_ad(len == 8);
	index_id = mach_read_from_8(ptr);

	/* We free all the pages but the root page first; this operation
	may span several mini-transactions */

	btr_free_but_not_root(space, root_page_no);

	/* Then we free the root page in the same mini-transaction where
	we create the b-tree and write its new root page number to the
	appropriate field in the SYS_INDEXES record: this mini-transaction
	marks the B-tree totally truncated */

	comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
					 mtr));

	btr_free_root(space, root_page_no, mtr);
	/* We will temporarily write FIL_NULL to the PAGE_NO field
	in SYS_INDEXES, so that the database will not get into an
	inconsistent state in case it crashes between the mtr_commit()
	below and the following mtr_commit() call. */
	page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     FIL_NULL, mtr);

	/* We will need to commit the mini-transaction in order to avoid
	deadlocks in the btr_create() call, because otherwise we would
	be freeing and allocating pages in the same mini-transaction. */
	btr_pcur_store_position(pcur, mtr);
	mtr_commit(mtr);

	mtr_start(mtr);
	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

	/* Find the index corresponding to this SYS_INDEXES record. */
	for (index = UT_LIST_GET_FIRST(table->indexes);
	     index;
	     index = UT_LIST_GET_NEXT(indexes, index)) {
		if (!ut_dulint_cmp(index->id, index_id)) {
			break;
		}
	}

	root_page_no = btr_create(type, space, index_id, comp, mtr);
	if (index) {
		index->page = (unsigned int) root_page_no;
	} else {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"  InnoDB: Index %lu %lu of table %s is missing\n"
			"InnoDB: from the data dictionary during TRUNCATE!\n",
			ut_dulint_get_high(index_id),
			ut_dulint_get_low(index_id),
			table->name);
	}

	return(root_page_no);
}
예제 #5
0
/**********************************************************************//**
Reports in the undo log of an update or delete marking of a clustered index
record.
@return byte offset of the inserted undo log entry on the page if
succeed, 0 if fail */
static
ulint
trx_undo_page_report_modify(
/*========================*/
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index where update or
					delete marking is done */
	const rec_t*	rec,		/*!< in: clustered index record which
					has NOT yet been modified */
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
	const upd_t*	update,		/*!< in: update vector which tells the
					columns to be updated; in the case of
					a delete, this should be set to NULL */
	ulint		cmpl_info,	/*!< in: compiler info on secondary
					index updates */
	mtr_t*		mtr)		/*!< in: mtr */
{
	dict_table_t*	table;
	ulint		first_free;
	byte*		ptr;
	const byte*	field;
	ulint		flen;
	ulint		col_no;
	ulint		type_cmpl;
	byte*		type_cmpl_ptr;
	ulint		i;
	trx_id_t	trx_id;
	ibool		ignore_prefix = FALSE;
	byte		ext_buf[REC_MAX_INDEX_COL_LEN
				+ BTR_EXTERN_FIELD_REF_SIZE];

	ut_a(dict_index_is_clust(index));
	ut_ad(rec_offs_validate(rec, index, offsets));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
	table = index->table;

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
				      + TRX_UNDO_PAGE_FREE);
	ptr = undo_page + first_free;

	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 50) {

		/* NOTE: the value 50 must be big enough so that the general
		fields written below fit on the undo log page */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */

	if (!update) {
		type_cmpl = TRX_UNDO_DEL_MARK_REC;
	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
		type_cmpl = TRX_UNDO_UPD_DEL_REC;
		/* We are about to update a delete marked record.
		We don't typically need the prefix in this case unless
		the delete marking is done by the same transaction
		(which we check below). */
		ignore_prefix = TRUE;
	} else {
		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
	}

	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
	type_cmpl_ptr = ptr;

	*ptr++ = (byte) type_cmpl;
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);

	ptr += mach_dulint_write_much_compressed(ptr, table->id);

	/*----------------------------------------*/
	/* Store the state of the info bits */

	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));

	/* Store the values of the system columns */
	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_TRX_ID), &flen);
	ut_ad(flen == DATA_TRX_ID_LEN);

	trx_id = trx_read_trx_id(field);

	/* If it is an update of a delete marked record, then we are
	allowed to ignore blob prefixes if the delete marking was done
	by some other trx as it must have committed by now for us to
	allow an over-write. */
	if (ignore_prefix) {
		ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
	}
	ptr += mach_dulint_write_compressed(ptr, trx_id);

	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_ROLL_PTR), &flen);
	ut_ad(flen == DATA_ROLL_PTR_LEN);

	ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));

	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the
	record which will be modified in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		field = rec_get_nth_field(rec, offsets, i, &flen);

		/* The ordering columns must not be stored externally. */
		ut_ad(!rec_offs_nth_extern(offsets, i));
		ut_ad(dict_index_get_nth_col(index, i)->ord_part);

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, flen);

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, field, flen);
			ptr += flen;
		}
	}

	/*----------------------------------------*/
	/* Save to the undo log the old values of the columns to be updated. */

	if (update) {
		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));

		for (i = 0; i < upd_get_n_fields(update); i++) {

			ulint	pos = upd_get_nth_field(update, i)->field_no;

			/* Write field number to undo log */
			if (trx_undo_left(undo_page, ptr) < 5) {

				return(0);
			}

			ptr += mach_write_compressed(ptr, pos);

			/* Save the old value of field */
			field = rec_get_nth_field(rec, offsets, pos, &flen);

			if (trx_undo_left(undo_page, ptr) < 15) {

				return(0);
			}

			if (rec_offs_nth_extern(offsets, pos)) {
				ptr = trx_undo_page_report_modify_ext(
					ptr,
					dict_index_get_nth_col(index, pos)
					->ord_part
					&& !ignore_prefix
					&& flen < REC_MAX_INDEX_COL_LEN
					? ext_buf : NULL,
					dict_table_zip_size(table),
					&field, &flen);

				/* Notify purge that it eventually has to
				free the old externally stored field */

				trx->update_undo->del_marks = TRUE;

				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
			} else {
				ptr += mach_write_compressed(ptr, flen);
			}

			if (flen != UNIV_SQL_NULL) {
				if (trx_undo_left(undo_page, ptr) < flen) {

					return(0);
				}

				ut_memcpy(ptr, field, flen);
				ptr += flen;
			}
		}
	}

	/*----------------------------------------*/
	/* In the case of a delete marking, and also in the case of an update
	where any ordering field of any index changes, store the values of all
	columns which occur as ordering fields in any index. This info is used
	in the purge of old versions where we use it to build and search the
	delete marked index records, to look if we can remove them from the
	index tree. Note that starting from 4.0.14 also externally stored
	fields can be ordering in some index. Starting from 5.2, we no longer
	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
	but we can construct the column prefix fields in the index by
	fetching the first page of the BLOB that is pointed to by the
	clustered index. This works also in crash recovery, because all pages
	(including BLOBs) are recovered before anything is rolled back. */

	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
		byte*	old_ptr = ptr;

		trx->update_undo->del_marks = TRUE;

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		/* Reserve 2 bytes to write the number of bytes the stored
		fields take in this undo record */

		ptr += 2;

		for (col_no = 0; col_no < dict_table_get_n_cols(table);
		     col_no++) {

			const dict_col_t*	col
				= dict_table_get_nth_col(table, col_no);

			if (col->ord_part) {
				ulint	pos;

				/* Write field number to undo log */
				if (trx_undo_left(undo_page, ptr) < 5 + 15) {

					return(0);
				}

				pos = dict_index_get_nth_col_pos(index,
								 col_no);
				ptr += mach_write_compressed(ptr, pos);

				/* Save the old value of field */
				field = rec_get_nth_field(rec, offsets, pos,
							  &flen);

				if (rec_offs_nth_extern(offsets, pos)) {
					ptr = trx_undo_page_report_modify_ext(
						ptr,
						flen < REC_MAX_INDEX_COL_LEN
						&& !ignore_prefix
						? ext_buf : NULL,
						dict_table_zip_size(table),
						&field, &flen);
				} else {
					ptr += mach_write_compressed(
						ptr, flen);
				}

				if (flen != UNIV_SQL_NULL) {
					if (trx_undo_left(undo_page, ptr)
					    < flen) {

						return(0);
					}

					ut_memcpy(ptr, field, flen);
					ptr += flen;
				}
			}
		}

		mach_write_to_2(old_ptr, ptr - old_ptr);
	}

	/*----------------------------------------*/
	/* Write pointers to the previous and the next undo log records */
	if (trx_undo_left(undo_page, ptr) < 2) {

		return(0);
	}

	mach_write_to_2(ptr, first_free);
	ptr += 2;
	mach_write_to_2(undo_page + first_free, ptr - undo_page);

	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
			ptr - undo_page);

	/* Write to the REDO log about this change in the UNDO log */

	trx_undof_page_add_undo_rec_log(undo_page, first_free,
					ptr - undo_page, mtr);
	return(first_free);
}
예제 #6
0
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry,	/*!< in/out: index entry */
	ulint*		n_ext)	/*!< in/out: number of
				externally stored columns */
{
	mem_heap_t*	heap;
	big_rec_t*	vector;
	dfield_t*	dfield;
	dict_field_t*	ifield;
	ulint		size;
	ulint		n_fields;
	ulint		local_len;
	ulint		local_prefix_len;

	if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
		return(NULL);
	}

	if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
		/* up to MySQL 5.1: store a 768-byte prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
	} else {
		/* new-format table: do not store any BLOB prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE;
	}

	ut_a(dtuple_check_typed_no_assert(entry));

	size = rec_get_converted_size(index, entry, *n_ext);

	if (UNIV_UNLIKELY(size > 1000000000)) {
		fprintf(stderr,
			"InnoDB: Warning: tuple size very big: %lu\n",
			(ulong) size);
		fputs("InnoDB: Tuple contents: ", stderr);
		dtuple_print(stderr, entry);
		putc('\n', stderr);
	}

	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
			       * sizeof(big_rec_field_t) + 1000);

	vector = mem_heap_alloc(heap, sizeof(big_rec_t));

	vector->heap = heap;
	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
					* sizeof(big_rec_field_t));

	/* Decide which fields to shorten: the algorithm is to look for
	a variable-length field that yields the biggest savings when
	stored externally */

	n_fields = 0;

	while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
							     *n_ext),
				      dict_table_is_comp(index->table),
				      dict_index_get_n_fields(index),
				      dict_table_zip_size(index->table))) {
		ulint			i;
		ulint			longest		= 0;
		ulint			longest_i	= ULINT_MAX;
		byte*			data;
		big_rec_field_t*	b;

		for (i = dict_index_get_n_unique_in_tree(index);
		     i < dtuple_get_n_fields(entry); i++) {
			ulint	savings;

			dfield = dtuple_get_nth_field(entry, i);
			ifield = dict_index_get_nth_field(index, i);

			/* Skip fixed-length, NULL, externally stored,
			or short columns */

			if (ifield->fixed_len
			    || dfield_is_null(dfield)
			    || dfield_is_ext(dfield)
			    || dfield_get_len(dfield) <= local_len
			    || dfield_get_len(dfield)
			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
				goto skip_field;
			}

			savings = dfield_get_len(dfield) - local_len;

			/* Check that there would be savings */
			if (longest >= savings) {
				goto skip_field;
			}

			longest_i = i;
			longest = savings;

skip_field:
			continue;
		}

		if (!longest) {
			/* Cannot shorten more */

			mem_heap_free(heap);

			return(NULL);
		}

		/* Move data from field longest_i to big rec vector.

		We store the first bytes locally to the record. Then
		we can calculate all ordering fields in all indexes
		from locally stored data. */

		dfield = dtuple_get_nth_field(entry, longest_i);
		ifield = dict_index_get_nth_field(index, longest_i);
		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;

		b = &vector->fields[n_fields];
		b->field_no = longest_i;
		b->len = dfield_get_len(dfield) - local_prefix_len;
		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;

		/* Allocate the locally stored part of the column. */
		data = mem_heap_alloc(heap, local_len);

		/* Copy the local prefix. */
		memcpy(data, dfield_get_data(dfield), local_prefix_len);
		/* Clear the extern field reference (BLOB pointer). */
		memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
#if 0
		/* The following would fail the Valgrind checks in
		page_cur_insert_rec_low() and page_cur_insert_rec_zip().
		The BLOB pointers in the record will be initialized after
		the record and the BLOBs have been written. */
		UNIV_MEM_ALLOC(data + local_prefix_len,
			       BTR_EXTERN_FIELD_REF_SIZE);
#endif

		dfield_set_data(dfield, data, local_len);
		dfield_set_ext(dfield);

		n_fields++;
		(*n_ext)++;
		ut_ad(n_fields < dtuple_get_n_fields(entry));
	}

	vector->n_fields = n_fields;
	return(vector);
}
예제 #7
0
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
@return	own: row built; see the NOTE below! */
UNIV_INTERN
dtuple_t*
row_build(
/*======*/
	ulint			type,	/*!< in: ROW_COPY_POINTERS or
					ROW_COPY_DATA; the latter
					copies also the data fields to
					heap while the first only
					places pointers to data fields
					on the index page, and thus is
					more efficient */
	const dict_index_t*	index,	/*!< in: clustered index */
	const rec_t*		rec,	/*!< in: record in the clustered
					index; NOTE: in the case
					ROW_COPY_POINTERS the data
					fields in the row will point
					directly into this record,
					therefore, the buffer page of
					this record must be at least
					s-latched and the latch held
					as long as the row dtuple is used! */
	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
					or NULL, in which case this function
					will invoke rec_get_offsets() */
	const dict_table_t*	col_table,
					/*!< in: table, to check which
					externally stored columns
					occur in the ordering columns
					of an index, or NULL if
					index->table should be
					consulted instead */
	row_ext_t**		ext,	/*!< out, own: cache of
					externally stored column
					prefixes, or NULL */
	mem_heap_t*		heap)	/*!< in: memory heap from which
					the memory needed is allocated */
{
	dtuple_t*		row;
	const dict_table_t*	table;
	ulint			n_fields;
	ulint			n_ext_cols;
	ulint*			ext_cols	= NULL; /* remove warning */
	ulint			len;
	ulint			row_len;
	byte*			buf;
	ulint			i;
	ulint			j;
	mem_heap_t*		tmp_heap	= NULL;
	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
	rec_offs_init(offsets_);

	ut_ad(index && rec && heap);
	ut_ad(dict_index_is_clust(index));
	ut_ad(!mutex_own(&kernel_mutex));

	if (!offsets) {
		offsets = rec_get_offsets(rec, index, offsets_,
					  ULINT_UNDEFINED, &tmp_heap);
	} else {
		ut_ad(rec_offs_validate(rec, index, offsets));
	}

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
	if (rec_offs_any_null_extern(rec, offsets)) {
		/* This condition can occur during crash recovery
		before trx_rollback_active() has completed execution,
		or when a concurrently executing
		row_ins_index_entry_low() has committed the B-tree
		mini-transaction but has not yet managed to restore
		the cursor position for writing the big_rec. */
		ut_a(trx_undo_roll_ptr_is_insert(
			     row_get_rec_roll_ptr(rec, index, offsets)));
	}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

	if (type != ROW_COPY_POINTERS) {
		/* Take a copy of rec to heap */
		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
		rec = rec_copy(buf, rec, offsets);
		/* Avoid a debug assertion in rec_offs_validate(). */
		rec_offs_make_valid(rec, index, (ulint*) offsets);
	}

	table = index->table;
	row_len = dict_table_get_n_cols(table);

	row = dtuple_create(heap, row_len);

	dict_table_copy_types(row, table);

	dtuple_set_info_bits(row, rec_get_info_bits(
				     rec, dict_table_is_comp(table)));

	n_fields = rec_offs_n_fields(offsets);
	n_ext_cols = rec_offs_n_extern(offsets);
	if (n_ext_cols) {
		ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
	}

	for (i = j = 0; i < n_fields; i++) {
		dict_field_t*		ind_field
			= dict_index_get_nth_field(index, i);
		const dict_col_t*	col
			= dict_field_get_col(ind_field);
		ulint			col_no
			= dict_col_get_no(col);
		dfield_t*		dfield
			= dtuple_get_nth_field(row, col_no);

		if (ind_field->prefix_len == 0) {

			const byte*	field = rec_get_nth_field(
				rec, offsets, i, &len);

			dfield_set_data(dfield, field, len);
		}

		if (rec_offs_nth_extern(offsets, i)) {
			dfield_set_ext(dfield);

			if (UNIV_LIKELY_NULL(col_table)) {
				ut_a(col_no
				     < dict_table_get_n_cols(col_table));
				col = dict_table_get_nth_col(
					col_table, col_no);
			}

			if (col->ord_part) {
				/* We will have to fetch prefixes of
				externally stored columns that are
				referenced by column prefixes. */
				ext_cols[j++] = col_no;
			}
		}
	}

	ut_ad(dtuple_check_typed(row));

	if (!ext) {
		/* REDUNDANT and COMPACT formats store a local
		768-byte prefix of each externally stored
		column. No cache is needed. */
		ut_ad(dict_table_get_format(index->table)
		      < DICT_TF_FORMAT_ZIP);
	} else if (j) {
		*ext = row_ext_create(j, ext_cols, row,
				      dict_table_zip_size(index->table),
				      heap);
	} else {
		*ext = NULL;
	}

	if (tmp_heap) {
		mem_heap_free(tmp_heap);
	}

	return(row);
}
예제 #8
0
/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
function!
@return NULL if committed, else the active transaction */
UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
	const rec_t*	rec,	/*!< in: record in a secondary index */
	dict_index_t*	index,	/*!< in: the secondary index */
	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
{
	dict_index_t*	clust_index;
	rec_t*		clust_rec;
	ulint*		clust_offsets;
	rec_t*		version;
	trx_id_t	trx_id;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	dtuple_t*	row;
	dtuple_t*	entry	= NULL; /* assignment to eliminate compiler
					warning */
	trx_t*		trx;
	ulint		rec_del;
#ifdef UNIV_DEBUG
	ulint		err;
#endif /* UNIV_DEBUG */
	mtr_t		mtr;
	ulint		comp;

	ut_ad(mutex_own(&kernel_mutex));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	mutex_exit(&kernel_mutex);

	mtr_start(&mtr);

	/* Search for the clustered index record: this is a time-consuming
	operation: therefore we release the kernel mutex; also, the release
	is required by the latching order convention. The latch on the
	clustered index locks the top of the stack of versions. We also
	reserve purge_latch to lock the bottom of the version stack. */

	clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
				      &clust_index, &mtr);
	if (!clust_rec) {
		/* In a rare case it is possible that no clust rec is found
		for a secondary index record: if in row0umod.c
		row_undo_mod_remove_clust_low() we have already removed the
		clust rec, while purge is still cleaning and removing
		secondary index records associated with earlier versions of
		the clustered index record. In that case there cannot be
		any implicit lock on the secondary index record, because
		an active transaction which has modified the secondary index
		record has also modified the clustered index record. And in
		a rollback we always undo the modifications to secondary index
		records before the clustered index record. */

		mutex_enter(&kernel_mutex);
		mtr_commit(&mtr);

		return(NULL);
	}

	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);
	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);

	mtr_s_lock(&(purge_sys->latch), &mtr);

	mutex_enter(&kernel_mutex);

	trx = NULL;
	if (!trx_is_active(trx_id)) {
		/* The transaction that modified or inserted clust_rec is no
		longer active: no implicit lock on rec */
		goto exit_func;
	}

	if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
				      clust_offsets, TRUE)) {
		/* Corruption noticed: try to avoid a crash by returning */
		goto exit_func;
	}

	comp = page_rec_is_comp(rec);
	ut_ad(index->table == clust_index->table);
	ut_ad(!!comp == dict_table_is_comp(index->table));
	ut_ad(!comp == !page_rec_is_comp(clust_rec));

	/* We look up if some earlier version, which was modified by the trx_id
	transaction, of the clustered index record would require rec to be in
	a different state (delete marked or unmarked, or have different field
	values, or not existing). If there is such a version, then rec was
	modified by the trx_id transaction, and it has an implicit x-lock on
	rec. Note that if clust_rec itself would require rec to be in a
	different state, then the trx_id transaction has not yet had time to
	modify rec, and does not necessarily have an implicit x-lock on rec. */

	rec_del = rec_get_deleted_flag(rec, comp);
	trx = NULL;

	version = clust_rec;

	for (;;) {
		rec_t*		prev_version;
		ulint		vers_del;
		row_ext_t*	ext;
		trx_id_t	prev_trx_id;

		mutex_exit(&kernel_mutex);

		/* While we retrieve an earlier version of clust_rec, we
		release the kernel mutex, because it may take time to access
		the disk. After the release, we have to check if the trx_id
		transaction is still active. We keep the semaphore in mtr on
		the clust_rec page, so that no other transaction can update
		it and get an implicit x-lock on rec. */

		heap2 = heap;
		heap = mem_heap_create(1024);
#ifdef UNIV_DEBUG
		err =
#endif /* UNIV_DEBUG */
		trx_undo_prev_version_build(clust_rec, &mtr, version,
					    clust_index, clust_offsets,
					    heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (prev_version == NULL) {
			mutex_enter(&kernel_mutex);

			if (!trx_is_active(trx_id)) {
				/* Transaction no longer active: no
				implicit x-lock */

				break;
			}

			/* If the transaction is still active,
			clust_rec must be a fresh insert, because no
			previous version was found. */
			ut_ad(err == DB_SUCCESS);

			/* It was a freshly inserted version: there is an
			implicit x-lock on rec */

			trx = trx_get_on_id(trx_id);

			break;
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		vers_del = rec_get_deleted_flag(prev_version, comp);
		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
						 clust_offsets);
		/* The stack of versions is locked by mtr.  Thus, it
		is safe to fetch the prefixes for externally stored
		columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
				clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);
		/* entry may be NULL if a record was inserted in place
		of a deleted record, and the BLOB pointers of the new
		record were not initialized yet.  But in that case,
		prev_version should be NULL. */
		ut_a(entry);

		mutex_enter(&kernel_mutex);

		if (!trx_is_active(trx_id)) {
			/* Transaction no longer active: no implicit x-lock */

			break;
		}

		/* If we get here, we know that the trx_id transaction is
		still active and it has modified prev_version. Let us check
		if prev_version would require rec to be in a different
		state. */

		/* The previous version of clust_rec must be
		accessible, because the transaction is still active
		and clust_rec was not a fresh insert. */
		ut_ad(err == DB_SUCCESS);

		/* We check if entry and rec are identified in the alphabetical
		ordering */
		if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
			/* The delete marks of rec and prev_version should be
			equal for rec to be in the state required by
			prev_version */

			if (rec_del != vers_del) {
				trx = trx_get_on_id(trx_id);

				break;
			}

			/* It is possible that the row was updated so that the
			secondary index record remained the same in
			alphabetical ordering, but the field values changed
			still. For example, 'abc' -> 'ABC'. Check also that. */

			dtuple_set_types_binary(entry,
						dtuple_get_n_fields(entry));
			if (0 != cmp_dtuple_rec(entry, rec, offsets)) {

				trx = trx_get_on_id(trx_id);

				break;
			}
		} else if (!rec_del) {
			/* The delete mark should be set in rec for it to be
			in the state required by prev_version */

			trx = trx_get_on_id(trx_id);

			break;
		}

		if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
			/* The versions modified by the trx_id transaction end
			to prev_version: no implicit x-lock */

			break;
		}

		version = prev_version;
	}/* for (;;) */

exit_func:
	mtr_commit(&mtr);
	mem_heap_free(heap);

	return(trx);
}
예제 #9
0
/*****************************************************************//**
Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry and ientry are identified in
the alphabetical ordering; exactly in this case we return TRUE.
@return	TRUE if earlier version should have */
UNIV_INTERN
ibool
row_vers_old_has_index_entry(
/*=========================*/
	ibool		also_curr,/*!< in: TRUE if also rec is included in the
				versions to search; otherwise only versions
				prior to it are searched */
	const rec_t*	rec,	/*!< in: record in the clustered index; the
				caller must have a latch on the page */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec; it will
				also hold the latch on purge_view */
	dict_index_t*	index,	/*!< in: the secondary index */
	const dtuple_t*	ientry)	/*!< in: the secondary index entry */
{
	const rec_t*	version;
	rec_t*		prev_version;
	dict_index_t*	clust_index;
	ulint*		clust_offsets;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	const dtuple_t*	row;
	const dtuple_t*	entry;
	ulint		err;
	ulint		comp;

	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */
	mtr_s_lock(&(purge_sys->latch), mtr);

	clust_index = dict_table_get_first_index(index->table);

	comp = page_rec_is_comp(rec);
	ut_ad(!dict_table_is_comp(index->table) == !comp);
	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);

	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
		row_ext_t*	ext;

		/* The stack of versions is locked by mtr.
		Thus, it is safe to fetch the prefixes for
		externally stored columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index,
				rec, clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);

		/* If entry == NULL, the record contains unset BLOB
		pointers.  This must be a freshly inserted record.  If
		this is called from
		row_purge_remove_sec_if_poss_low(), the thread will
		hold latches on the clustered index and the secondary
		index.  Because the insert works in three steps:

			(1) insert the record to clustered index
			(2) store the BLOBs and update BLOB pointers
			(3) insert records to secondary indexes

		the purge thread can safely ignore freshly inserted
		records and delete the secondary index record.  The
		thread that inserted the new record will be inserting
		the secondary index records. */

		/* NOTE that we cannot do the comparison as binary
		fields because the row is maybe being modified so that
		the clustered index record has already been updated to
		a different binary value in a char field, but the
		collation identifies the old and new value anyway! */
		if (entry && !dtuple_coll_cmp(ientry, entry)) {

			mem_heap_free(heap);

			return(TRUE);
		}
	}

	version = rec;

	for (;;) {
		heap2 = heap;
		heap = mem_heap_create(1024);
		err = trx_undo_prev_version_build(rec, mtr, version,
						  clust_index, clust_offsets,
						  heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (err != DB_SUCCESS || !prev_version) {
			/* Versions end here */

			mem_heap_free(heap);

			return(FALSE);
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		if (!rec_get_deleted_flag(prev_version, comp)) {
			row_ext_t*	ext;

			/* The stack of versions is locked by mtr.
			Thus, it is safe to fetch the prefixes for
			externally stored columns. */
			row = row_build(ROW_COPY_POINTERS, clust_index,
					prev_version, clust_offsets,
					NULL, &ext, heap);
			entry = row_build_index_entry(row, ext, index, heap);

			/* If entry == NULL, the record contains unset
			BLOB pointers.  This must be a freshly
			inserted record that we can safely ignore.
			For the justification, see the comments after
			the previous row_build_index_entry() call. */

			/* NOTE that we cannot do the comparison as binary
			fields because maybe the secondary index record has
			already been updated to a different binary value in
			a char field, but the collation identifies the old
			and new value anyway! */

			if (entry && !dtuple_coll_cmp(ientry, entry)) {

				mem_heap_free(heap);

				return(TRUE);
			}
		}

		version = prev_version;
	}
}
예제 #10
0
/*******************************************************************//**
Truncates the index tree associated with a row in SYS_INDEXES table.
@return	new root page number, or FIL_NULL on failure */
UNIV_INTERN
ulint
dict_truncate_index_tree(
    /*=====================*/
    dict_table_t*	table,	/*!< in: the table the index belongs to */
    ulint		space,	/*!< in: 0=truncate,
				nonzero=create the index tree in the
				given tablespace */
    btr_pcur_t*	pcur,	/*!< in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
    mtr_t*		mtr)	/*!< in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
    ulint		root_page_no;
    ibool		drop = !space;
    ulint		zip_size;
    ulint		type;
    index_id_t	index_id;
    rec_t*		rec;
    const byte*	ptr;
    ulint		len;
    dict_index_t*	index;

    ut_ad(mutex_own(&(dict_sys->mutex)));
    ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
    rec = btr_pcur_get_rec(pcur);
    ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

    ut_ad(len == 4);

    root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

    if (drop && root_page_no == FIL_NULL) {
        /* The tree has been freed. */

        ut_print_timestamp(stderr);
        fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
                " a missing index of table %s!\n", table->name);
        drop = FALSE;
    }

    ptr = rec_get_nth_field_old(rec,
                                DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

    ut_ad(len == 4);

    if (drop) {
        space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
    }

    zip_size = fil_space_get_zip_size(space);

    if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
        /* It is a single table tablespace and the .ibd file is
        missing: do nothing */

        ut_print_timestamp(stderr);
        fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
                " a missing .ibd file of table %s!\n", table->name);
        return(FIL_NULL);
    }

    ptr = rec_get_nth_field_old(rec,
                                DICT_SYS_INDEXES_TYPE_FIELD, &len);
    ut_ad(len == 4);
    type = mach_read_from_4(ptr);

    ptr = rec_get_nth_field_old(rec, 1, &len);
    ut_ad(len == 8);
    index_id = mach_read_from_8(ptr);

    if (!drop) {

        goto create;
    }

    /* We free all the pages but the root page first; this operation
    may span several mini-transactions */

    btr_free_but_not_root(space, zip_size, root_page_no);

    /* Then we free the root page in the same mini-transaction where
    we create the b-tree and write its new root page number to the
    appropriate field in the SYS_INDEXES record: this mini-transaction
    marks the B-tree totally truncated */

    btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr);

    btr_free_root(space, zip_size, root_page_no, mtr);
create:
    /* We will temporarily write FIL_NULL to the PAGE_NO field
    in SYS_INDEXES, so that the database will not get into an
    inconsistent state in case it crashes between the mtr_commit()
    below and the following mtr_commit() call. */
    page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
                         FIL_NULL, mtr);

    /* We will need to commit the mini-transaction in order to avoid
    deadlocks in the btr_create() call, because otherwise we would
    be freeing and allocating pages in the same mini-transaction. */
    btr_pcur_store_position(pcur, mtr);
    mtr_commit(mtr);

    mtr_start(mtr);
    btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

    /* Find the index corresponding to this SYS_INDEXES record. */
    for (index = UT_LIST_GET_FIRST(table->indexes);
            index;
            index = UT_LIST_GET_NEXT(indexes, index)) {
        if (index->id == index_id) {
            root_page_no = btr_create(type, space, zip_size,
                                      index_id, index, mtr);
            index->page = (unsigned int) root_page_no;
            return(root_page_no);
        }
    }

    ut_print_timestamp(stderr);
    fprintf(stderr,
            "  InnoDB: Index %llu of table %s is missing\n"
            "InnoDB: from the data dictionary during TRUNCATE!\n",
            (ullint) index_id,
            table->name);

    return(FIL_NULL);
}
예제 #11
0
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_TABLES system table.
@return	the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_tables_tuple(
    /*=========================*/
    const dict_table_t*	table,	/*!< in: table */
    mem_heap_t*		heap)	/*!< in: memory heap from
					which the memory for the built
					tuple is allocated */
{
    dict_table_t*	sys_tables;
    dtuple_t*	entry;
    dfield_t*	dfield;
    byte*		ptr;

    ut_ad(table);
    ut_ad(heap);

    sys_tables = dict_sys->sys_tables;

    entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);

    dict_table_copy_types(entry, sys_tables);

    /* 0: NAME -----------------------------*/
    dfield = dtuple_get_nth_field(entry, 0/*NAME*/);

    dfield_set_data(dfield, table->name, ut_strlen(table->name));
    /* 3: ID -------------------------------*/
    dfield = dtuple_get_nth_field(entry, 1/*ID*/);

    ptr = mem_heap_alloc(heap, 8);
    mach_write_to_8(ptr, table->id);

    dfield_set_data(dfield, ptr, 8);
    /* 4: N_COLS ---------------------------*/
    dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/);

#if DICT_TF_COMPACT != 1
#error
#endif

    ptr = mem_heap_alloc(heap, 4);
    if (dict_table_is_gcs(table))                       /* ±í¶¨ÒåÐÞ¸Ä */
    {
        ut_ad(dict_table_is_comp(table));

        mach_write_to_4(ptr, table->n_def
                        | (1 << 31) | (1 << 30));
    }
    else
    {
        mach_write_to_4(ptr, table->n_def
                        | ((table->flags & DICT_TF_COMPACT) << 31));
    }

    dfield_set_data(dfield, ptr, 4);
    /* 5: TYPE -----------------------------*/
    dfield = dtuple_get_nth_field(entry, 3/*TYPE*/);

    ptr = mem_heap_alloc(heap, 4);
    if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) {
        ut_a(table->flags & DICT_TF_COMPACT);
        ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
        ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
             <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
        ut_a(!(table->flags & (~0 << DICT_TF2_BITS)));
        mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS));
    } else {
        mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
    }

    dfield_set_data(dfield, ptr, 4);
    /* 6: MIX_ID (obsolete) ---------------------------*/
    dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/);

    ptr = mem_heap_zalloc(heap, 8);

    dfield_set_data(dfield, ptr, 8);
    /* 7: MIX_LEN (additional flags) --------------------------*/

    dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/);

    ptr = mem_heap_alloc(heap, 4);
    mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT);

    ut_ad(table->n_cols_before_alter_table == 0);

    dfield_set_data(dfield, ptr, 4);
    /* 8: CLUSTER_NAME ---------------------*/
    dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/);
    dfield_set_null(dfield); /* not supported */

    /* 9: SPACE ----------------------------*/
    dfield = dtuple_get_nth_field(entry, 7/*SPACE*/);

    ptr = mem_heap_alloc(heap, 4);
    mach_write_to_4(ptr, table->space);

    dfield_set_data(dfield, ptr, 4);
    /*----------------------------------*/

    return(entry);
}
예제 #12
0
/**************************************************************************
Reports in the undo log of an update or delete marking of a clustered index
record. */
static
ulint
trx_undo_page_report_modify(
/*========================*/
					/* out: byte offset of the inserted
					undo log entry on the page if succeed,
					0 if fail */
	page_t*		undo_page,	/* in: undo log page */
	trx_t*		trx,		/* in: transaction */
	dict_index_t*	index,		/* in: clustered index where update or
					delete marking is done */
	rec_t*		rec,		/* in: clustered index record which
					has NOT yet been modified */
	const ulint*	offsets,	/* in: rec_get_offsets(rec, index) */
	upd_t*		update,		/* in: update vector which tells the
					columns to be updated; in the case of
					a delete, this should be set to NULL */
	ulint		cmpl_info,	/* in: compiler info on secondary
					index updates */
	mtr_t*		mtr)		/* in: mtr */
{
	dict_table_t*	table;
	upd_field_t*	upd_field;
	ulint		first_free;
	byte*		ptr;
	ulint		len;
	byte*		field;
	ulint		flen;
	ulint		pos;
	dulint		roll_ptr;
	dulint		trx_id;
	ulint		bits;
	ulint		col_no;
	byte*		old_ptr;
	ulint		type_cmpl;
	byte*		type_cmpl_ptr;
	ulint		i;

	ut_a(index->type & DICT_CLUSTERED);
	ut_ad(rec_offs_validate(rec, index, offsets));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
	table = index->table;

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
				      + TRX_UNDO_PAGE_FREE);
	ptr = undo_page + first_free;

	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 50) {

		/* NOTE: the value 50 must be big enough so that the general
		fields written below fit on the undo log page */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */

	if (update) {
		if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
			type_cmpl = TRX_UNDO_UPD_DEL_REC;
		} else {
			type_cmpl = TRX_UNDO_UPD_EXIST_REC;
		}
	} else {
		type_cmpl = TRX_UNDO_DEL_MARK_REC;
	}

	type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT);

	mach_write_to_1(ptr, type_cmpl);

	type_cmpl_ptr = ptr;

	ptr++;
	len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
	ptr += len;

	len = mach_dulint_write_much_compressed(ptr, table->id);
	ptr += len;

	/*----------------------------------------*/
	/* Store the state of the info bits */

	bits = rec_get_info_bits(rec, dict_table_is_comp(table));
	mach_write_to_1(ptr, bits);
	ptr += 1;

	/* Store the values of the system columns */
	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_TRX_ID), &len);
	ut_ad(len == DATA_TRX_ID_LEN);
	trx_id = trx_read_trx_id(field);
	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_ROLL_PTR), &len);
	ut_ad(len == DATA_ROLL_PTR_LEN);
	roll_ptr = trx_read_roll_ptr(field);

	len = mach_dulint_write_compressed(ptr, trx_id);
	ptr += len;

	len = mach_dulint_write_compressed(ptr, roll_ptr);
	ptr += len;

	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the
	record which will be modified in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		field = rec_get_nth_field(rec, offsets, i, &flen);

		if (trx_undo_left(undo_page, ptr) < 4) {

			return(0);
		}

		len = mach_write_compressed(ptr, flen);
		ptr += len;

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, field, flen);
			ptr += flen;
		}
	}

	/*----------------------------------------*/
	/* Save to the undo log the old values of the columns to be updated. */

	if (update) {
		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		len = mach_write_compressed(ptr, upd_get_n_fields(update));
		ptr += len;

		for (i = 0; i < upd_get_n_fields(update); i++) {

			upd_field = upd_get_nth_field(update, i);
			pos = upd_field->field_no;

			/* Write field number to undo log */
			if (trx_undo_left(undo_page, ptr) < 5) {

				return(0);
			}

			len = mach_write_compressed(ptr, pos);
			ptr += len;

			/* Save the old value of field */
			field = rec_get_nth_field(rec, offsets, pos, &flen);

			if (trx_undo_left(undo_page, ptr) < 5) {

				return(0);
			}

			if (rec_offs_nth_extern(offsets, pos)) {
				/* If a field has external storage, we add
				to flen the flag */

				len = mach_write_compressed(
					ptr,
					UNIV_EXTERN_STORAGE_FIELD + flen);

				/* Notify purge that it eventually has to
				free the old externally stored field */

				trx->update_undo->del_marks = TRUE;

				*type_cmpl_ptr = *type_cmpl_ptr
					| TRX_UNDO_UPD_EXTERN;
			} else {
				len = mach_write_compressed(ptr, flen);
			}

			ptr += len;

			if (flen != UNIV_SQL_NULL) {
				if (trx_undo_left(undo_page, ptr) < flen) {

					return(0);
				}

				ut_memcpy(ptr, field, flen);
				ptr += flen;
			}
		}
	}

	/*----------------------------------------*/
	/* In the case of a delete marking, and also in the case of an update
	where any ordering field of any index changes, store the values of all
	columns which occur as ordering fields in any index. This info is used
	in the purge of old versions where we use it to build and search the
	delete marked index records, to look if we can remove them from the
	index tree. Note that starting from 4.0.14 also externally stored
	fields can be ordering in some index. But we always store at least
	384 first bytes locally to the clustered index record, which means
	we can construct the column prefix fields in the index from the
	stored data. */

	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {

		trx->update_undo->del_marks = TRUE;

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		old_ptr = ptr;

		/* Reserve 2 bytes to write the number of bytes the stored
		fields take in this undo record */

		ptr += 2;

		for (col_no = 0; col_no < dict_table_get_n_cols(table);
		     col_no++) {

			const dict_col_t*	col
				= dict_table_get_nth_col(table, col_no);

			if (col->ord_part > 0) {

				pos = dict_index_get_nth_col_pos(index,
								 col_no);

				/* Write field number to undo log */
				if (trx_undo_left(undo_page, ptr) < 5) {

					return(0);
				}

				len = mach_write_compressed(ptr, pos);
				ptr += len;

				/* Save the old value of field */
				field = rec_get_nth_field(rec, offsets, pos,
							  &flen);

				if (trx_undo_left(undo_page, ptr) < 5) {

					return(0);
				}

				len = mach_write_compressed(ptr, flen);
				ptr += len;

				if (flen != UNIV_SQL_NULL) {
					if (trx_undo_left(undo_page, ptr)
					    < flen) {

						return(0);
					}

					ut_memcpy(ptr, field, flen);
					ptr += flen;
				}
			}
		}

		mach_write_to_2(old_ptr, ptr - old_ptr);
	}

	/*----------------------------------------*/
	/* Write pointers to the previous and the next undo log records */
	if (trx_undo_left(undo_page, ptr) < 2) {

		return(0);
	}

	mach_write_to_2(ptr, first_free);
	ptr += 2;
	mach_write_to_2(undo_page + first_free, ptr - undo_page);

	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
			ptr - undo_page);

	/* Write to the REDO log about this change in the UNDO log */

	trx_undof_page_add_undo_rec_log(undo_page, first_free,
					ptr - undo_page, mtr);
	return(first_free);
}
예제 #13
0
파일: row0purge.c 프로젝트: Canos/mysql
/***************************************************************
Removes a secondary index entry without modifying the index tree,
if possible.
@return	TRUE if success or if not found */
static
ibool
row_purge_remove_sec_if_poss_leaf(
    /*==============================*/
    purge_node_t*	node,	/*!< in: row purge node */
    dict_index_t*	index,	/*!< in: index */
    const dtuple_t*	entry)	/*!< in: index entry */
{
    mtr_t			mtr;
    btr_pcur_t		pcur;
    enum row_search_result	search_result;

    log_free_check();

    mtr_start(&mtr);

    /* Set the purge node for the call to row_purge_poss_sec(). */
    pcur.btr_cur.purge_node = node;
    /* Set the query thread, so that ibuf_insert_low() will be
    able to invoke thd_get_trx(). */
    pcur.btr_cur.thr = que_node_get_parent(node);

    search_result = row_search_index_entry(
                        index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);

    switch (search_result) {
        ibool	success;
    case ROW_FOUND:
        /* Before attempting to purge a record, check
        if it is safe to do so. */
        if (row_purge_poss_sec(node, index, entry)) {
            btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);

            /* Only delete-marked records should be purged. */
            ut_ad(REC_INFO_DELETED_FLAG
                  & rec_get_info_bits(
                      btr_cur_get_rec(btr_cur),
                      dict_table_is_comp(index->table)));

            if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {

                /* The index entry could not be deleted. */
                success = FALSE;
                goto func_exit;
            }
        }
    /* fall through (the index entry is still needed,
    or the deletion succeeded) */
    case ROW_NOT_DELETED_REF:
    /* The index entry is still needed. */
    case ROW_BUFFERED:
    /* The deletion was buffered. */
    case ROW_NOT_FOUND:
        /* The index entry does not exist, nothing to do. */
        success = TRUE;
func_exit:
        btr_pcur_close(&pcur);
        mtr_commit(&mtr);
        return(success);
    }

    ut_error;
    return(FALSE);
}
예제 #14
0
파일: row0purge.c 프로젝트: Canos/mysql
/***************************************************************
Removes a secondary index entry if possible, by modifying the
index tree.  Does not try to buffer the delete.
@return	TRUE if success or if not found */
static
ibool
row_purge_remove_sec_if_poss_tree(
    /*==============================*/
    purge_node_t*	node,	/*!< in: row purge node */
    dict_index_t*	index,	/*!< in: index */
    const dtuple_t*	entry)	/*!< in: index entry */
{
    btr_pcur_t		pcur;
    btr_cur_t*		btr_cur;
    ibool			success	= TRUE;
    ulint			err;
    mtr_t			mtr;
    enum row_search_result	search_result;

    log_free_check();
    mtr_start(&mtr);

    search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
                                           &pcur, &mtr);

    switch (search_result) {
    case ROW_NOT_FOUND:
        /* Not found.  This is a legitimate condition.  In a
        rollback, InnoDB will remove secondary recs that would
        be purged anyway.  Then the actual purge will not find
        the secondary index record.  Also, the purge itself is
        eager: if it comes to consider a secondary index
        record, and notices it does not need to exist in the
        index, it will remove it.  Then if/when the purge
        comes to consider the secondary index record a second
        time, it will not exist any more in the index. */

        /* fputs("PURGE:........sec entry not found\n", stderr); */
        /* dtuple_print(stderr, entry); */
        goto func_exit;
    case ROW_FOUND:
        break;
    case ROW_BUFFERED:
    case ROW_NOT_DELETED_REF:
        /* These are invalid outcomes, because the mode passed
        to row_search_index_entry() did not include any of the
        flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
        ut_error;
    }

    btr_cur = btr_pcur_get_btr_cur(&pcur);

    /* We should remove the index record if no later version of the row,
    which cannot be purged yet, requires its existence. If some requires,
    we should do nothing. */

    if (row_purge_poss_sec(node, index, entry)) {
        /* Remove the index record, which should have been
        marked for deletion. */
        ut_ad(REC_INFO_DELETED_FLAG
              & rec_get_info_bits(btr_cur_get_rec(btr_cur),
                                  dict_table_is_comp(index->table)));

        btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
                                   RB_NONE, &mtr);
        switch (UNIV_EXPECT(err, DB_SUCCESS)) {
        case DB_SUCCESS:
            break;
        case DB_OUT_OF_FILE_SPACE:
            success = FALSE;
            break;
        default:
            ut_error;
        }
    }

func_exit:
    btr_pcur_close(&pcur);
    mtr_commit(&mtr);

    return(success);
}
예제 #15
0
파일: mtr0log.c 프로젝트: H0bby/GCS-SQL
/********************************************************//**
Opens a buffer for mlog, writes the initial log record and,
if needed, the field lengths of an index.
@return	buffer, NULL if log mode MTR_LOG_NONE */
UNIV_INTERN
byte*
mlog_open_and_write_index(
/*======================*/
	mtr_t*		mtr,	/*!< in: mtr */
	const byte*	rec,	/*!< in: index record or page */
	dict_index_t*	index,	/*!< in: record descriptor */
	byte		type,	/*!< in: log item type */
	ulint		size)	/*!< in: requested buffer size in bytes
				(if 0, calls mlog_close() and returns NULL) */
{
	byte*		log_ptr;
	const byte*	log_start;
	const byte*	log_end;

	ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));

	if (!page_rec_is_comp(rec)) {
		log_start = log_ptr = mlog_open(mtr, 11 + size);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);
		log_end = log_ptr + 11 + size;
	} else {
		ulint	i;
		ulint	n	= dict_index_get_n_fields(index);
        ibool   is_gcs_cluster = dict_index_is_gcs_clust_after_alter_table(index);
		/* total size needed */
        /* redo日志有可能需要多两字节,total需要根据实际情况分配空间! */
        ulint	total	= 11 + size + (n + 2 + (is_gcs_cluster ? 1 : 0)) * 2;
		ulint	alloc	= total;
        
        /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
		if (alloc > DYN_ARRAY_DATA_SIZE) {
			alloc = DYN_ARRAY_DATA_SIZE;
		}
		log_start = log_ptr = mlog_open(mtr, alloc);
		if (!log_ptr) {
			return(NULL); /* logging is disabled */
		}
		log_end = log_ptr + alloc;
		log_ptr = mlog_write_initial_log_record_fast(rec, type,
							     log_ptr, mtr);

        /* 在第一次alter table前,所有gcs表可以当成compact表,即使是redo过程,这样redo log也兼容! */
        if (is_gcs_cluster)
        {
            //ut_ad(rec_is_gcs(rec) || rec == page_align(rec) || rec_get_status(rec) & REC_STATUS_NODE_PTR);               /* rec有可能就是页头,如日志MLOG_COMP_LIST_END_COPY_CREATED */
            mach_write_to_2(log_ptr, n | 0x8000);                         /* 标记是gcs表 */
        }
        else
        {
            ut_ad(rec == page_align(rec) || !rec_is_gcs(rec));
		    mach_write_to_2(log_ptr, n);
        }

        /* 对于gcs聚集索引,记录第一次alter table前聚集索引的字段数 */
        if (is_gcs_cluster)
        {
            log_ptr += 2;
            mach_write_to_2(log_ptr, (ulint)index->n_fields_before_alter);

            ut_ad(!index->n_fields_before_alter == !dict_index_is_gcs_clust_after_alter_table(index));
        }

		log_ptr += 2;
		mach_write_to_2(log_ptr,
				dict_index_get_n_unique_in_tree(index));
		log_ptr += 2;
		for (i = 0; i < n; i++) {
			dict_field_t*		field;
			const dict_col_t*	col;
			ulint			len;

			field = dict_index_get_nth_field(index, i);
			col = dict_field_get_col(field);
			len = field->fixed_len;
			ut_ad(len < 0x7fff);
			if (len == 0
			    && (col->len > 255 || col->mtype == DATA_BLOB)) {
				/* variable-length field
				with maximum length > 255 */
				len = 0x7fff;
			}
			if (col->prtype & DATA_NOT_NULL) {
				len |= 0x8000;
			}
			if (log_ptr + 2 > log_end) {
				mlog_close(mtr, log_ptr);
				ut_a(total > (ulint) (log_ptr - log_start));
				total -= log_ptr - log_start;
				alloc = total;
				if (alloc > DYN_ARRAY_DATA_SIZE) {
					alloc = DYN_ARRAY_DATA_SIZE;
				}
				log_start = log_ptr = mlog_open(mtr, alloc);
				if (!log_ptr) {
					return(NULL); /* logging is disabled */
				}
				log_end = log_ptr + alloc;
			}
			mach_write_to_2(log_ptr, len);
			log_ptr += 2;
		}
	}
	if (size == 0) {
		mlog_close(mtr, log_ptr);
		log_ptr = NULL;
	} else if (log_ptr + size > log_end) {
		mlog_close(mtr, log_ptr);
		log_ptr = mlog_open(mtr, size);
	}
	return(log_ptr);
}