예제 #1
0
/***************************************************************//**
Removes a secondary index entry if found.
@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec_low(
/*========================*/
	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry)	/*!< in: index entry to remove */
{
	btr_pcur_t		pcur;
	btr_cur_t*		btr_cur;
	ulint			err;
	mtr_t			mtr;
	enum row_search_result	search_result;

	mtr_start(&mtr);

	btr_cur = btr_pcur_get_btr_cur(&pcur);

	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);

	search_result = row_search_index_entry(index, entry, mode,
					       &pcur, &mtr);

	switch (search_result) {
	case ROW_NOT_FOUND:
		err = DB_SUCCESS;
		goto func_exit;
	case ROW_FOUND:
		break;
	case ROW_BUFFERED:
	case ROW_NOT_DELETED_REF:
		/* These are invalid outcomes, because the mode passed
		to row_search_index_entry() did not include any of the
		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
		ut_error;
	}

	if (mode == BTR_MODIFY_LEAF) {
		err = btr_cur_optimistic_delete(btr_cur, &mtr)
			? DB_SUCCESS : DB_FAIL;
	} else {
		ut_ad(mode == BTR_MODIFY_TREE);

		/* No need to distinguish RB_RECOVERY here, because we
		are deleting a secondary index record: the distinction
		between RB_NORMAL and RB_RECOVERY only matters when
		deleting a record that contains externally stored
		columns. */
		ut_ad(!dict_index_is_clust(index));
		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
					   RB_NORMAL, &mtr);
	}
func_exit:
	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(err);
}
예제 #2
0
/*******************************************************************//**
Skips a row reference from an undo log record.
@return	pointer to remaining part of undo record */
UNIV_INTERN
byte*
trx_undo_rec_skip_row_ref(
/*======================*/
	byte*		ptr,	/*!< in: remaining part in update undo log
				record, at the start of the row reference */
	dict_index_t*	index)	/*!< in: clustered index */
{
	ulint	ref_len;
	ulint	i;

	ut_ad(index && ptr);
	ut_a(dict_index_is_clust(index));

	ref_len = dict_index_get_n_unique(index);

	for (i = 0; i < ref_len; i++) {
		byte*	field;
		ulint	len;
		ulint	orig_len;

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
	}

	return(ptr);
}
예제 #3
0
파일: row0uins.c 프로젝트: zhangchuhu/s2s
/***************************************************************//**
Removes a secondary index entry if found.
@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec_low(
    /*========================*/
    ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
    dict_index_t*	index,	/*!< in: index */
    dtuple_t*	entry)	/*!< in: index entry to remove */
{
    btr_pcur_t	pcur;
    btr_cur_t*	btr_cur;
    ibool		found;
    ibool		success;
    ulint		err;
    mtr_t		mtr;

    log_free_check();
    mtr_start(&mtr);

    found = row_search_index_entry(index, entry, mode, &pcur, &mtr);

    btr_cur = btr_pcur_get_btr_cur(&pcur);

    if (!found) {
        /* Not found */

        btr_pcur_close(&pcur);
        mtr_commit(&mtr);

        return(DB_SUCCESS);
    }

    if (mode == BTR_MODIFY_LEAF) {
        success = btr_cur_optimistic_delete(btr_cur, &mtr);

        if (success) {
            err = DB_SUCCESS;
        } else {
            err = DB_FAIL;
        }
    } else {
        ut_ad(mode == BTR_MODIFY_TREE);

        /* No need to distinguish RB_RECOVERY here, because we
        are deleting a secondary index record: the distinction
        between RB_NORMAL and RB_RECOVERY only matters when
        deleting a record that contains externally stored
        columns. */
        ut_ad(!dict_index_is_clust(index));
        btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
                                   RB_NORMAL, &mtr);
    }

    btr_pcur_close(&pcur);
    mtr_commit(&mtr);

    return(err);
}
예제 #4
0
/***************************************************************//**
Builds an index definition row to insert.
@return	DB_SUCCESS or error code */
static
ulint
dict_build_index_def_step(
    /*======================*/
    que_thr_t*	thr,	/*!< in: query thread */
    ind_node_t*	node)	/*!< in: index create node */
{
    dict_table_t*	table;
    dict_index_t*	index;
    dtuple_t*	row;
    trx_t*		trx;

    ut_ad(mutex_own(&(dict_sys->mutex)));

    trx = thr_get_trx(thr);

    index = node->index;

    table = dict_table_get_low(index->table_name);

    if (table == NULL) {
        return(DB_TABLE_NOT_FOUND);
    }

    trx->table_id = table->id;

    node->table = table;

    ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
          || dict_index_is_clust(index));

    dict_hdr_get_new_id(NULL, &index->id, NULL);

    /* Inherit the space id from the table; we store all indexes of a
    table in the same tablespace */

    index->space = table->space;
    node->page_no = FIL_NULL;
    row = dict_create_sys_indexes_tuple(index, node->heap);
    node->ind_row = row;

    ins_node_set_new_row(node->ind_def, row);

    /* Note that the index was created by this transaction. */
    index->trx_id = trx->id;

    return(DB_SUCCESS);
}
예제 #5
0
/*******************************************************************//**
Builds a row reference from an undo log record.
@return	pointer to remaining part of undo record */
UNIV_INTERN
byte*
trx_undo_rec_get_row_ref(
/*=====================*/
	byte*		ptr,	/*!< in: remaining part of a copy of an undo log
				record, at the start of the row reference;
				NOTE that this copy of the undo log record must
				be preserved as long as the row reference is
				used, as we do NOT copy the data in the
				record! */
	dict_index_t*	index,	/*!< in: clustered index */
	dtuple_t**	ref,	/*!< out, own: row reference */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
				needed is allocated */
{
	ulint		ref_len;
	ulint		i;

	ut_ad(index && ptr && ref && heap);
	ut_a(dict_index_is_clust(index));

	ref_len = dict_index_get_n_unique(index);

	*ref = dtuple_create(heap, ref_len);

	dict_index_copy_types(*ref, index, ref_len);

	for (i = 0; i < ref_len; i++) {
		dfield_t*	dfield;
		byte*		field;
		ulint		len;
		ulint		orig_len;

		dfield = dtuple_get_nth_field(*ref, i);

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);

		dfield_set_data(dfield, field, len);
	}

	return(ptr);
}
예제 #6
0
파일: row0purge.c 프로젝트: Canos/mysql
/***********************************************************//**
Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
is newer than the purge view.

NOTE: This function should only be called by the purge thread, only
while holding a latch on the leaf page of the secondary index entry
(or keeping the buffer pool watch on the page).  It is possible that
this function first returns TRUE and then FALSE, if a user transaction
inserts a record that the secondary index entry would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@return	TRUE if the secondary index record can be purged */
UNIV_INTERN
ibool
row_purge_poss_sec(
    /*===============*/
    purge_node_t*	node,	/*!< in/out: row purge node */
    dict_index_t*	index,	/*!< in: secondary index */
    const dtuple_t*	entry)	/*!< in: secondary index entry */
{
    ibool	can_delete;
    mtr_t	mtr;

    ut_ad(!dict_index_is_clust(index));
    mtr_start(&mtr);

    can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
                 || !row_vers_old_has_index_entry(TRUE,
                         btr_pcur_get_rec(&node->pcur),
                         &mtr, index, entry);

    btr_pcur_commit_specify_mtr(&node->pcur, &mtr);

    return(can_delete);
}
예제 #7
0
/*********************************************************************//**
Fetches the clustered index record for a secondary index record. The latches
on the secondary index record are preserved.
@return	record or NULL, if no record found */
UNIV_INTERN
rec_t*
row_get_clust_rec(
/*==============*/
	ulint		mode,	/*!< in: BTR_MODIFY_LEAF, ... */
	const rec_t*	rec,	/*!< in: record in a secondary index */
	dict_index_t*	index,	/*!< in: secondary index */
	dict_index_t**	clust_index,/*!< out: clustered index */
	mtr_t*		mtr)	/*!< in: mtr */
{
	mem_heap_t*	heap;
	dtuple_t*	ref;
	dict_table_t*	table;
	btr_pcur_t	pcur;
	ibool		found;
	rec_t*		clust_rec;

	ut_ad(!dict_index_is_clust(index));

	table = index->table;

	heap = mem_heap_create(256);

	ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);

	found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);

	clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;

	mem_heap_free(heap);

	btr_pcur_close(&pcur);

	*clust_index = dict_table_get_first_index(table);

	return(clust_rec);
}
예제 #8
0
/**********************************************************************//**
Reports in the undo log of an insert of a clustered index record.
@return	offset of the inserted entry on the page if succeed, 0 if fail */
static
ulint
trx_undo_page_report_insert(
/*========================*/
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index */
	const dtuple_t*	clust_entry,	/*!< in: index entry which will be
					inserted to the clustered index */
	mtr_t*		mtr)		/*!< in: mtr */
{
	ulint		first_free;
	byte*		ptr;
	ulint		i;

	ut_ad(dict_index_is_clust(index));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
				      + TRX_UNDO_PAGE_FREE);
	ptr = undo_page + first_free;

	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {

		/* Not enough space for writing the general parameters */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */
	*ptr++ = TRX_UNDO_INSERT_REC;
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
	ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the record
	to be inserted in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		const dfield_t*	field	= dtuple_get_nth_field(clust_entry, i);
		ulint		flen	= dfield_get_len(field);

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, flen);

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, dfield_get_data(field), flen);
			ptr += flen;
		}
	}

	return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
}
예제 #9
0
/*******************************************************************//**
Builds a partial row from an update undo log record. It contains the
columns which occur as ordering in any index of the table.
@return	pointer to remaining part of undo record */
UNIV_INTERN
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
	byte*		ptr,	/*!< in: remaining part in update undo log
				record of a suitable type, at the start of
				the stored index columns;
				NOTE that this copy of the undo log record must
				be preserved as long as the partial row is
				used, as we do NOT copy the data in the
				record! */
	dict_index_t*	index,	/*!< in: clustered index */
	dtuple_t**	row,	/*!< out, own: partial row */
	ibool		ignore_prefix, /*!< in: flag to indicate if we
				expect blob prefixes in undo. Used
				only in the assertion. */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
				needed is allocated */
{
	const byte*	end_ptr;
	ulint		row_len;

	ut_ad(index);
	ut_ad(ptr);
	ut_ad(row);
	ut_ad(heap);
	ut_ad(dict_index_is_clust(index));

	row_len = dict_table_get_n_cols(index->table);

	*row = dtuple_create(heap, row_len);

	dict_table_copy_types(*row, index->table);

	end_ptr = ptr + mach_read_from_2(ptr);
	ptr += 2;

	while (ptr != end_ptr) {
		dfield_t*		dfield;
		byte*			field;
		ulint			field_no;
		const dict_col_t*	col;
		ulint			col_no;
		ulint			len;
		ulint			orig_len;

		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);

		col = dict_index_get_nth_col(index, field_no);
		col_no = dict_col_get_no(col);

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);

		dfield = dtuple_get_nth_field(*row, col_no);

		dfield_set_data(dfield, field, len);

		if (len != UNIV_SQL_NULL
		    && len >= UNIV_EXTERN_STORAGE_FIELD) {
			dfield_set_len(dfield,
				       len - UNIV_EXTERN_STORAGE_FIELD);
			dfield_set_ext(dfield);
			/* If the prefix of this column is indexed,
			ensure that enough prefix is stored in the
			undo log record. */
			if (!ignore_prefix && col->ord_part) {
				ut_a(dfield_get_len(dfield)
				     >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
				ut_a(dict_table_get_format(index->table)
				     >= DICT_TF_FORMAT_ZIP
				     || dfield_get_len(dfield)
				     >= REC_MAX_INDEX_COL_LEN
				     + BTR_EXTERN_FIELD_REF_SIZE);
			}
		}
	}

	return(ptr);
}
예제 #10
0
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry,	/*!< in/out: index entry */
	ulint*		n_ext)	/*!< in/out: number of
				externally stored columns */
{
	mem_heap_t*	heap;
	big_rec_t*	vector;
	dfield_t*	dfield;
	dict_field_t*	ifield;
	ulint		size;
	ulint		n_fields;
	ulint		local_len;
	ulint		local_prefix_len;

	if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
		return(NULL);
	}

	if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
		/* up to MySQL 5.1: store a 768-byte prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
	} else {
		/* new-format table: do not store any BLOB prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE;
	}

	ut_a(dtuple_check_typed_no_assert(entry));

	size = rec_get_converted_size(index, entry, *n_ext);

	if (UNIV_UNLIKELY(size > 1000000000)) {
		fprintf(stderr,
			"InnoDB: Warning: tuple size very big: %lu\n",
			(ulong) size);
		fputs("InnoDB: Tuple contents: ", stderr);
		dtuple_print(stderr, entry);
		putc('\n', stderr);
	}

	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
			       * sizeof(big_rec_field_t) + 1000);

	vector = mem_heap_alloc(heap, sizeof(big_rec_t));

	vector->heap = heap;
	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
					* sizeof(big_rec_field_t));

	/* Decide which fields to shorten: the algorithm is to look for
	a variable-length field that yields the biggest savings when
	stored externally */

	n_fields = 0;

	while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
							     *n_ext),
				      dict_table_is_comp(index->table),
				      dict_index_get_n_fields(index),
				      dict_table_zip_size(index->table))) {
		ulint			i;
		ulint			longest		= 0;
		ulint			longest_i	= ULINT_MAX;
		byte*			data;
		big_rec_field_t*	b;

		for (i = dict_index_get_n_unique_in_tree(index);
		     i < dtuple_get_n_fields(entry); i++) {
			ulint	savings;

			dfield = dtuple_get_nth_field(entry, i);
			ifield = dict_index_get_nth_field(index, i);

			/* Skip fixed-length, NULL, externally stored,
			or short columns */

			if (ifield->fixed_len
			    || dfield_is_null(dfield)
			    || dfield_is_ext(dfield)
			    || dfield_get_len(dfield) <= local_len
			    || dfield_get_len(dfield)
			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
				goto skip_field;
			}

			savings = dfield_get_len(dfield) - local_len;

			/* Check that there would be savings */
			if (longest >= savings) {
				goto skip_field;
			}

			longest_i = i;
			longest = savings;

skip_field:
			continue;
		}

		if (!longest) {
			/* Cannot shorten more */

			mem_heap_free(heap);

			return(NULL);
		}

		/* Move data from field longest_i to big rec vector.

		We store the first bytes locally to the record. Then
		we can calculate all ordering fields in all indexes
		from locally stored data. */

		dfield = dtuple_get_nth_field(entry, longest_i);
		ifield = dict_index_get_nth_field(index, longest_i);
		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;

		b = &vector->fields[n_fields];
		b->field_no = longest_i;
		b->len = dfield_get_len(dfield) - local_prefix_len;
		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;

		/* Allocate the locally stored part of the column. */
		data = mem_heap_alloc(heap, local_len);

		/* Copy the local prefix. */
		memcpy(data, dfield_get_data(dfield), local_prefix_len);
		/* Clear the extern field reference (BLOB pointer). */
		memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
#if 0
		/* The following would fail the Valgrind checks in
		page_cur_insert_rec_low() and page_cur_insert_rec_zip().
		The BLOB pointers in the record will be initialized after
		the record and the BLOBs have been written. */
		UNIV_MEM_ALLOC(data + local_prefix_len,
			       BTR_EXTERN_FIELD_REF_SIZE);
#endif

		dfield_set_data(dfield, data, local_len);
		dfield_set_ext(dfield);

		n_fields++;
		(*n_ext)++;
		ut_ad(n_fields < dtuple_get_n_fields(entry));
	}

	vector->n_fields = n_fields;
	return(vector);
}
예제 #11
0
/**********************************************************************//**
Reports in the undo log of an update or delete marking of a clustered index
record.
@return byte offset of the inserted undo log entry on the page if
succeed, 0 if fail */
static
ulint
trx_undo_page_report_modify(
/*========================*/
	page_t*		undo_page,	/*!< in: undo log page */
	trx_t*		trx,		/*!< in: transaction */
	dict_index_t*	index,		/*!< in: clustered index where update or
					delete marking is done */
	const rec_t*	rec,		/*!< in: clustered index record which
					has NOT yet been modified */
	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
	const upd_t*	update,		/*!< in: update vector which tells the
					columns to be updated; in the case of
					a delete, this should be set to NULL */
	ulint		cmpl_info,	/*!< in: compiler info on secondary
					index updates */
	mtr_t*		mtr)		/*!< in: mtr */
{
	dict_table_t*	table;
	ulint		first_free;
	byte*		ptr;
	const byte*	field;
	ulint		flen;
	ulint		col_no;
	ulint		type_cmpl;
	byte*		type_cmpl_ptr;
	ulint		i;
	trx_id_t	trx_id;
	ibool		ignore_prefix = FALSE;
	byte		ext_buf[REC_MAX_INDEX_COL_LEN
				+ BTR_EXTERN_FIELD_REF_SIZE];

	ut_a(dict_index_is_clust(index));
	ut_ad(rec_offs_validate(rec, index, offsets));
	ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
			       + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
	table = index->table;

	first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
				      + TRX_UNDO_PAGE_FREE);
	ptr = undo_page + first_free;

	ut_ad(first_free <= UNIV_PAGE_SIZE);

	if (trx_undo_left(undo_page, ptr) < 50) {

		/* NOTE: the value 50 must be big enough so that the general
		fields written below fit on the undo log page */

		return(0);
	}

	/* Reserve 2 bytes for the pointer to the next undo log record */
	ptr += 2;

	/* Store first some general parameters to the undo log */

	if (!update) {
		type_cmpl = TRX_UNDO_DEL_MARK_REC;
	} else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
		type_cmpl = TRX_UNDO_UPD_DEL_REC;
		/* We are about to update a delete marked record.
		We don't typically need the prefix in this case unless
		the delete marking is done by the same transaction
		(which we check below). */
		ignore_prefix = TRUE;
	} else {
		type_cmpl = TRX_UNDO_UPD_EXIST_REC;
	}

	type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
	type_cmpl_ptr = ptr;

	*ptr++ = (byte) type_cmpl;
	ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);

	ptr += mach_dulint_write_much_compressed(ptr, table->id);

	/*----------------------------------------*/
	/* Store the state of the info bits */

	*ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));

	/* Store the values of the system columns */
	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_TRX_ID), &flen);
	ut_ad(flen == DATA_TRX_ID_LEN);

	trx_id = trx_read_trx_id(field);

	/* If it is an update of a delete marked record, then we are
	allowed to ignore blob prefixes if the delete marking was done
	by some other trx as it must have committed by now for us to
	allow an over-write. */
	if (ignore_prefix) {
		ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
	}
	ptr += mach_dulint_write_compressed(ptr, trx_id);

	field = rec_get_nth_field(rec, offsets,
				  dict_index_get_sys_col_pos(
					  index, DATA_ROLL_PTR), &flen);
	ut_ad(flen == DATA_ROLL_PTR_LEN);

	ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));

	/*----------------------------------------*/
	/* Store then the fields required to uniquely determine the
	record which will be modified in the clustered index */

	for (i = 0; i < dict_index_get_n_unique(index); i++) {

		field = rec_get_nth_field(rec, offsets, i, &flen);

		/* The ordering columns must not be stored externally. */
		ut_ad(!rec_offs_nth_extern(offsets, i));
		ut_ad(dict_index_get_nth_col(index, i)->ord_part);

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, flen);

		if (flen != UNIV_SQL_NULL) {
			if (trx_undo_left(undo_page, ptr) < flen) {

				return(0);
			}

			ut_memcpy(ptr, field, flen);
			ptr += flen;
		}
	}

	/*----------------------------------------*/
	/* Save to the undo log the old values of the columns to be updated. */

	if (update) {
		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		ptr += mach_write_compressed(ptr, upd_get_n_fields(update));

		for (i = 0; i < upd_get_n_fields(update); i++) {

			ulint	pos = upd_get_nth_field(update, i)->field_no;

			/* Write field number to undo log */
			if (trx_undo_left(undo_page, ptr) < 5) {

				return(0);
			}

			ptr += mach_write_compressed(ptr, pos);

			/* Save the old value of field */
			field = rec_get_nth_field(rec, offsets, pos, &flen);

			if (trx_undo_left(undo_page, ptr) < 15) {

				return(0);
			}

			if (rec_offs_nth_extern(offsets, pos)) {
				ptr = trx_undo_page_report_modify_ext(
					ptr,
					dict_index_get_nth_col(index, pos)
					->ord_part
					&& !ignore_prefix
					&& flen < REC_MAX_INDEX_COL_LEN
					? ext_buf : NULL,
					dict_table_zip_size(table),
					&field, &flen);

				/* Notify purge that it eventually has to
				free the old externally stored field */

				trx->update_undo->del_marks = TRUE;

				*type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
			} else {
				ptr += mach_write_compressed(ptr, flen);
			}

			if (flen != UNIV_SQL_NULL) {
				if (trx_undo_left(undo_page, ptr) < flen) {

					return(0);
				}

				ut_memcpy(ptr, field, flen);
				ptr += flen;
			}
		}
	}

	/*----------------------------------------*/
	/* In the case of a delete marking, and also in the case of an update
	where any ordering field of any index changes, store the values of all
	columns which occur as ordering fields in any index. This info is used
	in the purge of old versions where we use it to build and search the
	delete marked index records, to look if we can remove them from the
	index tree. Note that starting from 4.0.14 also externally stored
	fields can be ordering in some index. Starting from 5.2, we no longer
	store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
	but we can construct the column prefix fields in the index by
	fetching the first page of the BLOB that is pointed to by the
	clustered index. This works also in crash recovery, because all pages
	(including BLOBs) are recovered before anything is rolled back. */

	if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
		byte*	old_ptr = ptr;

		trx->update_undo->del_marks = TRUE;

		if (trx_undo_left(undo_page, ptr) < 5) {

			return(0);
		}

		/* Reserve 2 bytes to write the number of bytes the stored
		fields take in this undo record */

		ptr += 2;

		for (col_no = 0; col_no < dict_table_get_n_cols(table);
		     col_no++) {

			const dict_col_t*	col
				= dict_table_get_nth_col(table, col_no);

			if (col->ord_part) {
				ulint	pos;

				/* Write field number to undo log */
				if (trx_undo_left(undo_page, ptr) < 5 + 15) {

					return(0);
				}

				pos = dict_index_get_nth_col_pos(index,
								 col_no);
				ptr += mach_write_compressed(ptr, pos);

				/* Save the old value of field */
				field = rec_get_nth_field(rec, offsets, pos,
							  &flen);

				if (rec_offs_nth_extern(offsets, pos)) {
					ptr = trx_undo_page_report_modify_ext(
						ptr,
						flen < REC_MAX_INDEX_COL_LEN
						&& !ignore_prefix
						? ext_buf : NULL,
						dict_table_zip_size(table),
						&field, &flen);
				} else {
					ptr += mach_write_compressed(
						ptr, flen);
				}

				if (flen != UNIV_SQL_NULL) {
					if (trx_undo_left(undo_page, ptr)
					    < flen) {

						return(0);
					}

					ut_memcpy(ptr, field, flen);
					ptr += flen;
				}
			}
		}

		mach_write_to_2(old_ptr, ptr - old_ptr);
	}

	/*----------------------------------------*/
	/* Write pointers to the previous and the next undo log records */
	if (trx_undo_left(undo_page, ptr) < 2) {

		return(0);
	}

	mach_write_to_2(ptr, first_free);
	ptr += 2;
	mach_write_to_2(undo_page + first_free, ptr - undo_page);

	mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
			ptr - undo_page);

	/* Write to the REDO log about this change in the UNDO log */

	trx_undof_page_add_undo_rec_log(undo_page, first_free,
					ptr - undo_page, mtr);
	return(first_free);
}
예제 #12
0
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record.
@return	own: row reference built; see the NOTE below! */
UNIV_INTERN
dtuple_t*
row_build_row_ref(
/*==============*/
	ulint		type,	/*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
				the former copies also the data fields to
				heap, whereas the latter only places pointers
				to data fields on the index page */
	dict_index_t*	index,	/*!< in: secondary index */
	const rec_t*	rec,	/*!< in: record in the index;
				NOTE: in the case ROW_COPY_POINTERS
				the data fields in the row will point
				directly into this record, therefore,
				the buffer page of this record must be
				at least s-latched and the latch held
				as long as the row reference is used! */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
				needed is allocated */
{
	dict_table_t*	table;
	dict_index_t*	clust_index;
	dfield_t*	dfield;
	dtuple_t*	ref;
	const byte*	field;
	ulint		len;
	ulint		ref_len;
	ulint		pos;
	byte*		buf;
	ulint		clust_col_prefix_len;
	ulint		i;
	mem_heap_t*	tmp_heap	= NULL;
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
	ulint*		offsets		= offsets_;
	rec_offs_init(offsets_);

	ut_ad(index && rec && heap);
	ut_ad(!dict_index_is_clust(index));

	offsets = rec_get_offsets(rec, index, offsets,
				  ULINT_UNDEFINED, &tmp_heap);
	/* Secondary indexes must not contain externally stored columns. */
	ut_ad(!rec_offs_any_extern(offsets));

	if (type == ROW_COPY_DATA) {
		/* Take a copy of rec to heap */

		buf = mem_heap_alloc(heap, rec_offs_size(offsets));

		rec = rec_copy(buf, rec, offsets);
		/* Avoid a debug assertion in rec_offs_validate(). */
		rec_offs_make_valid(rec, index, offsets);
	}

	table = index->table;

	clust_index = dict_table_get_first_index(table);

	ref_len = dict_index_get_n_unique(clust_index);

	ref = dtuple_create(heap, ref_len);

	dict_index_copy_types(ref, clust_index, ref_len);

	for (i = 0; i < ref_len; i++) {
		dfield = dtuple_get_nth_field(ref, i);

		pos = dict_index_get_nth_field_pos(index, clust_index, i);

		ut_a(pos != ULINT_UNDEFINED);

		field = rec_get_nth_field(rec, offsets, pos, &len);

		dfield_set_data(dfield, field, len);

		/* If the primary key contains a column prefix, then the
		secondary index may contain a longer prefix of the same
		column, or the full column, and we must adjust the length
		accordingly. */

		clust_col_prefix_len = dict_index_get_nth_field(
			clust_index, i)->prefix_len;

		if (clust_col_prefix_len > 0) {
			if (len != UNIV_SQL_NULL) {

				const dtype_t*	dtype
					= dfield_get_type(dfield);

				dfield_set_len(dfield,
					       dtype_get_at_most_n_mbchars(
						       dtype->prtype,
						       dtype->mbminlen,
						       dtype->mbmaxlen,
						       clust_col_prefix_len,
						       len, (char*) field));
			}
		}
	}

	ut_ad(dtuple_check_typed(ref));
	if (tmp_heap) {
		mem_heap_free(tmp_heap);
	}

	return(ref);
}
예제 #13
0
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
@return	own: row built; see the NOTE below! */
UNIV_INTERN
dtuple_t*
row_build(
/*======*/
	ulint			type,	/*!< in: ROW_COPY_POINTERS or
					ROW_COPY_DATA; the latter
					copies also the data fields to
					heap while the first only
					places pointers to data fields
					on the index page, and thus is
					more efficient */
	const dict_index_t*	index,	/*!< in: clustered index */
	const rec_t*		rec,	/*!< in: record in the clustered
					index; NOTE: in the case
					ROW_COPY_POINTERS the data
					fields in the row will point
					directly into this record,
					therefore, the buffer page of
					this record must be at least
					s-latched and the latch held
					as long as the row dtuple is used! */
	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
					or NULL, in which case this function
					will invoke rec_get_offsets() */
	const dict_table_t*	col_table,
					/*!< in: table, to check which
					externally stored columns
					occur in the ordering columns
					of an index, or NULL if
					index->table should be
					consulted instead */
	row_ext_t**		ext,	/*!< out, own: cache of
					externally stored column
					prefixes, or NULL */
	mem_heap_t*		heap)	/*!< in: memory heap from which
					the memory needed is allocated */
{
	dtuple_t*		row;
	const dict_table_t*	table;
	ulint			n_fields;
	ulint			n_ext_cols;
	ulint*			ext_cols	= NULL; /* remove warning */
	ulint			len;
	ulint			row_len;
	byte*			buf;
	ulint			i;
	ulint			j;
	mem_heap_t*		tmp_heap	= NULL;
	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
	rec_offs_init(offsets_);

	ut_ad(index && rec && heap);
	ut_ad(dict_index_is_clust(index));
	ut_ad(!mutex_own(&kernel_mutex));

	if (!offsets) {
		offsets = rec_get_offsets(rec, index, offsets_,
					  ULINT_UNDEFINED, &tmp_heap);
	} else {
		ut_ad(rec_offs_validate(rec, index, offsets));
	}

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
	if (rec_offs_any_null_extern(rec, offsets)) {
		/* This condition can occur during crash recovery
		before trx_rollback_active() has completed execution,
		or when a concurrently executing
		row_ins_index_entry_low() has committed the B-tree
		mini-transaction but has not yet managed to restore
		the cursor position for writing the big_rec. */
		ut_a(trx_undo_roll_ptr_is_insert(
			     row_get_rec_roll_ptr(rec, index, offsets)));
	}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

	if (type != ROW_COPY_POINTERS) {
		/* Take a copy of rec to heap */
		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
		rec = rec_copy(buf, rec, offsets);
		/* Avoid a debug assertion in rec_offs_validate(). */
		rec_offs_make_valid(rec, index, (ulint*) offsets);
	}

	table = index->table;
	row_len = dict_table_get_n_cols(table);

	row = dtuple_create(heap, row_len);

	dict_table_copy_types(row, table);

	dtuple_set_info_bits(row, rec_get_info_bits(
				     rec, dict_table_is_comp(table)));

	n_fields = rec_offs_n_fields(offsets);
	n_ext_cols = rec_offs_n_extern(offsets);
	if (n_ext_cols) {
		ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
	}

	for (i = j = 0; i < n_fields; i++) {
		dict_field_t*		ind_field
			= dict_index_get_nth_field(index, i);
		const dict_col_t*	col
			= dict_field_get_col(ind_field);
		ulint			col_no
			= dict_col_get_no(col);
		dfield_t*		dfield
			= dtuple_get_nth_field(row, col_no);

		if (ind_field->prefix_len == 0) {

			const byte*	field = rec_get_nth_field(
				rec, offsets, i, &len);

			dfield_set_data(dfield, field, len);
		}

		if (rec_offs_nth_extern(offsets, i)) {
			dfield_set_ext(dfield);

			if (UNIV_LIKELY_NULL(col_table)) {
				ut_a(col_no
				     < dict_table_get_n_cols(col_table));
				col = dict_table_get_nth_col(
					col_table, col_no);
			}

			if (col->ord_part) {
				/* We will have to fetch prefixes of
				externally stored columns that are
				referenced by column prefixes. */
				ext_cols[j++] = col_no;
			}
		}
	}

	ut_ad(dtuple_check_typed(row));

	if (!ext) {
		/* REDUNDANT and COMPACT formats store a local
		768-byte prefix of each externally stored
		column. No cache is needed. */
		ut_ad(dict_table_get_format(index->table)
		      < DICT_TF_FORMAT_ZIP);
	} else if (j) {
		*ext = row_ext_create(j, ext_cols, row,
				      dict_table_zip_size(index->table),
				      heap);
	} else {
		*ext = NULL;
	}

	if (tmp_heap) {
		mem_heap_free(tmp_heap);
	}

	return(row);
}
예제 #14
0
/*****************************************************************//**
Constructs the last committed version of a clustered index record,
which should be seen by a semi-consistent read.
@return	DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
ulint
row_vers_build_for_semi_consistent_read(
/*====================================*/
	const rec_t*	rec,	/*!< in: record in a clustered index; the
				caller must have a latch on the page; this
				latch locks the top of the stack of versions
				of this records */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
	dict_index_t*	index,	/*!< in: the clustered index */
	ulint**		offsets,/*!< in/out: offsets returned by
				rec_get_offsets(rec, index) */
	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
				the offsets are allocated */
	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
				*old_vers is allocated; memory for possible
				intermediate versions is allocated and freed
				locally within the function */
	const rec_t**	old_vers)/*!< out: rec, old version, or NULL if the
				record does not exist in the view, that is,
				it was freshly inserted afterwards */
{
	const rec_t*	version;
	mem_heap_t*	heap		= NULL;
	byte*		buf;
	ulint		err;
	trx_id_t	rec_trx_id	= ut_dulint_zero;

	ut_ad(dict_index_is_clust(index));
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	ut_ad(rec_offs_validate(rec, index, *offsets));

	rw_lock_s_lock(&(purge_sys->latch));
	/* The S-latch on purge_sys prevents the purge view from
	changing.  Thus, if we have an uncommitted transaction at
	this point, then purge cannot remove its undo log even if
	the transaction could commit now. */

	version = rec;

	for (;;) {
		trx_t*		version_trx;
		mem_heap_t*	heap2;
		rec_t*		prev_version;
		trx_id_t	version_trx_id;

		version_trx_id = row_get_rec_trx_id(version, index, *offsets);
		if (rec == version) {
			rec_trx_id = version_trx_id;
		}

		mutex_enter(&kernel_mutex);
		version_trx = trx_get_on_id(version_trx_id);
		if (version_trx
		    && (version_trx->conc_state == TRX_COMMITTED_IN_MEMORY
			|| version_trx->conc_state == TRX_NOT_STARTED)) {

			version_trx = NULL;
		}
		mutex_exit(&kernel_mutex);

		if (!version_trx) {

			/* We found a version that belongs to a
			committed transaction: return it. */

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
			ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

			if (rec == version) {
				*old_vers = rec;
				err = DB_SUCCESS;
				break;
			}

			/* We assume that a rolled-back transaction stays in
			TRX_ACTIVE state until all the changes have been
			rolled back and the transaction is removed from
			the global list of transactions. */

			if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) {
				/* The transaction was committed while
				we searched for earlier versions.
				Return the current version as a
				semi-consistent read. */

				version = rec;
				*offsets = rec_get_offsets(version,
							   index, *offsets,
							   ULINT_UNDEFINED,
							   offset_heap);
			}

			buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
			*old_vers = rec_copy(buf, version, *offsets);
			rec_offs_make_valid(*old_vers, index, *offsets);
			err = DB_SUCCESS;

			break;
		}

		heap2 = heap;
		heap = mem_heap_create(1024);

		err = trx_undo_prev_version_build(rec, mtr, version, index,
						  *offsets, heap,
						  &prev_version);
		if (heap2) {
			mem_heap_free(heap2); /* free version */
		}

		if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
			break;
		}

		if (prev_version == NULL) {
			/* It was a freshly inserted version */
			*old_vers = NULL;
			err = DB_SUCCESS;

			break;
		}

		version = prev_version;
		*offsets = rec_get_offsets(version, index, *offsets,
					   ULINT_UNDEFINED, offset_heap);
#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
		ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
	}/* for (;;) */

	if (heap) {
		mem_heap_free(heap);
	}
	rw_lock_s_unlock(&(purge_sys->latch));

	return(err);
}
예제 #15
0
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
the consistent read should not see rec in its present version.
@return	DB_SUCCESS or DB_MISSING_HISTORY */
UNIV_INTERN
ulint
row_vers_build_for_consistent_read(
/*===============================*/
	const rec_t*	rec,	/*!< in: record in a clustered index; the
				caller must have a latch on the page; this
				latch locks the top of the stack of versions
				of this records */
	mtr_t*		mtr,	/*!< in: mtr holding the latch on rec */
	dict_index_t*	index,	/*!< in: the clustered index */
	ulint**		offsets,/*!< in/out: offsets returned by
				rec_get_offsets(rec, index) */
	read_view_t*	view,	/*!< in: the consistent read view */
	mem_heap_t**	offset_heap,/*!< in/out: memory heap from which
				the offsets are allocated */
	mem_heap_t*	in_heap,/*!< in: memory heap from which the memory for
				*old_vers is allocated; memory for possible
				intermediate versions is allocated and freed
				locally within the function */
	rec_t**		old_vers)/*!< out, own: old version, or NULL if the
				record does not exist in the view, that is,
				it was freshly inserted afterwards */
{
	const rec_t*	version;
	rec_t*		prev_version;
	trx_id_t	trx_id;
	mem_heap_t*	heap		= NULL;
	byte*		buf;
	ulint		err;

	ut_ad(dict_index_is_clust(index));
	ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
	      || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	ut_ad(rec_offs_validate(rec, index, *offsets));

	trx_id = row_get_rec_trx_id(rec, index, *offsets);

	ut_ad(!read_view_sees_trx_id(view, trx_id));

	rw_lock_s_lock(&(purge_sys->latch));
	version = rec;

	for (;;) {
		mem_heap_t*	heap2	= heap;
		trx_undo_rec_t* undo_rec;
		roll_ptr_t	roll_ptr;
		undo_no_t	undo_no;
		heap = mem_heap_create(1024);

		/* If we have high-granularity consistent read view and
		creating transaction of the view is the same as trx_id in
		the record we see this record only in the case when
		undo_no of the record is < undo_no in the view. */

		if (view->type == VIEW_HIGH_GRANULARITY
		    && ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) {

			roll_ptr = row_get_rec_roll_ptr(version, index,
							*offsets);
			undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
			undo_no = trx_undo_rec_get_undo_no(undo_rec);
			mem_heap_empty(heap);

			if (ut_dulint_cmp(view->undo_no, undo_no) > 0) {
				/* The view already sees this version: we can
				copy it to in_heap and return */

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
				ut_a(!rec_offs_any_null_extern(
					     version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

				buf = mem_heap_alloc(in_heap,
						     rec_offs_size(*offsets));
				*old_vers = rec_copy(buf, version, *offsets);
				rec_offs_make_valid(*old_vers, index,
						    *offsets);
				err = DB_SUCCESS;

				break;
			}
		}

		err = trx_undo_prev_version_build(rec, mtr, version, index,
						  *offsets, heap,
						  &prev_version);
		if (heap2) {
			mem_heap_free(heap2); /* free version */
		}

		if (err != DB_SUCCESS) {
			break;
		}

		if (prev_version == NULL) {
			/* It was a freshly inserted version */
			*old_vers = NULL;
			err = DB_SUCCESS;

			break;
		}

		*offsets = rec_get_offsets(prev_version, index, *offsets,
					   ULINT_UNDEFINED, offset_heap);

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
		ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

		trx_id = row_get_rec_trx_id(prev_version, index, *offsets);

		if (read_view_sees_trx_id(view, trx_id)) {

			/* The view already sees this version: we can copy
			it to in_heap and return */

			buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets));
			*old_vers = rec_copy(buf, prev_version, *offsets);
			rec_offs_make_valid(*old_vers, index, *offsets);
			err = DB_SUCCESS;

			break;
		}

		version = prev_version;
	}/* for (;;) */

	mem_heap_free(heap);
	rw_lock_s_unlock(&(purge_sys->latch));

	return(err);
}
예제 #16
0
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
@return index entry which should be inserted or purged, or NULL if the
externally stored columns in the clustered index record are
unavailable and ext != NULL */
UNIV_INTERN
dtuple_t*
row_build_index_entry(
/*==================*/
	const dtuple_t*	row,	/*!< in: row which should be
				inserted or purged */
	row_ext_t*	ext,	/*!< in: externally stored column prefixes,
				or NULL */
	dict_index_t*	index,	/*!< in: index on the table */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
				the index entry is allocated */
{
	dtuple_t*	entry;
	ulint		entry_len;
	ulint		i;

	ut_ad(row && index && heap);
	ut_ad(dtuple_check_typed(row));

	entry_len = dict_index_get_n_fields(index);
	entry = dtuple_create(heap, entry_len);

	if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
		dtuple_set_n_fields_cmp(entry, entry_len);
		/* There may only be externally stored columns
		in a clustered index B-tree of a user table. */
		ut_a(!ext);
	} else {
		dtuple_set_n_fields_cmp(
			entry, dict_index_get_n_unique_in_tree(index));
	}

	for (i = 0; i < entry_len; i++) {
		const dict_field_t*	ind_field
			= dict_index_get_nth_field(index, i);
		const dict_col_t*	col
			= ind_field->col;
		ulint			col_no
			= dict_col_get_no(col);
		dfield_t*		dfield
			= dtuple_get_nth_field(entry, i);
		const dfield_t*		dfield2
			= dtuple_get_nth_field(row, col_no);
		ulint			len
			= dfield_get_len(dfield2);

		dfield_copy(dfield, dfield2);

		if (dfield_is_null(dfield)) {
			continue;
		}

		if (ind_field->prefix_len == 0
		    && (!dfield_is_ext(dfield)
			|| dict_index_is_clust(index))) {
			/* The dfield_copy() above suffices for
			columns that are stored in-page, or for
			clustered index record columns that are not
			part of a column prefix in the PRIMARY KEY. */
			continue;
		}

		/* If the column is stored externally (off-page) in
		the clustered index, it must be an ordering field in
		the secondary index.  In the Antelope format, only
		prefix-indexed columns may be stored off-page in the
		clustered index record. In the Barracuda format, also
		fully indexed long CHAR or VARCHAR columns may be
		stored off-page. */
		ut_ad(col->ord_part);

		if (UNIV_LIKELY_NULL(ext)) {
			/* See if the column is stored externally. */
			const byte*	buf = row_ext_lookup(ext, col_no,
							     &len);
			if (UNIV_LIKELY_NULL(buf)) {
				if (UNIV_UNLIKELY(buf == field_ref_zero)) {
					return(NULL);
				}
				dfield_set_data(dfield, buf, len);
			}

			if (ind_field->prefix_len == 0) {
				/* In the Barracuda format
				(ROW_FORMAT=DYNAMIC or
				ROW_FORMAT=COMPRESSED), we can have a
				secondary index on an entire column
				that is stored off-page in the
				clustered index. As this is not a
				prefix index (prefix_len == 0),
				include the entire off-page column in
				the secondary index record. */
				continue;
			}
		} else if (dfield_is_ext(dfield)) {
			/* This table is either in Antelope format
			(ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT)
			or a purge record where the ordered part of
			the field is not external.
			In Antelope, the maximum column prefix
			index length is 767 bytes, and the clustered
			index record contains a 768-byte prefix of
			each off-page column. */
			ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
			len -= BTR_EXTERN_FIELD_REF_SIZE;
			dfield_set_len(dfield, len);
		}

		/* If a column prefix index, take only the prefix. */
		if (ind_field->prefix_len) {
			len = dtype_get_at_most_n_mbchars(
				col->prtype, col->mbminlen, col->mbmaxlen,
				ind_field->prefix_len, len,
				dfield_get_data(dfield));
			dfield_set_len(dfield, len);
		}
	}

	ut_ad(dtuple_check_typed(entry));

	return(entry);
}
예제 #17
0
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
search the clustered index record. */
UNIV_INTERN
void
row_build_row_ref_in_tuple(
/*=======================*/
	dtuple_t*		ref,	/*!< in/out: row reference built;
					see the NOTE below! */
	const rec_t*		rec,	/*!< in: record in the index;
					NOTE: the data fields in ref
					will point directly into this
					record, therefore, the buffer
					page of this record must be at
					least s-latched and the latch
					held as long as the row
					reference is used! */
	const dict_index_t*	index,	/*!< in: secondary index */
	ulint*			offsets,/*!< in: rec_get_offsets(rec, index)
					or NULL */
	trx_t*			trx)	/*!< in: transaction */
{
	const dict_index_t*	clust_index;
	dfield_t*		dfield;
	const byte*		field;
	ulint			len;
	ulint			ref_len;
	ulint			pos;
	ulint			clust_col_prefix_len;
	ulint			i;
	mem_heap_t*		heap		= NULL;
	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
	rec_offs_init(offsets_);

	ut_a(ref);
	ut_a(index);
	ut_a(rec);
	ut_ad(!dict_index_is_clust(index));

	if (UNIV_UNLIKELY(!index->table)) {
		fputs("InnoDB: table ", stderr);
notfound:
		ut_print_name(stderr, trx, TRUE, index->table_name);
		fputs(" for index ", stderr);
		ut_print_name(stderr, trx, FALSE, index->name);
		fputs(" not found\n", stderr);
		ut_error;
	}

	clust_index = dict_table_get_first_index(index->table);

	if (UNIV_UNLIKELY(!clust_index)) {
		fputs("InnoDB: clust index for table ", stderr);
		goto notfound;
	}

	if (!offsets) {
		offsets = rec_get_offsets(rec, index, offsets_,
					  ULINT_UNDEFINED, &heap);
	} else {
		ut_ad(rec_offs_validate(rec, index, offsets));
	}

	/* Secondary indexes must not contain externally stored columns. */
	ut_ad(!rec_offs_any_extern(offsets));
	ref_len = dict_index_get_n_unique(clust_index);

	ut_ad(ref_len == dtuple_get_n_fields(ref));

	dict_index_copy_types(ref, clust_index, ref_len);

	for (i = 0; i < ref_len; i++) {
		dfield = dtuple_get_nth_field(ref, i);

		pos = dict_index_get_nth_field_pos(index, clust_index, i);

		ut_a(pos != ULINT_UNDEFINED);

		field = rec_get_nth_field(rec, offsets, pos, &len);

		dfield_set_data(dfield, field, len);

		/* If the primary key contains a column prefix, then the
		secondary index may contain a longer prefix of the same
		column, or the full column, and we must adjust the length
		accordingly. */

		clust_col_prefix_len = dict_index_get_nth_field(
			clust_index, i)->prefix_len;

		if (clust_col_prefix_len > 0) {
			if (len != UNIV_SQL_NULL) {

				const dtype_t*	dtype
					= dfield_get_type(dfield);

				dfield_set_len(dfield,
					       dtype_get_at_most_n_mbchars(
						       dtype->prtype,
						       dtype->mbminlen,
						       dtype->mbmaxlen,
						       clust_col_prefix_len,
						       len, (char*) field));
			}
		}
	}

	ut_ad(dtuple_check_typed(ref));
	if (UNIV_LIKELY_NULL(heap)) {
		mem_heap_free(heap);
	}
}
예제 #18
0
/*******************************************************************//**
Builds an update vector based on a remaining part of an undo log record.
@return remaining part of the record, NULL if an error detected, which
means that the record is corrupted */
UNIV_INTERN
byte*
trx_undo_update_rec_get_update(
/*===========================*/
	byte*		ptr,	/*!< in: remaining part in update undo log
				record, after reading the row reference
				NOTE that this copy of the undo log record must
				be preserved as long as the update vector is
				used, as we do NOT copy the data in the
				record! */
	dict_index_t*	index,	/*!< in: clustered index */
	ulint		type,	/*!< in: TRX_UNDO_UPD_EXIST_REC,
				TRX_UNDO_UPD_DEL_REC, or
				TRX_UNDO_DEL_MARK_REC; in the last case,
				only trx id and roll ptr fields are added to
				the update vector */
	trx_id_t	trx_id,	/*!< in: transaction id from this undo record */
	roll_ptr_t	roll_ptr,/*!< in: roll pointer from this undo record */
	ulint		info_bits,/*!< in: info bits from this undo record */
	trx_t*		trx,	/*!< in: transaction */
	mem_heap_t*	heap,	/*!< in: memory heap from which the memory
				needed is allocated */
	upd_t**		upd)	/*!< out, own: update vector */
{
	upd_field_t*	upd_field;
	upd_t*		update;
	ulint		n_fields;
	byte*		buf;
	ulint		i;

	ut_a(dict_index_is_clust(index));

	if (type != TRX_UNDO_DEL_MARK_REC) {
		ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
	} else {
		n_fields = 0;
	}

	update = upd_create(n_fields + 2, heap);

	update->info_bits = info_bits;

	/* Store first trx id and roll ptr to update vector */

	upd_field = upd_get_nth_field(update, n_fields);
	buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
	trx_write_trx_id(buf, trx_id);

	upd_field_set_field_no(upd_field,
			       dict_index_get_sys_col_pos(index, DATA_TRX_ID),
			       index, trx);
	dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);

	upd_field = upd_get_nth_field(update, n_fields + 1);
	buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
	trx_write_roll_ptr(buf, roll_ptr);

	upd_field_set_field_no(
		upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
		index, trx);
	dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);

	/* Store then the updated ordinary columns to the update vector */

	for (i = 0; i < n_fields; i++) {

		byte*	field;
		ulint	len;
		ulint	field_no;
		ulint	orig_len;

		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);

		if (field_no >= dict_index_get_n_fields(index)) {
			fprintf(stderr,
				"InnoDB: Error: trying to access"
				" update undo rec field %lu in ",
				(ulong) field_no);
			dict_index_name_print(stderr, trx, index);
			fprintf(stderr, "\n"
				"InnoDB: but index has only %lu fields\n"
				"InnoDB: Submit a detailed bug report"
				" to http://bugs.mysql.com\n"
				"InnoDB: Run also CHECK TABLE ",
				(ulong) dict_index_get_n_fields(index));
			ut_print_name(stderr, trx, TRUE, index->table_name);
			fprintf(stderr, "\n"
				"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
				(ulong) n_fields, (ulong) i, ptr);
			*upd = NULL;
			return(NULL);
		}

		upd_field = upd_get_nth_field(update, i);

		upd_field_set_field_no(upd_field, field_no, index, trx);

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);

		upd_field->orig_len = orig_len;

		if (len == UNIV_SQL_NULL) {
			dfield_set_null(&upd_field->new_val);
		} else if (len < UNIV_EXTERN_STORAGE_FIELD) {
			dfield_set_data(&upd_field->new_val, field, len);
		} else {
			len -= UNIV_EXTERN_STORAGE_FIELD;

			dfield_set_data(&upd_field->new_val, field, len);
			dfield_set_ext(&upd_field->new_val);
		}
	}

	*upd = update;

	return(ptr);
}
예제 #19
0
/***********************************************************//**
Delete marks or removes a secondary index entry if found.
@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_del_mark_or_remove_sec_low(
/*====================================*/
	undo_node_t*	node,	/*!< in: row undo node */
	que_thr_t*	thr,	/*!< in: query thread */
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry,	/*!< in: index entry */
	ulint		mode)	/*!< in: latch mode BTR_MODIFY_LEAF or
				BTR_MODIFY_TREE */
{
	btr_pcur_t		pcur;
	btr_cur_t*		btr_cur;
	ibool			success;
	ibool			old_has;
	ulint			err;
	mtr_t			mtr;
	mtr_t			mtr_vers;
	enum row_search_result	search_result;

	log_free_check();
	mtr_start(&mtr);

	btr_cur = btr_pcur_get_btr_cur(&pcur);

	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);

	search_result = row_search_index_entry(index, entry, mode,
					       &pcur, &mtr);

	switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
	case ROW_NOT_FOUND:
		/* In crash recovery, the secondary index record may
		be missing if the UPDATE did not have time to insert
		the secondary index records before the crash.  When we
		are undoing that UPDATE in crash recovery, the record
		may be missing.

		In normal processing, if an update ends in a deadlock
		before it has inserted all updated secondary index
		records, then the undo will not find those records. */

		err = DB_SUCCESS;
		goto func_exit;
	case ROW_FOUND:
		break;
	case ROW_BUFFERED:
	case ROW_NOT_DELETED_REF:
		/* These are invalid outcomes, because the mode passed
		to row_search_index_entry() did not include any of the
		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
		ut_error;
	}

	/* We should remove the index record if no prior version of the row,
	which cannot be purged yet, requires its existence. If some requires,
	we should delete mark the record. */

	mtr_start(&mtr_vers);

	success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
					    &mtr_vers);
	ut_a(success);

	old_has = row_vers_old_has_index_entry(FALSE,
					       btr_pcur_get_rec(&(node->pcur)),
					       &mtr_vers, index, entry);
	if (old_has) {
		err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
						   btr_cur, TRUE, thr, &mtr);
		ut_ad(err == DB_SUCCESS);
	} else {
		/* Remove the index record */

		if (mode == BTR_MODIFY_LEAF) {
			success = btr_cur_optimistic_delete(btr_cur, &mtr);
			if (success) {
				err = DB_SUCCESS;
			} else {
				err = DB_FAIL;
			}
		} else {
			ut_ad(mode == BTR_MODIFY_TREE);

			/* No need to distinguish RB_RECOVERY_PURGE here,
			because we are deleting a secondary index record:
			the distinction between RB_NORMAL and
			RB_RECOVERY_PURGE only matters when deleting a
			record that contains externally stored
			columns. */
			ut_ad(!dict_index_is_clust(index));
			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
						   RB_NORMAL, &mtr);

			/* The delete operation may fail if we have little
			file space left: TODO: easiest to crash the database
			and restart with more file space */
		}
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);

func_exit:
	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(err);
}