Beispiel #1
0
/*******************************************************************//**
Builds a partial row from an update undo log record. It contains the
columns which occur as ordering in any index of the table.
@return	pointer to remaining part of undo record */
UNIV_INTERN
byte*
trx_undo_rec_get_partial_row(
/*=========================*/
	byte*		ptr,	/*!< in: remaining part in update undo log
				record of a suitable type, at the start of
				the stored index columns;
				NOTE that this copy of the undo log record must
				be preserved as long as the partial row is
				used, as we do NOT copy the data in the
				record! */
	dict_index_t*	index,	/*!< in: clustered index */
	dtuple_t**	row,	/*!< out, own: partial row */
	ibool		ignore_prefix, /*!< in: flag to indicate if we
				expect blob prefixes in undo. Used
				only in the assertion. */
	mem_heap_t*	heap)	/*!< in: memory heap from which the memory
				needed is allocated */
{
	const byte*	end_ptr;
	ulint		row_len;

	ut_ad(index);
	ut_ad(ptr);
	ut_ad(row);
	ut_ad(heap);
	ut_ad(dict_index_is_clust(index));

	row_len = dict_table_get_n_cols(index->table);

	*row = dtuple_create(heap, row_len);

	dict_table_copy_types(*row, index->table);

	end_ptr = ptr + mach_read_from_2(ptr);
	ptr += 2;

	while (ptr != end_ptr) {
		dfield_t*		dfield;
		byte*			field;
		ulint			field_no;
		const dict_col_t*	col;
		ulint			col_no;
		ulint			len;
		ulint			orig_len;

		ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);

		col = dict_index_get_nth_col(index, field_no);
		col_no = dict_col_get_no(col);

		ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);

		dfield = dtuple_get_nth_field(*row, col_no);

		dfield_set_data(dfield, field, len);

		if (len != UNIV_SQL_NULL
		    && len >= UNIV_EXTERN_STORAGE_FIELD) {
			dfield_set_len(dfield,
				       len - UNIV_EXTERN_STORAGE_FIELD);
			dfield_set_ext(dfield);
			/* If the prefix of this column is indexed,
			ensure that enough prefix is stored in the
			undo log record. */
			if (!ignore_prefix && col->ord_part) {
				ut_a(dfield_get_len(dfield)
				     >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
				ut_a(dict_table_get_format(index->table)
				     >= DICT_TF_FORMAT_ZIP
				     || dfield_get_len(dfield)
				     >= REC_MAX_INDEX_COL_LEN
				     + BTR_EXTERN_FIELD_REF_SIZE);
			}
		}
	}

	return(ptr);
}
Beispiel #2
0
/*******************************************************************//**
An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index.
@return	own: row built; see the NOTE below! */
UNIV_INTERN
dtuple_t*
row_build(
/*======*/
	ulint			type,	/*!< in: ROW_COPY_POINTERS or
					ROW_COPY_DATA; the latter
					copies also the data fields to
					heap while the first only
					places pointers to data fields
					on the index page, and thus is
					more efficient */
	const dict_index_t*	index,	/*!< in: clustered index */
	const rec_t*		rec,	/*!< in: record in the clustered
					index; NOTE: in the case
					ROW_COPY_POINTERS the data
					fields in the row will point
					directly into this record,
					therefore, the buffer page of
					this record must be at least
					s-latched and the latch held
					as long as the row dtuple is used! */
	const ulint*		offsets,/*!< in: rec_get_offsets(rec,index)
					or NULL, in which case this function
					will invoke rec_get_offsets() */
	const dict_table_t*	col_table,
					/*!< in: table, to check which
					externally stored columns
					occur in the ordering columns
					of an index, or NULL if
					index->table should be
					consulted instead */
	row_ext_t**		ext,	/*!< out, own: cache of
					externally stored column
					prefixes, or NULL */
	mem_heap_t*		heap)	/*!< in: memory heap from which
					the memory needed is allocated */
{
	dtuple_t*		row;
	const dict_table_t*	table;
	ulint			n_fields;
	ulint			n_ext_cols;
	ulint*			ext_cols	= NULL; /* remove warning */
	ulint			len;
	ulint			row_len;
	byte*			buf;
	ulint			i;
	ulint			j;
	mem_heap_t*		tmp_heap	= NULL;
	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
	rec_offs_init(offsets_);

	ut_ad(index && rec && heap);
	ut_ad(dict_index_is_clust(index));
	ut_ad(!mutex_own(&kernel_mutex));

	if (!offsets) {
		offsets = rec_get_offsets(rec, index, offsets_,
					  ULINT_UNDEFINED, &tmp_heap);
	} else {
		ut_ad(rec_offs_validate(rec, index, offsets));
	}

#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
	if (rec_offs_any_null_extern(rec, offsets)) {
		/* This condition can occur during crash recovery
		before trx_rollback_active() has completed execution,
		or when a concurrently executing
		row_ins_index_entry_low() has committed the B-tree
		mini-transaction but has not yet managed to restore
		the cursor position for writing the big_rec. */
		ut_a(trx_undo_roll_ptr_is_insert(
			     row_get_rec_roll_ptr(rec, index, offsets)));
	}
#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */

	if (type != ROW_COPY_POINTERS) {
		/* Take a copy of rec to heap */
		buf = mem_heap_alloc(heap, rec_offs_size(offsets));
		rec = rec_copy(buf, rec, offsets);
		/* Avoid a debug assertion in rec_offs_validate(). */
		rec_offs_make_valid(rec, index, (ulint*) offsets);
	}

	table = index->table;
	row_len = dict_table_get_n_cols(table);

	row = dtuple_create(heap, row_len);

	dict_table_copy_types(row, table);

	dtuple_set_info_bits(row, rec_get_info_bits(
				     rec, dict_table_is_comp(table)));

	n_fields = rec_offs_n_fields(offsets);
	n_ext_cols = rec_offs_n_extern(offsets);
	if (n_ext_cols) {
		ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols);
	}

	for (i = j = 0; i < n_fields; i++) {
		dict_field_t*		ind_field
			= dict_index_get_nth_field(index, i);
		const dict_col_t*	col
			= dict_field_get_col(ind_field);
		ulint			col_no
			= dict_col_get_no(col);
		dfield_t*		dfield
			= dtuple_get_nth_field(row, col_no);

		if (ind_field->prefix_len == 0) {

			const byte*	field = rec_get_nth_field(
				rec, offsets, i, &len);

			dfield_set_data(dfield, field, len);
		}

		if (rec_offs_nth_extern(offsets, i)) {
			dfield_set_ext(dfield);

			if (UNIV_LIKELY_NULL(col_table)) {
				ut_a(col_no
				     < dict_table_get_n_cols(col_table));
				col = dict_table_get_nth_col(
					col_table, col_no);
			}

			if (col->ord_part) {
				/* We will have to fetch prefixes of
				externally stored columns that are
				referenced by column prefixes. */
				ext_cols[j++] = col_no;
			}
		}
	}

	ut_ad(dtuple_check_typed(row));

	if (!ext) {
		/* REDUNDANT and COMPACT formats store a local
		768-byte prefix of each externally stored
		column. No cache is needed. */
		ut_ad(dict_table_get_format(index->table)
		      < DICT_TF_FORMAT_ZIP);
	} else if (j) {
		*ext = row_ext_create(j, ext_cols, row,
				      dict_table_zip_size(index->table),
				      heap);
	} else {
		*ext = NULL;
	}

	if (tmp_heap) {
		mem_heap_free(tmp_heap);
	}

	return(row);
}
Beispiel #3
0
/**************************************************************//**
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index.
@return own: created big record vector, NULL if we are not able to
shorten the entry enough, i.e., if there are too many fixed-length or
short fields in entry or the index is clustered */
UNIV_INTERN
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry,	/*!< in/out: index entry */
	ulint*		n_ext)	/*!< in/out: number of
				externally stored columns */
{
	mem_heap_t*	heap;
	big_rec_t*	vector;
	dfield_t*	dfield;
	dict_field_t*	ifield;
	ulint		size;
	ulint		n_fields;
	ulint		local_len;
	ulint		local_prefix_len;

	if (UNIV_UNLIKELY(!dict_index_is_clust(index))) {
		return(NULL);
	}

	if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
		/* up to MySQL 5.1: store a 768-byte prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
	} else {
		/* new-format table: do not store any BLOB prefix locally */
		local_len = BTR_EXTERN_FIELD_REF_SIZE;
	}

	ut_a(dtuple_check_typed_no_assert(entry));

	size = rec_get_converted_size(index, entry, *n_ext);

	if (UNIV_UNLIKELY(size > 1000000000)) {
		fprintf(stderr,
			"InnoDB: Warning: tuple size very big: %lu\n",
			(ulong) size);
		fputs("InnoDB: Tuple contents: ", stderr);
		dtuple_print(stderr, entry);
		putc('\n', stderr);
	}

	heap = mem_heap_create(size + dtuple_get_n_fields(entry)
			       * sizeof(big_rec_field_t) + 1000);

	vector = mem_heap_alloc(heap, sizeof(big_rec_t));

	vector->heap = heap;
	vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
					* sizeof(big_rec_field_t));

	/* Decide which fields to shorten: the algorithm is to look for
	a variable-length field that yields the biggest savings when
	stored externally */

	n_fields = 0;

	while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
							     *n_ext),
				      dict_table_is_comp(index->table),
				      dict_index_get_n_fields(index),
				      dict_table_zip_size(index->table))) {
		ulint			i;
		ulint			longest		= 0;
		ulint			longest_i	= ULINT_MAX;
		byte*			data;
		big_rec_field_t*	b;

		for (i = dict_index_get_n_unique_in_tree(index);
		     i < dtuple_get_n_fields(entry); i++) {
			ulint	savings;

			dfield = dtuple_get_nth_field(entry, i);
			ifield = dict_index_get_nth_field(index, i);

			/* Skip fixed-length, NULL, externally stored,
			or short columns */

			if (ifield->fixed_len
			    || dfield_is_null(dfield)
			    || dfield_is_ext(dfield)
			    || dfield_get_len(dfield) <= local_len
			    || dfield_get_len(dfield)
			    <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
				goto skip_field;
			}

			savings = dfield_get_len(dfield) - local_len;

			/* Check that there would be savings */
			if (longest >= savings) {
				goto skip_field;
			}

			longest_i = i;
			longest = savings;

skip_field:
			continue;
		}

		if (!longest) {
			/* Cannot shorten more */

			mem_heap_free(heap);

			return(NULL);
		}

		/* Move data from field longest_i to big rec vector.

		We store the first bytes locally to the record. Then
		we can calculate all ordering fields in all indexes
		from locally stored data. */

		dfield = dtuple_get_nth_field(entry, longest_i);
		ifield = dict_index_get_nth_field(index, longest_i);
		local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;

		b = &vector->fields[n_fields];
		b->field_no = longest_i;
		b->len = dfield_get_len(dfield) - local_prefix_len;
		b->data = (char*) dfield_get_data(dfield) + local_prefix_len;

		/* Allocate the locally stored part of the column. */
		data = mem_heap_alloc(heap, local_len);

		/* Copy the local prefix. */
		memcpy(data, dfield_get_data(dfield), local_prefix_len);
		/* Clear the extern field reference (BLOB pointer). */
		memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
#if 0
		/* The following would fail the Valgrind checks in
		page_cur_insert_rec_low() and page_cur_insert_rec_zip().
		The BLOB pointers in the record will be initialized after
		the record and the BLOBs have been written. */
		UNIV_MEM_ALLOC(data + local_prefix_len,
			       BTR_EXTERN_FIELD_REF_SIZE);
#endif

		dfield_set_data(dfield, data, local_len);
		dfield_set_ext(dfield);

		n_fields++;
		(*n_ext)++;
		ut_ad(n_fields < dtuple_get_n_fields(entry));
	}

	vector->n_fields = n_fields;
	return(vector);
}
Beispiel #4
0
/***********************************************************//**
Looks for the clustered index record when node has the row reference.
The pcur in node is used in the search. If found, stores the row to node,
and stores the position of pcur, and detaches it. The pcur must be closed
by the caller in any case.
@return TRUE if found; NOTE the node->pcur must be closed by the
caller, regardless of the return value */
UNIV_INTERN
ibool
row_undo_search_clust_to_pcur(
/*==========================*/
	undo_node_t*	node)	/*!< in: row undo node */
{
	dict_index_t*	clust_index;
	ibool		found;
	mtr_t		mtr;
	ibool		ret;
	rec_t*		rec;
	mem_heap_t*	heap		= NULL;
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
	ulint*		offsets		= offsets_;
	rec_offs_init(offsets_);

	mtr_start(&mtr);

	clust_index = dict_table_get_first_index(node->table);

	found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
				      node->table, node->ref, &mtr);

	rec = btr_pcur_get_rec(&(node->pcur));

	offsets = rec_get_offsets(rec, clust_index, offsets,
				  ULINT_UNDEFINED, &heap);

	if (!found || 0 != ut_dulint_cmp(node->roll_ptr,
					 row_get_rec_roll_ptr(rec, clust_index,
							      offsets))) {

		/* We must remove the reservation on the undo log record
		BEFORE releasing the latch on the clustered index page: this
		is to make sure that some thread will eventually undo the
		modification corresponding to node->roll_ptr. */

		/* fputs("--------------------undoing a previous version\n",
		stderr); */

		ret = FALSE;
	} else {
		row_ext_t**	ext;

		if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) {
			/* In DYNAMIC or COMPRESSED format, there is
			no prefix of externally stored columns in the
			clustered index record. Build a cache of
			column prefixes. */
			ext = &node->ext;
		} else {
			/* REDUNDANT and COMPACT formats store a local
			768-byte prefix of each externally stored
			column. No cache is needed. */
			ext = NULL;
			node->ext = NULL;
		}

		node->row = row_build(ROW_COPY_DATA, clust_index, rec,
				      offsets, NULL, ext, node->heap);
		if (node->update) {
			node->undo_row = dtuple_copy(node->row, node->heap);
			row_upd_replace(node->undo_row, &node->undo_ext,
					clust_index, node->update, node->heap);
		} else {
			node->undo_row = NULL;
			node->undo_ext = NULL;
		}

		btr_pcur_store_position(&(node->pcur), &mtr);

		ret = TRUE;
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	if (UNIV_LIKELY_NULL(heap)) {
		mem_heap_free(heap);
	}
	return(ret);
}
Beispiel #5
0
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_TABLES system table.
@return	the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_tables_tuple(
    /*=========================*/
    const dict_table_t*	table,	/*!< in: table */
    mem_heap_t*		heap)	/*!< in: memory heap from
					which the memory for the built
					tuple is allocated */
{
    dict_table_t*	sys_tables;
    dtuple_t*	entry;
    dfield_t*	dfield;
    byte*		ptr;

    ut_ad(table);
    ut_ad(heap);

    sys_tables = dict_sys->sys_tables;

    entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);

    dict_table_copy_types(entry, sys_tables);

    /* 0: NAME -----------------------------*/
    dfield = dtuple_get_nth_field(entry, 0/*NAME*/);

    dfield_set_data(dfield, table->name, ut_strlen(table->name));
    /* 3: ID -------------------------------*/
    dfield = dtuple_get_nth_field(entry, 1/*ID*/);

    ptr = mem_heap_alloc(heap, 8);
    mach_write_to_8(ptr, table->id);

    dfield_set_data(dfield, ptr, 8);
    /* 4: N_COLS ---------------------------*/
    dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/);

#if DICT_TF_COMPACT != 1
#error
#endif

    ptr = mem_heap_alloc(heap, 4);
    if (dict_table_is_gcs(table))                       /* ±í¶¨ÒåÐÞ¸Ä */
    {
        ut_ad(dict_table_is_comp(table));

        mach_write_to_4(ptr, table->n_def
                        | (1 << 31) | (1 << 30));
    }
    else
    {
        mach_write_to_4(ptr, table->n_def
                        | ((table->flags & DICT_TF_COMPACT) << 31));
    }

    dfield_set_data(dfield, ptr, 4);
    /* 5: TYPE -----------------------------*/
    dfield = dtuple_get_nth_field(entry, 3/*TYPE*/);

    ptr = mem_heap_alloc(heap, 4);
    if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) {
        ut_a(table->flags & DICT_TF_COMPACT);
        ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
        ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
             <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
        ut_a(!(table->flags & (~0 << DICT_TF2_BITS)));
        mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS));
    } else {
        mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
    }

    dfield_set_data(dfield, ptr, 4);
    /* 6: MIX_ID (obsolete) ---------------------------*/
    dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/);

    ptr = mem_heap_zalloc(heap, 8);

    dfield_set_data(dfield, ptr, 8);
    /* 7: MIX_LEN (additional flags) --------------------------*/

    dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/);

    ptr = mem_heap_alloc(heap, 4);
    mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT);

    ut_ad(table->n_cols_before_alter_table == 0);

    dfield_set_data(dfield, ptr, 4);
    /* 8: CLUSTER_NAME ---------------------*/
    dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/);
    dfield_set_null(dfield); /* not supported */

    /* 9: SPACE ----------------------------*/
    dfield = dtuple_get_nth_field(entry, 7/*SPACE*/);

    ptr = mem_heap_alloc(heap, 4);
    mach_write_to_4(ptr, table->space);

    dfield_set_data(dfield, ptr, 4);
    /*----------------------------------*/

    return(entry);
}
Beispiel #6
0
/***************************************************************//**
Builds a table definition to insert.
@return	DB_SUCCESS or error code */
static
ulint
dict_build_table_def_step(
    /*======================*/
    que_thr_t*	thr,	/*!< in: query thread */
    tab_node_t*	node)	/*!< in: table create node */
{
    dict_table_t*	table;
    dtuple_t*	row;
    ulint		error;
    ulint		flags;
    const char*	path_or_name;
    ibool		is_path;
    mtr_t		mtr;
    ulint		space = 0;
    ibool		file_per_table;

    ut_ad(mutex_own(&(dict_sys->mutex)));

    table = node->table;

    /* Cache the global variable "srv_file_per_table" to
    a local variable before using it. Please note
    "srv_file_per_table" is not under dict_sys mutex
    protection, and could be changed while executing
    this function. So better to cache the current value
    to a local variable, and all future reference to
    "srv_file_per_table" should use this local variable. */
    file_per_table = srv_file_per_table;

    dict_hdr_get_new_id(&table->id, NULL, NULL);

    thr_get_trx(thr)->table_id = table->id;

    if (file_per_table) {
        /* Get a new space id if srv_file_per_table is set */
        dict_hdr_get_new_id(NULL, NULL, &space);

        if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
            return(DB_ERROR);
        }

        /* We create a new single-table tablespace for the table.
        We initially let it be 4 pages:
        - page 0 is the fsp header and an extent descriptor page,
        - page 1 is an ibuf bitmap page,
        - page 2 is the first inode page,
        - page 3 will contain the root of the clustered index of the
        table we create here. */

        if (table->dir_path_of_temp_table) {
            /* We place tables created with CREATE TEMPORARY
            TABLE in the tmp dir of mysqld server */

            path_or_name = table->dir_path_of_temp_table;
            is_path = TRUE;
        } else {
            path_or_name = table->name;
            is_path = FALSE;
        }

        ut_ad(dict_table_get_format(table) <= DICT_TF_FORMAT_MAX);
        ut_ad(!dict_table_zip_size(table)
              || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);

        flags = table->flags & ~(~0 << DICT_TF_BITS);
        error = fil_create_new_single_table_tablespace(
                    space, path_or_name, is_path,
                    flags == DICT_TF_COMPACT ? 0 : flags,
                    FIL_IBD_FILE_INITIAL_SIZE);
        table->space = (unsigned int) space;

        if (error != DB_SUCCESS) {

            return(error);
        }

        mtr_start(&mtr);

        fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);

        mtr_commit(&mtr);
    } else {
        /* Create in the system tablespace: disallow new features */
        table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT;
    }

    row = dict_create_sys_tables_tuple(table, node->heap);

    ins_node_set_new_row(node->tab_def, row);

    return(DB_SUCCESS);
}
Beispiel #7
0
/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
@return	query thread to run next or NULL */
UNIV_INTERN
que_thr_t*
dict_create_index_step(
    /*===================*/
    que_thr_t*	thr)	/*!< in: query thread */
{
    ind_node_t*	node;
    ulint		err	= DB_ERROR;
    trx_t*		trx;

    ut_ad(thr);
    ut_ad(mutex_own(&(dict_sys->mutex)));

    trx = thr_get_trx(thr);

    node = thr->run_node;

    ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);

    if (thr->prev_node == que_node_get_parent(node)) {
        node->state = INDEX_BUILD_INDEX_DEF;
    }

    if (node->state == INDEX_BUILD_INDEX_DEF) {
        /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
        err = dict_build_index_def_step(thr, node);

        if (err != DB_SUCCESS) {

            goto function_exit;
        }

        node->state = INDEX_BUILD_FIELD_DEF;
        node->field_no = 0;

        thr->run_node = node->ind_def;

        return(thr);
    }

    if (node->state == INDEX_BUILD_FIELD_DEF) {

        if (node->field_no < (node->index)->n_fields) {

            err = dict_build_field_def_step(node);

            if (err != DB_SUCCESS) {

                goto function_exit;
            }

            node->field_no++;

            thr->run_node = node->field_def;

            return(thr);
        } else {
            node->state = INDEX_ADD_TO_CACHE;
        }
    }

    if (node->state == INDEX_ADD_TO_CACHE) {

        index_id_t	index_id = node->index->id;

        err = dict_index_add_to_cache(
                  node->table, node->index, FIL_NULL,
                  trx_is_strict(trx)
                  || dict_table_get_format(node->table)
                  >= DICT_TF_FORMAT_ZIP);

        node->index = dict_index_get_if_in_cache_low(index_id);
        ut_a(!node->index == (err != DB_SUCCESS));

        if (err != DB_SUCCESS) {

            goto function_exit;
        }

        node->state = INDEX_CREATE_INDEX_TREE;
    }

    if (node->state == INDEX_CREATE_INDEX_TREE) {

        err = dict_create_index_tree_step(node);

        if (err != DB_SUCCESS) {
            dict_index_remove_from_cache(node->table, node->index);
            node->index = NULL;

            goto function_exit;
        }

        node->index->page = node->page_no;
        node->state = INDEX_COMMIT_WORK;
    }

    if (node->state == INDEX_COMMIT_WORK) {

        /* Index was correctly defined: do NOT commit the transaction
        (CREATE INDEX does NOT currently do an implicit commit of
        the current transaction) */

        node->state = INDEX_CREATE_INDEX_TREE;

        /* thr->run_node = node->commit_node;

        return(thr); */
    }

function_exit:
    trx->error_state = err;

    if (err == DB_SUCCESS) {
        /* Ok: do nothing */

    } else if (err == DB_LOCK_WAIT) {

        return(NULL);
    } else {
        /* SQL error detected */

        return(NULL);
    }

    thr->run_node = que_node_get_parent(node);

    return(thr);
}
Beispiel #8
0
/***********************************************************//**
Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_mod_upd_exist_sec(
/*=======================*/
	undo_node_t*	node,	/*!< in: row undo node */
	que_thr_t*	thr)	/*!< in: query thread */
{
	mem_heap_t*	heap;
	dtuple_t*	entry;
	dict_index_t*	index;
	ulint		err;

	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
		/* No change in secondary indexes */

		return(DB_SUCCESS);
	}

	heap = mem_heap_create(1024);

	while (node->index != NULL) {
		/* Skip all corrupted secondary index */
		dict_table_skip_corrupt_index(node->index);

		if (!node->index) {
			break;
		}

		index = node->index;

		if (row_upd_changes_ord_field_binary(node->index, node->update,
						     thr,
						     node->row, node->ext)) {

			/* Build the newest version of the index entry */
			entry = row_build_index_entry(node->row, node->ext,
						      index, heap);
			if (UNIV_UNLIKELY(!entry)) {
				/* The server must have crashed in
				row_upd_clust_rec_by_insert() before
				the updated externally stored columns (BLOBs)
				of the new clustered index entry were
				written. */

				/* The table must be in DYNAMIC or COMPRESSED
				format.  REDUNDANT and COMPACT formats
				store a local 768-byte prefix of each
				externally stored column. */
				ut_a(dict_table_get_format(index->table)
				     >= DICT_TF_FORMAT_ZIP);

				/* This is only legitimate when
				rolling back an incomplete transaction
				after crash recovery. */
				ut_a(thr_get_trx(thr)->is_recovered);

				/* The server must have crashed before
				completing the insert of the new
				clustered index entry and before
				inserting to the secondary indexes.
				Because node->row was not yet written
				to this index, we can ignore it.  But
				we must restore node->undo_row. */
			} else {
				/* NOTE that if we updated the fields of a
				delete-marked secondary index record so that
				alphabetically they stayed the same, e.g.,
				'abc' -> 'aBc', we cannot return to the
				original values because we do not know them.
				But this should not cause problems because
				in row0sel.c, in queries we always retrieve
				the clustered index record or an earlier
				version of it, if the secondary index record
				through which we do the search is
				delete-marked. */

				err = row_undo_mod_del_mark_or_remove_sec(
					node, thr, index, entry);
				if (err != DB_SUCCESS) {
					mem_heap_free(heap);

					return(err);
				}

				mem_heap_empty(heap);
			}

			/* We may have to update the delete mark in the
			secondary index record of the previous version of
			the row. We also need to update the fields of
			the secondary index record if we updated its fields
			but alphabetically they stayed the same, e.g.,
			'abc' -> 'aBc'. */
			entry = row_build_index_entry(node->undo_row,
						      node->undo_ext,
						      index, heap);
			ut_a(entry);

			err = row_undo_mod_del_unmark_sec_and_undo_update(
				BTR_MODIFY_LEAF, thr, index, entry);
			if (err == DB_FAIL) {
				err = row_undo_mod_del_unmark_sec_and_undo_update(
					BTR_MODIFY_TREE, thr, index, entry);
			}

			if (err != DB_SUCCESS) {
				mem_heap_free(heap);

				return(err);
			}
		}

		node->index = dict_table_get_next_index(node->index);
	}

	mem_heap_free(heap);

	return(DB_SUCCESS);
}