示例#1
0
文件: fut0lst.c 项目: OPSF/uClinux
ibool
flst_validate(
    /*==========*/
    /* out: TRUE if ok */
    flst_base_node_t*	base,	/* in: pointer to base node of list */
    mtr_t*			mtr1)	/* in: mtr */
{
    ulint		space;
    flst_node_t*	node;
    fil_addr_t	node_addr;
    fil_addr_t	base_addr;
    ulint		len;
    ulint		i;
    mtr_t		mtr2;

    ut_ad(base);
    ut_ad(mtr_memo_contains(mtr1, buf_block_align(base),
                            MTR_MEMO_PAGE_X_FIX));

    /* We use two mini-transaction handles: the first is used to
    lock the base node, and prevent other threads from modifying the
    list. The second is used to traverse the list. We cannot run the
    second mtr without committing it at times, because if the list
    is long, then the x-locked pages could fill the buffer resulting
    in a deadlock. */

    /* Find out the space id */
    buf_ptr_get_fsp_addr(base, &space, &base_addr);

    len = flst_get_len(base, mtr1);
    node_addr = flst_get_first(base, mtr1);

    for (i = 0; i < len; i++) {
        mtr_start(&mtr2);

        node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
        node_addr = flst_get_next_addr(node, &mtr2);

        mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
				   becoming full */
    }

    ut_a(fil_addr_is_null(node_addr));

    node_addr = flst_get_last(base, mtr1);

    for (i = 0; i < len; i++) {
        mtr_start(&mtr2);

        node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2);
        node_addr = flst_get_prev_addr(node, &mtr2);

        mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
				   becoming full */
    }

    ut_a(fil_addr_is_null(node_addr));

    return(TRUE);
}
示例#2
0
文件: row0uins.c 项目: zhangchuhu/s2s
/***************************************************************//**
Removes a secondary index entry if found.
@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec_low(
    /*========================*/
    ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
    dict_index_t*	index,	/*!< in: index */
    dtuple_t*	entry)	/*!< in: index entry to remove */
{
    btr_pcur_t	pcur;
    btr_cur_t*	btr_cur;
    ibool		found;
    ibool		success;
    ulint		err;
    mtr_t		mtr;

    log_free_check();
    mtr_start(&mtr);

    found = row_search_index_entry(index, entry, mode, &pcur, &mtr);

    btr_cur = btr_pcur_get_btr_cur(&pcur);

    if (!found) {
        /* Not found */

        btr_pcur_close(&pcur);
        mtr_commit(&mtr);

        return(DB_SUCCESS);
    }

    if (mode == BTR_MODIFY_LEAF) {
        success = btr_cur_optimistic_delete(btr_cur, &mtr);

        if (success) {
            err = DB_SUCCESS;
        } else {
            err = DB_FAIL;
        }
    } else {
        ut_ad(mode == BTR_MODIFY_TREE);

        /* No need to distinguish RB_RECOVERY here, because we
        are deleting a secondary index record: the distinction
        between RB_NORMAL and RB_RECOVERY only matters when
        deleting a record that contains externally stored
        columns. */
        ut_ad(!dict_index_is_clust(index));
        btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
                                   RB_NORMAL, &mtr);
    }

    btr_pcur_close(&pcur);
    mtr_commit(&mtr);

    return(err);
}
示例#3
0
/*******************************************************************
Removes a secondary index entry if found. */
static
ulint
row_undo_ins_remove_sec_low(
/*========================*/
				/* out: DB_SUCCESS, DB_FAIL, or
				DB_OUT_OF_FILE_SPACE */
	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
	dict_index_t*	index,	/* in: index */
	dtuple_t*	entry)	/* in: index entry to remove */
{
	btr_pcur_t	pcur;
	btr_cur_t*	btr_cur;
	ibool		found;
	ibool		success;
	ulint		err;
	mtr_t		mtr;

	log_free_check();
	mtr_start(&mtr);

	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);

	btr_cur = btr_pcur_get_btr_cur(&pcur);

	if (!found) {
		/* Not found */

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);

		return(DB_SUCCESS);
	}

	if (mode == BTR_MODIFY_LEAF) {
		success = btr_cur_optimistic_delete(btr_cur, &mtr);

		if (success) {
			err = DB_SUCCESS;
		} else {
			err = DB_FAIL;
		}
	} else {
		ut_ad(mode == BTR_MODIFY_TREE);

		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
	}

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(err);
}
示例#4
0
/***************************************************************
Delete marks a clustered index record. */
static
ulint
row_upd_del_mark_clust_rec(
/*=======================*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code or DB_LOCK_WAIT */
	upd_node_t*	node,	/* in: row update node */
	dict_index_t*	index,	/* in: clustered index */
	que_thr_t*	thr,	/* in: query thread */
	ibool		check_ref,/* in: TRUE if index may be referenced in
				a foreign key constraint */
	mtr_t*		mtr)	/* in: mtr; gets committed here */
{
	btr_pcur_t*	pcur;
	btr_cur_t*	btr_cur;
	ulint		err;
	
	ut_ad(node);
	ut_ad(index->type & DICT_CLUSTERED);
	ut_ad(node->is_delete);

	pcur = node->pcur;
	btr_cur = btr_pcur_get_btr_cur(pcur);

	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));

	/* Store row because we have to build also the secondary index
	entries */
	
	row_upd_store_row(node);

	/* Mark the clustered index record deleted; we do not have to check
	locks, because we assume that we have an x-lock on the record */

	err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur,
							TRUE, thr, mtr);
	if (err == DB_SUCCESS && check_ref) {
		/* NOTE that the following call loses
		the position of pcur ! */
		err = row_upd_check_references_constraints(pcur, index->table,
							index, thr, mtr);
		if (err != DB_SUCCESS) {
			mtr_commit(mtr);

			return(err);
		}
	}

	mtr_commit(mtr);
	
	return(err);
}
示例#5
0
/***************************************************************//**
Removes a secondary index entry if found.
@return	DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
static
ulint
row_undo_ins_remove_sec_low(
/*========================*/
	ulint		mode,	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
	dict_index_t*	index,	/*!< in: index */
	dtuple_t*	entry)	/*!< in: index entry to remove */
{
	btr_pcur_t		pcur;
	btr_cur_t*		btr_cur;
	ulint			err;
	mtr_t			mtr;
	enum row_search_result	search_result;

	mtr_start(&mtr);

	btr_cur = btr_pcur_get_btr_cur(&pcur);

	ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF);

	search_result = row_search_index_entry(index, entry, mode,
					       &pcur, &mtr);

	switch (search_result) {
	case ROW_NOT_FOUND:
		err = DB_SUCCESS;
		goto func_exit;
	case ROW_FOUND:
		break;
	case ROW_BUFFERED:
	case ROW_NOT_DELETED_REF:
		/* These are invalid outcomes, because the mode passed
		to row_search_index_entry() did not include any of the
		flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
		ut_error;
	}

	if (mode == BTR_MODIFY_LEAF) {
		err = btr_cur_optimistic_delete(btr_cur, &mtr)
			? DB_SUCCESS : DB_FAIL;
	} else {
		ut_ad(mode == BTR_MODIFY_TREE);

		/* No need to distinguish RB_RECOVERY here, because we
		are deleting a secondary index record: the distinction
		between RB_NORMAL and RB_RECOVERY only matters when
		deleting a record that contains externally stored
		columns. */
		ut_ad(!dict_index_is_clust(index));
		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
					   RB_NORMAL, &mtr);
	}
func_exit:
	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(err);
}
示例#6
0
/**********************************************************************//**
Returns a new table, index, or tree id.
@return	the new id */
UNIV_INTERN
dulint
dict_hdr_get_new_id(
/*================*/
	ulint	type)	/*!< in: DICT_HDR_ROW_ID, ... */
{
	dict_hdr_t*	dict_hdr;
	dulint		id;
	mtr_t		mtr;

	ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID));

	mtr_start(&mtr);

	dict_hdr = dict_hdr_get(&mtr);

	id = mtr_read_dulint(dict_hdr + type, &mtr);
	id = ut_dulint_add(id, 1);

	mlog_write_dulint(dict_hdr + type, id, &mtr);

	mtr_commit(&mtr);

	return(id);
}
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
@return	DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
static
ulint
dict_create_index_tree_step(
/*========================*/
	ind_node_t*	node)	/*!< in: index create node */
{
	dict_index_t*	index;
	dict_table_t*	sys_indexes;
	dict_table_t*	table;
	dtuple_t*	search_tuple;
	ulint		zip_size;
	btr_pcur_t	pcur;
	mtr_t		mtr;

	ut_ad(mutex_own(&(dict_sys->mutex)));

	index = node->index;
	table = node->table;

	sys_indexes = dict_sys->sys_indexes;

	/* Run a mini-transaction in which the index tree is allocated for
	the index and its root address is written to the index entry in
	sys_indexes */

	mtr_start(&mtr);

	search_tuple = dict_create_search_tuple(node->ind_row, node->heap);

	btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
		      search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
		      &pcur, &mtr);

	btr_pcur_move_to_next_user_rec(&pcur, &mtr);

	zip_size = dict_table_zip_size(index->table);

	node->page_no = btr_create(index->type, index->space, zip_size,
				   index->id, index, &mtr);
	/* printf("Created a new index tree in space %lu root page %lu\n",
	index->space, index->page_no); */

	page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
				     DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     node->page_no, &mtr);
	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	if (node->page_no == FIL_NULL) {

		return(DB_OUT_OF_FILE_SPACE);
	}

	return(DB_SUCCESS);
}
示例#8
0
/*************************************************************************
Checks if possible foreign key constraints hold after a delete of the record
under pcur. NOTE that this function will temporarily commit mtr and lose
pcur position! */
static
ulint
row_upd_check_references_constraints(
/*=================================*/
				/* out: DB_SUCCESS, DB_LOCK_WAIT, or an error
				code */
	btr_pcur_t*	pcur,	/* in: cursor positioned on a record; NOTE: the
				cursor position is lost in this function! */
	dict_table_t*	table,	/* in: table in question */
	dict_index_t*	index,	/* in: index of the cursor */
	que_thr_t*	thr,	/* in: query thread */
	mtr_t*		mtr)	/* in: mtr */
{
	dict_foreign_t*	foreign;
	mem_heap_t*	heap;
	dtuple_t*	entry;
	rec_t*		rec;
	ulint		err;

	rec = btr_pcur_get_rec(pcur);

	heap = mem_heap_create(500);

	entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap);

	mtr_commit(mtr);	

	mtr_start(mtr);	
	
	rw_lock_s_lock(&dict_foreign_key_check_lock);	

	foreign = UT_LIST_GET_FIRST(table->referenced_list);

	while (foreign) {
		if (foreign->referenced_index == index) {

			err = row_ins_check_foreign_constraint(FALSE, foreign,
						table, index, entry, thr);
			if (err != DB_SUCCESS) {
				rw_lock_s_unlock(&dict_foreign_key_check_lock);	
				mem_heap_free(heap);

				return(err);
			}
		}

		foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
	}

	rw_lock_s_unlock(&dict_foreign_key_check_lock);	
	mem_heap_free(heap);
	
	return(DB_SUCCESS);
}
示例#9
0
/*********************************************************************
Creates a rollback segment.
@return pointer to new rollback segment if create successful */
UNIV_INTERN
trx_rseg_t*
trx_rseg_create(void)
/*=================*/
{
	mtr_t		mtr;
	ulint		slot_no;
	trx_rseg_t*	rseg = NULL;

	mtr_start(&mtr);

	/* To obey the latching order, acquire the file space
	x-latch before the kernel mutex. */
	mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), &mtr);

	mutex_enter(&kernel_mutex);

	slot_no = trx_sysf_rseg_find_free(&mtr);

	if (slot_no != ULINT_UNDEFINED) {
		ulint		space;
		ulint		page_no;
		ulint		zip_size;
		trx_sysf_t*	sys_header;

		page_no = trx_rseg_header_create(
			TRX_SYS_SPACE, 0, ULINT_MAX, slot_no, &mtr);

		ut_a(page_no != FIL_NULL);

		ut_ad(!trx_rseg_get_on_id(slot_no));

		sys_header = trx_sysf_get(&mtr);

		space = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);

		zip_size = space ? fil_space_get_zip_size(space) : 0;

		rseg = trx_rseg_mem_create(
			slot_no, space, zip_size, page_no, &mtr);
	}

	mutex_exit(&kernel_mutex);
	mtr_commit(&mtr);

	return(rseg);
}
示例#10
0
/*****************************************************************//**
Creates and initializes the data dictionary at the database creation. */
UNIV_INTERN
void
dict_create(void)
/*=============*/
{
	mtr_t	mtr;

	mtr_start(&mtr);

	dict_hdr_create(&mtr);

	mtr_commit(&mtr);

	dict_boot();

	dict_insert_initial_data();
}
示例#11
0
文件: dict0boot.c 项目: A-eolus/mysql
/**********************************************************************//**
Returns a new table, index, or space id. */
UNIV_INTERN
void
dict_hdr_get_new_id(
/*================*/
	table_id_t*	table_id,	/*!< out: table id
					(not assigned if NULL) */
	index_id_t*	index_id,	/*!< out: index id
					(not assigned if NULL) */
	ulint*		space_id)	/*!< out: space id
					(not assigned if NULL) */
{
	dict_hdr_t*	dict_hdr;
	ib_id_t		id;
	mtr_t		mtr;

	mtr_start(&mtr);

	dict_hdr = dict_hdr_get(&mtr);

	if (table_id) {
		id = mach_read_from_8(dict_hdr + DICT_HDR_TABLE_ID);
		id++;
		mlog_write_ull(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr);
		*table_id = id;
	}

	if (index_id) {
		id = mach_read_from_8(dict_hdr + DICT_HDR_INDEX_ID);
		id++;
		mlog_write_ull(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr);
		*index_id = id;
	}

	if (space_id) {
		*space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
					   MLOG_4BYTES, &mtr);
		if (fil_assign_new_space_id(space_id)) {
			mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
					 *space_id, MLOG_4BYTES, &mtr);
		}
	}

	mtr_commit(&mtr);
}
示例#12
0
/**********************************************************************//**
Writes the current value of the row id counter to the dictionary header file
page. */
UNIV_INTERN
void
dict_hdr_flush_row_id(void)
/*=======================*/
{
	dict_hdr_t*	dict_hdr;
	dulint		id;
	mtr_t		mtr;

	ut_ad(mutex_own(&(dict_sys->mutex)));

	id = dict_sys->row_id;

	mtr_start(&mtr);

	dict_hdr = dict_hdr_get(&mtr);

	mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);

	mtr_commit(&mtr);
}
示例#13
0
/***********************************************************************//**
Position the purge sys "iterator" on the undo record to use for purging. */
static
void
trx_purge_read_undo_rec(
    /*====================*/
    trx_purge_t*	purge_sys,		/*!< in/out: purge instance */
    ulint		zip_size)		/*!< in: block size or 0 */
{
    ulint		page_no;
    ulint		offset = 0;
    ib_uint64_t	undo_no = 0;

    purge_sys->hdr_offset = purge_sys->rseg->last_offset;
    page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;

    if (purge_sys->rseg->last_del_marks) {
        mtr_t		mtr;
        trx_undo_rec_t*	undo_rec;

        mtr_start(&mtr);

        undo_rec = trx_undo_get_first_rec(
                       0 /* System space id */, zip_size,
                       purge_sys->hdr_page_no,
                       purge_sys->hdr_offset, RW_S_LATCH, &mtr);

        if (undo_rec != NULL) {
            offset = page_offset(undo_rec);
            undo_no = trx_undo_rec_get_undo_no(undo_rec);
            page_no = page_get_page_no(page_align(undo_rec));
        }

        mtr_commit(&mtr);
    }

    purge_sys->offset = offset;
    purge_sys->page_no = page_no;
    purge_sys->purge_undo_no = undo_no;

    purge_sys->next_stored = TRUE;
}
示例#14
0
/***********************************************************************//**
Updates the last not yet purged history log info in rseg when we have purged
a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
static
void
trx_purge_rseg_get_next_history_log(
    /*================================*/
    trx_rseg_t*	rseg)	/*!< in: rollback segment */
{
    page_t*		undo_page;
    trx_ulogf_t*	log_hdr;
    fil_addr_t	prev_log_addr;
    trx_id_t	trx_no;
    ibool		del_marks;
    mtr_t		mtr;
    rseg_queue_t	rseg_queue;
    const void*	ptr;

    mutex_enter(&(rseg->mutex));

    ut_a(rseg->last_page_no != FIL_NULL);

    purge_sys->purge_trx_no = rseg->last_trx_no + 1;
    purge_sys->purge_undo_no = 0;
    purge_sys->next_stored = FALSE;

    mtr_start(&mtr);

    undo_page = trx_undo_page_get_s_latched(
                    rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);

    log_hdr = undo_page + rseg->last_offset;

    /* Increase the purge page count by one for every handled log */

    purge_sys->n_pages_handled++;

    prev_log_addr = trx_purge_get_log_from_hist(
                        flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));

    if (prev_log_addr.page == FIL_NULL) {
        /* No logs left in the history list */

        rseg->last_page_no = FIL_NULL;

        mutex_exit(&(rseg->mutex));
        mtr_commit(&mtr);

        mutex_enter(&kernel_mutex);

        /* Add debug code to track history list corruption reported
        on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
        file-based list was corrupt. The prev node pointer was
        FIL_NULL, even though the list length was over 8 million nodes!
        We assume that purge truncates the history list in large
        size pieces, and if we here reach the head of the list, the
        list cannot be longer than 2000 000 undo logs now. */

        if (trx_sys->rseg_history_len > 2000000) {
            ut_print_timestamp(stderr);
            fprintf(stderr,
                    "  InnoDB: Warning: purge reached the"
                    " head of the history list,\n"
                    "InnoDB: but its length is still"
                    " reported as %lu! Make a detailed bug\n"
                    "InnoDB: report, and submit it"
                    " to http://bugs.mysql.com\n",
                    (ulong) trx_sys->rseg_history_len);
        }

        mutex_exit(&kernel_mutex);

        return;
    }

    mutex_exit(&(rseg->mutex));
    mtr_commit(&mtr);

    /* Read the trx number and del marks from the previous log header */
    mtr_start(&mtr);

    log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
                                          prev_log_addr.page, &mtr)
              + prev_log_addr.boffset;

    trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);

    del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);

    mtr_commit(&mtr);

    mutex_enter(&(rseg->mutex));

    rseg->last_page_no = prev_log_addr.page;
    rseg->last_offset = prev_log_addr.boffset;
    rseg->last_trx_no = trx_no;
    rseg->last_del_marks = del_marks;

    rseg_queue.rseg = rseg;
    rseg_queue.trx_no = rseg->last_trx_no;

    /* Purge can also produce events, however these are already ordered
    in the rollback segment and any user generated event will be greater
    than the events that Purge produces. ie. Purge can never produce
    events from an empty rollback segment. */

    mutex_enter(&purge_sys->bh_mutex);

    ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
    ut_a(ptr != NULL);

    mutex_exit(&purge_sys->bh_mutex);

    mutex_exit(&(rseg->mutex));
}
示例#15
0
/********************************************************************//**
Removes unnecessary history data from a rollback segment. */
static
void
trx_purge_truncate_rseg_history(
    /*============================*/
    trx_rseg_t*	rseg,		/*!< in: rollback segment */
    trx_id_t	limit_trx_no,	/*!< in: remove update undo logs whose
					trx number is < limit_trx_no */
    undo_no_t	limit_undo_no)	/*!< in: if transaction number is equal
					to limit_trx_no, truncate undo records
					with undo number < limit_undo_no */
{
    fil_addr_t	hdr_addr;
    fil_addr_t	prev_hdr_addr;
    trx_rsegf_t*	rseg_hdr;
    page_t*		undo_page;
    trx_ulogf_t*	log_hdr;
    trx_usegf_t*	seg_hdr;
    ulint		n_removed_logs	= 0;
    mtr_t		mtr;
    trx_id_t	undo_trx_no;

    mtr_start(&mtr);
    mutex_enter(&(rseg->mutex));

    rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
                             rseg->page_no, &mtr);

    hdr_addr = trx_purge_get_log_from_hist(
                   flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
loop:
    if (hdr_addr.page == FIL_NULL) {

        mutex_exit(&(rseg->mutex));

        mtr_commit(&mtr);

        return;
    }

    undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
                                  hdr_addr.page, &mtr);

    log_hdr = undo_page + hdr_addr.boffset;
    undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);

    if (undo_trx_no >= limit_trx_no) {
        if (undo_trx_no == limit_trx_no) {
            trx_undo_truncate_start(rseg, rseg->space,
                                    hdr_addr.page,
                                    hdr_addr.boffset,
                                    limit_undo_no);
        }

        mutex_enter(&kernel_mutex);
        ut_a(trx_sys->rseg_history_len >= n_removed_logs);
        trx_sys->rseg_history_len -= n_removed_logs;
        mutex_exit(&kernel_mutex);

        flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY,
                          log_hdr + TRX_UNDO_HISTORY_NODE,
                          n_removed_logs, &mtr);

        mutex_exit(&(rseg->mutex));
        mtr_commit(&mtr);

        return;
    }

    prev_hdr_addr = trx_purge_get_log_from_hist(
                        flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
    n_removed_logs++;

    seg_hdr = undo_page + TRX_UNDO_SEG_HDR;

    if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
            && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {

        /* We can free the whole log segment */

        mutex_exit(&(rseg->mutex));
        mtr_commit(&mtr);

        trx_purge_free_segment(rseg, hdr_addr, n_removed_logs);

        n_removed_logs = 0;
    } else {
        mutex_exit(&(rseg->mutex));
        mtr_commit(&mtr);
    }

    mtr_start(&mtr);
    mutex_enter(&(rseg->mutex));

    rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
                             rseg->page_no, &mtr);

    hdr_addr = prev_hdr_addr;

    goto loop;
}
示例#16
0
/**********************************************************************//**
Frees an undo log segment which is in the history list. Cuts the end of the
history list at the youngest undo log in this segment. */
static
void
trx_purge_free_segment(
    /*===================*/
    trx_rseg_t*	rseg,		/*!< in: rollback segment */
    fil_addr_t	hdr_addr,	/*!< in: the file address of log_hdr */
    ulint		n_removed_logs)	/*!< in: count of how many undo logs we
					will cut off from the end of the
					history list */
{
    page_t*		undo_page;
    trx_rsegf_t*	rseg_hdr;
    trx_ulogf_t*	log_hdr;
    trx_usegf_t*	seg_hdr;
    ibool		freed;
    ulint		seg_size;
    ulint		hist_size;
    ibool		marked		= FALSE;
    mtr_t		mtr;

    /*	fputs("Freeing an update undo log segment\n", stderr); */

loop:
    mtr_start(&mtr);
    mutex_enter(&(rseg->mutex));

    rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
                             rseg->page_no, &mtr);

    undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
                                  hdr_addr.page, &mtr);
    seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
    log_hdr = undo_page + hdr_addr.boffset;

    /* Mark the last undo log totally purged, so that if the system
    crashes, the tail of the undo log will not get accessed again. The
    list of pages in the undo log tail gets inconsistent during the
    freeing of the segment, and therefore purge should not try to access
    them again. */

    if (!marked) {
        mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
                         MLOG_2BYTES, &mtr);
        marked = TRUE;
    }

    freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER,
                                      &mtr);
    if (!freed) {
        mutex_exit(&(rseg->mutex));
        mtr_commit(&mtr);

        goto loop;
    }

    /* The page list may now be inconsistent, but the length field
    stored in the list base node tells us how big it was before we
    started the freeing. */

    seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);

    /* We may free the undo log segment header page; it must be freed
    within the same mtr as the undo log header is removed from the
    history list: otherwise, in case of a database crash, the segment
    could become inaccessible garbage in the file space. */

    flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY,
                 log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr);

    mutex_enter(&kernel_mutex);
    ut_ad(trx_sys->rseg_history_len >= n_removed_logs);
    trx_sys->rseg_history_len -= n_removed_logs;
    mutex_exit(&kernel_mutex);

    freed = FALSE;

    while (!freed) {
        /* Here we assume that a file segment with just the header
        page can be freed in a few steps, so that the buffer pool
        is not flooded with bufferfixed pages: see the note in
        fsp0fsp.c. */

        freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER,
                               &mtr);
    }

    hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
                               MLOG_4BYTES, &mtr);
    ut_ad(hist_size >= seg_size);

    mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
                     hist_size - seg_size, MLOG_4BYTES, &mtr);

    ut_ad(rseg->curr_size >= seg_size);

    rseg->curr_size -= seg_size;

    mutex_exit(&(rseg->mutex));

    mtr_commit(&mtr);
}
示例#17
0
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
	trx_t*	trx)	/*!< in: transaction */
{
	page_t*		update_hdr_page;
	ib_uint64_t	lsn		= 0;
	trx_rseg_t*	rseg;
	trx_undo_t*	undo;
	mtr_t		mtr;

	ut_ad(mutex_own(&kernel_mutex));

	trx->must_flush_log_later = FALSE;

	rseg = trx->rseg;

	if (trx->insert_undo != NULL || trx->update_undo != NULL) {

		mutex_exit(&kernel_mutex);

		mtr_start(&mtr);

		/* Change the undo log segment states from TRX_UNDO_ACTIVE
		to some other state: these modifications to the file data
		structure define the transaction as committed in the file
		based world, at the serialization point of the log sequence
		number lsn obtained below. */

		mutex_enter(&(rseg->mutex));

		if (trx->insert_undo != NULL) {
			trx_undo_set_state_at_finish(
				rseg, trx, trx->insert_undo, &mtr);
		}

		undo = trx->update_undo;

		if (undo) {
			mutex_enter(&kernel_mutex);
			trx->no = trx_sys_get_new_trx_no();

			mutex_exit(&kernel_mutex);

			/* It is not necessary to obtain trx->undo_mutex here
			because only a single OS thread is allowed to do the
			transaction commit for this transaction. */

			update_hdr_page = trx_undo_set_state_at_finish(
				rseg, trx, undo, &mtr);

			/* We have to do the cleanup for the update log while
			holding the rseg mutex because update log headers
			have to be put to the history list in the order of
			the trx number. */

			trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
		}

		mutex_exit(&(rseg->mutex));

		/* Update the latest MySQL binlog name and offset info
		in trx sys header if MySQL binlogging is on or the database
		server is a MySQL replication slave */

		if (trx->mysql_log_file_name
		    && trx->mysql_log_file_name[0] != '\0') {
			trx_sys_update_mysql_binlog_offset(
				trx->mysql_log_file_name,
				trx->mysql_log_offset,
				TRX_SYS_MYSQL_LOG_INFO, &mtr);
			trx->mysql_log_file_name = NULL;
		}

		/* The following call commits the mini-transaction, making the
		whole transaction committed in the file-based world, at this
		log sequence number. The transaction becomes 'durable' when
		we write the log to disk, but in the logical sense the commit
		in the file-based data structures (undo logs etc.) happens
		here.

		NOTE that transaction numbers, which are assigned only to
		transactions with an update undo log, do not necessarily come
		in exactly the same order as commit lsn's, if the transactions
		have different rollback segments. To get exactly the same
		order we should hold the kernel mutex up to this point,
		adding to the contention of the kernel mutex. However, if
		a transaction T2 is able to see modifications made by
		a transaction T1, T2 will always get a bigger transaction
		number and a bigger commit lsn than T1. */

		/*--------------*/
		mtr_commit(&mtr);
		/*--------------*/
		lsn = mtr.end_lsn;

		mutex_enter(&kernel_mutex);
	}

	ut_ad(trx->conc_state == TRX_ACTIVE
	      || trx->conc_state == TRX_PREPARED);
	ut_ad(mutex_own(&kernel_mutex));

	/* The following assignment makes the transaction committed in memory
	and makes its changes to data visible to other transactions.
	NOTE that there is a small discrepancy from the strict formal
	visibility rules here: a human user of the database can see
	modifications made by another transaction T even before the necessary
	log segment has been flushed to the disk. If the database happens to
	crash before the flush, the user has seen modifications from T which
	will never be a committed transaction. However, any transaction T2
	which sees the modifications of the committing transaction T, and
	which also itself makes modifications to the database, will get an lsn
	larger than the committing transaction T. In the case where the log
	flush fails, and T never gets committed, also T2 will never get
	committed. */

	/*--------------------------------------*/
	trx->conc_state = TRX_COMMITTED_IN_MEMORY;
	/*--------------------------------------*/

	/* If we release kernel_mutex below and we are still doing
	recovery i.e.: back ground rollback thread is still active
	then there is a chance that the rollback thread may see
	this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
	up calling trx_cleanup_at_db_startup(). This can happen
	in the case we are committing a trx here that is left in
	PREPARED state during the crash. Note that commit of the
	rollback of a PREPARED trx happens in the recovery thread
	while the rollback of other transactions happen in the
	background thread. To avoid this race we unconditionally
	unset the is_recovered flag from the trx. */

	trx->is_recovered = FALSE;

	lock_release_off_kernel(trx);

	if (trx->global_read_view) {
		read_view_close(trx->global_read_view);
		mem_heap_empty(trx->global_read_view_heap);
		trx->global_read_view = NULL;
	}

	trx->read_view = NULL;

	if (lsn) {

		mutex_exit(&kernel_mutex);

		if (trx->insert_undo != NULL) {

			trx_undo_insert_cleanup(trx);
		}

		/* NOTE that we could possibly make a group commit more
		efficient here: call os_thread_yield here to allow also other
		trxs to come to commit! */

		/*-------------------------------------*/

		/* Depending on the my.cnf options, we may now write the log
		buffer to the log files, making the transaction durable if
		the OS does not crash. We may also flush the log files to
		disk, making the transaction durable also at an OS crash or a
		power outage.

		The idea in InnoDB's group commit is that a group of
		transactions gather behind a trx doing a physical disk write
		to log files, and when that physical write has been completed,
		one of those transactions does a write which commits the whole
		group. Note that this group commit will only bring benefit if
		there are > 2 users in the database. Then at least 2 users can
		gather behind one doing the physical log write to disk.

		If we are calling trx_commit() under prepare_commit_mutex, we
		will delay possible log write and flush to a separate function
		trx_commit_complete_for_mysql(), which is only called when the
		thread has released the mutex. This is to make the
		group commit algorithm to work. Otherwise, the prepare_commit
		mutex would serialize all commits and prevent a group of
		transactions from gathering. */

		if (trx->flush_log_later) {
			/* Do nothing yet */
			trx->must_flush_log_later = TRUE;
		} else if (srv_flush_log_at_trx_commit == 0) {
			/* Do nothing */
		} else if (srv_flush_log_at_trx_commit == 1) {
			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
				/* Write the log but do not flush it to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
						FALSE);
			} else {
				/* Write the log to the log files AND flush
				them to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
			}
		} else if (srv_flush_log_at_trx_commit == 2) {

			/* Write the log but do not flush it to disk */

			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
		} else {
			ut_error;
		}

		trx->commit_lsn = lsn;

		/*-------------------------------------*/

		mutex_enter(&kernel_mutex);
	}

	/* Free all savepoints */
	trx_roll_free_all_savepoints(trx);

	trx->conc_state = TRX_NOT_STARTED;
	trx->rseg = NULL;
	trx->undo_no = ut_dulint_zero;
	trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;

	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
	ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);

	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
示例#18
0
/****************************************************************//**
Prepares a transaction. */
UNIV_INTERN
void
trx_prepare_off_kernel(
/*===================*/
	trx_t*	trx)	/*!< in: transaction */
{
	trx_rseg_t*	rseg;
	ib_uint64_t	lsn		= 0;
	mtr_t		mtr;

	ut_ad(mutex_own(&kernel_mutex));

	rseg = trx->rseg;

	if (trx->insert_undo != NULL || trx->update_undo != NULL) {

		mutex_exit(&kernel_mutex);

		mtr_start(&mtr);

		/* Change the undo log segment states from TRX_UNDO_ACTIVE
		to TRX_UNDO_PREPARED: these modifications to the file data
		structure define the transaction as prepared in the
		file-based world, at the serialization point of lsn. */

		mutex_enter(&(rseg->mutex));

		if (trx->insert_undo != NULL) {

			/* It is not necessary to obtain trx->undo_mutex here
			because only a single OS thread is allowed to do the
			transaction prepare for this transaction. */

			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
						      &mtr);
		}

		if (trx->update_undo) {
			trx_undo_set_state_at_prepare(
				trx, trx->update_undo, &mtr);
		}

		mutex_exit(&(rseg->mutex));

		/*--------------*/
		mtr_commit(&mtr);	/* This mtr commit makes the
					transaction prepared in the file-based
					world */
		/*--------------*/
		lsn = mtr.end_lsn;

		mutex_enter(&kernel_mutex);
	}

	ut_ad(mutex_own(&kernel_mutex));

	/*--------------------------------------*/
	trx->conc_state = TRX_PREPARED;
	/*--------------------------------------*/

	if (lsn) {
		/* Depending on the my.cnf options, we may now write the log
		buffer to the log files, making the prepared state of the
		transaction durable if the OS does not crash. We may also
		flush the log files to disk, making the prepared state of the
		transaction durable also at an OS crash or a power outage.

		The idea in InnoDB's group prepare is that a group of
		transactions gather behind a trx doing a physical disk write
		to log files, and when that physical write has been completed,
		one of those transactions does a write which prepares the whole
		group. Note that this group prepare will only bring benefit if
		there are > 2 users in the database. Then at least 2 users can
		gather behind one doing the physical log write to disk.

		TODO: find out if MySQL holds some mutex when calling this.
		That would spoil our group prepare algorithm. */

		mutex_exit(&kernel_mutex);

		if (srv_flush_log_at_trx_commit == 0) {
			/* Do nothing */
		} else if (srv_flush_log_at_trx_commit == 1) {
			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
				/* Write the log but do not flush it to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
						FALSE);
			} else {
				/* Write the log to the log files AND flush
				them to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
			}
		} else if (srv_flush_log_at_trx_commit == 2) {

			/* Write the log but do not flush it to disk */

			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
		} else {
			ut_error;
		}

		mutex_enter(&kernel_mutex);
	}
}
示例#19
0
/*******************************************************************//**
Fills the "lock_data" member of i_s_locks_row_t object.
If memory can not be allocated then FALSE is returned.
@return	FALSE if allocation fails */
static
ibool
fill_lock_data(
/*===========*/
	const char**		lock_data,/*!< out: "lock_data" to fill */
	const lock_t*		lock,	/*!< in: lock used to find the data */
	ulint			heap_no,/*!< in: rec num used to find the data */
	trx_i_s_cache_t*	cache)	/*!< in/out: cache where to store
					volatile data */
{
	mtr_t			mtr;

	const buf_block_t*	block;
	const page_t*		page;
	const rec_t*		rec;

	ut_a(lock_get_type(lock) == LOCK_REC);

	mtr_start(&mtr);

	block = buf_page_try_get(lock_rec_get_space_id(lock),
				 lock_rec_get_page_no(lock),
				 &mtr);

	if (block == NULL) {

		*lock_data = NULL;

		mtr_commit(&mtr);

		return(TRUE);
	}

	page = (const page_t*) buf_block_get_frame(block);

	rec = page_find_rec_with_heap_no(page, heap_no);

	if (page_rec_is_infimum(rec)) {

		*lock_data = ha_storage_put_str_memlim(
			cache->storage, "infimum pseudo-record",
			MAX_ALLOWED_FOR_STORAGE(cache));
	} else if (page_rec_is_supremum(rec)) {

		*lock_data = ha_storage_put_str_memlim(
			cache->storage, "supremum pseudo-record",
			MAX_ALLOWED_FOR_STORAGE(cache));
	} else {

		const dict_index_t*	index;
		ulint			n_fields;
		mem_heap_t*		heap;
		ulint			offsets_onstack[REC_OFFS_NORMAL_SIZE];
		ulint*			offsets;
		char			buf[TRX_I_S_LOCK_DATA_MAX_LEN];
		ulint			buf_used;
		ulint			i;

		rec_offs_init(offsets_onstack);
		offsets = offsets_onstack;

		index = lock_rec_get_index(lock);

		n_fields = dict_index_get_n_unique(index);

		ut_a(n_fields > 0);

		heap = NULL;
		offsets = rec_get_offsets(rec, index, offsets, n_fields,
					  &heap);

		/* format and store the data */

		buf_used = 0;
		for (i = 0; i < n_fields; i++) {

			buf_used += put_nth_field(
				buf + buf_used, sizeof(buf) - buf_used,
				i, index, rec, offsets) - 1;
		}

		*lock_data = (const char*) ha_storage_put_memlim(
			cache->storage, buf, buf_used + 1,
			MAX_ALLOWED_FOR_STORAGE(cache));

		if (UNIV_UNLIKELY(heap != NULL)) {

			/* this means that rec_get_offsets() has created a new
			heap and has stored offsets in it; check that this is
			really the case and free the heap */
			ut_a(offsets != offsets_onstack);
			mem_heap_free(heap);
		}
	}

	mtr_commit(&mtr);

	if (*lock_data == NULL) {

		return(FALSE);
	}

	return(TRUE);
}
示例#20
0
/*****************************************************************//**
Finds out if an active transaction has inserted or modified a secondary
index record. NOTE: the kernel mutex is temporarily released in this
function!
@return NULL if committed, else the active transaction */
UNIV_INTERN
trx_t*
row_vers_impl_x_locked_off_kernel(
/*==============================*/
	const rec_t*	rec,	/*!< in: record in a secondary index */
	dict_index_t*	index,	/*!< in: the secondary index */
	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
{
	dict_index_t*	clust_index;
	rec_t*		clust_rec;
	ulint*		clust_offsets;
	rec_t*		version;
	trx_id_t	trx_id;
	mem_heap_t*	heap;
	mem_heap_t*	heap2;
	dtuple_t*	row;
	dtuple_t*	entry	= NULL; /* assignment to eliminate compiler
					warning */
	trx_t*		trx;
	ulint		rec_del;
#ifdef UNIV_DEBUG
	ulint		err;
#endif /* UNIV_DEBUG */
	mtr_t		mtr;
	ulint		comp;

	ut_ad(mutex_own(&kernel_mutex));
#ifdef UNIV_SYNC_DEBUG
	ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
#endif /* UNIV_SYNC_DEBUG */

	mutex_exit(&kernel_mutex);

	mtr_start(&mtr);

	/* Search for the clustered index record: this is a time-consuming
	operation: therefore we release the kernel mutex; also, the release
	is required by the latching order convention. The latch on the
	clustered index locks the top of the stack of versions. We also
	reserve purge_latch to lock the bottom of the version stack. */

	clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index,
				      &clust_index, &mtr);
	if (!clust_rec) {
		/* In a rare case it is possible that no clust rec is found
		for a secondary index record: if in row0umod.c
		row_undo_mod_remove_clust_low() we have already removed the
		clust rec, while purge is still cleaning and removing
		secondary index records associated with earlier versions of
		the clustered index record. In that case there cannot be
		any implicit lock on the secondary index record, because
		an active transaction which has modified the secondary index
		record has also modified the clustered index record. And in
		a rollback we always undo the modifications to secondary index
		records before the clustered index record. */

		mutex_enter(&kernel_mutex);
		mtr_commit(&mtr);

		return(NULL);
	}

	heap = mem_heap_create(1024);
	clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL,
					ULINT_UNDEFINED, &heap);
	trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);

	mtr_s_lock(&(purge_sys->latch), &mtr);

	mutex_enter(&kernel_mutex);

	trx = NULL;
	if (!trx_is_active(trx_id)) {
		/* The transaction that modified or inserted clust_rec is no
		longer active: no implicit lock on rec */
		goto exit_func;
	}

	if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index,
				      clust_offsets, TRUE)) {
		/* Corruption noticed: try to avoid a crash by returning */
		goto exit_func;
	}

	comp = page_rec_is_comp(rec);
	ut_ad(index->table == clust_index->table);
	ut_ad(!!comp == dict_table_is_comp(index->table));
	ut_ad(!comp == !page_rec_is_comp(clust_rec));

	/* We look up if some earlier version, which was modified by the trx_id
	transaction, of the clustered index record would require rec to be in
	a different state (delete marked or unmarked, or have different field
	values, or not existing). If there is such a version, then rec was
	modified by the trx_id transaction, and it has an implicit x-lock on
	rec. Note that if clust_rec itself would require rec to be in a
	different state, then the trx_id transaction has not yet had time to
	modify rec, and does not necessarily have an implicit x-lock on rec. */

	rec_del = rec_get_deleted_flag(rec, comp);
	trx = NULL;

	version = clust_rec;

	for (;;) {
		rec_t*		prev_version;
		ulint		vers_del;
		row_ext_t*	ext;
		trx_id_t	prev_trx_id;

		mutex_exit(&kernel_mutex);

		/* While we retrieve an earlier version of clust_rec, we
		release the kernel mutex, because it may take time to access
		the disk. After the release, we have to check if the trx_id
		transaction is still active. We keep the semaphore in mtr on
		the clust_rec page, so that no other transaction can update
		it and get an implicit x-lock on rec. */

		heap2 = heap;
		heap = mem_heap_create(1024);
#ifdef UNIV_DEBUG
		err =
#endif /* UNIV_DEBUG */
		trx_undo_prev_version_build(clust_rec, &mtr, version,
					    clust_index, clust_offsets,
					    heap, &prev_version);
		mem_heap_free(heap2); /* free version and clust_offsets */

		if (prev_version == NULL) {
			mutex_enter(&kernel_mutex);

			if (!trx_is_active(trx_id)) {
				/* Transaction no longer active: no
				implicit x-lock */

				break;
			}

			/* If the transaction is still active,
			clust_rec must be a fresh insert, because no
			previous version was found. */
			ut_ad(err == DB_SUCCESS);

			/* It was a freshly inserted version: there is an
			implicit x-lock on rec */

			trx = trx_get_on_id(trx_id);

			break;
		}

		clust_offsets = rec_get_offsets(prev_version, clust_index,
						NULL, ULINT_UNDEFINED, &heap);

		vers_del = rec_get_deleted_flag(prev_version, comp);
		prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
						 clust_offsets);
		/* The stack of versions is locked by mtr.  Thus, it
		is safe to fetch the prefixes for externally stored
		columns. */
		row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
				clust_offsets, NULL, &ext, heap);
		entry = row_build_index_entry(row, ext, index, heap);
		/* entry may be NULL if a record was inserted in place
		of a deleted record, and the BLOB pointers of the new
		record were not initialized yet.  But in that case,
		prev_version should be NULL. */
		ut_a(entry);

		mutex_enter(&kernel_mutex);

		if (!trx_is_active(trx_id)) {
			/* Transaction no longer active: no implicit x-lock */

			break;
		}

		/* If we get here, we know that the trx_id transaction is
		still active and it has modified prev_version. Let us check
		if prev_version would require rec to be in a different
		state. */

		/* The previous version of clust_rec must be
		accessible, because the transaction is still active
		and clust_rec was not a fresh insert. */
		ut_ad(err == DB_SUCCESS);

		/* We check if entry and rec are identified in the alphabetical
		ordering */
		if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
			/* The delete marks of rec and prev_version should be
			equal for rec to be in the state required by
			prev_version */

			if (rec_del != vers_del) {
				trx = trx_get_on_id(trx_id);

				break;
			}

			/* It is possible that the row was updated so that the
			secondary index record remained the same in
			alphabetical ordering, but the field values changed
			still. For example, 'abc' -> 'ABC'. Check also that. */

			dtuple_set_types_binary(entry,
						dtuple_get_n_fields(entry));
			if (0 != cmp_dtuple_rec(entry, rec, offsets)) {

				trx = trx_get_on_id(trx_id);

				break;
			}
		} else if (!rec_del) {
			/* The delete mark should be set in rec for it to be
			in the state required by prev_version */

			trx = trx_get_on_id(trx_id);

			break;
		}

		if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) {
			/* The versions modified by the trx_id transaction end
			to prev_version: no implicit x-lock */

			break;
		}

		version = prev_version;
	}/* for (;;) */

exit_func:
	mtr_commit(&mtr);
	mem_heap_free(heap);

	return(trx);
}
示例#21
0
ulint
dict_truncate_index_tree(
/*=====================*/
				/* out: new root page number, or
				FIL_NULL on failure */
	dict_table_t*	table,	/* in: the table the index belongs to */
	btr_pcur_t*	pcur,	/* in/out: persistent cursor pointing to
				record in the clustered index of
				SYS_INDEXES table. The cursor may be
				repositioned in this call. */
	mtr_t*		mtr)	/* in: mtr having the latch
				on the record page. The mtr may be
				committed and restarted in this call. */
{
	ulint		root_page_no;
	ulint		space;
	ulint		type;
	dulint		index_id;
	rec_t*		rec;
	byte*		ptr;
	ulint		len;
	ulint		comp;
	dict_index_t*	index;

	ut_ad(mutex_own(&(dict_sys->mutex)));
	ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
	rec = btr_pcur_get_rec(pcur);
	ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len);

	ut_ad(len == 4);

	root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (root_page_no == FIL_NULL) {
		/* The tree has been freed. */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing index of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_SPACE_NO_FIELD, &len);

	ut_ad(len == 4);

	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);

	if (!fil_tablespace_exists_in_mem(space)) {
		/* It is a single table tablespace and the .ibd file is
		missing: do nothing */

		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB: Trying to TRUNCATE"
			" a missing .ibd file of table %s!\n", table->name);
		return(FIL_NULL);
	}

	ptr = rec_get_nth_field_old(rec,
				    DICT_SYS_INDEXES_TYPE_FIELD, &len);
	ut_ad(len == 4);
	type = mach_read_from_4(ptr);

	ptr = rec_get_nth_field_old(rec, 1, &len);
	ut_ad(len == 8);
	index_id = mach_read_from_8(ptr);

	/* We free all the pages but the root page first; this operation
	may span several mini-transactions */

	btr_free_but_not_root(space, root_page_no);

	/* Then we free the root page in the same mini-transaction where
	we create the b-tree and write its new root page number to the
	appropriate field in the SYS_INDEXES record: this mini-transaction
	marks the B-tree totally truncated */

	comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH,
					 mtr));

	btr_free_root(space, root_page_no, mtr);
	/* We will temporarily write FIL_NULL to the PAGE_NO field
	in SYS_INDEXES, so that the database will not get into an
	inconsistent state in case it crashes between the mtr_commit()
	below and the following mtr_commit() call. */
	page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
				     FIL_NULL, mtr);

	/* We will need to commit the mini-transaction in order to avoid
	deadlocks in the btr_create() call, because otherwise we would
	be freeing and allocating pages in the same mini-transaction. */
	btr_pcur_store_position(pcur, mtr);
	mtr_commit(mtr);

	mtr_start(mtr);
	btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

	/* Find the index corresponding to this SYS_INDEXES record. */
	for (index = UT_LIST_GET_FIRST(table->indexes);
	     index;
	     index = UT_LIST_GET_NEXT(indexes, index)) {
		if (!ut_dulint_cmp(index->id, index_id)) {
			break;
		}
	}

	root_page_no = btr_create(type, space, index_id, comp, mtr);
	if (index) {
		index->page = (unsigned int) root_page_no;
	} else {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"  InnoDB: Index %lu %lu of table %s is missing\n"
			"InnoDB: from the data dictionary during TRUNCATE!\n",
			ut_dulint_get_high(index_id),
			ut_dulint_get_low(index_id),
			table->name);
	}

	return(root_page_no);
}
示例#22
0
/*******************************************************************
Builds a table definition to insert. */
static
ulint
dict_build_table_def_step(
/*======================*/
				/* out: DB_SUCCESS or error code */
	que_thr_t*	thr,	/* in: query thread */
	tab_node_t*	node)	/* in: table create node */
{
	dict_table_t*	table;
	dtuple_t*	row;
	ulint		error;
	const char*	path_or_name;
	ibool		is_path;
	mtr_t		mtr;
	ulint		i;
	ulint		row_len;

	ut_ad(mutex_own(&(dict_sys->mutex)));

	table = node->table;

	table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);

	thr_get_trx(thr)->table_id = table->id;

	row_len = 0;
	for (i = 0; i < table->n_def; i++) {
		row_len += dict_col_get_min_size(&table->cols[i]);
	}
	if (row_len > BTR_PAGE_MAX_REC_SIZE) {
		return(DB_TOO_BIG_RECORD);
	}

	if (srv_file_per_table) {
		/* We create a new single-table tablespace for the table.
		We initially let it be 4 pages:
		- page 0 is the fsp header and an extent descriptor page,
		- page 1 is an ibuf bitmap page,
		- page 2 is the first inode page,
		- page 3 will contain the root of the clustered index of the
		table we create here. */

		ulint	space = 0;	/* reset to zero for the call below */

		if (table->dir_path_of_temp_table) {
			/* We place tables created with CREATE TEMPORARY
			TABLE in the tmp dir of mysqld server */

			path_or_name = table->dir_path_of_temp_table;
			is_path = TRUE;
		} else {
			path_or_name = table->name;
			is_path = FALSE;
		}

		error = fil_create_new_single_table_tablespace(
			&space, path_or_name, is_path,
			FIL_IBD_FILE_INITIAL_SIZE);
		table->space = (unsigned int) space;

		if (error != DB_SUCCESS) {

			return(error);
		}

		mtr_start(&mtr);

		fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);

		mtr_commit(&mtr);
	}

	row = dict_create_sys_tables_tuple(table, node->heap);

	ins_node_set_new_row(node->tab_def, row);

	return(DB_SUCCESS);
}
示例#23
0
/***************************************************************
Marks the clustered index record deleted and inserts the updated version
of the record to the index. This function should be used when the ordering
fields of the clustered index record change. This should be quite rare in
database applications. */
static
ulint
row_upd_clust_rec_by_insert(
/*========================*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code or DB_LOCK_WAIT */
	upd_node_t*	node,	/* in: row update node */
	dict_index_t*	index,	/* in: clustered index of the record */
	que_thr_t*	thr,	/* in: query thread */
	ibool		check_ref,/* in: TRUE if index may be referenced in
				a foreign key constraint */
	mtr_t*		mtr)	/* in: mtr; gets committed here */
{
	mem_heap_t*	heap;
	btr_pcur_t*	pcur;
	btr_cur_t*	btr_cur;
	trx_t*		trx;
	dict_table_t*	table;
	dtuple_t*	entry;
	ulint		err;
	
	ut_ad(node);
	ut_ad(index->type & DICT_CLUSTERED);

	trx = thr_get_trx(thr);
	table = node->table;
	pcur = node->pcur;
	btr_cur	= btr_pcur_get_btr_cur(pcur);
	
	if (node->state != UPD_NODE_INSERT_CLUSTERED) {

		err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG,
						btr_cur, TRUE, thr, mtr);
		if (err != DB_SUCCESS) {
			mtr_commit(mtr);

			return(err);
		}

		/* Mark as not-owned the externally stored fields which the new
		row inherits from the delete marked record: purge should not
		free those externally stored fields even if the delete marked
		record is removed from the index tree, or updated. */

		btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur),
							node->update, mtr);
		if (check_ref) {
			/* NOTE that the following call loses
			the position of pcur ! */
			err = row_upd_check_references_constraints(
							pcur, table,
							index, thr, mtr);
			if (err != DB_SUCCESS) {
				mtr_commit(mtr);

				return(err);
			}
		}

	} 

	mtr_commit(mtr);

	node->state = UPD_NODE_INSERT_CLUSTERED;

	heap = mem_heap_create(500);
	
	entry = row_build_index_entry(node->row, index, heap);

	row_upd_clust_index_replace_new_col_vals(entry, node->update);
	
	row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
	
	/* If we return from a lock wait, for example, we may have
	extern fields marked as not-owned in entry (marked if the
	if-branch above). We must unmark them. */
	
	btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec,
							node->n_ext_vec);
	/* We must mark non-updated extern fields in entry as inherited,
	so that a possible rollback will not free them */
	
	btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec,
						node->n_ext_vec,
						node->update);
	
	err = row_ins_index_entry(index, entry, node->ext_vec,
						node->n_ext_vec, thr);
	mem_heap_free(heap);
	
	return(err);
}
示例#24
0
/*******************************************************************
Removes a clustered index record. The pcur in node was positioned on the
record, now it is detached. */
static
ulint
row_undo_ins_remove_clust_rec(
/*==========================*/
				/* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
	undo_node_t*	node,	/* in: undo node */
	que_thr_t*	thr)	/* in: query thread */
{
	btr_cur_t*	btr_cur;		
	ibool		success;
	ulint		err;
	ulint		n_tries		= 0;
	mtr_t		mtr;
	
	UT_NOT_USED(thr);

	mtr_start(&mtr);
	
	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
									&mtr);
	ut_a(success);

	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {

		/* Drop the index tree associated with the row in
		SYS_INDEXES table: */
	
		dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);

		mtr_commit(&mtr);

		mtr_start(&mtr);

		success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
						&(node->pcur), &mtr);
		ut_a(success);
	}
		
	btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
	
	success = btr_cur_optimistic_delete(btr_cur, &mtr);

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	if (success) {
		trx_undo_rec_release(node->trx, node->undo_no);

		return(DB_SUCCESS);
	}
retry:
	/* If did not succeed, try pessimistic descent to tree */
	mtr_start(&mtr);
	
	success = btr_pcur_restore_position(BTR_MODIFY_TREE,
							&(node->pcur), &mtr);
	ut_a(success);

	btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);

	/* The delete operation may fail if we have little
	file space left: TODO: easiest to crash the database
	and restart with more file space */

	if (err == DB_OUT_OF_FILE_SPACE
				&& n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {

		btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

		n_tries++;

		os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
			
		goto retry;
	}

	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);

	trx_undo_rec_release(node->trx, node->undo_no);

	return(err);
}
示例#25
0
/*******************************************************************
Removes a secondary index entry if found. */
static
ulint
row_undo_ins_remove_sec_low(
/*========================*/
				/* out: DB_SUCCESS, DB_FAIL, or
				DB_OUT_OF_FILE_SPACE */
	ulint		mode,	/* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
				depending on whether we wish optimistic or
				pessimistic descent down the index tree */
	dict_index_t*	index,	/* in: index */
	dtuple_t*	entry,	/* in: index entry to remove */
	que_thr_t*	thr)	/* in: query thread */
{
	btr_pcur_t	pcur;		
	btr_cur_t*	btr_cur;
	ibool		found;
	ibool		success;
	ulint		err;
	mtr_t		mtr;
	
	UT_NOT_USED(thr);

	log_free_check();
	mtr_start(&mtr);

	found = row_search_index_entry(index, entry, mode, &pcur, &mtr);

	btr_cur = btr_pcur_get_btr_cur(&pcur);

	if (!found) {
		/* Not found */

		/* FIXME: remove printfs in the final version */

		/* printf(
		"--UNDO INS: Record not found from page %lu index %s\n",
			buf_frame_get_page_no(btr_cur_get_rec(btr_cur)),
			index->name); */

		/* ibuf_print(); */

		btr_pcur_close(&pcur);
		mtr_commit(&mtr);

		return(DB_SUCCESS);
	}

	if (mode == BTR_MODIFY_LEAF) {
		success = btr_cur_optimistic_delete(btr_cur, &mtr);

		if (success) {
			err = DB_SUCCESS;
		} else {
			err = DB_FAIL;
		}
	} else {
		ut_ad(mode == BTR_MODIFY_TREE);

		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr);
	}

	btr_pcur_close(&pcur);
	mtr_commit(&mtr);

	return(err);
}
示例#26
0
/***************************************************************
Updates a clustered index record of a row when the ordering fields do
not change. */
static
ulint
row_upd_clust_rec(
/*==============*/
				/* out: DB_SUCCESS if operation successfully
				completed, else error code or DB_LOCK_WAIT */
	upd_node_t*	node,	/* in: row update node */
	dict_index_t*	index,	/* in: clustered index */
	que_thr_t*	thr,	/* in: query thread */
	mtr_t*		mtr)	/* in: mtr; gets committed here */
{
	big_rec_t*	big_rec	= NULL;
	btr_pcur_t*	pcur;
	btr_cur_t*	btr_cur;
	ulint		err;
	
	ut_ad(node);
	ut_ad(index->type & DICT_CLUSTERED);

	pcur = node->pcur;
	btr_cur = btr_pcur_get_btr_cur(pcur);

	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
	
	/* Try optimistic updating of the record, keeping changes within
	the page; we do not check locks because we assume the x-lock on the
	record to update */

	if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
		err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG,
						btr_cur, node->update,
						node->cmpl_info, thr, mtr);
	} else {
		err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG,
						btr_cur, node->update,
						node->cmpl_info, thr, mtr);
	}

	mtr_commit(mtr);
	
	if (err == DB_SUCCESS) {

		return(err);
	}

	/* We may have to modify the tree structure: do a pessimistic descent
	down the index tree */

	mtr_start(mtr);
	
	/* NOTE: this transaction has an s-lock or x-lock on the record and
	therefore other transactions cannot modify the record when we have no
	latch on the page. In addition, we assume that other query threads of
	the same transaction do not modify the record in the meantime.
	Therefore we can assert that the restoration of the cursor succeeds. */

	ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));

	ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
	
	err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
					&big_rec, node->update,
					node->cmpl_info, thr, mtr);
	mtr_commit(mtr);

	if (err == DB_SUCCESS && big_rec) {
		mtr_start(mtr);
		ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
	
		err = btr_store_big_rec_extern_fields(index,
						btr_cur_get_rec(btr_cur),
						big_rec, mtr);		
		mtr_commit(mtr);
	}

	if (big_rec) {
		dtuple_big_rec_free(big_rec);
	}
		
	return(err);
}
示例#27
0
/***********************************************************************//**
Chooses the next undo log to purge and updates the info in purge_sys. This
function is used to initialize purge_sys when the next record to purge is
not known, and also to update the purge system info on the next record when
purge has handled the whole undo log for a transaction. */
static
void
trx_purge_choose_next_log(void)
/*===========================*/
{
	trx_undo_rec_t*	rec;
	trx_rseg_t*	rseg;
	trx_rseg_t*	min_rseg;
	trx_id_t	min_trx_no;
	ulint		space = 0;   /* remove warning (??? bug ???) */
	ulint		zip_size = 0;
	ulint		page_no = 0; /* remove warning (??? bug ???) */
	ulint		offset = 0;  /* remove warning (??? bug ???) */
	mtr_t		mtr;

	ut_ad(mutex_own(&(purge_sys->mutex)));
	ut_ad(purge_sys->next_stored == FALSE);

	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);

	min_trx_no = ut_dulint_max;

	min_rseg = NULL;

	while (rseg) {
		mutex_enter(&(rseg->mutex));

		if (rseg->last_page_no != FIL_NULL) {

			if ((min_rseg == NULL)
			    || (ut_dulint_cmp(min_trx_no,
					      rseg->last_trx_no) > 0)) {

				min_rseg = rseg;
				min_trx_no = rseg->last_trx_no;
				space = rseg->space;
				zip_size = rseg->zip_size;
				ut_a(space == 0); /* We assume in purge of
						  externally stored fields
						  that space id == 0 */
				page_no = rseg->last_page_no;
				offset = rseg->last_offset;
			}
		}

		mutex_exit(&(rseg->mutex));

		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
	}

	if (min_rseg == NULL) {

		return;
	}

	mtr_start(&mtr);

	if (!min_rseg->last_del_marks) {
		/* No need to purge this log */

		rec = &trx_purge_dummy_rec;
	} else {
		rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
					     RW_S_LATCH, &mtr);
		if (rec == NULL) {
			/* Undo log empty */

			rec = &trx_purge_dummy_rec;
		}
	}

	purge_sys->next_stored = TRUE;
	purge_sys->rseg = min_rseg;

	purge_sys->hdr_page_no = page_no;
	purge_sys->hdr_offset = offset;

	purge_sys->purge_trx_no = min_trx_no;

	if (rec == &trx_purge_dummy_rec) {

		purge_sys->purge_undo_no = ut_dulint_zero;
		purge_sys->page_no = page_no;
		purge_sys->offset = 0;
	} else {
		purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);

		purge_sys->page_no = page_get_page_no(page_align(rec));
		purge_sys->offset = page_offset(rec);
	}

	mtr_commit(&mtr);
}
示例#28
0
/*********************************************************//**
Moves the persistent cursor backward if it is on the first record of the page.
Commits mtr. Note that to prevent a possible deadlock, the operation
first stores the position of the cursor, commits mtr, acquires the necessary
latches and restores the cursor position again before returning. The
alphabetical position of the cursor is guaranteed to be sensible on
return, but it may happen that the cursor is not positioned on the last
record of any page, because the structure of the tree may have changed
during the time when the cursor had no latches. */
UNIV_INTERN
void
btr_pcur_move_backward_from_page(
/*=============================*/
	btr_pcur_t*	cursor,	/*!< in: persistent cursor, must be on the first
				record of the current page */
	mtr_t*		mtr)	/*!< in: mtr */
{
	ulint		prev_page_no;
	page_t*		page;
	buf_block_t*	prev_block;
	ulint		latch_mode;
	ulint		latch_mode2;

	ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
	ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
	ut_ad(btr_pcur_is_before_first_on_page(cursor));
	ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));

	latch_mode = cursor->latch_mode;

	if (latch_mode == BTR_SEARCH_LEAF) {

		latch_mode2 = BTR_SEARCH_PREV;

	} else if (latch_mode == BTR_MODIFY_LEAF) {

		latch_mode2 = BTR_MODIFY_PREV;
	} else {
		latch_mode2 = 0; /* To eliminate compiler warning */
		ut_error;
	}

	btr_pcur_store_position(cursor, mtr);

	mtr_commit(mtr);

	mtr_start(mtr);

	btr_pcur_restore_position(latch_mode2, cursor, mtr);

	page = btr_pcur_get_page(cursor);

	prev_page_no = btr_page_get_prev(page, mtr);

	if (prev_page_no == FIL_NULL) {
	} else if (btr_pcur_is_before_first_on_page(cursor)) {

		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;

		btr_leaf_page_release(btr_pcur_get_block(cursor),
				      latch_mode, mtr);

		page_cur_set_after_last(prev_block,
					btr_pcur_get_page_cur(cursor));
	} else {

		/* The repositioned cursor did not end on an infimum record on
		a page. Cursor repositioning acquired a latch also on the
		previous page, but we do not need the latch: release it. */

		prev_block = btr_pcur_get_btr_cur(cursor)->left_block;

		btr_leaf_page_release(prev_block, latch_mode, mtr);
	}

	cursor->latch_mode = latch_mode;

	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
}
示例#29
0
/***********************************************************************//**
Gets the next record to purge and updates the info in the purge system.
@return	copy of an undo log record or pointer to the dummy undo log record */
static
trx_undo_rec_t*
trx_purge_get_next_rec(
    /*===================*/
    mem_heap_t*	heap)	/*!< in: memory heap where copied */
{
    trx_undo_rec_t*	rec;
    trx_undo_rec_t*	rec_copy;
    trx_undo_rec_t*	rec2;
    trx_undo_rec_t*	next_rec;
    page_t*		undo_page;
    page_t*		page;
    ulint		offset;
    ulint		page_no;
    ulint		space;
    ulint		zip_size;
    ulint		type;
    ulint		cmpl_info;
    mtr_t		mtr;

    ut_ad(purge_sys->next_stored);

    space = purge_sys->rseg->space;
    zip_size = purge_sys->rseg->zip_size;
    page_no = purge_sys->page_no;
    offset = purge_sys->offset;

    if (offset == 0) {
        /* It is the dummy undo log record, which means that there is
        no need to purge this undo log */

        trx_purge_rseg_get_next_history_log(purge_sys->rseg);

        /* Look for the next undo log and record to purge */

        trx_purge_choose_next_log();

        return(&trx_purge_dummy_rec);
    }

    mtr_start(&mtr);

    undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);

    rec = undo_page + offset;

    rec2 = rec;

    for (;;) {
        /* Try first to find the next record which requires a purge
        operation from the same page of the same undo log */

        next_rec = trx_undo_page_get_next_rec(
                       rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);

        if (next_rec == NULL) {
            rec2 = trx_undo_get_next_rec(
                       rec2, purge_sys->hdr_page_no,
                       purge_sys->hdr_offset, &mtr);
            break;
        }

        rec2 = next_rec;

        type = trx_undo_rec_get_type(rec2);

        if (type == TRX_UNDO_DEL_MARK_REC) {

            break;
        }

        cmpl_info = trx_undo_rec_get_cmpl_info(rec2);

        if (trx_undo_rec_get_extern_storage(rec2)) {
            break;
        }

        if ((type == TRX_UNDO_UPD_EXIST_REC)
                && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
            break;
        }
    }

    if (rec2 == NULL) {
        mtr_commit(&mtr);

        trx_purge_rseg_get_next_history_log(purge_sys->rseg);

        /* Look for the next undo log and record to purge */

        trx_purge_choose_next_log();

        mtr_start(&mtr);

        undo_page = trx_undo_page_get_s_latched(space, zip_size,
                                                page_no, &mtr);

        rec = undo_page + offset;
    } else {
        page = page_align(rec2);

        purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2);
        purge_sys->page_no = page_get_page_no(page);
        purge_sys->offset = rec2 - page;

        if (undo_page != page) {
            /* We advance to a new page of the undo log: */
            purge_sys->n_pages_handled++;
        }
    }

    rec_copy = trx_undo_rec_copy(rec, heap);

    mtr_commit(&mtr);

    return(rec_copy);
}
示例#30
0
/***************************************************************
Updates the clustered index record. */
static
ulint
row_upd_clust_step(
/*===============*/
				/* out: DB_SUCCESS if operation successfully
				completed, DB_LOCK_WAIT in case of a lock wait,
				else error code */
	upd_node_t*	node,	/* in: row update node */
	que_thr_t*	thr)	/* in: query thread */
{
	dict_index_t*	index;
	btr_pcur_t*	pcur;
	ibool		success;
	ibool		check_ref;
	ulint		err;
	mtr_t*		mtr;
	mtr_t		mtr_buf;
	
	index = dict_table_get_first_index(node->table);

	check_ref = row_upd_index_is_referenced(index);

	pcur = node->pcur;

	/* We have to restore the cursor to its position */
	mtr = &mtr_buf;

	mtr_start(mtr);
	
	/* If the restoration does not succeed, then the same
	transaction has deleted the record on which the cursor was,
	and that is an SQL error. If the restoration succeeds, it may
	still be that the same transaction has successively deleted
	and inserted a record with the same ordering fields, but in
	that case we know that the transaction has at least an
	implicit x-lock on the record. */
	
	ut_a(pcur->rel_pos == BTR_PCUR_ON);

	success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);

	if (!success) {
		err = DB_RECORD_NOT_FOUND;

		mtr_commit(mtr);

		return(err);
	}

	/* If this is a row in SYS_INDEXES table of the data dictionary,
	then we have to free the file segments of the index tree associated
	with the index */

	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {

		dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);

		mtr_commit(mtr);

		mtr_start(mtr);

		success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
									mtr);
		if (!success) {
			err = DB_ERROR;

			mtr_commit(mtr);

			return(err);
		}
	} 

	if (!node->has_clust_rec_x_lock) {
		err = lock_clust_rec_modify_check_and_lock(0,
						btr_pcur_get_rec(pcur),
						index, thr);
		if (err != DB_SUCCESS) {
			mtr_commit(mtr);

			return(err);
		}
	}

	/* NOTE: the following function calls will also commit mtr */

	if (node->is_delete) {
		err = row_upd_del_mark_clust_rec(node, index, thr, check_ref,
									mtr);
		if (err != DB_SUCCESS) {

			return(err);
		}

		node->state = UPD_NODE_UPDATE_ALL_SEC;
		node->index = dict_table_get_next_index(index);

		return(err);
	}
	
	/* If the update is made for MySQL, we already have the update vector
	ready, else we have to do some evaluation: */
 
	if (!node->in_mysql_interface) {
		/* Copy the necessary columns from clust_rec and calculate the
		new values to set */

		row_upd_copy_columns(btr_pcur_get_rec(pcur),
					UT_LIST_GET_FIRST(node->columns));
		row_upd_eval_new_vals(node->update);
	}
		
	if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {

		err = row_upd_clust_rec(node, index, thr, mtr);

		return(err);
	}
	
	row_upd_store_row(node);

	if (row_upd_changes_ord_field_binary(node->row, index, node->update)) {

		/* Update causes an ordering field (ordering fields within
		the B-tree) of the clustered index record to change: perform
		the update by delete marking and inserting.

		TODO! What to do to the 'Halloween problem', where an update
		moves the record forward in index so that it is again
		updated when the cursor arrives there? Solution: the
		read operation must check the undo record undo number when
		choosing records to update. MySQL solves now the problem
		externally! */

		err = row_upd_clust_rec_by_insert(node, index, thr, check_ref,
									mtr);
		if (err != DB_SUCCESS) {

			return(err);
		}

		node->state = UPD_NODE_UPDATE_ALL_SEC;
	} else {
		err = row_upd_clust_rec(node, index, thr, mtr);

		if (err != DB_SUCCESS) {

			return(err);
		}

		node->state = UPD_NODE_UPDATE_SOME_SEC;
	}

	node->index = dict_table_get_next_index(index);

	return(err);
}