Example #1
0
/******************************************************************//**
Removes a debug information struct for an rw-lock. */
UNIV_INTERN
void
rw_lock_remove_debug_info(
/*======================*/
	rw_lock_t*	lock,		/*!< in: rw-lock */
	ulint		pass,		/*!< in: pass value */
	ulint		lock_type)	/*!< in: lock type */
{
	rw_lock_debug_t*	info;

	ut_ad(lock);

	if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
		sync_thread_reset_level(lock);
	}

	rw_lock_debug_mutex_enter();

	info = UT_LIST_GET_FIRST(lock->debug_list);

	while (info != NULL) {
		if ((pass == info->pass)
		    && ((pass != 0)
			|| os_thread_eq(info->thread_id,
					os_thread_get_curr_id()))
		    && (info->lock_type == lock_type)) {

			/* Found! */
			UT_LIST_REMOVE(list, lock->debug_list, info);
			rw_lock_debug_mutex_exit();

			rw_lock_debug_free(info);

			return;
		}

		info = UT_LIST_GET_NEXT(list, info);
	}

	ut_error;
}
Example #2
0
/**********************************************************************//**
Remove a block from the appropriate buddy free list. */
UNIV_INLINE
void
buf_buddy_remove_from_free(
/*=======================*/
	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
	buf_page_t*	bpage,		/*!< in: block to be removed */
	ulint		i)		/*!< in: index of
					buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG
	buf_page_t*	prev = UT_LIST_GET_PREV(list, bpage);
	buf_page_t*	next = UT_LIST_GET_NEXT(list, bpage);

	ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
	ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG */

	ut_ad(buf_pool_mutex_own(buf_pool));
	ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
	UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
}
Example #3
0
void
trx_end_lock_wait(
/*==============*/
	trx_t*	trx)	/* in: transaction */
{
	que_thr_t*	thr;

	ut_ad(mutex_own(&kernel_mutex));
	ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);

	thr = UT_LIST_GET_FIRST(trx->wait_thrs);

	while (thr != NULL) {
		que_thr_end_wait_no_next_thr(thr);

		UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);

		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
	}

	trx->que_state = TRX_QUE_RUNNING;
}
Example #4
0
void
buf_flush_write_complete(
/*=====================*/
	buf_block_t*	block)	/* in: pointer to the block in question */
{
	ut_ad(block);
#ifdef UNIV_SYNC_DEBUG
	ut_ad(mutex_own(&(buf_pool->mutex)));
#endif /* UNIV_SYNC_DEBUG */
	ut_a(block->state == BUF_BLOCK_FILE_PAGE);

	block->oldest_modification = ut_dulint_zero;

	UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);

	ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list));

	(buf_pool->n_flush[block->flush_type])--;

	if (block->flush_type == BUF_FLUSH_LRU) {
		/* Put the block to the end of the LRU list to wait to be
		moved to the free list */

		buf_LRU_make_block_old(block);

		buf_pool->LRU_flush_ended++;
	}

	/* fprintf(stderr, "n pending flush %lu\n",
	buf_pool->n_flush[block->flush_type]); */

	if ((buf_pool->n_flush[block->flush_type] == 0)
	    && (buf_pool->init_flush[block->flush_type] == FALSE)) {

		/* The running flush batch has ended */

		os_event_set(buf_pool->no_flush[block->flush_type]);
	}
}
Example #5
0
// Remove timer from watch list
void OSEventThread::DelTimer(event_timer_t * ev)
{
    TRACE("~~~~ +DelTimer ~~~~\n");
    if(!ev) return;

    {
        OSMutexLocker _locker(&mMutex);
        if(NULL == UT_LIST_GET_NEXT(watchNode,ev) || NULL == UT_LIST_GET_PREV(watchNode,ev))
        {
            UT_LIST_REMOVE(watchNode,mTimerList,ev);
            DecreaseWatchNum();
        }
    }
    wakeup();

    while (ev->status != 0)//just for single thread
    {
        WARN("wait exit from pending list \n");
        OSThread::Sleep(1);
    }
    TRACE("~~~~ -DelTimer ~~~~\n");
}
Example #6
0
/***********************************************************//**
Moves the query threads in the lock wait list to the SUSPENDED state and puts
the transaction to the TRX_QUE_RUNNING state. */
static
void
trx_lock_wait_to_suspended(
/*=======================*/
	trx_t*	trx)	/*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
{
	que_thr_t*	thr;

	ut_ad(mutex_own(&kernel_mutex));
	ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);

	thr = UT_LIST_GET_FIRST(trx->wait_thrs);

	while (thr != NULL) {
		thr->state = QUE_THR_SUSPENDED;

		UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);

		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
	}

	trx->que_state = TRX_QUE_RUNNING;
}
Example #7
0
/****************************************************************//**
Commits a transaction. */
UNIV_INTERN
void
trx_commit_off_kernel(
/*==================*/
	trx_t*	trx)	/*!< in: transaction */
{
	page_t*		update_hdr_page;
	ib_uint64_t	lsn		= 0;
	trx_rseg_t*	rseg;
	trx_undo_t*	undo;
	mtr_t		mtr;

	ut_ad(mutex_own(&kernel_mutex));

	trx->must_flush_log_later = FALSE;

	rseg = trx->rseg;

	if (trx->insert_undo != NULL || trx->update_undo != NULL) {

		mutex_exit(&kernel_mutex);

		mtr_start(&mtr);

		/* Change the undo log segment states from TRX_UNDO_ACTIVE
		to some other state: these modifications to the file data
		structure define the transaction as committed in the file
		based world, at the serialization point of the log sequence
		number lsn obtained below. */

		mutex_enter(&(rseg->mutex));

		if (trx->insert_undo != NULL) {
			trx_undo_set_state_at_finish(
				rseg, trx, trx->insert_undo, &mtr);
		}

		undo = trx->update_undo;

		if (undo) {
			mutex_enter(&kernel_mutex);
			trx->no = trx_sys_get_new_trx_no();

			mutex_exit(&kernel_mutex);

			/* It is not necessary to obtain trx->undo_mutex here
			because only a single OS thread is allowed to do the
			transaction commit for this transaction. */

			update_hdr_page = trx_undo_set_state_at_finish(
				rseg, trx, undo, &mtr);

			/* We have to do the cleanup for the update log while
			holding the rseg mutex because update log headers
			have to be put to the history list in the order of
			the trx number. */

			trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
		}

		mutex_exit(&(rseg->mutex));

		/* Update the latest MySQL binlog name and offset info
		in trx sys header if MySQL binlogging is on or the database
		server is a MySQL replication slave */

		if (trx->mysql_log_file_name
		    && trx->mysql_log_file_name[0] != '\0') {
			trx_sys_update_mysql_binlog_offset(
				trx->mysql_log_file_name,
				trx->mysql_log_offset,
				TRX_SYS_MYSQL_LOG_INFO, &mtr);
			trx->mysql_log_file_name = NULL;
		}

		/* The following call commits the mini-transaction, making the
		whole transaction committed in the file-based world, at this
		log sequence number. The transaction becomes 'durable' when
		we write the log to disk, but in the logical sense the commit
		in the file-based data structures (undo logs etc.) happens
		here.

		NOTE that transaction numbers, which are assigned only to
		transactions with an update undo log, do not necessarily come
		in exactly the same order as commit lsn's, if the transactions
		have different rollback segments. To get exactly the same
		order we should hold the kernel mutex up to this point,
		adding to the contention of the kernel mutex. However, if
		a transaction T2 is able to see modifications made by
		a transaction T1, T2 will always get a bigger transaction
		number and a bigger commit lsn than T1. */

		/*--------------*/
		mtr_commit(&mtr);
		/*--------------*/
		lsn = mtr.end_lsn;

		mutex_enter(&kernel_mutex);
	}

	ut_ad(trx->conc_state == TRX_ACTIVE
	      || trx->conc_state == TRX_PREPARED);
	ut_ad(mutex_own(&kernel_mutex));

	/* The following assignment makes the transaction committed in memory
	and makes its changes to data visible to other transactions.
	NOTE that there is a small discrepancy from the strict formal
	visibility rules here: a human user of the database can see
	modifications made by another transaction T even before the necessary
	log segment has been flushed to the disk. If the database happens to
	crash before the flush, the user has seen modifications from T which
	will never be a committed transaction. However, any transaction T2
	which sees the modifications of the committing transaction T, and
	which also itself makes modifications to the database, will get an lsn
	larger than the committing transaction T. In the case where the log
	flush fails, and T never gets committed, also T2 will never get
	committed. */

	/*--------------------------------------*/
	trx->conc_state = TRX_COMMITTED_IN_MEMORY;
	/*--------------------------------------*/

	/* If we release kernel_mutex below and we are still doing
	recovery i.e.: back ground rollback thread is still active
	then there is a chance that the rollback thread may see
	this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
	up calling trx_cleanup_at_db_startup(). This can happen
	in the case we are committing a trx here that is left in
	PREPARED state during the crash. Note that commit of the
	rollback of a PREPARED trx happens in the recovery thread
	while the rollback of other transactions happen in the
	background thread. To avoid this race we unconditionally
	unset the is_recovered flag from the trx. */

	trx->is_recovered = FALSE;

	lock_release_off_kernel(trx);

	if (trx->global_read_view) {
		read_view_close(trx->global_read_view);
		mem_heap_empty(trx->global_read_view_heap);
		trx->global_read_view = NULL;
	}

	trx->read_view = NULL;

	if (lsn) {

		mutex_exit(&kernel_mutex);

		if (trx->insert_undo != NULL) {

			trx_undo_insert_cleanup(trx);
		}

		/* NOTE that we could possibly make a group commit more
		efficient here: call os_thread_yield here to allow also other
		trxs to come to commit! */

		/*-------------------------------------*/

		/* Depending on the my.cnf options, we may now write the log
		buffer to the log files, making the transaction durable if
		the OS does not crash. We may also flush the log files to
		disk, making the transaction durable also at an OS crash or a
		power outage.

		The idea in InnoDB's group commit is that a group of
		transactions gather behind a trx doing a physical disk write
		to log files, and when that physical write has been completed,
		one of those transactions does a write which commits the whole
		group. Note that this group commit will only bring benefit if
		there are > 2 users in the database. Then at least 2 users can
		gather behind one doing the physical log write to disk.

		If we are calling trx_commit() under prepare_commit_mutex, we
		will delay possible log write and flush to a separate function
		trx_commit_complete_for_mysql(), which is only called when the
		thread has released the mutex. This is to make the
		group commit algorithm to work. Otherwise, the prepare_commit
		mutex would serialize all commits and prevent a group of
		transactions from gathering. */

		if (trx->flush_log_later) {
			/* Do nothing yet */
			trx->must_flush_log_later = TRUE;
		} else if (srv_flush_log_at_trx_commit == 0) {
			/* Do nothing */
		} else if (srv_flush_log_at_trx_commit == 1) {
			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
				/* Write the log but do not flush it to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
						FALSE);
			} else {
				/* Write the log to the log files AND flush
				them to disk */

				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
			}
		} else if (srv_flush_log_at_trx_commit == 2) {

			/* Write the log but do not flush it to disk */

			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
		} else {
			ut_error;
		}

		trx->commit_lsn = lsn;

		/*-------------------------------------*/

		mutex_enter(&kernel_mutex);
	}

	/* Free all savepoints */
	trx_roll_free_all_savepoints(trx);

	trx->conc_state = TRX_NOT_STARTED;
	trx->rseg = NULL;
	trx->undo_no = ut_dulint_zero;
	trx->last_sql_stat_start.least_undo_no = ut_dulint_zero;

	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
	ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);

	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
}
Example #8
0
/********************************************************************//**
Frees memory to a pool. */
UNIV_INTERN
void
mem_area_free(
/*==========*/
	void*		ptr,	/*!< in, own: pointer to allocated memory
				buffer */
	mem_pool_t*	pool)	/*!< in: memory pool */
{
	mem_area_t*	area;
	mem_area_t*	buddy;
	void*		new_ptr;
	ulint		size;
	ulint		n;

	if (UNIV_LIKELY(srv_use_sys_malloc)) {
		free(ptr);

		return;
	}

	/* It may be that the area was really allocated from the OS with
	regular malloc: check if ptr points within our memory pool */

	if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) {
		ut_free(ptr);

		return;
	}

	area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE);

	if (mem_area_get_free(area)) {
		fprintf(stderr,
			"InnoDB: Error: Freeing element to mem pool"
			" free list though the\n"
			"InnoDB: element is marked free!\n");

		mem_analyze_corruption(area);
		ut_error;
	}

	size = mem_area_get_size(area);
	UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);

	if (size == 0) {
		fprintf(stderr,
			"InnoDB: Error: Mem area size is 0. Possibly a"
			" memory overrun of the\n"
			"InnoDB: previous allocated area!\n");

		mem_analyze_corruption(area);
		ut_error;
	}

#ifdef UNIV_LIGHT_MEM_DEBUG
	if (((byte*)area) + size < pool->buf + pool->size) {

		ulint	next_size;

		next_size = mem_area_get_size(
			(mem_area_t*)(((byte*)area) + size));
		if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
			fprintf(stderr,
				"InnoDB: Error: Memory area size %lu,"
				" next area size %lu not a power of 2!\n"
				"InnoDB: Possibly a memory overrun of"
				" the buffer being freed here.\n",
				(ulong) size, (ulong) next_size);
			mem_analyze_corruption(area);

			ut_error;
		}
	}
#endif
	buddy = mem_area_get_buddy(area, size, pool);

	n = ut_2_log(size);

	mem_pool_mutex_enter(pool);
	mem_n_threads_inside++;

	ut_a(mem_n_threads_inside == 1);

	if (buddy && mem_area_get_free(buddy)
	    && (size == mem_area_get_size(buddy))) {

		/* The buddy is in a free list */

		if ((byte*)buddy < (byte*)area) {
			new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE;

			mem_area_set_size(buddy, 2 * size);
			mem_area_set_free(buddy, FALSE);
		} else {
			new_ptr = ptr;

			mem_area_set_size(area, 2 * size);
		}

		/* Remove the buddy from its free list and merge it to area */

		UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);

		pool->reserved += ut_2_exp(n);

		mem_n_threads_inside--;
		mem_pool_mutex_exit(pool);

		mem_area_free(new_ptr, pool);

		return;
	} else {
		UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);

		mem_area_set_free(area, TRUE);

		ut_ad(pool->reserved >= size);

		pool->reserved -= size;
	}

	mem_n_threads_inside--;
	mem_pool_mutex_exit(pool);

	ut_ad(mem_pool_validate(pool));
}
Example #9
0
/********************************************************************//**
Allocates memory from a pool. NOTE: This low-level function should only be
used in mem0mem.*!
@return	own: allocated memory buffer */
UNIV_INTERN
void*
mem_area_alloc(
/*===========*/
	ulint*		psize,	/*!< in: requested size in bytes; for optimum
				space usage, the size should be a power of 2
				minus MEM_AREA_EXTRA_SIZE;
				out: allocated size in bytes (greater than
				or equal to the requested size) */
	mem_pool_t*	pool)	/*!< in: memory pool */
{
	mem_area_t*	area;
	ulint		size;
	ulint		n;
	ibool		ret;

	/* If we are using os allocator just make a simple call
	to malloc */
	if (UNIV_LIKELY(srv_use_sys_malloc)) {
		return(malloc(*psize));
	}

	size = *psize;
	n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));

	mutex_enter(&(pool->mutex));
	mem_n_threads_inside++;

	ut_a(mem_n_threads_inside == 1);

	area = UT_LIST_GET_FIRST(pool->free_list[n]);

	if (area == NULL) {
		ret = mem_pool_fill_free_list(n, pool);

		if (ret == FALSE) {
			/* Out of memory in memory pool: we try to allocate
			from the operating system with the regular malloc: */

			mem_n_threads_inside--;
			mutex_exit(&(pool->mutex));

			return(ut_malloc(size));
		}

		area = UT_LIST_GET_FIRST(pool->free_list[n]);
	}

	if (!mem_area_get_free(area)) {
		fprintf(stderr,
			"InnoDB: Error: Removing element from mem pool"
			" free list %lu though the\n"
			"InnoDB: element is not marked free!\n",
			(ulong) n);

		mem_analyze_corruption(area);

		/* Try to analyze a strange assertion failure reported at
		[email protected] where the free bit IS 1 in the
		hex dump above */

		if (mem_area_get_free(area)) {
			fprintf(stderr,
				"InnoDB: Probably a race condition"
				" because now the area is marked free!\n");
		}

		ut_error;
	}

	if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
		fprintf(stderr,
			"InnoDB: Error: Removing element from mem pool"
			" free list %lu\n"
			"InnoDB: though the list length is 0!\n",
			(ulong) n);
		mem_analyze_corruption(area);

		ut_error;
	}

	ut_ad(mem_area_get_size(area) == ut_2_exp(n));

	mem_area_set_free(area, FALSE);

	UT_LIST_REMOVE(free_list, pool->free_list[n], area);

	pool->reserved += mem_area_get_size(area);

	mem_n_threads_inside--;
	mutex_exit(&(pool->mutex));

	ut_ad(mem_pool_validate(pool));

	*psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize);

	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
}
Example #10
0
/********************************************************************//**
Fills the specified free list.
@return	TRUE if we were able to insert a block to the free list */
static
ibool
mem_pool_fill_free_list(
/*====================*/
	ulint		i,	/*!< in: free list index */
	mem_pool_t*	pool)	/*!< in: memory pool */
{
	mem_area_t*	area;
	mem_area_t*	area2;
	ibool		ret;

	ut_ad(mutex_own(&(pool->mutex)));

	if (UNIV_UNLIKELY(i >= 63)) {
		/* We come here when we have run out of space in the
		memory pool: */

		return(FALSE);
	}

	area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);

	if (area == NULL) {
		if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
			ut_print_timestamp(stderr);

			fprintf(stderr,
				"  InnoDB: Error: mem pool free list %lu"
				" length is %lu\n"
				"InnoDB: though the list is empty!\n",
				(ulong) i + 1,
				(ulong)
				UT_LIST_GET_LEN(pool->free_list[i + 1]));
		}

		ret = mem_pool_fill_free_list(i + 1, pool);

		if (ret == FALSE) {

			return(FALSE);
		}

		area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
	}

	if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
		mem_analyze_corruption(area);

		ut_error;
	}

	UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);

	area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
	UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);

	mem_area_set_size(area2, ut_2_exp(i));
	mem_area_set_free(area2, TRUE);

	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);

	mem_area_set_size(area, ut_2_exp(i));

	UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);

	return(TRUE);
}
Example #11
0
/***************************************************************//**
Removes a memory heap (which is going to be freed by the caller)
from the list of live memory heaps. Returns the size of the heap
in terms of how much memory in bytes was allocated for the user of
the heap (not the total space occupied by the heap).
Also validates the heap.
NOTE: This function does not free the storage occupied by the
heap itself, only the node in the list of heaps. */
UNIV_INTERN
void
mem_hash_remove(
/*============*/
	mem_heap_t*	heap,	   /*!< in: the heap to be freed */
	const char*	file_name, /*!< in: file name of freeing */
	ulint		line)	   /*!< in: line where freed */
{
	mem_hash_node_t*	node;
	ulint			cell_no;
	ibool			error;
	ulint			size;

	ut_ad(mem_heap_check(heap));

	mutex_enter(&mem_hash_mutex);

	cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE);

	/* Look for the heap in the hash table list */
	node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));

	while (node != NULL) {
		if (node->heap == heap) {

			break;
		}

		node = UT_LIST_GET_NEXT(list, node);
	}

	if (node == NULL) {
		fprintf(stderr,
			"Memory heap or buffer freed in %s line %lu"
			" did not exist.\n",
			innobase_basename(file_name), (ulong) line);
		ut_error;
	}

	/* Remove from lists */
	UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);

	UT_LIST_REMOVE(all_list, mem_all_list_base, node);

	/* Validate the heap which will be freed */
	mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
				   NULL, NULL);
	if (error) {
		fprintf(stderr,
			"Inconsistency in memory heap or"
			" buffer n:o %lu created\n"
			"in %s line %lu and tried to free in %s line %lu.\n"
			"Hex dump of 400 bytes around memory heap"
			" first block start:\n",
			node->nth_heap,
			innobase_basename(node->file_name), (ulong) node->line,
			innobase_basename(file_name), (ulong) line);
		ut_print_buf(stderr, (byte*)node->heap - 200, 400);
		fputs("\nDump of the mem heap:\n", stderr);
		mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
					   &size, NULL, NULL);
		ut_error;
	}

	/* Free the memory occupied by the node struct */
	ut_free(node);

	mem_current_allocated_memory -= size;

	mutex_exit(&mem_hash_mutex);
}
Example #12
0
buf_block_t*
buf_LRU_get_free_block(void)
/*========================*/
				/* out: the free control block; also if AWE is
				used, it is guaranteed that the block has its
				page mapped to a frame when we return */
{
	buf_block_t*	block		= NULL;
	ibool		freed;
	ulint		n_iterations	= 1;
	ibool		mon_value_was	= FALSE;
	ibool		started_monitor	= FALSE;
loop:
	mutex_enter(&(buf_pool->mutex));

	if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
	    + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) {
		ut_print_timestamp(stderr);

		fprintf(stderr,
			"  InnoDB: ERROR: over 95 percent of the buffer pool"
			" is occupied by\n"
			"InnoDB: lock heaps or the adaptive hash index!"
			" Check that your\n"
			"InnoDB: transactions do not set too many row locks.\n"
			"InnoDB: Your buffer pool size is %lu MB."
			" Maybe you should make\n"
			"InnoDB: the buffer pool bigger?\n"
			"InnoDB: We intentionally generate a seg fault"
			" to print a stack trace\n"
			"InnoDB: on Linux!\n",
			(ulong) (buf_pool->curr_size
				 / (1024 * 1024 / UNIV_PAGE_SIZE)));

		ut_error;

	} else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
		   + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) {

		if (!buf_lru_switched_on_innodb_mon) {

	   		/* Over 67 % of the buffer pool is occupied by lock
			heaps or the adaptive hash index. This may be a memory
			leak! */

			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB: WARNING: over 67 percent of"
				" the buffer pool is occupied by\n"
				"InnoDB: lock heaps or the adaptive"
				" hash index! Check that your\n"
				"InnoDB: transactions do not set too many"
				" row locks.\n"
				"InnoDB: Your buffer pool size is %lu MB."
				" Maybe you should make\n"
				"InnoDB: the buffer pool bigger?\n"
				"InnoDB: Starting the InnoDB Monitor to print"
				" diagnostics, including\n"
				"InnoDB: lock heap and hash index sizes.\n",
				(ulong) (buf_pool->curr_size
					 / (1024 * 1024 / UNIV_PAGE_SIZE)));

			buf_lru_switched_on_innodb_mon = TRUE;
			srv_print_innodb_monitor = TRUE;
			os_event_set(srv_lock_timeout_thread_event);
		}
	} else if (buf_lru_switched_on_innodb_mon) {

		/* Switch off the InnoDB Monitor; this is a simple way
		to stop the monitor if the situation becomes less urgent,
		but may also surprise users if the user also switched on the
		monitor! */

		buf_lru_switched_on_innodb_mon = FALSE;
		srv_print_innodb_monitor = FALSE;
	}

	/* If there is a block in the free list, take it */
	if (UT_LIST_GET_LEN(buf_pool->free) > 0) {

		block = UT_LIST_GET_FIRST(buf_pool->free);
		ut_a(block->in_free_list);
		UT_LIST_REMOVE(free, buf_pool->free, block);
		block->in_free_list = FALSE;
		ut_a(block->state != BUF_BLOCK_FILE_PAGE);
		ut_a(!block->in_LRU_list);

		if (srv_use_awe) {
			if (block->frame) {
				/* Remove from the list of mapped pages */

				UT_LIST_REMOVE(awe_LRU_free_mapped,
					       buf_pool->awe_LRU_free_mapped,
					       block);
			} else {
				/* We map the page to a frame; second param
				FALSE below because we do not want it to be
				added to the awe_LRU_free_mapped list */

				buf_awe_map_page_to_frame(block, FALSE);
			}
		}

		mutex_enter(&block->mutex);

		block->state = BUF_BLOCK_READY_FOR_USE;
		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);

		mutex_exit(&block->mutex);

		mutex_exit(&(buf_pool->mutex));

		if (started_monitor) {
			srv_print_innodb_monitor = mon_value_was;
		}

		return(block);
	}

	/* If no block was in the free list, search from the end of the LRU
	list and try to free a block there */

	mutex_exit(&(buf_pool->mutex));

	freed = buf_LRU_search_and_free_block(n_iterations);

	if (freed > 0) {
		goto loop;
	}

	if (n_iterations > 30) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
			"InnoDB: Warning: difficult to find free blocks from\n"
			"InnoDB: the buffer pool (%lu search iterations)!"
			" Consider\n"
			"InnoDB: increasing the buffer pool size.\n"
			"InnoDB: It is also possible that"
			" in your Unix version\n"
			"InnoDB: fsync is very slow, or"
			" completely frozen inside\n"
			"InnoDB: the OS kernel. Then upgrading to"
			" a newer version\n"
			"InnoDB: of your operating system may help."
			" Look at the\n"
			"InnoDB: number of fsyncs in diagnostic info below.\n"
			"InnoDB: Pending flushes (fsync) log: %lu;"
			" buffer pool: %lu\n"
			"InnoDB: %lu OS file reads, %lu OS file writes,"
			" %lu OS fsyncs\n"
			"InnoDB: Starting InnoDB Monitor to print further\n"
			"InnoDB: diagnostics to the standard output.\n",
			(ulong) n_iterations,
			(ulong) fil_n_pending_log_flushes,
			(ulong) fil_n_pending_tablespace_flushes,
			(ulong) os_n_file_reads, (ulong) os_n_file_writes,
			(ulong) os_n_fsyncs);

		mon_value_was = srv_print_innodb_monitor;
		started_monitor = TRUE;
		srv_print_innodb_monitor = TRUE;
		os_event_set(srv_lock_timeout_thread_event);
	}

	/* No free block was found: try to flush the LRU list */

	buf_flush_free_margin();
	++srv_buf_pool_wait_free;

	os_aio_simulated_wake_handler_threads();

	mutex_enter(&(buf_pool->mutex));

	if (buf_pool->LRU_flush_ended > 0) {
		/* We have written pages in an LRU flush. To make the insert
		buffer more efficient, we try to move these pages to the free
		list. */

		mutex_exit(&(buf_pool->mutex));

		buf_LRU_try_free_flushed_blocks();
	} else {
		mutex_exit(&(buf_pool->mutex));
	}

	if (n_iterations > 10) {

		os_thread_sleep(500000);
	}

	n_iterations++;

	goto loop;
}
Example #13
0
void
buf_LRU_invalidate_tablespace(
/*==========================*/
	ulint	id)	/* in: space id */
{
	buf_block_t*	block;
	ulint		page_no;
	ibool		all_freed;

	/* Before we attempt to drop pages one by one we first
	attempt to drop page hash index entries in batches to make
	it more efficient. The batching attempt is a best effort
	attempt and does not guarantee that all pages hash entries
	will be dropped. We get rid of remaining page hash entries
	one by one below. */
	buf_LRU_drop_page_hash_for_tablespace(id);

scan_again:
	mutex_enter(&(buf_pool->mutex));

	all_freed = TRUE;

	block = UT_LIST_GET_LAST(buf_pool->LRU);

	while (block != NULL) {
		buf_block_t*	prev_block;

		mutex_enter(&block->mutex);
		prev_block = UT_LIST_GET_PREV(LRU, block);

		ut_a(block->state == BUF_BLOCK_FILE_PAGE);

		if (block->space == id
		    && (block->buf_fix_count > 0 || block->io_fix != 0)) {

			/* We cannot remove this page during this scan yet;
			maybe the system is currently reading it in, or
			flushing the modifications to the file */

			all_freed = FALSE;

			goto next_page;
		}

		if (block->space == id) {
#ifdef UNIV_DEBUG
			if (buf_debug_prints) {
				fprintf(stderr,
					"Dropping space %lu page %lu\n",
					(ulong) block->space,
					(ulong) block->offset);
			}
#endif
			if (block->is_hashed) {
				page_no = block->offset;

				mutex_exit(&block->mutex);

				mutex_exit(&(buf_pool->mutex));

				/* Note that the following call will acquire
				an S-latch on the page */

				btr_search_drop_page_hash_when_freed(id,
								     page_no);
				goto scan_again;
			}

			if (0 != ut_dulint_cmp(block->oldest_modification,
					       ut_dulint_zero)) {

				/* Remove from the flush list of modified
				blocks */
				block->oldest_modification = ut_dulint_zero;

				UT_LIST_REMOVE(flush_list,
					       buf_pool->flush_list, block);
			}

			/* Remove from the LRU list */
			buf_LRU_block_remove_hashed_page(block);
			buf_LRU_block_free_hashed_page(block);
		}
next_page:
		mutex_exit(&block->mutex);
		block = prev_block;
	}

	mutex_exit(&(buf_pool->mutex));

	if (!all_freed) {
		os_thread_sleep(20000);

		goto scan_again;
	}
}
Example #14
0
/******************************************************************//**
Frees a block from a memory heap. */
UNIV_INTERN
void
mem_heap_block_free(
/*================*/
	mem_heap_t*	heap,	/*!< in: heap */
	mem_block_t*	block)	/*!< in: block to free */
{
	ulint		type;
	ulint		len;
#ifndef UNIV_HOTBACKUP
	buf_block_t*	buf_block	= block->buf_block;
#endif /* !UNIV_HOTBACKUP */

	if (block->magic_n != MEM_BLOCK_MAGIC_N) {
		mem_analyze_corruption(block);
	}

	UT_LIST_REMOVE(list, heap->base, block);

#ifdef MEM_PERIODIC_CHECK
	mem_pool_mutex_enter();

	UT_LIST_REMOVE(mem_block_list, mem_block_list, block);

	mem_pool_mutex_exit();
#endif

	ut_ad(heap->total_size >= block->len);
	heap->total_size -= block->len;

	type = heap->type;
	len = block->len;
	block->magic_n = MEM_FREED_BLOCK_MAGIC_N;

#ifndef UNIV_HOTBACKUP
	if (!srv_use_sys_malloc) {
#ifdef UNIV_MEM_DEBUG
		/* In the debug version we set the memory to a random
		combination of hex 0xDE and 0xAD. */

		mem_erase_buf((byte*)block, len);
#else /* UNIV_MEM_DEBUG */
		UNIV_MEM_ASSERT_AND_FREE(block, len);
#endif /* UNIV_MEM_DEBUG */

	}
	if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {

		ut_ad(!buf_block);
		mem_area_free(block, mem_comm_pool);
	} else {
		ut_ad(type & MEM_HEAP_BUFFER);

		buf_block_free(buf_block);
	}
#else /* !UNIV_HOTBACKUP */
#ifdef UNIV_MEM_DEBUG
	/* In the debug version we set the memory to a random
	combination of hex 0xDE and 0xAD. */

	mem_erase_buf((byte*)block, len);
#else /* UNIV_MEM_DEBUG */
	UNIV_MEM_ASSERT_AND_FREE(block, len);
#endif /* UNIV_MEM_DEBUG */
	ut_free(block);
#endif /* !UNIV_HOTBACKUP */
}
Example #15
0
/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
is checked to be in the reset state. */
UNIV_INTERN
void
mutex_free(
/*=======*/
	mutex_t*	mutex)	/*!< in: mutex */
{
	ut_ad(mutex_validate(mutex));
	ut_a(mutex_get_lock_word(mutex) == 0);
	ut_a(mutex_get_waiters(mutex) == 0);

#ifdef UNIV_MEM_DEBUG
	if (mutex == &mem_hash_mutex) {
		ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
		ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
		UT_LIST_REMOVE(list, mutex_list, mutex);
		goto func_exit;
	}
#endif /* UNIV_MEM_DEBUG */

#ifdef UNIV_MEM_DEBUG
	if (mutex == &mem_hash_mutex) {
		ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
		ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
		UT_LIST_REMOVE(list, mutex_list, mutex);
		goto func_exit;
	}
#endif /* UNIV_MEM_DEBUG */

	if (mutex != &mutex_list_mutex
#ifdef UNIV_SYNC_DEBUG
	    && mutex != &sync_thread_mutex
#endif /* UNIV_SYNC_DEBUG */
	    ) {

		mutex_enter(&mutex_list_mutex);

		ut_ad(!UT_LIST_GET_PREV(list, mutex)
		      || UT_LIST_GET_PREV(list, mutex)->magic_n
		      == MUTEX_MAGIC_N);
		ut_ad(!UT_LIST_GET_NEXT(list, mutex)
		      || UT_LIST_GET_NEXT(list, mutex)->magic_n
		      == MUTEX_MAGIC_N);

		UT_LIST_REMOVE(list, mutex_list, mutex);

		mutex_exit(&mutex_list_mutex);
	}

	os_event_free(mutex->event);
#ifdef UNIV_MEM_DEBUG
func_exit:
#endif /* UNIV_MEM_DEBUG */
#if !defined(HAVE_ATOMIC_BUILTINS)
	os_fast_mutex_free(&(mutex->os_fast_mutex));
#endif
	/* If we free the mutex protecting the mutex list (freeing is
	not necessary), we have to reset the magic number AFTER removing
	it from the list. */
#ifdef UNIV_DEBUG
	mutex->magic_n = 0;
#endif /* UNIV_DEBUG */
}