/******************************************************************//** Removes a debug information struct for an rw-lock. */ UNIV_INTERN void rw_lock_remove_debug_info( /*======================*/ rw_lock_t* lock, /*!< in: rw-lock */ ulint pass, /*!< in: pass value */ ulint lock_type) /*!< in: lock type */ { rw_lock_debug_t* info; ut_ad(lock); if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { sync_thread_reset_level(lock); } rw_lock_debug_mutex_enter(); info = UT_LIST_GET_FIRST(lock->debug_list); while (info != NULL) { if ((pass == info->pass) && ((pass != 0) || os_thread_eq(info->thread_id, os_thread_get_curr_id())) && (info->lock_type == lock_type)) { /* Found! */ UT_LIST_REMOVE(list, lock->debug_list, info); rw_lock_debug_mutex_exit(); rw_lock_debug_free(info); return; } info = UT_LIST_GET_NEXT(list, info); } ut_error; }
/**********************************************************************//** Remove a block from the appropriate buddy free list. */ UNIV_INLINE void buf_buddy_remove_from_free( /*=======================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_page_t* bpage, /*!< in: block to be removed */ ulint i) /*!< in: index of buf_pool->zip_free[] */ { #ifdef UNIV_DEBUG buf_page_t* prev = UT_LIST_GET_PREV(list, bpage); buf_page_t* next = UT_LIST_GET_NEXT(list, bpage); ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE); ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE); #endif /* UNIV_DEBUG */ ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE); UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage); }
void trx_end_lock_wait( /*==============*/ trx_t* trx) /* in: transaction */ { que_thr_t* thr; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); thr = UT_LIST_GET_FIRST(trx->wait_thrs); while (thr != NULL) { que_thr_end_wait_no_next_thr(thr); UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); thr = UT_LIST_GET_FIRST(trx->wait_thrs); } trx->que_state = TRX_QUE_RUNNING; }
void buf_flush_write_complete( /*=====================*/ buf_block_t* block) /* in: pointer to the block in question */ { ut_ad(block); #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(buf_pool->mutex))); #endif /* UNIV_SYNC_DEBUG */ ut_a(block->state == BUF_BLOCK_FILE_PAGE); block->oldest_modification = ut_dulint_zero; UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block); ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list)); (buf_pool->n_flush[block->flush_type])--; if (block->flush_type == BUF_FLUSH_LRU) { /* Put the block to the end of the LRU list to wait to be moved to the free list */ buf_LRU_make_block_old(block); buf_pool->LRU_flush_ended++; } /* fprintf(stderr, "n pending flush %lu\n", buf_pool->n_flush[block->flush_type]); */ if ((buf_pool->n_flush[block->flush_type] == 0) && (buf_pool->init_flush[block->flush_type] == FALSE)) { /* The running flush batch has ended */ os_event_set(buf_pool->no_flush[block->flush_type]); } }
// Remove timer from watch list void OSEventThread::DelTimer(event_timer_t * ev) { TRACE("~~~~ +DelTimer ~~~~\n"); if(!ev) return; { OSMutexLocker _locker(&mMutex); if(NULL == UT_LIST_GET_NEXT(watchNode,ev) || NULL == UT_LIST_GET_PREV(watchNode,ev)) { UT_LIST_REMOVE(watchNode,mTimerList,ev); DecreaseWatchNum(); } } wakeup(); while (ev->status != 0)//just for single thread { WARN("wait exit from pending list \n"); OSThread::Sleep(1); } TRACE("~~~~ -DelTimer ~~~~\n"); }
/***********************************************************//** Moves the query threads in the lock wait list to the SUSPENDED state and puts the transaction to the TRX_QUE_RUNNING state. */ static void trx_lock_wait_to_suspended( /*=======================*/ trx_t* trx) /*!< in: transaction in the TRX_QUE_LOCK_WAIT state */ { que_thr_t* thr; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); thr = UT_LIST_GET_FIRST(trx->wait_thrs); while (thr != NULL) { thr->state = QUE_THR_SUSPENDED; UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); thr = UT_LIST_GET_FIRST(trx->wait_thrs); } trx->que_state = TRX_QUE_RUNNING; }
/****************************************************************//** Commits a transaction. */ UNIV_INTERN void trx_commit_off_kernel( /*==================*/ trx_t* trx) /*!< in: transaction */ { page_t* update_hdr_page; ib_uint64_t lsn = 0; trx_rseg_t* rseg; trx_undo_t* undo; mtr_t mtr; ut_ad(mutex_own(&kernel_mutex)); trx->must_flush_log_later = FALSE; rseg = trx->rseg; if (trx->insert_undo != NULL || trx->update_undo != NULL) { mutex_exit(&kernel_mutex); mtr_start(&mtr); /* Change the undo log segment states from TRX_UNDO_ACTIVE to some other state: these modifications to the file data structure define the transaction as committed in the file based world, at the serialization point of the log sequence number lsn obtained below. */ mutex_enter(&(rseg->mutex)); if (trx->insert_undo != NULL) { trx_undo_set_state_at_finish( rseg, trx, trx->insert_undo, &mtr); } undo = trx->update_undo; if (undo) { mutex_enter(&kernel_mutex); trx->no = trx_sys_get_new_trx_no(); mutex_exit(&kernel_mutex); /* It is not necessary to obtain trx->undo_mutex here because only a single OS thread is allowed to do the transaction commit for this transaction. */ update_hdr_page = trx_undo_set_state_at_finish( rseg, trx, undo, &mtr); /* We have to do the cleanup for the update log while holding the rseg mutex because update log headers have to be put to the history list in the order of the trx number. */ trx_undo_update_cleanup(trx, update_hdr_page, &mtr); } mutex_exit(&(rseg->mutex)); /* Update the latest MySQL binlog name and offset info in trx sys header if MySQL binlogging is on or the database server is a MySQL replication slave */ if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { trx_sys_update_mysql_binlog_offset( trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); trx->mysql_log_file_name = NULL; } /* The following call commits the mini-transaction, making the whole transaction committed in the file-based world, at this log sequence number. The transaction becomes 'durable' when we write the log to disk, but in the logical sense the commit in the file-based data structures (undo logs etc.) happens here. NOTE that transaction numbers, which are assigned only to transactions with an update undo log, do not necessarily come in exactly the same order as commit lsn's, if the transactions have different rollback segments. To get exactly the same order we should hold the kernel mutex up to this point, adding to the contention of the kernel mutex. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */ /*--------------*/ mtr_commit(&mtr); /*--------------*/ lsn = mtr.end_lsn; mutex_enter(&kernel_mutex); } ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED); ut_ad(mutex_own(&kernel_mutex)); /* The following assignment makes the transaction committed in memory and makes its changes to data visible to other transactions. NOTE that there is a small discrepancy from the strict formal visibility rules here: a human user of the database can see modifications made by another transaction T even before the necessary log segment has been flushed to the disk. If the database happens to crash before the flush, the user has seen modifications from T which will never be a committed transaction. However, any transaction T2 which sees the modifications of the committing transaction T, and which also itself makes modifications to the database, will get an lsn larger than the committing transaction T. In the case where the log flush fails, and T never gets committed, also T2 will never get committed. */ /*--------------------------------------*/ trx->conc_state = TRX_COMMITTED_IN_MEMORY; /*--------------------------------------*/ /* If we release kernel_mutex below and we are still doing recovery i.e.: back ground rollback thread is still active then there is a chance that the rollback thread may see this trx as COMMITTED_IN_MEMORY and goes adhead to clean it up calling trx_cleanup_at_db_startup(). This can happen in the case we are committing a trx here that is left in PREPARED state during the crash. Note that commit of the rollback of a PREPARED trx happens in the recovery thread while the rollback of other transactions happen in the background thread. To avoid this race we unconditionally unset the is_recovered flag from the trx. */ trx->is_recovered = FALSE; lock_release_off_kernel(trx); if (trx->global_read_view) { read_view_close(trx->global_read_view); mem_heap_empty(trx->global_read_view_heap); trx->global_read_view = NULL; } trx->read_view = NULL; if (lsn) { mutex_exit(&kernel_mutex); if (trx->insert_undo != NULL) { trx_undo_insert_cleanup(trx); } /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ /*-------------------------------------*/ /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the transaction durable if the OS does not crash. We may also flush the log files to disk, making the transaction durable also at an OS crash or a power outage. The idea in InnoDB's group commit is that a group of transactions gather behind a trx doing a physical disk write to log files, and when that physical write has been completed, one of those transactions does a write which commits the whole group. Note that this group commit will only bring benefit if there are > 2 users in the database. Then at least 2 users can gather behind one doing the physical log write to disk. If we are calling trx_commit() under prepare_commit_mutex, we will delay possible log write and flush to a separate function trx_commit_complete_for_mysql(), which is only called when the thread has released the mutex. This is to make the group commit algorithm to work. Otherwise, the prepare_commit mutex would serialize all commits and prevent a group of transactions from gathering. */ if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; } else if (srv_flush_log_at_trx_commit == 0) { /* Do nothing */ } else if (srv_flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { /* Write the log to the log files AND flush them to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { ut_error; } trx->commit_lsn = lsn; /*-------------------------------------*/ mutex_enter(&kernel_mutex); } /* Free all savepoints */ trx_roll_free_all_savepoints(trx); trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); }
/********************************************************************//** Frees memory to a pool. */ UNIV_INTERN void mem_area_free( /*==========*/ void* ptr, /*!< in, own: pointer to allocated memory buffer */ mem_pool_t* pool) /*!< in: memory pool */ { mem_area_t* area; mem_area_t* buddy; void* new_ptr; ulint size; ulint n; if (UNIV_LIKELY(srv_use_sys_malloc)) { free(ptr); return; } /* It may be that the area was really allocated from the OS with regular malloc: check if ptr points within our memory pool */ if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) { ut_free(ptr); return; } area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE); if (mem_area_get_free(area)) { fprintf(stderr, "InnoDB: Error: Freeing element to mem pool" " free list though the\n" "InnoDB: element is marked free!\n"); mem_analyze_corruption(area); ut_error; } size = mem_area_get_size(area); UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE); if (size == 0) { fprintf(stderr, "InnoDB: Error: Mem area size is 0. Possibly a" " memory overrun of the\n" "InnoDB: previous allocated area!\n"); mem_analyze_corruption(area); ut_error; } #ifdef UNIV_LIGHT_MEM_DEBUG if (((byte*)area) + size < pool->buf + pool->size) { ulint next_size; next_size = mem_area_get_size( (mem_area_t*)(((byte*)area) + size)); if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) { fprintf(stderr, "InnoDB: Error: Memory area size %lu," " next area size %lu not a power of 2!\n" "InnoDB: Possibly a memory overrun of" " the buffer being freed here.\n", (ulong) size, (ulong) next_size); mem_analyze_corruption(area); ut_error; } } #endif buddy = mem_area_get_buddy(area, size, pool); n = ut_2_log(size); mem_pool_mutex_enter(pool); mem_n_threads_inside++; ut_a(mem_n_threads_inside == 1); if (buddy && mem_area_get_free(buddy) && (size == mem_area_get_size(buddy))) { /* The buddy is in a free list */ if ((byte*)buddy < (byte*)area) { new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE; mem_area_set_size(buddy, 2 * size); mem_area_set_free(buddy, FALSE); } else { new_ptr = ptr; mem_area_set_size(area, 2 * size); } /* Remove the buddy from its free list and merge it to area */ UT_LIST_REMOVE(free_list, pool->free_list[n], buddy); pool->reserved += ut_2_exp(n); mem_n_threads_inside--; mem_pool_mutex_exit(pool); mem_area_free(new_ptr, pool); return; } else { UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area); mem_area_set_free(area, TRUE); ut_ad(pool->reserved >= size); pool->reserved -= size; } mem_n_threads_inside--; mem_pool_mutex_exit(pool); ut_ad(mem_pool_validate(pool)); }
/********************************************************************//** Allocates memory from a pool. NOTE: This low-level function should only be used in mem0mem.*! @return own: allocated memory buffer */ UNIV_INTERN void* mem_area_alloc( /*===========*/ ulint* psize, /*!< in: requested size in bytes; for optimum space usage, the size should be a power of 2 minus MEM_AREA_EXTRA_SIZE; out: allocated size in bytes (greater than or equal to the requested size) */ mem_pool_t* pool) /*!< in: memory pool */ { mem_area_t* area; ulint size; ulint n; ibool ret; /* If we are using os allocator just make a simple call to malloc */ if (UNIV_LIKELY(srv_use_sys_malloc)) { return(malloc(*psize)); } size = *psize; n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE)); mutex_enter(&(pool->mutex)); mem_n_threads_inside++; ut_a(mem_n_threads_inside == 1); area = UT_LIST_GET_FIRST(pool->free_list[n]); if (area == NULL) { ret = mem_pool_fill_free_list(n, pool); if (ret == FALSE) { /* Out of memory in memory pool: we try to allocate from the operating system with the regular malloc: */ mem_n_threads_inside--; mutex_exit(&(pool->mutex)); return(ut_malloc(size)); } area = UT_LIST_GET_FIRST(pool->free_list[n]); } if (!mem_area_get_free(area)) { fprintf(stderr, "InnoDB: Error: Removing element from mem pool" " free list %lu though the\n" "InnoDB: element is not marked free!\n", (ulong) n); mem_analyze_corruption(area); /* Try to analyze a strange assertion failure reported at [email protected] where the free bit IS 1 in the hex dump above */ if (mem_area_get_free(area)) { fprintf(stderr, "InnoDB: Probably a race condition" " because now the area is marked free!\n"); } ut_error; } if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) { fprintf(stderr, "InnoDB: Error: Removing element from mem pool" " free list %lu\n" "InnoDB: though the list length is 0!\n", (ulong) n); mem_analyze_corruption(area); ut_error; } ut_ad(mem_area_get_size(area) == ut_2_exp(n)); mem_area_set_free(area, FALSE); UT_LIST_REMOVE(free_list, pool->free_list[n], area); pool->reserved += mem_area_get_size(area); mem_n_threads_inside--; mutex_exit(&(pool->mutex)); ut_ad(mem_pool_validate(pool)); *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE; UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, *psize); return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area))); }
/********************************************************************//** Fills the specified free list. @return TRUE if we were able to insert a block to the free list */ static ibool mem_pool_fill_free_list( /*====================*/ ulint i, /*!< in: free list index */ mem_pool_t* pool) /*!< in: memory pool */ { mem_area_t* area; mem_area_t* area2; ibool ret; ut_ad(mutex_own(&(pool->mutex))); if (UNIV_UNLIKELY(i >= 63)) { /* We come here when we have run out of space in the memory pool: */ return(FALSE); } area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); if (area == NULL) { if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Error: mem pool free list %lu" " length is %lu\n" "InnoDB: though the list is empty!\n", (ulong) i + 1, (ulong) UT_LIST_GET_LEN(pool->free_list[i + 1])); } ret = mem_pool_fill_free_list(i + 1, pool); if (ret == FALSE) { return(FALSE); } area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); } if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) { mem_analyze_corruption(area); ut_error; } UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area); area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i)); UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE); mem_area_set_size(area2, ut_2_exp(i)); mem_area_set_free(area2, TRUE); UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2); mem_area_set_size(area, ut_2_exp(i)); UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); return(TRUE); }
/***************************************************************//** Removes a memory heap (which is going to be freed by the caller) from the list of live memory heaps. Returns the size of the heap in terms of how much memory in bytes was allocated for the user of the heap (not the total space occupied by the heap). Also validates the heap. NOTE: This function does not free the storage occupied by the heap itself, only the node in the list of heaps. */ UNIV_INTERN void mem_hash_remove( /*============*/ mem_heap_t* heap, /*!< in: the heap to be freed */ const char* file_name, /*!< in: file name of freeing */ ulint line) /*!< in: line where freed */ { mem_hash_node_t* node; ulint cell_no; ibool error; ulint size; ut_ad(mem_heap_check(heap)); mutex_enter(&mem_hash_mutex); cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE); /* Look for the heap in the hash table list */ node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no)); while (node != NULL) { if (node->heap == heap) { break; } node = UT_LIST_GET_NEXT(list, node); } if (node == NULL) { fprintf(stderr, "Memory heap or buffer freed in %s line %lu" " did not exist.\n", innobase_basename(file_name), (ulong) line); ut_error; } /* Remove from lists */ UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node); UT_LIST_REMOVE(all_list, mem_all_list_base, node); /* Validate the heap which will be freed */ mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size, NULL, NULL); if (error) { fprintf(stderr, "Inconsistency in memory heap or" " buffer n:o %lu created\n" "in %s line %lu and tried to free in %s line %lu.\n" "Hex dump of 400 bytes around memory heap" " first block start:\n", node->nth_heap, innobase_basename(node->file_name), (ulong) node->line, innobase_basename(file_name), (ulong) line); ut_print_buf(stderr, (byte*)node->heap - 200, 400); fputs("\nDump of the mem heap:\n", stderr); mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, &size, NULL, NULL); ut_error; } /* Free the memory occupied by the node struct */ ut_free(node); mem_current_allocated_memory -= size; mutex_exit(&mem_hash_mutex); }
buf_block_t* buf_LRU_get_free_block(void) /*========================*/ /* out: the free control block; also if AWE is used, it is guaranteed that the block has its page mapped to a frame when we return */ { buf_block_t* block = NULL; ibool freed; ulint n_iterations = 1; ibool mon_value_was = FALSE; ibool started_monitor = FALSE; loop: mutex_enter(&(buf_pool->mutex)); if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: over 95 percent of the buffer pool" " is occupied by\n" "InnoDB: lock heaps or the adaptive hash index!" " Check that your\n" "InnoDB: transactions do not set too many row locks.\n" "InnoDB: Your buffer pool size is %lu MB." " Maybe you should make\n" "InnoDB: the buffer pool bigger?\n" "InnoDB: We intentionally generate a seg fault" " to print a stack trace\n" "InnoDB: on Linux!\n", (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); ut_error; } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) { if (!buf_lru_switched_on_innodb_mon) { /* Over 67 % of the buffer pool is occupied by lock heaps or the adaptive hash index. This may be a memory leak! */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: WARNING: over 67 percent of" " the buffer pool is occupied by\n" "InnoDB: lock heaps or the adaptive" " hash index! Check that your\n" "InnoDB: transactions do not set too many" " row locks.\n" "InnoDB: Your buffer pool size is %lu MB." " Maybe you should make\n" "InnoDB: the buffer pool bigger?\n" "InnoDB: Starting the InnoDB Monitor to print" " diagnostics, including\n" "InnoDB: lock heap and hash index sizes.\n", (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); buf_lru_switched_on_innodb_mon = TRUE; srv_print_innodb_monitor = TRUE; os_event_set(srv_lock_timeout_thread_event); } } else if (buf_lru_switched_on_innodb_mon) { /* Switch off the InnoDB Monitor; this is a simple way to stop the monitor if the situation becomes less urgent, but may also surprise users if the user also switched on the monitor! */ buf_lru_switched_on_innodb_mon = FALSE; srv_print_innodb_monitor = FALSE; } /* If there is a block in the free list, take it */ if (UT_LIST_GET_LEN(buf_pool->free) > 0) { block = UT_LIST_GET_FIRST(buf_pool->free); ut_a(block->in_free_list); UT_LIST_REMOVE(free, buf_pool->free, block); block->in_free_list = FALSE; ut_a(block->state != BUF_BLOCK_FILE_PAGE); ut_a(!block->in_LRU_list); if (srv_use_awe) { if (block->frame) { /* Remove from the list of mapped pages */ UT_LIST_REMOVE(awe_LRU_free_mapped, buf_pool->awe_LRU_free_mapped, block); } else { /* We map the page to a frame; second param FALSE below because we do not want it to be added to the awe_LRU_free_mapped list */ buf_awe_map_page_to_frame(block, FALSE); } } mutex_enter(&block->mutex); block->state = BUF_BLOCK_READY_FOR_USE; UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; } return(block); } /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ mutex_exit(&(buf_pool->mutex)); freed = buf_LRU_search_and_free_block(n_iterations); if (freed > 0) { goto loop; } if (n_iterations > 30) { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: Warning: difficult to find free blocks from\n" "InnoDB: the buffer pool (%lu search iterations)!" " Consider\n" "InnoDB: increasing the buffer pool size.\n" "InnoDB: It is also possible that" " in your Unix version\n" "InnoDB: fsync is very slow, or" " completely frozen inside\n" "InnoDB: the OS kernel. Then upgrading to" " a newer version\n" "InnoDB: of your operating system may help." " Look at the\n" "InnoDB: number of fsyncs in diagnostic info below.\n" "InnoDB: Pending flushes (fsync) log: %lu;" " buffer pool: %lu\n" "InnoDB: %lu OS file reads, %lu OS file writes," " %lu OS fsyncs\n" "InnoDB: Starting InnoDB Monitor to print further\n" "InnoDB: diagnostics to the standard output.\n", (ulong) n_iterations, (ulong) fil_n_pending_log_flushes, (ulong) fil_n_pending_tablespace_flushes, (ulong) os_n_file_reads, (ulong) os_n_file_writes, (ulong) os_n_fsyncs); mon_value_was = srv_print_innodb_monitor; started_monitor = TRUE; srv_print_innodb_monitor = TRUE; os_event_set(srv_lock_timeout_thread_event); } /* No free block was found: try to flush the LRU list */ buf_flush_free_margin(); ++srv_buf_pool_wait_free; os_aio_simulated_wake_handler_threads(); mutex_enter(&(buf_pool->mutex)); if (buf_pool->LRU_flush_ended > 0) { /* We have written pages in an LRU flush. To make the insert buffer more efficient, we try to move these pages to the free list. */ mutex_exit(&(buf_pool->mutex)); buf_LRU_try_free_flushed_blocks(); } else { mutex_exit(&(buf_pool->mutex)); } if (n_iterations > 10) { os_thread_sleep(500000); } n_iterations++; goto loop; }
void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id) /* in: space id */ { buf_block_t* block; ulint page_no; ibool all_freed; /* Before we attempt to drop pages one by one we first attempt to drop page hash index entries in batches to make it more efficient. The batching attempt is a best effort attempt and does not guarantee that all pages hash entries will be dropped. We get rid of remaining page hash entries one by one below. */ buf_LRU_drop_page_hash_for_tablespace(id); scan_again: mutex_enter(&(buf_pool->mutex)); all_freed = TRUE; block = UT_LIST_GET_LAST(buf_pool->LRU); while (block != NULL) { buf_block_t* prev_block; mutex_enter(&block->mutex); prev_block = UT_LIST_GET_PREV(LRU, block); ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (block->space == id && (block->buf_fix_count > 0 || block->io_fix != 0)) { /* We cannot remove this page during this scan yet; maybe the system is currently reading it in, or flushing the modifications to the file */ all_freed = FALSE; goto next_page; } if (block->space == id) { #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Dropping space %lu page %lu\n", (ulong) block->space, (ulong) block->offset); } #endif if (block->is_hashed) { page_no = block->offset; mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); /* Note that the following call will acquire an S-latch on the page */ btr_search_drop_page_hash_when_freed(id, page_no); goto scan_again; } if (0 != ut_dulint_cmp(block->oldest_modification, ut_dulint_zero)) { /* Remove from the flush list of modified blocks */ block->oldest_modification = ut_dulint_zero; UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block); } /* Remove from the LRU list */ buf_LRU_block_remove_hashed_page(block); buf_LRU_block_free_hashed_page(block); } next_page: mutex_exit(&block->mutex); block = prev_block; } mutex_exit(&(buf_pool->mutex)); if (!all_freed) { os_thread_sleep(20000); goto scan_again; } }
/******************************************************************//** Frees a block from a memory heap. */ UNIV_INTERN void mem_heap_block_free( /*================*/ mem_heap_t* heap, /*!< in: heap */ mem_block_t* block) /*!< in: block to free */ { ulint type; ulint len; #ifndef UNIV_HOTBACKUP buf_block_t* buf_block = block->buf_block; #endif /* !UNIV_HOTBACKUP */ if (block->magic_n != MEM_BLOCK_MAGIC_N) { mem_analyze_corruption(block); } UT_LIST_REMOVE(list, heap->base, block); #ifdef MEM_PERIODIC_CHECK mem_pool_mutex_enter(); UT_LIST_REMOVE(mem_block_list, mem_block_list, block); mem_pool_mutex_exit(); #endif ut_ad(heap->total_size >= block->len); heap->total_size -= block->len; type = heap->type; len = block->len; block->magic_n = MEM_FREED_BLOCK_MAGIC_N; #ifndef UNIV_HOTBACKUP if (!srv_use_sys_malloc) { #ifdef UNIV_MEM_DEBUG /* In the debug version we set the memory to a random combination of hex 0xDE and 0xAD. */ mem_erase_buf((byte*)block, len); #else /* UNIV_MEM_DEBUG */ UNIV_MEM_ASSERT_AND_FREE(block, len); #endif /* UNIV_MEM_DEBUG */ } if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) { ut_ad(!buf_block); mem_area_free(block, mem_comm_pool); } else { ut_ad(type & MEM_HEAP_BUFFER); buf_block_free(buf_block); } #else /* !UNIV_HOTBACKUP */ #ifdef UNIV_MEM_DEBUG /* In the debug version we set the memory to a random combination of hex 0xDE and 0xAD. */ mem_erase_buf((byte*)block, len); #else /* UNIV_MEM_DEBUG */ UNIV_MEM_ASSERT_AND_FREE(block, len); #endif /* UNIV_MEM_DEBUG */ ut_free(block); #endif /* !UNIV_HOTBACKUP */ }
/******************************************************************//** Calling this function is obligatory only if the memory buffer containing the mutex is freed. Removes a mutex object from the mutex list. The mutex is checked to be in the reset state. */ UNIV_INTERN void mutex_free( /*=======*/ mutex_t* mutex) /*!< in: mutex */ { ut_ad(mutex_validate(mutex)); ut_a(mutex_get_lock_word(mutex) == 0); ut_a(mutex_get_waiters(mutex) == 0); #ifdef UNIV_MEM_DEBUG if (mutex == &mem_hash_mutex) { ut_ad(UT_LIST_GET_LEN(mutex_list) == 1); ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex); UT_LIST_REMOVE(list, mutex_list, mutex); goto func_exit; } #endif /* UNIV_MEM_DEBUG */ #ifdef UNIV_MEM_DEBUG if (mutex == &mem_hash_mutex) { ut_ad(UT_LIST_GET_LEN(mutex_list) == 1); ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex); UT_LIST_REMOVE(list, mutex_list, mutex); goto func_exit; } #endif /* UNIV_MEM_DEBUG */ if (mutex != &mutex_list_mutex #ifdef UNIV_SYNC_DEBUG && mutex != &sync_thread_mutex #endif /* UNIV_SYNC_DEBUG */ ) { mutex_enter(&mutex_list_mutex); ut_ad(!UT_LIST_GET_PREV(list, mutex) || UT_LIST_GET_PREV(list, mutex)->magic_n == MUTEX_MAGIC_N); ut_ad(!UT_LIST_GET_NEXT(list, mutex) || UT_LIST_GET_NEXT(list, mutex)->magic_n == MUTEX_MAGIC_N); UT_LIST_REMOVE(list, mutex_list, mutex); mutex_exit(&mutex_list_mutex); } os_event_free(mutex->event); #ifdef UNIV_MEM_DEBUG func_exit: #endif /* UNIV_MEM_DEBUG */ #if !defined(HAVE_ATOMIC_BUILTINS) os_fast_mutex_free(&(mutex->os_fast_mutex)); #endif /* If we free the mutex protecting the mutex list (freeing is not necessary), we have to reset the magic number AFTER removing it from the list. */ #ifdef UNIV_DEBUG mutex->magic_n = 0; #endif /* UNIV_DEBUG */ }