/************************************************************************ Writes a page asynchronously from the buffer buf_pool to a file, if it can be found in the buf_pool and it is in a flushable state. NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of writes! */ static ulint buf_flush_try_page( /*===============*/ /* out: 1 if a page was flushed, 0 otherwise */ ulint space, /* in: space id */ ulint offset, /* in: page offset */ ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or BUF_FLUSH_SINGLE_PAGE */ { buf_block_t* block; ibool locked; ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST || flush_type == BUF_FLUSH_SINGLE_PAGE); mutex_enter(&(buf_pool->mutex)); block = buf_page_hash_get(space, offset); ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); if (!block) { mutex_exit(&(buf_pool->mutex)); return(0); } mutex_enter(&block->mutex); if (flush_type == BUF_FLUSH_LIST && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; /* If AWE is enabled and the page is not mapped to a frame, then map it */ if (block->frame == NULL) { ut_a(srv_use_awe); /* We set second parameter TRUE because the block is in the LRU list and we must put it to awe_LRU_free_mapped list once mapped to a frame */ buf_awe_map_page_to_frame(block, TRUE); } block->flush_type = flush_type; if (buf_pool->n_flush[flush_type] == 0) { os_event_reset(buf_pool->no_flush[flush_type]); } (buf_pool->n_flush[flush_type])++; locked = FALSE; /* If the simulated aio thread is not running, we must not wait for any latch, as we may end up in a deadlock: if buf_fix_count == 0, then we know we need not wait */ if (block->buf_fix_count == 0) { rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); locked = TRUE; } mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (!locked) { buf_flush_buffered_writes(); rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); } #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Flushing page space %lu, page no %lu \n", (ulong) block->space, (ulong) block->offset); } #endif /* UNIV_DEBUG */ buf_flush_write_block_low(block); return(1); } else if (flush_type == BUF_FLUSH_LRU && buf_flush_ready_for_flush(block, flush_type)) { /* VERY IMPORTANT: Because any thread may call the LRU flush, even when owning locks on pages, to avoid deadlocks, we must make sure that the s-lock is acquired on the page without waiting: this is accomplished because in the if-condition above we require the page not to be bufferfixed (in function ..._ready_for_flush). */ block->io_fix = BUF_IO_WRITE; /* If AWE is enabled and the page is not mapped to a frame, then map it */ if (block->frame == NULL) { ut_a(srv_use_awe); /* We set second parameter TRUE because the block is in the LRU list and we must put it to awe_LRU_free_mapped list once mapped to a frame */ buf_awe_map_page_to_frame(block, TRUE); } block->flush_type = flush_type; if (buf_pool->n_flush[flush_type] == 0) { os_event_reset(buf_pool->no_flush[flush_type]); } (buf_pool->n_flush[flush_type])++; rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); /* Note that the s-latch is acquired before releasing the buf_pool mutex: this ensures that the latch is acquired immediately. */ mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); buf_flush_write_block_low(block); return(1); } else if (flush_type == BUF_FLUSH_SINGLE_PAGE && buf_flush_ready_for_flush(block, flush_type)) { block->io_fix = BUF_IO_WRITE; /* If AWE is enabled and the page is not mapped to a frame, then map it */ if (block->frame == NULL) { ut_a(srv_use_awe); /* We set second parameter TRUE because the block is in the LRU list and we must put it to awe_LRU_free_mapped list once mapped to a frame */ buf_awe_map_page_to_frame(block, TRUE); } block->flush_type = flush_type; if (buf_pool->n_flush[block->flush_type] == 0) { os_event_reset(buf_pool->no_flush[block->flush_type]); } (buf_pool->n_flush[flush_type])++; mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Flushing single page space %lu," " page no %lu \n", (ulong) block->space, (ulong) block->offset); } #endif /* UNIV_DEBUG */ buf_flush_write_block_low(block); return(1); } mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); return(0); }
buf_block_t* buf_LRU_get_free_block(void) /*========================*/ /* out: the free control block; also if AWE is used, it is guaranteed that the block has its page mapped to a frame when we return */ { buf_block_t* block = NULL; ibool freed; ulint n_iterations = 1; ibool mon_value_was = FALSE; ibool started_monitor = FALSE; loop: mutex_enter(&(buf_pool->mutex)); if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: over 95 percent of the buffer pool" " is occupied by\n" "InnoDB: lock heaps or the adaptive hash index!" " Check that your\n" "InnoDB: transactions do not set too many row locks.\n" "InnoDB: Your buffer pool size is %lu MB." " Maybe you should make\n" "InnoDB: the buffer pool bigger?\n" "InnoDB: We intentionally generate a seg fault" " to print a stack trace\n" "InnoDB: on Linux!\n", (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); ut_error; } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) { if (!buf_lru_switched_on_innodb_mon) { /* Over 67 % of the buffer pool is occupied by lock heaps or the adaptive hash index. This may be a memory leak! */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: WARNING: over 67 percent of" " the buffer pool is occupied by\n" "InnoDB: lock heaps or the adaptive" " hash index! Check that your\n" "InnoDB: transactions do not set too many" " row locks.\n" "InnoDB: Your buffer pool size is %lu MB." " Maybe you should make\n" "InnoDB: the buffer pool bigger?\n" "InnoDB: Starting the InnoDB Monitor to print" " diagnostics, including\n" "InnoDB: lock heap and hash index sizes.\n", (ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE))); buf_lru_switched_on_innodb_mon = TRUE; srv_print_innodb_monitor = TRUE; os_event_set(srv_lock_timeout_thread_event); } } else if (buf_lru_switched_on_innodb_mon) { /* Switch off the InnoDB Monitor; this is a simple way to stop the monitor if the situation becomes less urgent, but may also surprise users if the user also switched on the monitor! */ buf_lru_switched_on_innodb_mon = FALSE; srv_print_innodb_monitor = FALSE; } /* If there is a block in the free list, take it */ if (UT_LIST_GET_LEN(buf_pool->free) > 0) { block = UT_LIST_GET_FIRST(buf_pool->free); ut_a(block->in_free_list); UT_LIST_REMOVE(free, buf_pool->free, block); block->in_free_list = FALSE; ut_a(block->state != BUF_BLOCK_FILE_PAGE); ut_a(!block->in_LRU_list); if (srv_use_awe) { if (block->frame) { /* Remove from the list of mapped pages */ UT_LIST_REMOVE(awe_LRU_free_mapped, buf_pool->awe_LRU_free_mapped, block); } else { /* We map the page to a frame; second param FALSE below because we do not want it to be added to the awe_LRU_free_mapped list */ buf_awe_map_page_to_frame(block, FALSE); } } mutex_enter(&block->mutex); block->state = BUF_BLOCK_READY_FOR_USE; UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); if (started_monitor) { srv_print_innodb_monitor = mon_value_was; } return(block); } /* If no block was in the free list, search from the end of the LRU list and try to free a block there */ mutex_exit(&(buf_pool->mutex)); freed = buf_LRU_search_and_free_block(n_iterations); if (freed > 0) { goto loop; } if (n_iterations > 30) { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: Warning: difficult to find free blocks from\n" "InnoDB: the buffer pool (%lu search iterations)!" " Consider\n" "InnoDB: increasing the buffer pool size.\n" "InnoDB: It is also possible that" " in your Unix version\n" "InnoDB: fsync is very slow, or" " completely frozen inside\n" "InnoDB: the OS kernel. Then upgrading to" " a newer version\n" "InnoDB: of your operating system may help." " Look at the\n" "InnoDB: number of fsyncs in diagnostic info below.\n" "InnoDB: Pending flushes (fsync) log: %lu;" " buffer pool: %lu\n" "InnoDB: %lu OS file reads, %lu OS file writes," " %lu OS fsyncs\n" "InnoDB: Starting InnoDB Monitor to print further\n" "InnoDB: diagnostics to the standard output.\n", (ulong) n_iterations, (ulong) fil_n_pending_log_flushes, (ulong) fil_n_pending_tablespace_flushes, (ulong) os_n_file_reads, (ulong) os_n_file_writes, (ulong) os_n_fsyncs); mon_value_was = srv_print_innodb_monitor; started_monitor = TRUE; srv_print_innodb_monitor = TRUE; os_event_set(srv_lock_timeout_thread_event); } /* No free block was found: try to flush the LRU list */ buf_flush_free_margin(); ++srv_buf_pool_wait_free; os_aio_simulated_wake_handler_threads(); mutex_enter(&(buf_pool->mutex)); if (buf_pool->LRU_flush_ended > 0) { /* We have written pages in an LRU flush. To make the insert buffer more efficient, we try to move these pages to the free list. */ mutex_exit(&(buf_pool->mutex)); buf_LRU_try_free_flushed_blocks(); } else { mutex_exit(&(buf_pool->mutex)); } if (n_iterations > 10) { os_thread_sleep(500000); } n_iterations++; goto loop; }