/********************************************************************** Adds a block to the LRU list end. */ UNIV_INLINE void buf_LRU_add_block_to_end_low( /*=========================*/ buf_block_t* block) /* in: control block */ { buf_block_t* last_block; ut_ad(buf_pool); ut_ad(block); ut_ad(mutex_own(&(buf_pool->mutex))); ut_a(block->state == BUF_BLOCK_FILE_PAGE); block->old = TRUE; last_block = UT_LIST_GET_LAST(buf_pool->LRU); if (last_block) { block->LRU_position = last_block->LRU_position; } else { block->LRU_position = buf_pool_clock_tic(); } ut_a(!block->in_LRU_list); UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block); block->in_LRU_list = TRUE; if (srv_use_awe && block->frame) { /* Add to the list of mapped pages */ UT_LIST_ADD_LAST(awe_LRU_free_mapped, buf_pool->awe_LRU_free_mapped, block); } if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { buf_pool->LRU_old_len++; } if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { ut_ad(buf_pool->LRU_old); /* Adjust the length of the old block list if necessary */ buf_LRU_old_adjust_len(); } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { /* The LRU list is now long enough for LRU_old to become defined: init it */ buf_LRU_old_init(); } }
ibool buf_LRU_search_and_free_block( /*==========================*/ /* out: TRUE if freed */ ulint n_iterations) /* in: how many times this has been called repeatedly without result: a high value means that we should search farther; if value is k < 10, then we only search k/10 * [number of pages in the buffer pool] from the end of the LRU list */ { buf_block_t* block; ulint distance = 0; ibool freed; mutex_enter(&(buf_pool->mutex)); freed = FALSE; block = UT_LIST_GET_LAST(buf_pool->LRU); while (block != NULL) { ut_a(block->in_LRU_list); mutex_enter(&block->mutex); freed = buf_LRU_free_block(block); mutex_exit(&block->mutex); if (freed) { break; } block = UT_LIST_GET_PREV(LRU, block); distance++; if (!freed && n_iterations <= 10 && distance > 100 + (n_iterations * buf_pool->curr_size) / 10) { buf_pool->LRU_flush_ended = 0; mutex_exit(&(buf_pool->mutex)); return(FALSE); } } if (buf_pool->LRU_flush_ended > 0) { buf_pool->LRU_flush_ended--; } if (!freed) { buf_pool->LRU_flush_ended = 0; } mutex_exit(&(buf_pool->mutex)); return(freed); }
void OSEventThread::removeWatch(event_req_t * ev) { TRACE("~~~~ +removeWatch (%d) ~~~~\n",ev->fd); FD_CLR(ev->fd, &mReadFds); UT_LIST_REMOVE(watchNode,mWatchList,ev); event_req_t * rev = UT_LIST_GET_LAST(mWatchList); mMaxfFd = rev?rev->fd+1 : 0; TRACE("~~~~ -removeWatch ,mMaxfFd(%d),count(%d)~~~~\n",mMaxfFd,mWatchList.count); }
mem_block_t* mem_heap_add_block( /*===============*/ /* out: created block, NULL if did not succeed */ mem_heap_t* heap, /* in: memory heap */ ulint n) /* in: number of bytes user needs */ { mem_block_t* block; mem_block_t* new_block; ulint new_size; ut_ad(mem_heap_check(heap)); block = UT_LIST_GET_LAST(heap->base); /* We have to allocate a new block. The size is always at least doubled until the standard size is reached. After that the size stays the same, except in cases where the caller needs more space. */ new_size = 2 * mem_block_get_len(block); if (heap->type != MEM_HEAP_DYNAMIC) { /* From the buffer pool we allocate buffer frames */ ut_a(n <= MEM_MAX_ALLOC_IN_BUF); if (new_size > MEM_MAX_ALLOC_IN_BUF) { new_size = MEM_MAX_ALLOC_IN_BUF; } } else if (new_size > MEM_BLOCK_STANDARD_SIZE) { new_size = MEM_BLOCK_STANDARD_SIZE; } if (new_size < n) { new_size = n; } new_block = mem_heap_create_block(heap, new_size, NULL, heap->type, heap->file_name, heap->line); if (new_block == NULL) { return(NULL); } /* Add the new block as the last block */ UT_LIST_INSERT_AFTER(list, heap->base, block, new_block); return(new_block); }
/********************************************************************** Gives a recommendation of how many blocks should be flushed to establish a big enough margin of replaceable blocks near the end of the LRU list and in the free list. */ static ulint buf_flush_LRU_recommendation(void) /*==============================*/ /* out: number of blocks which should be flushed from the end of the LRU list */ { buf_block_t* block; ulint n_replaceable; ulint distance = 0; mutex_enter(&(buf_pool->mutex)); n_replaceable = UT_LIST_GET_LEN(buf_pool->free); block = UT_LIST_GET_LAST(buf_pool->LRU); while ((block != NULL) && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN) && (distance < BUF_LRU_FREE_SEARCH_LEN)) { mutex_enter(&block->mutex); if (buf_flush_ready_for_replace(block)) { n_replaceable++; } mutex_exit(&block->mutex); distance++; block = UT_LIST_GET_PREV(LRU, block); } mutex_exit(&(buf_pool->mutex)); if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) { return(0); } return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN - n_replaceable); }
void OSEventThread::rollback_timer(ULONG now){ TRACE("~~~~ +rollback_timer ~~~~\n"); OSMutexLocker _locker(&mMutex); //event_req_t * tev = mTimerList.prev; event_timer_t * tev = UT_LIST_GET_LAST(mTimerList); event_timer_t * prev; // walk list, see if now >= ev->timeout for any events TRACE("~~~~ Looking for timers <= %lu ~~~~\n", (unsigned long)now); while ((tev) && (now < tev->timeout) && (now + 0xFFFFFF) < tev->timeout) { // Timer expired TRACE("~~~~ firing timer ~~~~\n"); prev = UT_LIST_GET_PREV(watchNode,tev); UT_LIST_REMOVE(watchNode,mTimerList,tev); UT_LIST_ADD_FIRST(activeNode,mTimeoutList,tev); tev = prev; } TRACE("~~~~ -rollback_timer ~~~~\n"); }
ulint buf_flush_batch( /*============*/ /* out: number of blocks for which the write request was queued; ULINT_UNDEFINED if there was a flush of the same type already running */ ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if BUF_FLUSH_LIST, then the caller must not own any latches on pages */ ulint min_n, /* in: wished minimum mumber of blocks flushed (it is not guaranteed that the actual number is that big, though) */ dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose oldest_modification is smaller than this should be flushed (if their number does not exceed min_n), otherwise ignored */ { buf_block_t* block; ulint page_count = 0; ulint old_page_count; ulint space; ulint offset; ibool found; ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)); #ifdef UNIV_SYNC_DEBUG ut_ad((flush_type != BUF_FLUSH_LIST) || sync_thread_levels_empty_gen(TRUE)); #endif /* UNIV_SYNC_DEBUG */ mutex_enter(&(buf_pool->mutex)); if ((buf_pool->n_flush[flush_type] > 0) || (buf_pool->init_flush[flush_type] == TRUE)) { /* There is already a flush batch of the same type running */ mutex_exit(&(buf_pool->mutex)); return(ULINT_UNDEFINED); } (buf_pool->init_flush)[flush_type] = TRUE; for (;;) { /* If we have flushed enough, leave the loop */ if (page_count >= min_n) { break; } /* Start from the end of the list looking for a suitable block to be flushed. */ if (flush_type == BUF_FLUSH_LRU) { block = UT_LIST_GET_LAST(buf_pool->LRU); } else { ut_ad(flush_type == BUF_FLUSH_LIST); block = UT_LIST_GET_LAST(buf_pool->flush_list); if (!block || (ut_dulint_cmp(block->oldest_modification, lsn_limit) >= 0)) { /* We have flushed enough */ break; } } found = FALSE; /* Note that after finding a single flushable page, we try to flush also all its neighbors, and after that start from the END of the LRU list or flush list again: the list may change during the flushing and we cannot safely preserve within this function a pointer to a block in the list! */ while ((block != NULL) && !found) { ut_a(block->state == BUF_BLOCK_FILE_PAGE); mutex_enter(&block->mutex); if (buf_flush_ready_for_flush(block, flush_type)) { found = TRUE; space = block->space; offset = block->offset; mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); old_page_count = page_count; /* Try to flush also all the neighbors */ page_count += buf_flush_try_neighbors( space, offset, flush_type); /* fprintf(stderr, "Flush type %lu, page no %lu, neighb %lu\n", flush_type, offset, page_count - old_page_count); */ mutex_enter(&(buf_pool->mutex)); } else if (flush_type == BUF_FLUSH_LRU) { mutex_exit(&block->mutex); block = UT_LIST_GET_PREV(LRU, block); } else { ut_ad(flush_type == BUF_FLUSH_LIST); mutex_exit(&block->mutex); block = UT_LIST_GET_PREV(flush_list, block); } } /* If we could not find anything to flush, leave the loop */ if (!found) { break; } } (buf_pool->init_flush)[flush_type] = FALSE; if ((buf_pool->n_flush[flush_type] == 0) && (buf_pool->init_flush[flush_type] == FALSE)) { /* The running flush batch has ended */ os_event_set(buf_pool->no_flush[flush_type]); } mutex_exit(&(buf_pool->mutex)); buf_flush_buffered_writes(); #ifdef UNIV_DEBUG if (buf_debug_prints && page_count > 0) { ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); fprintf(stderr, flush_type == BUF_FLUSH_LRU ? "Flushed %lu pages in LRU flush\n" : "Flushed %lu pages in flush list flush\n", (ulong) page_count); } #endif /* UNIV_DEBUG */ srv_buf_pool_flushed += page_count; return(page_count); }
/*******************************************************************//** This function runs a purge batch. @return number of undo log pages handled in the batch */ UNIV_INTERN ulint trx_purge( /*======*/ ulint limit) /*!< in: the maximum number of records to purge in one batch */ { que_thr_t* thr; ulint old_pages_handled; ut_a(purge_sys->trx->n_active_thrs == 0); rw_lock_x_lock(&purge_sys->latch); mutex_enter(&kernel_mutex); /* Close and free the old purge view */ read_view_close(purge_sys->view); purge_sys->view = NULL; mem_heap_empty(purge_sys->heap); /* Determine how much data manipulation language (DML) statements need to be delayed in order to reduce the lagging of the purge thread. */ srv_dml_needed_delay = 0; /* in microseconds; default: no delay */ /* If we cannot advance the 'purge view' because of an old 'consistent read view', then the DML statements cannot be delayed. Also, srv_max_purge_lag <= 0 means 'infinity'. */ if (srv_max_purge_lag > 0 && !UT_LIST_GET_LAST(trx_sys->view_list)) { float ratio = (float) trx_sys->rseg_history_len / srv_max_purge_lag; if (ratio > ULINT_MAX / 10000) { /* Avoid overflow: maximum delay is 4295 seconds */ srv_dml_needed_delay = ULINT_MAX; } else if (ratio > 1) { /* If the history list length exceeds the innodb_max_purge_lag, the data manipulation statements are delayed by at least 5000 microseconds. */ srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000); } } purge_sys->view = read_view_oldest_copy_or_open_new( 0, purge_sys->heap); mutex_exit(&kernel_mutex); rw_lock_x_unlock(&(purge_sys->latch)); purge_sys->state = TRX_PURGE_ON; purge_sys->handle_limit = purge_sys->n_pages_handled + limit; old_pages_handled = purge_sys->n_pages_handled; mutex_enter(&kernel_mutex); thr = que_fork_start_command(purge_sys->query); ut_ad(thr); mutex_exit(&kernel_mutex); if (srv_print_thread_releases) { fputs("Starting purge\n", stderr); } que_run_threads(thr); if (srv_print_thread_releases) { fprintf(stderr, "Purge ends; pages handled %lu\n", (ulong) purge_sys->n_pages_handled); } return(purge_sys->n_pages_handled - old_pages_handled); }
read_view_t* read_view_oldest_copy_or_open_new( /*==============================*/ /* out, own: read view struct */ dulint cr_trx_id, /* in: trx_id of creating transaction, or (0, 0) used in purge*/ mem_heap_t* heap) /* in: memory heap from which allocated */ { read_view_t* old_view; read_view_t* view_copy; ibool needs_insert = TRUE; ulint insert_done = 0; ulint n; ulint i; ut_ad(mutex_own(&kernel_mutex)); old_view = UT_LIST_GET_LAST(trx_sys->view_list); if (old_view == NULL) { return(read_view_open_now(cr_trx_id, heap)); } n = old_view->n_trx_ids; if (ut_dulint_cmp(old_view->creator_trx_id, ut_dulint_create(0,0)) != 0) { n++; } else { needs_insert = FALSE; } view_copy = read_view_create_low(n, heap); /* Insert the id of the creator in the right place of the descending array of ids, if needs_insert is TRUE: */ i = 0; while (i < n) { if (needs_insert && (i >= old_view->n_trx_ids || ut_dulint_cmp(old_view->creator_trx_id, read_view_get_nth_trx_id(old_view, i)) > 0)) { read_view_set_nth_trx_id(view_copy, i, old_view->creator_trx_id); needs_insert = FALSE; insert_done = 1; } else { read_view_set_nth_trx_id(view_copy, i, read_view_get_nth_trx_id( old_view, i - insert_done)); } i++; } view_copy->creator_trx_id = cr_trx_id; view_copy->low_limit_no = old_view->low_limit_no; view_copy->low_limit_id = old_view->low_limit_id; if (n > 0) { /* The last active transaction has the smallest id: */ view_copy->up_limit_id = read_view_get_nth_trx_id( view_copy, n - 1); } else { view_copy->up_limit_id = old_view->up_limit_id; } UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy); return(view_copy); }
/********************************************************************** When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page hash index entries belonging to that table. This function tries to do that in batch. Note that this is a 'best effort' attempt and does not guarantee that ALL hash entries will be removed. */ static void buf_LRU_drop_page_hash_for_tablespace( /*==================================*/ ulint id) /* in: space id */ { buf_block_t* block; ulint* page_arr; ulint num_entries; page_arr = ut_malloc(sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE); mutex_enter(&buf_pool->mutex); scan_again: num_entries = 0; block = UT_LIST_GET_LAST(buf_pool->LRU); while (block != NULL) { buf_block_t* prev_block; mutex_enter(&block->mutex); prev_block = UT_LIST_GET_PREV(LRU, block); ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (block->space != id || block->buf_fix_count > 0 || block->io_fix != 0) { /* We leave the fixed pages as is in this scan. To be dealt with later in the final scan. */ mutex_exit(&block->mutex); goto next_page; } ut_ad(block->space == id); if (block->is_hashed) { /* Store the offset(i.e.: page_no) in the array so that we can drop hash index in a batch later. */ page_arr[num_entries] = block->offset; mutex_exit(&block->mutex); ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); ++num_entries; if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { goto next_page; } /* Array full. We release the buf_pool->mutex to obey the latching order. */ mutex_exit(&buf_pool->mutex); buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); num_entries = 0; mutex_enter(&buf_pool->mutex); } else { mutex_exit(&block->mutex); } next_page: /* Note that we may have released the buf_pool->mutex above after reading the prev_block during processing of a page_hash_batch (i.e.: when the array was full). This means that prev_block can change in LRU list. This is OK because this function is a 'best effort' to drop as many search hash entries as possible and it does not guarantee that ALL such entries will be dropped. */ block = prev_block; /* If, however, block has been removed from LRU list to the free list then we should restart the scan. block->state is protected by buf_pool->mutex. */ if (block && block->state != BUF_BLOCK_FILE_PAGE) { ut_a(num_entries == 0); goto scan_again; } } mutex_exit(&buf_pool->mutex); /* Drop any remaining batch of search hashed pages. */ buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); ut_free(page_arr); }
void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id) /* in: space id */ { buf_block_t* block; ulint page_no; ibool all_freed; /* Before we attempt to drop pages one by one we first attempt to drop page hash index entries in batches to make it more efficient. The batching attempt is a best effort attempt and does not guarantee that all pages hash entries will be dropped. We get rid of remaining page hash entries one by one below. */ buf_LRU_drop_page_hash_for_tablespace(id); scan_again: mutex_enter(&(buf_pool->mutex)); all_freed = TRUE; block = UT_LIST_GET_LAST(buf_pool->LRU); while (block != NULL) { buf_block_t* prev_block; mutex_enter(&block->mutex); prev_block = UT_LIST_GET_PREV(LRU, block); ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (block->space == id && (block->buf_fix_count > 0 || block->io_fix != 0)) { /* We cannot remove this page during this scan yet; maybe the system is currently reading it in, or flushing the modifications to the file */ all_freed = FALSE; goto next_page; } if (block->space == id) { #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Dropping space %lu page %lu\n", (ulong) block->space, (ulong) block->offset); } #endif if (block->is_hashed) { page_no = block->offset; mutex_exit(&block->mutex); mutex_exit(&(buf_pool->mutex)); /* Note that the following call will acquire an S-latch on the page */ btr_search_drop_page_hash_when_freed(id, page_no); goto scan_again; } if (0 != ut_dulint_cmp(block->oldest_modification, ut_dulint_zero)) { /* Remove from the flush list of modified blocks */ block->oldest_modification = ut_dulint_zero; UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block); } /* Remove from the LRU list */ buf_LRU_block_remove_hashed_page(block); buf_LRU_block_free_hashed_page(block); } next_page: mutex_exit(&block->mutex); block = prev_block; } mutex_exit(&(buf_pool->mutex)); if (!all_freed) { os_thread_sleep(20000); goto scan_again; } }