/*************************************************************//** Validates a given range of the cells in hash table. @return TRUE if ok */ UNIV_INTERN ibool ha_validate( /*========*/ hash_table_t* table, /*!< in: hash table */ ulint start_index, /*!< in: start index */ ulint end_index) /*!< in: end index */ { hash_cell_t* cell; ha_node_t* node; ibool ok = TRUE; ulint i; ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); ut_a(start_index <= end_index); ut_a(start_index < hash_get_n_cells(table)); ut_a(end_index < hash_get_n_cells(table)); for (i = start_index; i <= end_index; i++) { cell = hash_get_nth_cell(table, i); node = cell->node; while (node) { if (hash_calc_hash(node->fold, table) != i) { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: Error: hash table node" " fold value %lu does not\n" "InnoDB: match the cell number %lu.\n", (ulong) node->fold, (ulong) i); ok = FALSE; } node = node->next; } } return(ok); }
/********************************************************************//** Issues read requests for pages which recovery wants to read in. */ UNIV_INTERN void buf_read_recv_pages( /*================*/ ibool sync, /*!< in: TRUE if the caller wants this function to wait for the highest address page to get read in, before this function returns */ ulint space, /*!< in: space id */ ulint zip_size, /*!< in: compressed page size in bytes, or 0 */ const ulint* page_nos, /*!< in: array of page numbers to read, with the highest page number the last in the array */ ulint n_stored) /*!< in: number of page numbers in the array */ { ib_int64_t tablespace_version; ulint count; ulint err; ulint i; zip_size = fil_space_get_zip_size(space); if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { /* It is a single table tablespace and the .ibd file is missing: do nothing */ /* the log records should be treated here same reason for http://bugs.mysql.com/bug.php?id=43948 */ if (recv_recovery_is_on()) { recv_addr_t* recv_addr; mutex_enter(&(recv_sys->mutex)); if (recv_sys->apply_log_recs == FALSE) { mutex_exit(&(recv_sys->mutex)); goto not_to_recover; } for (i = 0; i < n_stored; i++) { /* recv_get_fil_addr_struct() */ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]), recv_sys->addr_hash)); while (recv_addr) { if ((recv_addr->space == space) && (recv_addr->page_no == page_nos[i])) { break; } recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); } if ((recv_addr == NULL) || (recv_addr->state == RECV_BEING_PROCESSED) || (recv_addr->state == RECV_PROCESSED)) { continue; } recv_addr->state = RECV_PROCESSED; ut_a(recv_sys->n_addrs); recv_sys->n_addrs--; } mutex_exit(&(recv_sys->mutex)); fprintf(stderr, " (cannot find space: %lu)", space); } not_to_recover: return; } tablespace_version = fil_space_get_version(space); for (i = 0; i < n_stored; i++) { buf_pool_t* buf_pool; count = 0; os_aio_print_debug = FALSE; buf_pool = buf_pool_get(space, page_nos[i]); while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) { os_aio_simulated_wake_handler_threads(); os_thread_sleep(10000); count++; if (count > 1000) { fprintf(stderr, "InnoDB: Error: InnoDB has waited for" " 10 seconds for pending\n" "InnoDB: reads to the buffer pool to" " be finished.\n" "InnoDB: Number of pending reads %lu," " pending pread calls %lu\n", (ulong) buf_pool->n_pend_reads, (ulong)os_file_n_pending_preads); os_aio_print_debug = TRUE; } } os_aio_print_debug = FALSE; if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, TRUE, tablespace_version, page_nos[i], NULL); } else { buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, TRUE, tablespace_version, page_nos[i], NULL); } } os_aio_simulated_wake_handler_threads(); /* Flush pages from the end of all the LRU lists if necessary */ buf_flush_free_margins(FALSE); #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Recovery applies read-ahead pages %lu\n", (ulong) n_stored); } #endif /* UNIV_DEBUG */ }
/********************************************************************//** Low-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there, in which case does nothing. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock released by an i/o-handler thread. @return 1 if a read request was queued, 0 if the page already resided in buf_pool, or if the page is in the doublewrite buffer blocks in which case it is never read into the pool, or if the tablespace does not exist or is being dropped @return 1 if read request is issued. 0 if it is not */ UNIV_INTERN ulint buf_read_page_low( /*==============*/ ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are trying to read from a non-existent tablespace, or a tablespace which is just now being dropped */ ibool sync, /*!< in: TRUE if synchronous aio is desired */ ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ..., ORed to OS_AIO_SIMULATED_WAKE_LATER (see below at read-ahead functions) */ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size, or 0 */ ibool unzip, /*!< in: TRUE=request uncompressed page */ ib_int64_t tablespace_version, /*!< in: if the space memory object has this timestamp different from what we are giving here, treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ ulint offset, /*!< in: page number */ trx_t* trx) { buf_page_t* bpage; ulint wake_later; *err = DB_SUCCESS; wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER; if (trx_doublewrite && (space == TRX_SYS_SPACE || (srv_doublewrite_file && space == TRX_DOUBLEWRITE_SPACE)) && ( (offset >= trx_doublewrite->block1 && offset < trx_doublewrite->block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) || (offset >= trx_doublewrite->block2 && offset < trx_doublewrite->block2 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: trying to read" " doublewrite buffer page %lu\n", (ulong) offset); return(0); } if (ibuf_bitmap_page(zip_size, offset) || trx_sys_hdr_page(space, offset)) { /* Trx sys header is so low in the latching order that we play safe and do not leave the i/o-completion to an asynchronous i/o-thread. Ibuf bitmap pages must always be read with syncronous i/o, to make sure they do not get involved in thread deadlocks. */ sync = TRUE; } /* The following call will also check if the tablespace does not exist or is being dropped; if we succeed in initing the page in the buffer pool for read, then DISCARD cannot proceed until the read has completed */ bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip, tablespace_version, offset); if (bpage == NULL) { /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */ if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) { /* hashed log recs must be treated here */ recv_addr_t* recv_addr; mutex_enter(&(recv_sys->mutex)); if (recv_sys->apply_log_recs == FALSE) { mutex_exit(&(recv_sys->mutex)); goto not_to_recover; } /* recv_get_fil_addr_struct() */ recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, hash_calc_hash(ut_fold_ulint_pair(space, offset), recv_sys->addr_hash)); while (recv_addr) { if ((recv_addr->space == space) && (recv_addr->page_no == offset)) { break; } recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); } if ((recv_addr == NULL) || (recv_addr->state == RECV_BEING_PROCESSED) || (recv_addr->state == RECV_PROCESSED)) { mutex_exit(&(recv_sys->mutex)); goto not_to_recover; } fprintf(stderr, " (cannot find space: %lu)", space); recv_addr->state = RECV_PROCESSED; ut_a(recv_sys->n_addrs); recv_sys->n_addrs--; mutex_exit(&(recv_sys->mutex)); } not_to_recover: return(0); } #ifdef UNIV_DEBUG if (buf_debug_prints) { fprintf(stderr, "Posting read request for page %lu, sync %lu\n", (ulong) offset, (ulong) sync); } #endif ut_ad(buf_page_in_file(bpage)); if(sync) { thd_wait_begin(NULL, THD_WAIT_DISKIO); } if (zip_size) { *err = _fil_io(OS_FILE_READ | wake_later, sync, space, zip_size, offset, 0, zip_size, bpage->zip.data, bpage, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); *err = _fil_io(OS_FILE_READ | wake_later, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, ((buf_block_t*) bpage)->frame, bpage, trx); } if (sync) { thd_wait_end(NULL); } if (*err == DB_TABLESPACE_DELETED) { buf_read_page_handle_error(bpage); return(0); } if (srv_pass_corrupt_table) { if (*err != DB_SUCCESS) { bpage->is_corrupt = TRUE; } } else { ut_a(*err == DB_SUCCESS); } if (sync) { /* The i/o is already completed when we arrive from fil_read */ if (!buf_page_io_complete(bpage)) { return(0); } } return(1); }
/*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node is inserted. If btr_search_enabled is set to FALSE, we will only allow updating existing nodes, but no new node is allowed to be added. @return TRUE if succeed, FALSE if no more memory could be allocated */ UNIV_INTERN ibool ha_insert_for_fold_func( /*====================*/ hash_table_t* table, /*!< in: hash table */ ulint fold, /*!< in: folded value of data; if a node with the same fold value already exists, it is updated to point to the same data, and no new node is created! */ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG buf_block_t* block, /*!< in: buffer block containing the data */ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ const rec_t* data) /*!< in: data, must not be NULL */ { hash_cell_t* cell; ha_node_t* node; ha_node_t* prev_node; ulint hash; ut_ad(data); ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG ut_a(block->frame == page_align(data)); #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ ASSERT_HASH_MUTEX_OWN(table, fold); ut_ad(btr_search_enabled); hash = hash_calc_hash(fold, table); cell = hash_get_nth_cell(table, hash); prev_node = cell->node; while (prev_node != NULL) { if (prev_node->fold == fold) { #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG # ifndef UNIV_HOTBACKUP if (table->adaptive) { buf_block_t* prev_block = prev_node->block; ut_a(prev_block->frame == page_align(prev_node->data)); ut_a(prev_block->n_pointers > 0); prev_block->n_pointers--; block->n_pointers++; } # endif /* !UNIV_HOTBACKUP */ prev_node->block = block; #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ prev_node->data = data; return(TRUE); } prev_node = prev_node->next; } /* We have to allocate a new chain node */ node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)); if (node == NULL) { /* It was a btr search type memory heap and at the moment no more memory could be allocated: return */ ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH); return(FALSE); } ha_node_set_data(node, block, data); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG # ifndef UNIV_HOTBACKUP if (table->adaptive) { block->n_pointers++; } # endif /* !UNIV_HOTBACKUP */ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ node->fold = fold; node->next = NULL; prev_node = cell->node; if (prev_node == NULL) { cell->node = node; return(TRUE); } while (prev_node->next != NULL) { prev_node = prev_node->next; } prev_node->next = node; return(TRUE); }