void buf_LRU_print(void) /*===============*/ { buf_block_t* block; buf_frame_t* frame; ulint len; ut_ad(buf_pool); mutex_enter(&(buf_pool->mutex)); fprintf(stderr, "Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock); block = UT_LIST_GET_FIRST(buf_pool->LRU); len = 0; while (block != NULL) { fprintf(stderr, "BLOCK %lu ", (ulong) block->offset); if (block->old) { fputs("old ", stderr); } if (block->buf_fix_count) { fprintf(stderr, "buffix count %lu ", (ulong) block->buf_fix_count); } if (block->io_fix) { fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix); } if (ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) { fputs("modif. ", stderr); } frame = buf_block_get_frame(block); fprintf(stderr, "LRU pos %lu type %lu index id %lu ", (ulong) block->LRU_position, (ulong) fil_page_get_type(frame), (ulong) ut_dulint_get_low (btr_page_get_index_id(frame))); block = UT_LIST_GET_NEXT(LRU, block); if (++len == 10) { len = 0; putc('\n', stderr); } } mutex_exit(&(buf_pool->mutex)); }
/********************************************************//** Parses a log record written by mlog_write_string. @return parsed record end, NULL if not a complete record */ UNIV_INTERN byte* mlog_parse_string( /*==============*/ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint len; ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 4) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; len = mach_read_from_2(ptr); ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (end_ptr < ptr + len) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { memcpy(((page_zip_des_t*) page_zip)->data + offset, ptr, len); } memcpy(page + offset, ptr, len); } return(ptr + len); }
/********************************************************//** Parses a log record written by mlog_write_ulint or mlog_write_dulint. @return parsed record end, NULL if not a complete record or a corrupt record */ UNIV_INTERN byte* mlog_parse_nbytes( /*==============*/ ulint type, /*!< in: log record type: MLOG_1BYTE, ... */ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint val; dulint dval; ut_a(type <= MLOG_8BYTES); ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 2) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (type == MLOG_8BYTES) { ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); if (ptr == NULL) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_8 (((page_zip_des_t*) page_zip)->data + offset, dval); } mach_write_to_8(page + offset, dval); } return(ptr); } ptr = mach_parse_compressed(ptr, end_ptr, &val); if (ptr == NULL) { return(NULL); } switch (type) { case MLOG_1BYTE: if (UNIV_UNLIKELY(val > 0xFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_1 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_1(page + offset, val); } break; case MLOG_2BYTES: if (UNIV_UNLIKELY(val > 0xFFFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_2 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_2(page + offset, val); } break; case MLOG_4BYTES: if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_4 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_4(page + offset, val); } break; default: corrupt: recv_sys->found_corrupt_log = TRUE; ptr = NULL; } return(ptr); }
/********************************************************************//** Flush pages from flash cache. @return number of pages have been flushed to tablespace */ UNIV_INTERN ulint fc_flush_to_disk( /*==================*/ ibool do_full_io) /*!< in: whether do full io capacity */ { ulint distance; byte* page; ulint ret; ulint space; ulint offset; ulint page_type; ulint i, j; ulint pos; ulint zip_size; ulint block_offset, byte_offset; ulint fc_size = fc_get_size(); ulint fc_blk_size = fc_get_block_size_byte(); ulint start_offset; ulint data_size; fc_block_t *flush_block = NULL; ulint c_flush = 0; ut_ad(!mutex_own(&fc->mutex)); ut_a(fc->flush_buf->free_pos == 0); /* step 1: get the number of blocks need to flush to tablespace */ flash_cache_mutex_enter(); distance = fc_get_distance(); start_offset = fc->flush_off; if ( distance == 0 ) { flash_cache_mutex_exit(); return 0; } else if ( recv_recovery_on ) { if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)) { fc->n_flush_cur = 0; } else if ( distance < ( ( 1.0*srv_flash_cache_do_full_io_pct /100 ) * fc_size)) { fc->n_flush_cur = ut_min(PCT_IO_FC(10), distance); } else { fc->n_flush_cur = ut_min(PCT_IO_FC(100), distance); } } else if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size) && !do_full_io ) { flash_cache_mutex_exit(); return 0; } else if ( distance < (( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) && !do_full_io ) { fc->n_flush_cur = PCT_IO_FC(srv_fc_write_cache_flush_pct); } else { ut_ad((distance > ( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) || do_full_io ); fc->n_flush_cur = ut_min(PCT_IO_FC(srv_fc_full_flush_pct), distance); } flash_cache_mutex_exit(); /* step 2: start to flush blocks use async io, set block io_fix IO_FIX_FLUSH */ i = 0; while (i < fc->n_flush_cur) { ulint b_space; ulint b_offset; ulint raw_zip_size; ulint size; ulint fil_offset; #ifdef UNIV_FLASH_CACHE_TRACE ulint is_v4_blk; #endif byte* page_io; flash_cache_mutex_enter(); pos = ( start_offset + i ) % fc_size; flush_block = fc_get_block(pos); if (flush_block == NULL) { i++; flash_cache_mutex_exit(); continue; } /* we should get the mutex, as doublewrite may hit this block and invalid the block */ flash_block_mutex_enter(flush_block->fil_offset); flash_cache_mutex_exit(); data_size = fc_block_get_data_size(flush_block); if (flush_block->state != BLOCK_READY_FOR_FLUSH) { /* if readonly or merge write or already flushed*/ ut_a (flush_block->state == BLOCK_NOT_USED || flush_block->state == BLOCK_READ_CACHE || flush_block->state == BLOCK_FLUSHED); i += data_size; flash_block_mutex_exit(flush_block->fil_offset); if (flush_block->state == BLOCK_NOT_USED) { //fc_block_detach(FALSE, flush_block); fc_block_free(flush_block); } continue; } zip_size = fil_space_get_zip_size(flush_block->space); if (zip_size == ULINT_UNDEFINED) { /* table has been droped, just set it BLOCK_FLUSHED */ #ifdef UNIV_FLASH_CACHE_TRACE ut_print_timestamp(fc->f_debug); fprintf(fc->f_debug, "space:%lu is droped, the page(%lu, %lu) need not to be flushed.\n", (ulong)flush_block->space, (ulong)flush_block->space, (ulong)flush_block->offset); #endif flush_block->state = BLOCK_FLUSHED; i += data_size; c_flush += data_size; flash_block_mutex_exit(flush_block->fil_offset); continue; } #ifdef UNIV_FLASH_CACHE_TRACE if (flush_block->state != BLOCK_READY_FOR_FLUSH) { fc_block_print(flush_block); ut_error; } #endif flush_block->io_fix |= IO_FIX_FLUSH; /* * we should set block state BLOCK_FLUSHED, if not, doublewrite may hit this block * and invalid this block and reduce the dirty count, but when finish flush ,we will * reduce the dirty count too, so it may reduce twice. */ flush_block->state = BLOCK_FLUSHED; /* save the block info, as the block may be invalided by doublewrite after release mutex */ b_space = flush_block->space; b_offset = flush_block->offset; raw_zip_size = flush_block->raw_zip_size; size = flush_block->size; fil_offset = flush_block->fil_offset; #ifdef UNIV_FLASH_CACHE_TRACE is_v4_blk = flush_block->is_v4_blk; #endif /* release the block now, so read can hit in this blocks and read the data */ flash_block_mutex_exit(flush_block->fil_offset); /* * Only flush thread will update read_buf and flush_off/round. * there only single flush thread no need to lock read_buf */ page = fc->flush_buf->buf + fc->flush_buf->free_pos * fc_blk_size; if (raw_zip_size > 0) { ut_a((size * fc_blk_size) == UNIV_PAGE_SIZE); page_io = fc->flush_zip_read_buf; } else { page_io = page; } fc_io_offset(fil_offset, &block_offset, &byte_offset); ret = fil_io(OS_FILE_READ, TRUE, FLASH_CACHE_SPACE, 0, block_offset, byte_offset, data_size * fc_blk_size, page_io, NULL); if (ret != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Flash cache [Error]: unable to read page from flash cache.\n" "flash cache flush offset is:%lu.\n", (ulong)(start_offset + i)); ut_error; } if ((flush_block != NULL) && (flush_block->state == BLOCK_NOT_USED)) { goto skip; } /* decompress the compress data */ if (raw_zip_size > 0) { #ifdef UNIV_FLASH_CACHE_TRACE ulint blk_zip_size_byte; if (is_v4_blk) { blk_zip_size_byte = raw_zip_size * fc_get_block_size_byte(); } else { blk_zip_size_byte = fc_block_compress_align(raw_zip_size) * fc_get_block_size_byte(); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ZIP_RAW_SIZE) == raw_zip_size); } ut_a(page_io); ut_a(page); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_HEADER) == FC_ZIP_PAGE_CHECKSUM); ut_a((ulint)mach_read_from_4(page_io + blk_zip_size_byte - FC_ZIP_PAGE_TAILER) == FC_ZIP_PAGE_CHECKSUM); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SIZE) == blk_zip_size_byte); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ORIG_SIZE) == UNIV_PAGE_SIZE); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SPACE) == b_space); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_OFFSET) == b_offset); /* only qlz can do this check */ if (srv_flash_cache_compress_algorithm == FC_BLOCK_COMPRESS_QUICKLZ) { if (is_v4_blk) { ut_a(raw_zip_size * fc_get_block_size_byte() >= (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } else { ut_a(raw_zip_size == (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } ut_a(UNIV_PAGE_SIZE == fc_qlz_size_decompressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } #endif fc_block_do_decompress(DECOMPRESS_FLUSH, page_io, raw_zip_size, page); } space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); offset = mach_read_from_4(page + FIL_PAGE_OFFSET); if ((space != b_space) || (offset != b_offset)) { ut_print_timestamp(stderr); fc_block_print(flush_block); ut_error; } if (buf_page_is_corrupted(page, zip_size)) { buf_page_print(page, zip_size, BUF_PAGE_PRINT_NO_CRASH); ut_error; } page_type = fil_page_get_type(page); if (page_type == FIL_PAGE_INDEX) { page_type = 1; } srv_flash_cache_flush_detail[page_type]++; ret = fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, FALSE, space, zip_size, offset, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL); if (ret != DB_SUCCESS && ret != DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fc_block_print(flush_block); ut_error; } /* add UNIV_PAGE_SIZE / fc_blk_size for safe */ fc->flush_buf->free_pos += UNIV_PAGE_SIZE / fc_blk_size; skip: i += data_size; c_flush += data_size; if ((fc->flush_buf->free_pos + UNIV_PAGE_SIZE / fc_blk_size) >= fc->flush_buf->size) { /* FIXME: is it safe to change n_flush, as step 3 will use n_flush */ fc->n_flush_cur = i; break; } } /* ok, now flush all async io to disk */ fc_flush_sync_dbfile(); /* step 3: all the flush blocks have sync to disk, update the state and io_fix */ j = 0; while (j < fc->n_flush_cur) { flash_cache_mutex_enter(); pos = (start_offset + j) % fc_size; flush_block = fc_get_block(pos); if (flush_block == NULL) { j++; flash_cache_mutex_exit(); continue; } /* block state and io_fix may be changed by doublewrite and lru move */ flash_block_mutex_enter(flush_block->fil_offset); flash_cache_mutex_exit(); if (flush_block->io_fix & IO_FIX_FLUSH) { /* the block is already in BLOCK_FLUSHED state */ flush_block->io_fix &= ~IO_FIX_FLUSH; } data_size = fc_block_get_data_size(flush_block); flash_block_mutex_exit(flush_block->fil_offset); j += data_size; } /* * i and j may be different, as the last been flushed block may be invalid by doublewrite, * so maybe i > j */ /* add the actual flushed blocks */ srv_flash_cache_flush = srv_flash_cache_flush + c_flush; /* step 4: update fc status and flush_off, and wake up threads that are sleep for space */ if (i > 0) { ut_a(i >= c_flush); flash_cache_mutex_enter(); /* * it is safe to inc flush off and sub dirty blocks at this time, * as fc_validate is not work */ fc_inc_flush_off(i); flash_cache_log_mutex_enter(); fc_log->current_stat->flush_offset = fc->flush_off; fc_log->current_stat->flush_round = fc->flush_round; flash_cache_log_mutex_exit(); ut_a(srv_flash_cache_dirty >= c_flush); srv_flash_cache_dirty -= c_flush; srv_fc_flush_should_commit_log_flush++; os_event_set(fc->wait_space_event); fc->n_flush_cur = 0; flash_cache_mutex_exit(); } fc->flush_buf->free_pos = 0; return c_flush; }