byte* mach_parse_compressed( /*==================*/ /* out: pointer to end of the stored field, NULL if not complete */ byte* ptr, /* in: pointer to buffer from where to read */ byte* end_ptr,/* in: pointer to end of the buffer */ ulint* val) /* out: read value (< 2^32) */ { ulint flag; ut_ad(ptr && end_ptr && val); if (ptr >= end_ptr) { return(NULL); } flag = mach_read_from_1(ptr); if (flag < 0x80UL) { *val = flag; return(ptr + 1); } else if (flag < 0xC0UL) { if (end_ptr < ptr + 2) { return(NULL); } *val = mach_read_from_2(ptr) & 0x7FFFUL; return(ptr + 2); } else if (flag < 0xE0UL) { if (end_ptr < ptr + 3) { return(NULL); } *val = mach_read_from_3(ptr) & 0x3FFFFFUL; return(ptr + 3); } else if (flag < 0xF0UL) { if (end_ptr < ptr + 4) { return(NULL); } *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL; return(ptr + 4); } else { ut_ad(flag == 0xF0UL); if (end_ptr < ptr + 5) { return(NULL); } *val = mach_read_from_4(ptr + 1); return(ptr + 5); } }
void process_ibpage(page_t *page) { ulint page_id; rec_t *origin; ulint offsets[MAX_TABLE_FIELDS + 2]; ulint offset, i; // Skip tables if filter used if (use_filter_id) { dulint index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); if (index_id.low != filter_id.low || index_id.high != filter_id.high) { if (debug) { page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); printf("Skipped using index id filter: %lu!\n", page_id); } return; } } // Read page id page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); if (debug) printf("Page id: %lu\n", page_id); // Check requested and actual formats if (!check_page_format(page)) return; // Find possible data area start point (at least 5 bytes of utility data) offset = 100 + record_extra_bytes; if (debug) printf("Starting offset: %lu. Checking %d table definitions.\n", offset, table_definitions_cnt); // Walk through all possible positions to the end of page // (start of directory - extra bytes of the last rec) while (offset < UNIV_PAGE_SIZE - record_extra_bytes) { // Get record pointer origin = page + offset; if (debug) printf("\nChecking offset: %lu: ", offset); // Check all tables for (i = 0; i < table_definitions_cnt; i++) { // Get table info table_def_t *table = &(table_definitions[i]); if (debug) printf(" (%s) ", table->name); // Check if origin points to a valid record if (check_for_a_record(page, origin, table, offsets) && check_constraints(origin, table, offsets)) { if (debug) printf("\n---------------------------------------------------\n" "PAGE%lu: Found a table %s record: %p (offset = %lu)\n", page_id, table->name, origin, offset); offset += process_ibrec(page, origin, table, offsets); break; } } // Check from next byte offset++; } }
void dfield_print( /*=========*/ dfield_t* dfield) /* in: dfield */ { byte* data; ulint len; ulint mtype; ulint i; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (len == UNIV_SQL_NULL) { fputs("NULL", stderr); return; } mtype = dtype_get_mtype(dfield_get_type(dfield)); if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { for (i = 0; i < len; i++) { int c = *data++; putc(isprint(c) ? c : ' ', stderr); } } else if (mtype == DATA_INT) { ut_a(len == 4); /* only works for 32-bit integers */ fprintf(stderr, "%d", (int)mach_read_from_4(data)); } else { ut_error; } }
UNIV_INTERN ulint mem_field_trailer_get_check(byte* field) { return(mach_read_from_4(field + mem_field_header_get_len(field))); }
void dfield_print_also_hex( /*==================*/ dfield_t* dfield) /* in: dfield */ { byte* data; ulint len; ulint mtype; ulint i; ibool print_also_hex; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (len == UNIV_SQL_NULL) { printf("NULL"); return; } mtype = dtype_get_mtype(dfield_get_type(dfield)); if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { print_also_hex = FALSE; for (i = 0; i < len; i++) { if (isprint((char)(*data))) { printf("%c", (char)*data); } else { print_also_hex = TRUE; printf(" "); } data++; } if (!print_also_hex) { return; } printf(" Hex: "); data = dfield_get_data(dfield); for (i = 0; i < len; i++) { printf("%02lx", (ulint)*data); data++; } } else if (mtype == DATA_INT) { ut_a(len == 4); /* inly works for 32-bit integers */ printf("%i", (int)mach_read_from_4(data)); } else { ut_error; } }
void dfield_print_also_hex( /*==================*/ dfield_t* dfield) /* in: dfield */ { byte* data; ulint len; ulint mtype; ulint i; ibool print_also_hex; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (len == UNIV_SQL_NULL) { fputs("NULL", stderr); return; } mtype = dtype_get_mtype(dfield_get_type(dfield)); if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { print_also_hex = FALSE; for (i = 0; i < len; i++) { int c = *data++; if (!isprint(c)) { print_also_hex = TRUE; c = ' '; } putc(c, stderr); } if (!print_also_hex) { return; } fputs(" Hex: ", stderr); data = dfield_get_data(dfield); for (i = 0; i < len; i++) { fprintf(stderr, "%02lx", (ulint)*data); data++; } } else if (mtype == DATA_INT) { ut_a(len == 4); /* only works for 32-bit integers */ fprintf(stderr, "%d", (int)mach_read_from_4(data)); } else { ut_error; } }
inline unsigned long long int get_uint_value(field_def_t *field, byte *value) { switch (field->fixed_length) { case 1: return mach_read_from_1(value); case 2: return mach_read_from_2(value); case 3: return mach_read_from_3(value) & 0x3FFFFFUL; case 4: return mach_read_from_4(value); case 8: return make_ulonglong(mach_read_from_8(value)); } return 0; }
inline long long int get_int_value(field_def_t *field, byte *value) { switch (field->fixed_length) { case 1: return mach_read_from_1(value) & ~(1<<7); case 2: return mach_read_from_2(value) & ~(1<<15); case 3: return mach_read_from_3(value) & 0x3FFFFFUL & ~(1L<<23); case 4: return mach_read_from_4(value) & ~(1L<<31); case 8: return make_longlong(mach_read_from_8(value)) & ~(1LL<<63); } return 0; }
void process_ibpage(page_t *page) { static ulint id = 0; ulint page_id; dulint index_id; char tmp[256]; int fn; // Get page info page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); // Skip empty pages if (ignore_crap && index_id.high == 0 && index_id.low == 0) return; // Skip tables if filter used if (use_filter_id && (index_id.low != filter_id.low || index_id.high != filter_id.high)) return; if (count_pages) { if (index_id.high >= 1000) { if (ignore_crap) return; printf("ERROR: Too high tablespace id! %ld >= 1000!\n", index_id.high); exit(1); } if (index_id.low >= 10000) { if (ignore_crap) return; printf("ERROR: Too high index id! %ld >= 10000!\n", index_id.low); exit(1); } page_counters[index_id.high][index_id.low]++; return; } // Create table directory sprintf(tmp, "pages-%u/%lu-%lu", (unsigned int)timestamp, index_id.high, index_id.low); mkdir(tmp, 0755); // Compose page file_name sprintf(tmp, "pages-%u/%lu-%lu/%lu-%08lu.page", (unsigned int)timestamp, index_id.high, index_id.low, id++, page_id); printf("Read page #%lu.. saving it to %s\n", page_id, tmp); // Save page data fn = open(tmp, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (!fn) error("Can't open file to save page!"); write(fn, page, UNIV_PAGE_SIZE); close(fn); }
/*************************************************************//** Pretty prints a dfield value according to its data type. */ UNIV_INTERN void dfield_print( /*=========*/ const dfield_t* dfield) /*!< in: dfield */ { const byte* data; ulint len; ulint i; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (dfield_is_null(dfield)) { fputs("NULL", stderr); return; } switch (dtype_get_mtype(dfield_get_type(dfield))) { case DATA_CHAR: case DATA_VARCHAR: for (i = 0; i < len; i++) { int c = *data++; putc(isprint(c) ? c : ' ', stderr); } if (dfield_is_ext(dfield)) { fputs("(external)", stderr); } break; case DATA_INT: ut_a(len == 4); /* only works for 32-bit integers */ fprintf(stderr, "%d", (int)mach_read_from_4(data)); break; default: ut_error; } }
byte* mach_dulint_parse_compressed( /*=========================*/ /* out: pointer to end of the stored field, NULL if not complete */ byte* ptr, /* in: pointer to buffer from where to read */ byte* end_ptr,/* in: pointer to end of the buffer */ dulint* val) /* out: read value */ { ulint high; ulint low; ulint size; ut_ad(ptr && end_ptr && val); if (end_ptr < ptr + 5) { return(NULL); } high = mach_read_compressed(ptr); size = mach_get_compressed_size(high); ptr += size; if (end_ptr < ptr + 4) { return(NULL); } low = mach_read_from_4(ptr); *val = ut_dulint_create(high, low); return(ptr + 4); }
/************************************************************************ Flushes possible buffered writes from the doublewrite memory buffer to disk, and also wakes up the aio thread if simulated aio is used. It is very important to call this function after a batch of writes has been posted, and also when we may have to wait for a page latch! Otherwise a deadlock of threads can occur. */ static void buf_flush_buffered_writes(void) /*===========================*/ { buf_block_t* block; byte* write_buf; ulint len; ulint len2; ulint i; if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) { os_aio_simulated_wake_handler_threads(); return; } mutex_enter(&(trx_doublewrite->mutex)); /* Write first to doublewrite buffer blocks. We use synchronous aio and thus know that file write has been completed when the control returns. */ if (trx_doublewrite->first_free == 0) { mutex_exit(&(trx_doublewrite->mutex)); return; } for (i = 0; i < trx_doublewrite->first_free; i++) { block = trx_doublewrite->buf_block_arr[i]; ut_a(block->state == BUF_BLOCK_FILE_PAGE); if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4) != mach_read_from_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: The page to be written" " seems corrupt!\n" "InnoDB: The lsn fields do not match!" " Noticed in the buffer pool\n" "InnoDB: before posting to the" " doublewrite buffer.\n"); } if (block->check_index_page_at_flush && !page_simple_validate(block->frame)) { buf_page_print(block->frame); ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Apparent corruption of an" " index page n:o %lu in space %lu\n" "InnoDB: to be written to data file." " We intentionally crash server\n" "InnoDB: to prevent corrupt data" " from ending up in data\n" "InnoDB: files.\n", (ulong) block->offset, (ulong) block->space); ut_error; } } /* increment the doublewrite flushed pages counter */ srv_dblwr_pages_written+= trx_doublewrite->first_free; srv_dblwr_writes++; if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; } else { len = trx_doublewrite->first_free * UNIV_PAGE_SIZE; } fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, trx_doublewrite->block1, 0, len, (void*)trx_doublewrite->write_buf, NULL); write_buf = trx_doublewrite->write_buf; for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) { if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4) != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: The page to be written" " seems corrupt!\n" "InnoDB: The lsn fields do not match!" " Noticed in the doublewrite block1.\n"); } } if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE; fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, trx_doublewrite->block2, 0, len, (void*)(trx_doublewrite->write_buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE), NULL); write_buf = trx_doublewrite->write_buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) { if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4) != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: The page to be" " written seems corrupt!\n" "InnoDB: The lsn fields do not match!" " Noticed in" " the doublewrite block2.\n"); } } } /* Now flush the doublewrite buffer data to disk */ fil_flush(TRX_SYS_SPACE); /* We know that the writes have been flushed to disk now and in recovery we will find them in the doublewrite buffer blocks. Next do the writes to the intended positions. */ for (i = 0; i < trx_doublewrite->first_free; i++) { block = trx_doublewrite->buf_block_arr[i]; if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4) != mach_read_from_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: The page to be written" " seems corrupt!\n" "InnoDB: The lsn fields do not match!" " Noticed in the buffer pool\n" "InnoDB: after posting and flushing" " the doublewrite buffer.\n" "InnoDB: Page buf fix count %lu," " io fix %lu, state %lu\n", (ulong)block->buf_fix_count, (ulong)block->io_fix, (ulong)block->state); } ut_a(block->state == BUF_BLOCK_FILE_PAGE); fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE, (void*)block->frame, (void*)block); } /* Wake possible simulated aio thread to actually post the writes to the operating system */ os_aio_simulated_wake_handler_threads(); /* Wait that all async writes to tablespaces have been posted to the OS */ os_aio_wait_until_no_pending_writes(); /* Now we flush the data to disk (for example, with fsync) */ fil_flush_file_spaces(FIL_TABLESPACE); /* We can now reuse the doublewrite memory buffer: */ trx_doublewrite->first_free = 0; mutex_exit(&(trx_doublewrite->mutex)); }
/*************************************************************//** Pretty prints a dfield value according to its data type. Also the hex string is printed if a string contains non-printable characters. */ UNIV_INTERN void dfield_print_also_hex( /*==================*/ const dfield_t* dfield) /*!< in: dfield */ { const byte* data; ulint len; ulint prtype; ulint i; ibool print_also_hex; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (dfield_is_null(dfield)) { fputs("NULL", stderr); return; } prtype = dtype_get_prtype(dfield_get_type(dfield)); switch (dtype_get_mtype(dfield_get_type(dfield))) { dulint id; case DATA_INT: switch (len) { ulint val; case 1: val = mach_read_from_1(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x80; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 2: val = mach_read_from_2(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x8000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 3: val = mach_read_from_3(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x800000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 4: val = mach_read_from_4(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x80000000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 6: id = mach_read_from_6(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case 7: id = mach_read_from_7(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case 8: id = mach_read_from_8(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; default: goto print_hex; } break; case DATA_SYS: switch (prtype & DATA_SYS_PRTYPE_MASK) { case DATA_TRX_ID: id = mach_read_from_6(data); fprintf(stderr, "trx_id " TRX_ID_FMT, TRX_ID_PREP_PRINTF(id)); break; case DATA_ROLL_PTR: id = mach_read_from_7(data); fprintf(stderr, "roll_ptr {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case DATA_ROW_ID: id = mach_read_from_6(data); fprintf(stderr, "row_id {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; default: id = mach_dulint_read_compressed(data); fprintf(stderr, "mix_id {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); } break; case DATA_CHAR: case DATA_VARCHAR: print_also_hex = FALSE; for (i = 0; i < len; i++) { int c = *data++; if (!isprint(c)) { print_also_hex = TRUE; fprintf(stderr, "\\x%02x", (unsigned char) c); } else { putc(c, stderr); } } if (dfield_is_ext(dfield)) { fputs("(external)", stderr); } if (!print_also_hex) { break; } data = dfield_get_data(dfield); /* fall through */ case DATA_BINARY: default: print_hex: fputs(" Hex: ",stderr); for (i = 0; i < len; i++) { fprintf(stderr, "%02lx", (ulint) *data++); } if (dfield_is_ext(dfield)) { fputs("(external)", stderr); } } }
/**********************************************************************//** Try to relocate a block. @return TRUE if relocated */ static ibool buf_buddy_relocate( /*===============*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ void* src, /*!< in: block to relocate */ void* dst, /*!< in: free block to relocate to */ ulint i) /*!< in: index of buf_pool->zip_free[] */ { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; ullint usec = ut_time_us(NULL); mutex_t* mutex; ulint space; ulint page_no; ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); ut_ad(!ut_align_offset(src, size)); ut_ad(!ut_align_offset(dst, size)); ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)); UNIV_MEM_ASSERT_W(dst, size); /* We assume that all memory from buf_buddy_alloc() is used for compressed page frames. */ /* We look inside the allocated objects returned by buf_buddy_alloc() and assume that each block is a compressed page that contains a valid space_id and page_no in the page header. Should the fields be invalid, we will be unable to relocate the block. */ /* The src block may be split into smaller blocks, some of which may be free. Thus, the mach_read_from_4() calls below may attempt to read from free memory. The memory is "owned" by the buddy allocator (and it has been allocated from the buffer pool), so there is nothing wrong about this. The mach_read_from_4() calls here will only trigger bogus Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */ space = mach_read_from_4((const byte *) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); page_no = mach_read_from_4((const byte *) src + FIL_PAGE_OFFSET); /* Suppress Valgrind warnings about conditional jump on uninitialized value. */ UNIV_MEM_VALID(&space, sizeof space); UNIV_MEM_VALID(&page_no, sizeof page_no); bpage = buf_page_hash_get(buf_pool, space, page_no); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly allocated by buf_LRU_get_free_block() but not added to buf_pool->page_hash yet. Obviously, it cannot be relocated. */ return(FALSE); } if (page_zip_get_size(&bpage->zip) != size) { /* The block is of different size. We would have to relocate all blocks covered by src. For the sake of simplicity, give up. */ ut_ad(page_zip_get_size(&bpage->zip) < size); return(FALSE); } /* The block must have been allocated, but it may contain uninitialized data. */ UNIV_MEM_ASSERT_W(src, size); mutex = buf_page_get_mutex(bpage); mutex_enter(mutex); if (buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ ut_a(bpage->zip.data == src); memcpy(dst, src, size); bpage->zip.data = dst; mutex_exit(mutex); UNIV_MEM_INVALID(src, size); { buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; buddy_stat->relocated++; buddy_stat->relocated_usec += ut_time_us(NULL) - usec; } return(TRUE); } mutex_exit(mutex); return(FALSE); }
static void print_page(uchar *p) { int type = mach_read_from_2(p + FIL_PAGE_TYPE); if (type == FIL_PAGE_TYPE_ALLOCATED) { return; } printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_OFFSET", mach_read_from_4(p + FIL_PAGE_OFFSET)); printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_SPACE_OR_CHKSUM", mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_PREV", mach_read_from_4(p + FIL_PAGE_PREV)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_NEXT", mach_read_from_4(p + FIL_PAGE_NEXT)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_LSN", mach_read_from_4(p + FIL_PAGE_LSN)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_TYPE", mach_read_from_2(p + FIL_PAGE_TYPE)); dulint flush_lsn_tuple = mach_read_from_6(p + FIL_PAGE_FILE_FLUSH_LSN); uint64_t flush_lsn = (((uint64_t) flush_lsn_tuple.high) << 32) + flush_lsn_tuple.low; printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "FIL_PAGE_FILE_FLUSH_LSN", flush_lsn); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID", mach_read_from_4(p + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_END_LSN_OLD_CHKSUM", mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM)); uchar *pd = p + FIL_PAGE_DATA; if (type == FIL_PAGE_TYPE_FSP_HDR) { printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_SPACE", mach_read_from_4(pd + FSEG_HDR_SPACE)); printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_PAGE_NO", mach_read_from_4(pd + FSEG_HDR_PAGE_NO)); printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_OFFSET", mach_read_from_4(pd + FSEG_HDR_OFFSET)); } else if (type == FIL_PAGE_INDEX) { printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_N_HEAP", mach_read_from_2(pd + PAGE_N_HEAP)); printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_FREE", mach_read_from_2(pd + PAGE_FREE)); dulint index_id_tuple = mach_read_from_8(pd + PAGE_INDEX_ID); uint64_t index_id = (((uint64_t) index_id_tuple.high) << 32) + index_id_tuple.low; printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "PAGE_INDEX_ID", index_id); printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_LEAF", mach_read_from_4(pd + PAGE_BTR_SEG_LEAF + FSEG_HDR_SPACE)); printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_TOP", mach_read_from_4(pd + PAGE_BTR_SEG_TOP + FSEG_HDR_SPACE)); #if 0 int i; for (i = 0; i < 80; i += 4) { if (i == PAGE_BTR_SEG_LEAF || i == PAGE_N_HEAP || i == PAGE_INDEX_ID || i == PAGE_INDEX_ID + 4) { continue; } char column_name[256]; snprintf(column_name, sizeof(column_name), "FIL_PAGE_DATA + %2d", i); printf(COLUMN_NAME_FMT " %ld\n", column_name, mach_read_from_4(p + FIL_PAGE_DATA + i)); } #endif } printf("\n"); }
int main(int argc, char **argv) { FILE *f; /* our input file */ FILE *b; /* our output file */ byte *p; /* storage of pages read input file*/ byte *op; /* storage of pages read output file*/ int bytes; /* bytes read count */ ulint ct; /* current page number (0 based) */ int now; /* current time */ int lastt; /* last time */ ulint oldcsum, noldcsum, oldcsumfield, noldcsumfield, csum, ncsum, csumfield, ncsumfield, logseq, nlogseq, logseqfield, nlogseqfield; /* ulints for checksum storage */ struct stat st; /* for stat, if you couldn't guess */ unsigned long long int size; /* size of file (has to be 64 bits) */ ulint pages; /* number of pages in file */ ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */ off_t offset= 0; off_t boffset= 0; int just_count= 0; /* if true, just print page count */ int verbose= 0; int debug= 0; int c; int fd; int fn; int posstat; off_t pos; /* make sure we have the right arguments */ if (argc != 3) { printf("InnoDB offline file checksum utility.\n"); printf("usage: %s <backupfilename> <filename>\n", argv[0]); return 1; } /* stat the file to get size and page count */ if (stat(argv[2], &st)) { perror("error statting file"); return 1; } size= st.st_size; pages= size / UNIV_PAGE_SIZE; if (just_count) { printf("%lu\n", pages); return 0; } else if (verbose) { printf("file %s= %llu bytes (%lu pages)...\n", argv[1], size, pages); printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1)); } /* open the file for reading */ f= fopen(argv[2], "rb"); if (!f) { perror("error opening ibdata file"); return 1; } /* open the file for readwrite */ b= fopen(argv[1], "rb+"); if (!b) { perror("error opening backup file"); return 1; } printf("Comparing files %s and %s\n", argv[1], argv[2]); /* seek to the necessary position */ if (start_page) { fd= fileno(f); fn= fileno(b); if (!fd || !fn) { perror("unable to obtain file descriptor number"); return 1; } offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE; boffset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE; if (lseek(fd, offset, SEEK_SET) != offset) { perror("unable to seek to necessary offset"); return 1; } if (lseek(fn, boffset, SEEK_SET) != boffset) { perror("unable to seek to necessary offset"); return 1; } } /* allocate buffer for reading (so we don't realloc every time) */ p= (byte *)malloc(UNIV_PAGE_SIZE); op= (byte *)malloc(UNIV_PAGE_SIZE); /* main checksumming loop */ ct= start_page; lastt= 0; while (!feof(f)) { bytes= fread(p, 1, UNIV_PAGE_SIZE, f); if (!bytes && feof(f)) return 0; if (bytes != UNIV_PAGE_SIZE) { fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE); return 1; } /* get position before reading page from backup file */ pos = ftell(b); /* read page from backup file */ bytes= fread(op, 1, UNIV_PAGE_SIZE, b); /* check the "stored log sequence numbers" */ logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4); nlogseq= mach_read_from_4(op + FIL_PAGE_LSN + 4); logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); nlogseqfield= mach_read_from_4(op + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); /* Make sure our page LSNs make sense */ if (logseq != logseqfield || nlogseq != nlogseqfield) { fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct); return 1; } /* get old method checksums */ oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); noldcsumfield= mach_read_from_4(op + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); /* get new method checksums */ csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); ncsumfield= mach_read_from_4(op + FIL_PAGE_SPACE_OR_CHKSUM); /* If any of the LSNs or checksums dont make assume we need the newer page */ if(logseq != nlogseq || oldcsumfield != noldcsumfield || csumfield != ncsumfield) { printf("%lu:%lu:%lu:%lu:%lu\t Is different from \t%lu:%lu:%lu:%lu:%lu\n", ct, logseq, logseqfield,oldcsumfield,csumfield,ct, nlogseq, nlogseqfield,noldcsumfield,ncsumfield); printf("Page start position is %lu\n", pos); /* seek back to the start of this page */ fseek(b, -UNIV_PAGE_SIZE, SEEK_CUR); printf("Successfully rewound position to %lu\n", pos); /* write the page from the newer file to the backup */ fwrite(p, 1, UNIV_PAGE_SIZE, b); /* we should now be back at the end of the page */ pos = ftell(b); printf("Wrote new page and back at %lu\n", pos); exit; } /* do counter increase and progress printing */ ct++; } return 0; }
/************************************************************************ Loads definitions for table indexes. Adds them to the data dictionary cache. */ static void dict_load_indexes( /*==============*/ dict_table_t* table, /* in: table */ mem_heap_t* heap) /* in: memory heap for temporary storage */ { dict_table_t* sys_indexes; dict_index_t* sys_index; dict_index_t* index; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; rec_t* rec; byte* field; ulint len; ulint name_len; char* name_buf; ulint type; ulint space; ulint page_no; ulint n_fields; byte* buf; ibool is_sys_table; dulint id; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); if ((ut_dulint_get_high(table->id) == 0) && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) { is_sys_table = TRUE; } else { is_sys_table = FALSE; } mtr_start(&mtr); sys_indexes = dict_table_get_low("SYS_INDEXES"); sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); buf = mem_heap_alloc(heap, 8); mach_write_to_8(buf, table->id); dfield_set_data(dfield, buf, 8); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (;;) { if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { break; } rec = btr_pcur_get_rec(&pcur); field = rec_get_nth_field(rec, 0, &len); ut_ad(len == 8); if (ut_memcmp(buf, field, len) != 0) { break; } ut_a(!rec_get_deleted_flag(rec)); field = rec_get_nth_field(rec, 1, &len); ut_ad(len == 8); id = mach_read_from_8(field); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_indexes), 4))->name)); field = rec_get_nth_field(rec, 4, &name_len); name_buf = mem_heap_alloc(heap, name_len + 1); ut_memcpy(name_buf, field, name_len); name_buf[name_len] = '\0'; field = rec_get_nth_field(rec, 5, &len); n_fields = mach_read_from_4(field); field = rec_get_nth_field(rec, 6, &len); type = mach_read_from_4(field); field = rec_get_nth_field(rec, 7, &len); space = mach_read_from_4(field); ut_a(0 == ut_strcmp("PAGE_NO", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_indexes), 8))->name)); field = rec_get_nth_field(rec, 8, &len); page_no = mach_read_from_4(field); if (is_sys_table && ((type & DICT_CLUSTERED) || ((table == dict_sys->sys_tables) && (name_len == ut_strlen("ID_IND")) && (0 == ut_memcmp(name_buf, "ID_IND", name_len))))) { /* The index was created in memory already in booting */ } else { index = dict_mem_index_create(table->name, name_buf, space, type, n_fields); index->page_no = page_no; index->id = id; dict_load_fields(table, index, heap); dict_index_add_to_cache(table, index); } btr_pcur_move_to_next_user_rec(&pcur, &mtr); } btr_pcur_close(&pcur); mtr_commit(&mtr); }
/************************************************************************ Loads definitions for table columns. */ static void dict_load_columns( /*==============*/ dict_table_t* table, /* in: table */ mem_heap_t* heap) /* in: memory heap for temporary storage */ { dict_table_t* sys_columns; dict_index_t* sys_index; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; rec_t* rec; byte* field; ulint len; byte* buf; char* name_buf; char* name; ulint mtype; ulint prtype; ulint col_len; ulint prec; ulint i; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); mtr_start(&mtr); sys_columns = dict_table_get_low("SYS_COLUMNS"); sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); buf = mem_heap_alloc(heap, 8); mach_write_to_8(buf, table->id); dfield_set_data(dfield, buf, 8); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (i = 0; i < table->n_cols - DATA_N_SYS_COLS; i++) { rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); ut_a(!rec_get_deleted_flag(rec)); field = rec_get_nth_field(rec, 0, &len); ut_ad(len == 8); ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0); field = rec_get_nth_field(rec, 1, &len); ut_ad(len == 4); ut_a(i == mach_read_from_4(field)); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_columns), 4))->name)); field = rec_get_nth_field(rec, 4, &len); name_buf = mem_heap_alloc(heap, len + 1); ut_memcpy(name_buf, field, len); name_buf[len] = '\0'; name = name_buf; field = rec_get_nth_field(rec, 5, &len); mtype = mach_read_from_4(field); field = rec_get_nth_field(rec, 6, &len); prtype = mach_read_from_4(field); field = rec_get_nth_field(rec, 7, &len); col_len = mach_read_from_4(field); ut_a(0 == ut_strcmp("PREC", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_columns), 8))->name)); field = rec_get_nth_field(rec, 8, &len); prec = mach_read_from_4(field); dict_mem_table_add_col(table, name, mtype, prtype, col_len, prec); btr_pcur_move_to_next_user_rec(&pcur, &mtr); } btr_pcur_close(&pcur); mtr_commit(&mtr); }
dict_table_t* dict_load_table( /*============*/ /* out: table, NULL if does not exist */ char* name) /* in: table name */ { dict_table_t* table; dict_table_t* sys_tables; btr_pcur_t pcur; dict_index_t* sys_index; dtuple_t* tuple; mem_heap_t* heap; dfield_t* dfield; rec_t* rec; byte* field; ulint len; char* buf; ulint space; ulint n_cols; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); heap = mem_heap_create(1000); mtr_start(&mtr); sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, name, ut_strlen(name)); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) || rec_get_deleted_flag(rec)) { /* Not found */ btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); return(NULL); } field = rec_get_nth_field(rec, 0, &len); /* Check if the table name in record is the searched one */ if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); return(NULL); } ut_a(0 == ut_strcmp("SPACE", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_tables), 9))->name)); field = rec_get_nth_field(rec, 9, &len); space = mach_read_from_4(field); ut_a(0 == ut_strcmp("N_COLS", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_tables), 4))->name)); field = rec_get_nth_field(rec, 4, &len); n_cols = mach_read_from_4(field); table = dict_mem_table_create(name, space, n_cols); ut_a(0 == ut_strcmp("ID", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_tables), 3))->name)); field = rec_get_nth_field(rec, 3, &len); table->id = mach_read_from_8(field); field = rec_get_nth_field(rec, 5, &len); table->type = mach_read_from_4(field); if (table->type == DICT_TABLE_CLUSTER_MEMBER) { ut_a(0); field = rec_get_nth_field(rec, 6, &len); table->mix_id = mach_read_from_8(field); field = rec_get_nth_field(rec, 8, &len); buf = mem_heap_alloc(heap, len); ut_memcpy(buf, field, len); table->cluster_name = buf; } if ((table->type == DICT_TABLE_CLUSTER) || (table->type == DICT_TABLE_CLUSTER_MEMBER)) { field = rec_get_nth_field(rec, 7, &len); table->mix_len = mach_read_from_4(field); } btr_pcur_close(&pcur); mtr_commit(&mtr); if (table->type == DICT_TABLE_CLUSTER_MEMBER) { /* Load the cluster table definition if not yet in memory cache */ dict_table_get_low(table->cluster_name); } dict_load_columns(table, heap); dict_table_add_to_cache(table); dict_load_indexes(table, heap); ut_a(DB_SUCCESS == dict_load_foreigns(table->name)); mem_heap_free(heap); return(table); }
int main(int argc, char **argv) { FILE *f; /* our input file */ uchar *p; /* storage of pages read */ int bytes; /* bytes read count */ ulint ct; /* current page number (0 based) */ int now; /* current time */ int lastt; /* last time */ ulint oldcsum, oldcsumfield, csum, csumfield, logseq, logseqfield; /* ulints for checksum storage */ struct stat st; /* for stat, if you couldn't guess */ unsigned long long int size; /* size of file (has to be 64 bits) */ ulint pages; /* number of pages in file */ ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */ off_t offset= 0; int just_count= 0; /* if true, just print page count */ int verbose= 0; int debug= 0; int c; int fd; /* remove arguments */ while ((c= getopt(argc, argv, "cvds:e:p:")) != -1) { switch (c) { case 'v': verbose= 1; break; case 'c': just_count= 1; break; case 's': start_page= atoi(optarg); break; case 'e': end_page= atoi(optarg); use_end_page= 1; break; case 'p': start_page= atoi(optarg); end_page= atoi(optarg); use_end_page= 1; break; case 'd': debug= 1; break; case ':': fprintf(stderr, "option -%c requires an argument\n", optopt); return 1; break; case '?': fprintf(stderr, "unrecognized option: -%c\n", optopt); return 1; break; } } /* debug implies verbose... */ if (debug) verbose= 1; /* make sure we have the right arguments */ if (optind >= argc) { printf("InnoDB offline file checksum utility.\n"); printf("usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", argv[0]); printf("\t-c\tprint the count of pages in the file\n"); printf("\t-s n\tstart on this page number (0 based)\n"); printf("\t-e n\tend at this page number (0 based)\n"); printf("\t-p n\tcheck only this page (0 based)\n"); printf("\t-v\tverbose (prints progress every 5 seconds)\n"); printf("\t-d\tdebug mode (prints checksums for each page)\n"); return 1; } /* stat the file to get size and page count */ if (stat(argv[optind], &st)) { perror("error statting file"); return 1; } size= st.st_size; pages= size / UNIV_PAGE_SIZE; if (just_count) { printf("%lu\n", pages); return 0; } else if (verbose) { printf("file %s = %llu bytes (%lu pages)...\n", argv[optind], size, pages); printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1)); } /* open the file for reading */ f= fopen(argv[optind], "r"); if (!f) { perror("error opening file"); return 1; } /* seek to the necessary position */ if (start_page) { fd= fileno(f); if (!fd) { perror("unable to obtain file descriptor number"); return 1; } offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE; if (lseek(fd, offset, SEEK_SET) != offset) { perror("unable to seek to necessary offset"); return 1; } } /* allocate buffer for reading (so we don't realloc every time) */ p= (uchar *)malloc(UNIV_PAGE_SIZE); /* main checksumming loop */ ct= start_page; lastt= 0; while (!feof(f)) { bytes= fread(p, 1, UNIV_PAGE_SIZE, f); if (!bytes && feof(f)) return 0; if (bytes != UNIV_PAGE_SIZE) { fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE); return 1; } /* check the "stored log sequence numbers" */ logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4); logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4); if (debug) printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield); if (logseq != logseqfield) { fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct); return 1; } /* check old method of checksumming */ oldcsum= buf_calc_page_old_checksum(p); oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); if (debug) printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield); if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && oldcsumfield != oldcsum) { fprintf(stderr, "page %lu invalid (fails old style checksum)\n", ct); return 1; } /* now check the new method */ csum= buf_calc_page_new_checksum(p); csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM); if (debug) printf("page %lu: new style: calculated = %lu; recorded = %lu\n", ct, csum, csumfield); if (csumfield != 0 && csum != csumfield) { fprintf(stderr, "page %lu invalid (fails new style checksum)\n", ct); return 1; } /* end if this was the last page we were supposed to check */ if (use_end_page && (ct >= end_page)) return 0; /* do counter increase and progress printing */ ct++; if (verbose) { if (ct % 64 == 0) { now= time(0); if (!lastt) lastt= now; if (now - lastt >= 1) { printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100); lastt= now; } } } } return 0; }
/************************************************************************ Loads definitions for index fields. */ static void dict_load_fields( /*=============*/ dict_table_t* table, /* in: table */ dict_index_t* index, /* in: index whose fields to load */ mem_heap_t* heap) /* in: memory heap for temporary storage */ { dict_table_t* sys_fields; dict_index_t* sys_index; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; char* col_name; rec_t* rec; byte* field; ulint len; byte* buf; ulint i; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); UT_NOT_USED(table); mtr_start(&mtr); sys_fields = dict_table_get_low("SYS_FIELDS"); sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); buf = mem_heap_alloc(heap, 8); mach_write_to_8(buf, index->id); dfield_set_data(dfield, buf, 8); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (i = 0; i < index->n_fields; i++) { rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); ut_a(!rec_get_deleted_flag(rec)); field = rec_get_nth_field(rec, 0, &len); ut_ad(len == 8); ut_a(ut_memcmp(buf, field, len) == 0); field = rec_get_nth_field(rec, 1, &len); ut_ad(len == 4); ut_a(i == mach_read_from_4(field)); ut_a(0 == ut_strcmp("COL_NAME", dict_field_get_col( dict_index_get_nth_field( dict_table_get_first_index(sys_fields), 4))->name)); field = rec_get_nth_field(rec, 4, &len); col_name = mem_heap_alloc(heap, len + 1); ut_memcpy(col_name, field, len); col_name[len] = '\0'; dict_mem_index_add_field(index, col_name, 0); btr_pcur_move_to_next_user_rec(&pcur, &mtr); } btr_pcur_close(&pcur); mtr_commit(&mtr); }
/************************************************************************ Loads foreign key constraint col names (also for the referenced table). */ static void dict_load_foreign_cols( /*===================*/ char* id, /* in: foreign constraint id as a null- terminated string */ dict_foreign_t* foreign)/* in: foreign constraint object */ { dict_table_t* sys_foreign_cols; dict_index_t* sys_index; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; char* col_name; rec_t* rec; byte* field; ulint len; ulint i; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); foreign->foreign_col_names = mem_heap_alloc(foreign->heap, foreign->n_fields * sizeof(void*)); foreign->referenced_col_names = mem_heap_alloc(foreign->heap, foreign->n_fields * sizeof(void*)); mtr_start(&mtr); sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); tuple = dtuple_create(foreign->heap, 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, id, ut_strlen(id)); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (i = 0; i < foreign->n_fields; i++) { rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); ut_a(!rec_get_deleted_flag(rec)); field = rec_get_nth_field(rec, 0, &len); ut_a(len == ut_strlen(id)); ut_a(ut_memcmp(id, field, len) == 0); field = rec_get_nth_field(rec, 1, &len); ut_a(len == 4); ut_a(i == mach_read_from_4(field)); field = rec_get_nth_field(rec, 4, &len); col_name = mem_heap_alloc(foreign->heap, len + 1); ut_memcpy(col_name, field, len); col_name[len] = '\0'; foreign->foreign_col_names[i] = col_name; field = rec_get_nth_field(rec, 5, &len); col_name = mem_heap_alloc(foreign->heap, len + 1); ut_memcpy(col_name, field, len); col_name[len] = '\0'; foreign->referenced_col_names[i] = col_name; btr_pcur_move_to_next_user_rec(&pcur, &mtr); } btr_pcur_close(&pcur); mtr_commit(&mtr); }
uint64_t mach_read_from_8(byte* p) { return ( (uint64_t) mach_read_from_4(p) << 32 | (uint64_t) mach_read_from_4(p + 4) ); }
void process_ibpage(page_t *page) { ulint page_id; rec_t *origin; ulint offsets[MAX_TABLE_FIELDS + 2]; ulint offset, i; int is_page_valid = 0; int comp; unsigned int expected_records = 0; unsigned int actual_records = 0; int16_t b, infimum, supremum; // Skip tables if filter used if (use_filter_id) { dulint index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); if (index_id.low != filter_id.low || index_id.high != filter_id.high) { if (debug) { page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); printf("Skipped using index id filter: %lu!\n", page_id); } return; } } // Read page id page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); if (debug) printf("Page id: %lu\n", page_id); fprintf(f_result, "-- Page id: %lu", page_id); // Check requested and actual formats if (!check_page_format(page)) return; if(table_definitions_cnt == 0){ fprintf(stderr, "There are no table definitions. Please check include/table_defs.h\n"); exit(EXIT_FAILURE); } is_page_valid = check_page(page, &expected_records); // comp == 1 if page in COMPACT format and 0 if REDUNDANT comp = page_is_comp(page); fprintf(f_result, ", Format: %s", (comp ) ? "COMPACT": "REDUNDANT"); infimum = (comp) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM; supremum = (comp) ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM; // Find possible data area start point (at least 5 bytes of utility data) if(is_page_valid){ b = mach_read_from_2(page + infimum - 2); offset = (comp) ? infimum + b : b; } else{ offset = 100 + record_extra_bytes; } fprintf(f_result, ", Records list: %s", is_page_valid? "Valid": "Invalid"); fprintf(f_result, ", Expected records: (%u %lu)", expected_records, mach_read_from_2(page + PAGE_HEADER + PAGE_N_RECS)); fprintf(f_result, "\n"); if (debug) printf("Starting offset: %lu (%lX). Checking %d table definitions.\n", offset, offset, table_definitions_cnt); // Walk through all possible positions to the end of page // (start of directory - extra bytes of the last rec) //is_page_valid = 0; while (offset < UNIV_PAGE_SIZE - record_extra_bytes && ( (offset != supremum ) || !is_page_valid) ) { // Get record pointer origin = page + offset; if (debug) printf("\nChecking offset: 0x%lX: ", offset); // Check all tables for (i = 0; i < table_definitions_cnt; i++) { // Get table info table_def_t *table = &(table_definitions[i]); if (debug) printf(" (%s) ", table->name); // Check if origin points to a valid record if (check_for_a_record(page, origin, table, offsets) && check_constraints(origin, table, offsets)) { actual_records++; if (debug) printf("\n---------------------------------------------------\n" "PAGE%lu: Found a table %s record: %p (offset = %lu)\n", \ page_id, table->name, origin, offset); if(is_page_valid){ process_ibrec(page, origin, table, offsets); b = mach_read_from_2(page + offset - 2); offset = (comp) ? offset + b : b; } else{ offset += process_ibrec(page, origin, table, offsets); } if (debug) printf("Next offset: 0x%lX", offset); break; } else{ if(is_page_valid){ b = mach_read_from_2(page + offset - 2); offset = (comp) ? offset + b : b; } else{ offset++; } if (debug) printf("\nNext offset: %lX", offset); } } } fprintf(f_result, "-- Page id: %lu", page_id); fprintf(f_result, ", Found records: %u", actual_records); fprintf(f_result, ", Lost records: %s", (actual_records != expected_records) ? "YES": "NO"); fprintf(f_result, ", Leaf page: %s", (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0)? "YES": "NO"); fprintf(f_result, "\n"); }
/*************************************************************************** Loads a foreign key constraint to the dictionary cache. */ static ulint dict_load_foreign( /*==============*/ /* out: DB_SUCCESS or error code */ char* id) /* in: foreign constraint id as a null-terminated string */ { dict_foreign_t* foreign; dict_table_t* sys_foreign; btr_pcur_t pcur; dict_index_t* sys_index; dtuple_t* tuple; mem_heap_t* heap2; dfield_t* dfield; rec_t* rec; byte* field; ulint len; ulint err; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); heap2 = mem_heap_create(1000); mtr_start(&mtr); sys_foreign = dict_table_get_low("SYS_FOREIGN"); sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); tuple = dtuple_create(heap2, 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, id, ut_strlen(id)); dict_index_copy_types(tuple, sys_index, 1); btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) || rec_get_deleted_flag(rec)) { /* Not found */ fprintf(stderr, "InnoDB: Error A: cannot load foreign constraint %s\n", id); btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap2); return(DB_ERROR); } field = rec_get_nth_field(rec, 0, &len); /* Check if the id in record is the searched one */ if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { fprintf(stderr, "InnoDB: Error B: cannot load foreign constraint %s\n", id); btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap2); return(DB_ERROR); } /* Read the table names and the number of columns associated with the constraint */ mem_heap_free(heap2); foreign = dict_mem_foreign_create(); foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len)); ut_a(len == 4); foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1); ut_memcpy(foreign->id, id, ut_strlen(id) + 1); field = rec_get_nth_field(rec, 3, &len); foreign->foreign_table_name = mem_heap_alloc(foreign->heap, 1 + len); ut_memcpy(foreign->foreign_table_name, field, len); foreign->foreign_table_name[len] = '\0'; field = rec_get_nth_field(rec, 4, &len); foreign->referenced_table_name = mem_heap_alloc(foreign->heap, 1 + len); ut_memcpy(foreign->referenced_table_name, field, len); foreign->referenced_table_name[len] = '\0'; btr_pcur_close(&pcur); mtr_commit(&mtr); dict_load_foreign_cols(id, foreign); /* Note that there may already be a foreign constraint object in the dictionary cache for this constraint: then the following call only sets the pointers in it to point to the appropriate table and index objects and frees the newly created object foreign. */ err = dict_foreign_add_to_cache(foreign); return(err); }
UNIV_INTERN ulint mem_field_header_get_check(byte* field) { return(mach_read_from_4(field - sizeof(ulint))); }
/*******************************************************************//** Truncates the index tree associated with a row in SYS_INDEXES table. @return new root page number, or FIL_NULL on failure */ UNIV_INTERN ulint dict_truncate_index_tree( /*=====================*/ dict_table_t* table, /*!< in: the table the index belongs to */ ulint space, /*!< in: 0=truncate, nonzero=create the index tree in the given tablespace */ btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to record in the clustered index of SYS_INDEXES table. The cursor may be repositioned in this call. */ mtr_t* mtr) /*!< in: mtr having the latch on the record page. The mtr may be committed and restarted in this call. */ { ulint root_page_no; ibool drop = !space; ulint zip_size; ulint type; index_id_t index_id; rec_t* rec; const byte* ptr; ulint len; dict_index_t* index; ut_ad(mutex_own(&(dict_sys->mutex))); ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); rec = btr_pcur_get_rec(pcur); ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); ut_ad(len == 4); root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); if (drop && root_page_no == FIL_NULL) { /* The tree has been freed. */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Trying to TRUNCATE" " a missing index of table %s!\n", table->name); drop = FALSE; } ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); ut_ad(len == 4); if (drop) { space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); } zip_size = fil_space_get_zip_size(space); if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { /* It is a single table tablespace and the .ibd file is missing: do nothing */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Trying to TRUNCATE" " a missing .ibd file of table %s!\n", table->name); return(FIL_NULL); } ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_TYPE_FIELD, &len); ut_ad(len == 4); type = mach_read_from_4(ptr); ptr = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 8); index_id = mach_read_from_8(ptr); if (!drop) { goto create; } /* We free all the pages but the root page first; this operation may span several mini-transactions */ btr_free_but_not_root(space, zip_size, root_page_no); /* Then we free the root page in the same mini-transaction where we create the b-tree and write its new root page number to the appropriate field in the SYS_INDEXES record: this mini-transaction marks the B-tree totally truncated */ btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr); btr_free_root(space, zip_size, root_page_no, mtr); create: /* We will temporarily write FIL_NULL to the PAGE_NO field in SYS_INDEXES, so that the database will not get into an inconsistent state in case it crashes between the mtr_commit() below and the following mtr_commit() call. */ page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, FIL_NULL, mtr); /* We will need to commit the mini-transaction in order to avoid deadlocks in the btr_create() call, because otherwise we would be freeing and allocating pages in the same mini-transaction. */ btr_pcur_store_position(pcur, mtr); mtr_commit(mtr); mtr_start(mtr); btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); /* Find the index corresponding to this SYS_INDEXES record. */ for (index = UT_LIST_GET_FIRST(table->indexes); index; index = UT_LIST_GET_NEXT(indexes, index)) { if (index->id == index_id) { root_page_no = btr_create(type, space, zip_size, index_id, index, mtr); index->page = (unsigned int) root_page_no; return(root_page_no); } } ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Index %llu of table %s is missing\n" "InnoDB: from the data dictionary during TRUNCATE!\n", (ullint) index_id, table->name); return(FIL_NULL); }
ulint dict_truncate_index_tree( /*=====================*/ /* out: new root page number, or FIL_NULL on failure */ dict_table_t* table, /* in: the table the index belongs to */ btr_pcur_t* pcur, /* in/out: persistent cursor pointing to record in the clustered index of SYS_INDEXES table. The cursor may be repositioned in this call. */ mtr_t* mtr) /* in: mtr having the latch on the record page. The mtr may be committed and restarted in this call. */ { ulint root_page_no; ulint space; ulint type; dulint index_id; rec_t* rec; byte* ptr; ulint len; ulint comp; dict_index_t* index; ut_ad(mutex_own(&(dict_sys->mutex))); ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); rec = btr_pcur_get_rec(pcur); ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); ut_ad(len == 4); root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); if (root_page_no == FIL_NULL) { /* The tree has been freed. */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Trying to TRUNCATE" " a missing index of table %s!\n", table->name); return(FIL_NULL); } ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); ut_ad(len == 4); space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); if (!fil_tablespace_exists_in_mem(space)) { /* It is a single table tablespace and the .ibd file is missing: do nothing */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Trying to TRUNCATE" " a missing .ibd file of table %s!\n", table->name); return(FIL_NULL); } ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_TYPE_FIELD, &len); ut_ad(len == 4); type = mach_read_from_4(ptr); ptr = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 8); index_id = mach_read_from_8(ptr); /* We free all the pages but the root page first; this operation may span several mini-transactions */ btr_free_but_not_root(space, root_page_no); /* Then we free the root page in the same mini-transaction where we create the b-tree and write its new root page number to the appropriate field in the SYS_INDEXES record: this mini-transaction marks the B-tree totally truncated */ comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH, mtr)); btr_free_root(space, root_page_no, mtr); /* We will temporarily write FIL_NULL to the PAGE_NO field in SYS_INDEXES, so that the database will not get into an inconsistent state in case it crashes between the mtr_commit() below and the following mtr_commit() call. */ page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, FIL_NULL, mtr); /* We will need to commit the mini-transaction in order to avoid deadlocks in the btr_create() call, because otherwise we would be freeing and allocating pages in the same mini-transaction. */ btr_pcur_store_position(pcur, mtr); mtr_commit(mtr); mtr_start(mtr); btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); /* Find the index corresponding to this SYS_INDEXES record. */ for (index = UT_LIST_GET_FIRST(table->indexes); index; index = UT_LIST_GET_NEXT(indexes, index)) { if (!ut_dulint_cmp(index->id, index_id)) { break; } } root_page_no = btr_create(type, space, index_id, comp, mtr); if (index) { index->page = (unsigned int) root_page_no; } else { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Index %lu %lu of table %s is missing\n" "InnoDB: from the data dictionary during TRUNCATE!\n", ut_dulint_get_high(index_id), ut_dulint_get_low(index_id), table->name); } return(root_page_no); }
/********************************************************************//** Flush pages from flash cache. @return number of pages have been flushed to tablespace */ UNIV_INTERN ulint fc_flush_to_disk( /*==================*/ ibool do_full_io) /*!< in: whether do full io capacity */ { ulint distance; byte* page; ulint ret; ulint space; ulint offset; ulint page_type; ulint i, j; ulint pos; ulint zip_size; ulint block_offset, byte_offset; ulint fc_size = fc_get_size(); ulint fc_blk_size = fc_get_block_size_byte(); ulint start_offset; ulint data_size; fc_block_t *flush_block = NULL; ulint c_flush = 0; ut_ad(!mutex_own(&fc->mutex)); ut_a(fc->flush_buf->free_pos == 0); /* step 1: get the number of blocks need to flush to tablespace */ flash_cache_mutex_enter(); distance = fc_get_distance(); start_offset = fc->flush_off; if ( distance == 0 ) { flash_cache_mutex_exit(); return 0; } else if ( recv_recovery_on ) { if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size)) { fc->n_flush_cur = 0; } else if ( distance < ( ( 1.0*srv_flash_cache_do_full_io_pct /100 ) * fc_size)) { fc->n_flush_cur = ut_min(PCT_IO_FC(10), distance); } else { fc->n_flush_cur = ut_min(PCT_IO_FC(100), distance); } } else if ( distance < (( 1.0 * srv_flash_cache_write_cache_pct /100 ) * fc_size) && !do_full_io ) { flash_cache_mutex_exit(); return 0; } else if ( distance < (( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) && !do_full_io ) { fc->n_flush_cur = PCT_IO_FC(srv_fc_write_cache_flush_pct); } else { ut_ad((distance > ( 1.0 * srv_flash_cache_do_full_io_pct/100 ) * fc_size) || do_full_io ); fc->n_flush_cur = ut_min(PCT_IO_FC(srv_fc_full_flush_pct), distance); } flash_cache_mutex_exit(); /* step 2: start to flush blocks use async io, set block io_fix IO_FIX_FLUSH */ i = 0; while (i < fc->n_flush_cur) { ulint b_space; ulint b_offset; ulint raw_zip_size; ulint size; ulint fil_offset; #ifdef UNIV_FLASH_CACHE_TRACE ulint is_v4_blk; #endif byte* page_io; flash_cache_mutex_enter(); pos = ( start_offset + i ) % fc_size; flush_block = fc_get_block(pos); if (flush_block == NULL) { i++; flash_cache_mutex_exit(); continue; } /* we should get the mutex, as doublewrite may hit this block and invalid the block */ flash_block_mutex_enter(flush_block->fil_offset); flash_cache_mutex_exit(); data_size = fc_block_get_data_size(flush_block); if (flush_block->state != BLOCK_READY_FOR_FLUSH) { /* if readonly or merge write or already flushed*/ ut_a (flush_block->state == BLOCK_NOT_USED || flush_block->state == BLOCK_READ_CACHE || flush_block->state == BLOCK_FLUSHED); i += data_size; flash_block_mutex_exit(flush_block->fil_offset); if (flush_block->state == BLOCK_NOT_USED) { //fc_block_detach(FALSE, flush_block); fc_block_free(flush_block); } continue; } zip_size = fil_space_get_zip_size(flush_block->space); if (zip_size == ULINT_UNDEFINED) { /* table has been droped, just set it BLOCK_FLUSHED */ #ifdef UNIV_FLASH_CACHE_TRACE ut_print_timestamp(fc->f_debug); fprintf(fc->f_debug, "space:%lu is droped, the page(%lu, %lu) need not to be flushed.\n", (ulong)flush_block->space, (ulong)flush_block->space, (ulong)flush_block->offset); #endif flush_block->state = BLOCK_FLUSHED; i += data_size; c_flush += data_size; flash_block_mutex_exit(flush_block->fil_offset); continue; } #ifdef UNIV_FLASH_CACHE_TRACE if (flush_block->state != BLOCK_READY_FOR_FLUSH) { fc_block_print(flush_block); ut_error; } #endif flush_block->io_fix |= IO_FIX_FLUSH; /* * we should set block state BLOCK_FLUSHED, if not, doublewrite may hit this block * and invalid this block and reduce the dirty count, but when finish flush ,we will * reduce the dirty count too, so it may reduce twice. */ flush_block->state = BLOCK_FLUSHED; /* save the block info, as the block may be invalided by doublewrite after release mutex */ b_space = flush_block->space; b_offset = flush_block->offset; raw_zip_size = flush_block->raw_zip_size; size = flush_block->size; fil_offset = flush_block->fil_offset; #ifdef UNIV_FLASH_CACHE_TRACE is_v4_blk = flush_block->is_v4_blk; #endif /* release the block now, so read can hit in this blocks and read the data */ flash_block_mutex_exit(flush_block->fil_offset); /* * Only flush thread will update read_buf and flush_off/round. * there only single flush thread no need to lock read_buf */ page = fc->flush_buf->buf + fc->flush_buf->free_pos * fc_blk_size; if (raw_zip_size > 0) { ut_a((size * fc_blk_size) == UNIV_PAGE_SIZE); page_io = fc->flush_zip_read_buf; } else { page_io = page; } fc_io_offset(fil_offset, &block_offset, &byte_offset); ret = fil_io(OS_FILE_READ, TRUE, FLASH_CACHE_SPACE, 0, block_offset, byte_offset, data_size * fc_blk_size, page_io, NULL); if (ret != DB_SUCCESS) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Flash cache [Error]: unable to read page from flash cache.\n" "flash cache flush offset is:%lu.\n", (ulong)(start_offset + i)); ut_error; } if ((flush_block != NULL) && (flush_block->state == BLOCK_NOT_USED)) { goto skip; } /* decompress the compress data */ if (raw_zip_size > 0) { #ifdef UNIV_FLASH_CACHE_TRACE ulint blk_zip_size_byte; if (is_v4_blk) { blk_zip_size_byte = raw_zip_size * fc_get_block_size_byte(); } else { blk_zip_size_byte = fc_block_compress_align(raw_zip_size) * fc_get_block_size_byte(); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ZIP_RAW_SIZE) == raw_zip_size); } ut_a(page_io); ut_a(page); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_HEADER) == FC_ZIP_PAGE_CHECKSUM); ut_a((ulint)mach_read_from_4(page_io + blk_zip_size_byte - FC_ZIP_PAGE_TAILER) == FC_ZIP_PAGE_CHECKSUM); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SIZE) == blk_zip_size_byte); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_ORIG_SIZE) == UNIV_PAGE_SIZE); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_SPACE) == b_space); ut_a((ulint)mach_read_from_4(page_io + FC_ZIP_PAGE_OFFSET) == b_offset); /* only qlz can do this check */ if (srv_flash_cache_compress_algorithm == FC_BLOCK_COMPRESS_QUICKLZ) { if (is_v4_blk) { ut_a(raw_zip_size * fc_get_block_size_byte() >= (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } else { ut_a(raw_zip_size == (ulint)fc_qlz_size_compressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } ut_a(UNIV_PAGE_SIZE == fc_qlz_size_decompressed((const char *)(page_io + FC_ZIP_PAGE_DATA))); } #endif fc_block_do_decompress(DECOMPRESS_FLUSH, page_io, raw_zip_size, page); } space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); offset = mach_read_from_4(page + FIL_PAGE_OFFSET); if ((space != b_space) || (offset != b_offset)) { ut_print_timestamp(stderr); fc_block_print(flush_block); ut_error; } if (buf_page_is_corrupted(page, zip_size)) { buf_page_print(page, zip_size, BUF_PAGE_PRINT_NO_CRASH); ut_error; } page_type = fil_page_get_type(page); if (page_type == FIL_PAGE_INDEX) { page_type = 1; } srv_flash_cache_flush_detail[page_type]++; ret = fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, FALSE, space, zip_size, offset, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL); if (ret != DB_SUCCESS && ret != DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fc_block_print(flush_block); ut_error; } /* add UNIV_PAGE_SIZE / fc_blk_size for safe */ fc->flush_buf->free_pos += UNIV_PAGE_SIZE / fc_blk_size; skip: i += data_size; c_flush += data_size; if ((fc->flush_buf->free_pos + UNIV_PAGE_SIZE / fc_blk_size) >= fc->flush_buf->size) { /* FIXME: is it safe to change n_flush, as step 3 will use n_flush */ fc->n_flush_cur = i; break; } } /* ok, now flush all async io to disk */ fc_flush_sync_dbfile(); /* step 3: all the flush blocks have sync to disk, update the state and io_fix */ j = 0; while (j < fc->n_flush_cur) { flash_cache_mutex_enter(); pos = (start_offset + j) % fc_size; flush_block = fc_get_block(pos); if (flush_block == NULL) { j++; flash_cache_mutex_exit(); continue; } /* block state and io_fix may be changed by doublewrite and lru move */ flash_block_mutex_enter(flush_block->fil_offset); flash_cache_mutex_exit(); if (flush_block->io_fix & IO_FIX_FLUSH) { /* the block is already in BLOCK_FLUSHED state */ flush_block->io_fix &= ~IO_FIX_FLUSH; } data_size = fc_block_get_data_size(flush_block); flash_block_mutex_exit(flush_block->fil_offset); j += data_size; } /* * i and j may be different, as the last been flushed block may be invalid by doublewrite, * so maybe i > j */ /* add the actual flushed blocks */ srv_flash_cache_flush = srv_flash_cache_flush + c_flush; /* step 4: update fc status and flush_off, and wake up threads that are sleep for space */ if (i > 0) { ut_a(i >= c_flush); flash_cache_mutex_enter(); /* * it is safe to inc flush off and sub dirty blocks at this time, * as fc_validate is not work */ fc_inc_flush_off(i); flash_cache_log_mutex_enter(); fc_log->current_stat->flush_offset = fc->flush_off; fc_log->current_stat->flush_round = fc->flush_round; flash_cache_log_mutex_exit(); ut_a(srv_flash_cache_dirty >= c_flush); srv_flash_cache_dirty -= c_flush; srv_fc_flush_should_commit_log_flush++; os_event_set(fc->wait_space_event); fc->n_flush_cur = 0; flash_cache_mutex_exit(); } fc->flush_buf->free_pos = 0; return c_flush; }