int check_page(page_t *page, unsigned int *n_records){ int comp = page_is_comp(page); int16_t i, s, p, b, p_prev; int recs = 0; int max_recs = UNIV_PAGE_SIZE / 5; *n_records = 0; i = (comp) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM; s = (comp) ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM; if(deleted_records_only == 1){ if (debug) printf("We look for deleted records only. Consider all pages are not valid\n"); return 0; } if (debug) printf("Checking a page\nInfimum offset: 0x%X\nSupremum offset: 0x%X\n", i, s); p_prev = 0; p = i; while(p != s){ if(recs > max_recs){ *n_records = 0; if (debug) printf("Page is bad\n"); return 0; } // If a pointer to the next record is negative - the page is bad if(p < 2){ *n_records = 0; if (debug) printf("Page is bad\n"); return 0; } // If the pointer is bigger than UNIV_PAGE_SIZE, the page is corrupted if(p > UNIV_PAGE_SIZE){ *n_records = 0; if (debug) printf("Page is bad\n"); return 0; } // If we've already was here, the page is bad if(p == p_prev){ *n_records = 0; if (debug) printf("Page is bad\n"); return 0; } p_prev = p; // Get next pointer if(comp){ b = mach_read_from_2(page + p - 2); p = p + b; } else{ p = mach_read_from_2(page + p - 2); } if (debug) printf("Next record at offset: 0x%X (%d) \n", 0x0000FFFF & p, p); recs++; } *n_records = recs -1; // - infinum record if (debug) printf("Page is good\n"); return 1; }
byte* mach_parse_compressed( /*==================*/ /* out: pointer to end of the stored field, NULL if not complete */ byte* ptr, /* in: pointer to buffer from where to read */ byte* end_ptr,/* in: pointer to end of the buffer */ ulint* val) /* out: read value (< 2^32) */ { ulint flag; ut_ad(ptr && end_ptr && val); if (ptr >= end_ptr) { return(NULL); } flag = mach_read_from_1(ptr); if (flag < 0x80UL) { *val = flag; return(ptr + 1); } else if (flag < 0xC0UL) { if (end_ptr < ptr + 2) { return(NULL); } *val = mach_read_from_2(ptr) & 0x7FFFUL; return(ptr + 2); } else if (flag < 0xE0UL) { if (end_ptr < ptr + 3) { return(NULL); } *val = mach_read_from_3(ptr) & 0x3FFFFFUL; return(ptr + 3); } else if (flag < 0xF0UL) { if (end_ptr < ptr + 4) { return(NULL); } *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL; return(ptr + 4); } else { ut_ad(flag == 0xF0UL); if (end_ptr < ptr + 5) { return(NULL); } *val = mach_read_from_4(ptr + 1); return(ptr + 5); } }
byte* trx_undo_rec_get_partial_row( /*=========================*/ /* out: pointer to remaining part of undo record */ byte* ptr, /* in: remaining part in update undo log record of a suitable type, at the start of the stored index columns; NOTE that this copy of the undo log record must be preserved as long as the partial row is used, as we do NOT copy the data in the record! */ dict_index_t* index, /* in: clustered index */ dtuple_t** row, /* out, own: partial row */ mem_heap_t* heap) /* in: memory heap from which the memory needed is allocated */ { dfield_t* dfield; byte* field; ulint len; ulint field_no; ulint col_no; ulint row_len; ulint total_len; byte* start_ptr; ulint i; ut_ad(index && ptr && row && heap); row_len = dict_table_get_n_cols(index->table); *row = dtuple_create(heap, row_len); dict_table_copy_types(*row, index->table); start_ptr = ptr; total_len = mach_read_from_2(ptr); ptr += 2; for (i = 0;; i++) { if (ptr == start_ptr + total_len) { break; } ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); col_no = dict_index_get_nth_col_no(index, field_no); ptr = trx_undo_rec_get_col_val(ptr, &field, &len); dfield = dtuple_get_nth_field(*row, col_no); dfield_set_data(dfield, field, len); } return(ptr); }
/********************************************************//** Parses a log record written by mlog_write_string. @return parsed record end, NULL if not a complete record */ UNIV_INTERN byte* mlog_parse_string( /*==============*/ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint len; ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 4) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; len = mach_read_from_2(ptr); ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (end_ptr < ptr + len) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { memcpy(((page_zip_des_t*) page_zip)->data + offset, ptr, len); } memcpy(page + offset, ptr, len); } return(ptr + len); }
inline unsigned long long int get_uint_value(field_def_t *field, byte *value) { switch (field->fixed_length) { case 1: return mach_read_from_1(value); case 2: return mach_read_from_2(value); case 3: return mach_read_from_3(value) & 0x3FFFFFUL; case 4: return mach_read_from_4(value); case 8: return make_ulonglong(mach_read_from_8(value)); } return 0; }
inline long long int get_int_value(field_def_t *field, byte *value) { switch (field->fixed_length) { case 1: return mach_read_from_1(value) & ~(1<<7); case 2: return mach_read_from_2(value) & ~(1<<15); case 3: return mach_read_from_3(value) & 0x3FFFFFUL & ~(1L<<23); case 4: return mach_read_from_4(value) & ~(1L<<31); case 8: return make_longlong(mach_read_from_8(value)) & ~(1LL<<63); } return 0; }
/***********************************************************//** Parses a redo log record of adding an undo log record. @return end of log record or NULL */ UNIV_INTERN byte* trx_undo_parse_add_undo_rec( /*========================*/ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ page_t* page) /*!< in: page or NULL */ { ulint len; byte* rec; ulint first_free; if (end_ptr < ptr + 2) { return(NULL); } len = mach_read_from_2(ptr); ptr += 2; if (end_ptr < ptr + len) { return(NULL); } if (page == NULL) { return(ptr + len); } first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); rec = page + first_free; mach_write_to_2(rec, first_free + 4 + len); mach_write_to_2(rec + 2 + len, first_free); mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, first_free + 4 + len); ut_memcpy(rec + 2, ptr, len); return(ptr + len); }
byte* mlog_parse_string( /*==============*/ /* out: parsed record end, NULL if not a complete record */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ byte* page) /* in: page where to apply the log record, or NULL */ { ulint offset; ulint len; if (end_ptr < ptr + 4) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; return(NULL); } len = mach_read_from_2(ptr); ptr += 2; ut_a(len + offset < UNIV_PAGE_SIZE); if (end_ptr < ptr + len) { return(NULL); } if (page) { ut_memcpy(page + offset, ptr, len); } return(ptr + len); }
/*************************************************************************** Gets the next record in an undo log from the next page. */ static trx_undo_rec_t* trx_undo_get_next_rec_from_next_page( /*=================================*/ /* out: undo log record, the page latched, NULL if none */ page_t* undo_page, /* in: undo log page */ ulint page_no,/* in: undo log header page number */ ulint offset, /* in: undo log header offset on page */ ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */ mtr_t* mtr) /* in: mtr */ { trx_ulogf_t* log_hdr; ulint next_page_no; page_t* next_page; ulint space; ulint next; if (page_no == buf_frame_get_page_no(undo_page)) { log_hdr = undo_page + offset; next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); if (next != 0) { return(NULL); } } space = buf_frame_get_space_id(undo_page); next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr) .page; if (next_page_no == FIL_NULL) { return(NULL); } if (mode == RW_S_LATCH) { next_page = trx_undo_page_get_s_latched(space, next_page_no, mtr); } else { ut_ad(mode == RW_X_LATCH); next_page = trx_undo_page_get(space, next_page_no, mtr); } return(trx_undo_page_get_first_rec(next_page, page_no, offset)); }
ibool trx_undo_erase_page_end( /*====================*/ page_t* undo_page, /*!< in/out: undo page whose end to erase */ mtr_t* mtr) /*!< in/out: mini-transaction */ { ulint first_free; first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); memset(undo_page + first_free, 0xff, (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free); mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr); return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); }
/***********************************************************************//** Erases the unused undo log page end. */ static void trx_undo_erase_page_end( /*====================*/ page_t* undo_page, /*!< in: undo page whose end to erase */ mtr_t* mtr) /*!< in: mtr */ { ulint first_free; first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); memset(undo_page + first_free, 0xff, (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free); mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr); }
/**********************************************************************//** Set the next and previous pointers in the undo page for the undo record that was written to ptr. Update the first free value by the number of bytes written for this undo record. @return offset of the inserted entry on the page if succeeded, 0 if fail */ static ulint trx_undo_page_set_next_prev_and_add( /*================================*/ page_t* undo_page, /*!< in/out: undo log page */ byte* ptr, /*!< in: ptr up to where data has been written on this undo page. */ mtr_t* mtr) /*!< in: mtr */ { ulint first_free; /*!< offset within undo_page */ ulint end_of_rec; /*!< offset within undo_page */ byte* ptr_to_first_free; /* pointer within undo_page that points to the next free offset value within undo_page.*/ ut_ad(ptr > undo_page); ut_ad(ptr < undo_page + UNIV_PAGE_SIZE); if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) { return(0); } ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE; first_free = mach_read_from_2(ptr_to_first_free); /* Write offset of the previous undo log record */ mach_write_to_2(ptr, first_free); ptr += 2; end_of_rec = ptr - undo_page; /* Write offset of the next undo log record */ mach_write_to_2(undo_page + first_free, end_of_rec); /* Update the offset to first free undo record */ mach_write_to_2(ptr_to_first_free, end_of_rec); /* Write this log entry to the UNDO log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, end_of_rec, mtr); return(first_free); }
/**********************************************************************//** Reports in the undo log of an update or delete marking of a clustered index record. @return byte offset of the inserted undo log entry on the page if succeed, 0 if fail */ static ulint trx_undo_page_report_modify( /*========================*/ page_t* undo_page, /*!< in: undo log page */ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: clustered index where update or delete marking is done */ const rec_t* rec, /*!< in: clustered index record which has NOT yet been modified */ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ const upd_t* update, /*!< in: update vector which tells the columns to be updated; in the case of a delete, this should be set to NULL */ ulint cmpl_info, /*!< in: compiler info on secondary index updates */ mtr_t* mtr) /*!< in: mtr */ { dict_table_t* table; ulint first_free; byte* ptr; const byte* field; ulint flen; ulint col_no; ulint type_cmpl; byte* type_cmpl_ptr; ulint i; trx_id_t trx_id; ibool ignore_prefix = FALSE; byte ext_buf[REC_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE]; ut_a(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); table = index->table; first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); ptr = undo_page + first_free; ut_ad(first_free <= UNIV_PAGE_SIZE); if (trx_undo_left(undo_page, ptr) < 50) { /* NOTE: the value 50 must be big enough so that the general fields written below fit on the undo log page */ return(0); } /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; /* Store first some general parameters to the undo log */ if (!update) { type_cmpl = TRX_UNDO_DEL_MARK_REC; } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) { type_cmpl = TRX_UNDO_UPD_DEL_REC; /* We are about to update a delete marked record. We don't typically need the prefix in this case unless the delete marking is done by the same transaction (which we check below). */ ignore_prefix = TRUE; } else { type_cmpl = TRX_UNDO_UPD_EXIST_REC; } type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT; type_cmpl_ptr = ptr; *ptr++ = (byte) type_cmpl; ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); ptr += mach_dulint_write_much_compressed(ptr, table->id); /*----------------------------------------*/ /* Store the state of the info bits */ *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table)); /* Store the values of the system columns */ field = rec_get_nth_field(rec, offsets, dict_index_get_sys_col_pos( index, DATA_TRX_ID), &flen); ut_ad(flen == DATA_TRX_ID_LEN); trx_id = trx_read_trx_id(field); /* If it is an update of a delete marked record, then we are allowed to ignore blob prefixes if the delete marking was done by some other trx as it must have committed by now for us to allow an over-write. */ if (ignore_prefix) { ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0; } ptr += mach_dulint_write_compressed(ptr, trx_id); field = rec_get_nth_field(rec, offsets, dict_index_get_sys_col_pos( index, DATA_ROLL_PTR), &flen); ut_ad(flen == DATA_ROLL_PTR_LEN); ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field)); /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record which will be modified in the clustered index */ for (i = 0; i < dict_index_get_n_unique(index); i++) { field = rec_get_nth_field(rec, offsets, i, &flen); /* The ordering columns must not be stored externally. */ ut_ad(!rec_offs_nth_extern(offsets, i)); ut_ad(dict_index_get_nth_col(index, i)->ord_part); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } ptr += mach_write_compressed(ptr, flen); if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } /*----------------------------------------*/ /* Save to the undo log the old values of the columns to be updated. */ if (update) { if (trx_undo_left(undo_page, ptr) < 5) { return(0); } ptr += mach_write_compressed(ptr, upd_get_n_fields(update)); for (i = 0; i < upd_get_n_fields(update); i++) { ulint pos = upd_get_nth_field(update, i)->field_no; /* Write field number to undo log */ if (trx_undo_left(undo_page, ptr) < 5) { return(0); } ptr += mach_write_compressed(ptr, pos); /* Save the old value of field */ field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 15) { return(0); } if (rec_offs_nth_extern(offsets, pos)) { ptr = trx_undo_page_report_modify_ext( ptr, dict_index_get_nth_col(index, pos) ->ord_part && !ignore_prefix && flen < REC_MAX_INDEX_COL_LEN ? ext_buf : NULL, dict_table_zip_size(table), &field, &flen); /* Notify purge that it eventually has to free the old externally stored field */ trx->update_undo->del_marks = TRUE; *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN; } else { ptr += mach_write_compressed(ptr, flen); } if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } } /*----------------------------------------*/ /* In the case of a delete marking, and also in the case of an update where any ordering field of any index changes, store the values of all columns which occur as ordering fields in any index. This info is used in the purge of old versions where we use it to build and search the delete marked index records, to look if we can remove them from the index tree. Note that starting from 4.0.14 also externally stored fields can be ordering in some index. Starting from 5.2, we no longer store REC_MAX_INDEX_COL_LEN first bytes to the undo log record, but we can construct the column prefix fields in the index by fetching the first page of the BLOB that is pointed to by the clustered index. This works also in crash recovery, because all pages (including BLOBs) are recovered before anything is rolled back. */ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { byte* old_ptr = ptr; trx->update_undo->del_marks = TRUE; if (trx_undo_left(undo_page, ptr) < 5) { return(0); } /* Reserve 2 bytes to write the number of bytes the stored fields take in this undo record */ ptr += 2; for (col_no = 0; col_no < dict_table_get_n_cols(table); col_no++) { const dict_col_t* col = dict_table_get_nth_col(table, col_no); if (col->ord_part) { ulint pos; /* Write field number to undo log */ if (trx_undo_left(undo_page, ptr) < 5 + 15) { return(0); } pos = dict_index_get_nth_col_pos(index, col_no); ptr += mach_write_compressed(ptr, pos); /* Save the old value of field */ field = rec_get_nth_field(rec, offsets, pos, &flen); if (rec_offs_nth_extern(offsets, pos)) { ptr = trx_undo_page_report_modify_ext( ptr, flen < REC_MAX_INDEX_COL_LEN && !ignore_prefix ? ext_buf : NULL, dict_table_zip_size(table), &field, &flen); } else { ptr += mach_write_compressed( ptr, flen); } if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } } mach_write_to_2(old_ptr, ptr - old_ptr); } /*----------------------------------------*/ /* Write pointers to the previous and the next undo log records */ if (trx_undo_left(undo_page, ptr) < 2) { return(0); } mach_write_to_2(ptr, first_free); ptr += 2; mach_write_to_2(undo_page + first_free, ptr - undo_page); mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, ptr - undo_page); /* Write to the REDO log about this change in the UNDO log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, ptr - undo_page, mtr); return(first_free); }
/**********************************************************************//** Reports in the undo log of an insert of a clustered index record. @return offset of the inserted entry on the page if succeed, 0 if fail */ static ulint trx_undo_page_report_insert( /*========================*/ page_t* undo_page, /*!< in: undo log page */ trx_t* trx, /*!< in: transaction */ dict_index_t* index, /*!< in: clustered index */ const dtuple_t* clust_entry, /*!< in: index entry which will be inserted to the clustered index */ mtr_t* mtr) /*!< in: mtr */ { ulint first_free; byte* ptr; ulint i; ut_ad(dict_index_is_clust(index)); ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); ptr = undo_page + first_free; ut_ad(first_free <= UNIV_PAGE_SIZE); if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) { /* Not enough space for writing the general parameters */ return(0); } /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; /* Store first some general parameters to the undo log */ *ptr++ = TRX_UNDO_INSERT_REC; ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no); ptr += mach_dulint_write_much_compressed(ptr, index->table->id); /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ for (i = 0; i < dict_index_get_n_unique(index); i++) { const dfield_t* field = dtuple_get_nth_field(clust_entry, i); ulint flen = dfield_get_len(field); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } ptr += mach_write_compressed(ptr, flen); if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, dfield_get_data(field), flen); ptr += flen; } } return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr)); }
/*******************************************************************//** Builds a partial row from an update undo log record. It contains the columns which occur as ordering in any index of the table. @return pointer to remaining part of undo record */ UNIV_INTERN byte* trx_undo_rec_get_partial_row( /*=========================*/ byte* ptr, /*!< in: remaining part in update undo log record of a suitable type, at the start of the stored index columns; NOTE that this copy of the undo log record must be preserved as long as the partial row is used, as we do NOT copy the data in the record! */ dict_index_t* index, /*!< in: clustered index */ dtuple_t** row, /*!< out, own: partial row */ ibool ignore_prefix, /*!< in: flag to indicate if we expect blob prefixes in undo. Used only in the assertion. */ mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ { const byte* end_ptr; ulint row_len; ut_ad(index); ut_ad(ptr); ut_ad(row); ut_ad(heap); ut_ad(dict_index_is_clust(index)); row_len = dict_table_get_n_cols(index->table); *row = dtuple_create(heap, row_len); dict_table_copy_types(*row, index->table); end_ptr = ptr + mach_read_from_2(ptr); ptr += 2; while (ptr != end_ptr) { dfield_t* dfield; byte* field; ulint field_no; const dict_col_t* col; ulint col_no; ulint len; ulint orig_len; ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); col = dict_index_get_nth_col(index, field_no); col_no = dict_col_get_no(col); ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len); dfield = dtuple_get_nth_field(*row, col_no); dfield_set_data(dfield, field, len); if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) { dfield_set_len(dfield, len - UNIV_EXTERN_STORAGE_FIELD); dfield_set_ext(dfield); /* If the prefix of this column is indexed, ensure that enough prefix is stored in the undo log record. */ if (!ignore_prefix && col->ord_part) { ut_a(dfield_get_len(dfield) >= 2 * BTR_EXTERN_FIELD_REF_SIZE); ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP || dfield_get_len(dfield) >= REC_MAX_INDEX_COL_LEN + BTR_EXTERN_FIELD_REF_SIZE); } } } return(ptr); }
/************************************************************************** Reports in the undo log of an update or delete marking of a clustered index record. */ static ulint trx_undo_page_report_modify( /*========================*/ /* out: byte offset of the inserted undo log entry on the page if succeed, 0 if fail */ page_t* undo_page, /* in: undo log page */ trx_t* trx, /* in: transaction */ dict_index_t* index, /* in: clustered index where update or delete marking is done */ rec_t* rec, /* in: clustered index record which has NOT yet been modified */ const ulint* offsets, /* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector which tells the columns to be updated; in the case of a delete, this should be set to NULL */ ulint cmpl_info, /* in: compiler info on secondary index updates */ mtr_t* mtr) /* in: mtr */ { dict_table_t* table; upd_field_t* upd_field; ulint first_free; byte* ptr; ulint len; byte* field; ulint flen; ulint pos; dulint roll_ptr; dulint trx_id; ulint bits; ulint col_no; byte* old_ptr; ulint type_cmpl; byte* type_cmpl_ptr; ulint i; ut_a(index->type & DICT_CLUSTERED); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); table = index->table; first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); ptr = undo_page + first_free; ut_ad(first_free <= UNIV_PAGE_SIZE); if (trx_undo_left(undo_page, ptr) < 50) { /* NOTE: the value 50 must be big enough so that the general fields written below fit on the undo log page */ return(0); } /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; /* Store first some general parameters to the undo log */ if (update) { if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) { type_cmpl = TRX_UNDO_UPD_DEL_REC; } else { type_cmpl = TRX_UNDO_UPD_EXIST_REC; } } else { type_cmpl = TRX_UNDO_DEL_MARK_REC; } type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT); mach_write_to_1(ptr, type_cmpl); type_cmpl_ptr = ptr; ptr++; len = mach_dulint_write_much_compressed(ptr, trx->undo_no); ptr += len; len = mach_dulint_write_much_compressed(ptr, table->id); ptr += len; /*----------------------------------------*/ /* Store the state of the info bits */ bits = rec_get_info_bits(rec, dict_table_is_comp(table)); mach_write_to_1(ptr, bits); ptr += 1; /* Store the values of the system columns */ field = rec_get_nth_field(rec, offsets, dict_index_get_sys_col_pos( index, DATA_TRX_ID), &len); ut_ad(len == DATA_TRX_ID_LEN); trx_id = trx_read_trx_id(field); field = rec_get_nth_field(rec, offsets, dict_index_get_sys_col_pos( index, DATA_ROLL_PTR), &len); ut_ad(len == DATA_ROLL_PTR_LEN); roll_ptr = trx_read_roll_ptr(field); len = mach_dulint_write_compressed(ptr, trx_id); ptr += len; len = mach_dulint_write_compressed(ptr, roll_ptr); ptr += len; /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record which will be modified in the clustered index */ for (i = 0; i < dict_index_get_n_unique(index); i++) { field = rec_get_nth_field(rec, offsets, i, &flen); if (trx_undo_left(undo_page, ptr) < 4) { return(0); } len = mach_write_compressed(ptr, flen); ptr += len; if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } /*----------------------------------------*/ /* Save to the undo log the old values of the columns to be updated. */ if (update) { if (trx_undo_left(undo_page, ptr) < 5) { return(0); } len = mach_write_compressed(ptr, upd_get_n_fields(update)); ptr += len; for (i = 0; i < upd_get_n_fields(update); i++) { upd_field = upd_get_nth_field(update, i); pos = upd_field->field_no; /* Write field number to undo log */ if (trx_undo_left(undo_page, ptr) < 5) { return(0); } len = mach_write_compressed(ptr, pos); ptr += len; /* Save the old value of field */ field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } if (rec_offs_nth_extern(offsets, pos)) { /* If a field has external storage, we add to flen the flag */ len = mach_write_compressed( ptr, UNIV_EXTERN_STORAGE_FIELD + flen); /* Notify purge that it eventually has to free the old externally stored field */ trx->update_undo->del_marks = TRUE; *type_cmpl_ptr = *type_cmpl_ptr | TRX_UNDO_UPD_EXTERN; } else { len = mach_write_compressed(ptr, flen); } ptr += len; if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } } /*----------------------------------------*/ /* In the case of a delete marking, and also in the case of an update where any ordering field of any index changes, store the values of all columns which occur as ordering fields in any index. This info is used in the purge of old versions where we use it to build and search the delete marked index records, to look if we can remove them from the index tree. Note that starting from 4.0.14 also externally stored fields can be ordering in some index. But we always store at least 384 first bytes locally to the clustered index record, which means we can construct the column prefix fields in the index from the stored data. */ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { trx->update_undo->del_marks = TRUE; if (trx_undo_left(undo_page, ptr) < 5) { return(0); } old_ptr = ptr; /* Reserve 2 bytes to write the number of bytes the stored fields take in this undo record */ ptr += 2; for (col_no = 0; col_no < dict_table_get_n_cols(table); col_no++) { const dict_col_t* col = dict_table_get_nth_col(table, col_no); if (col->ord_part > 0) { pos = dict_index_get_nth_col_pos(index, col_no); /* Write field number to undo log */ if (trx_undo_left(undo_page, ptr) < 5) { return(0); } len = mach_write_compressed(ptr, pos); ptr += len; /* Save the old value of field */ field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } len = mach_write_compressed(ptr, flen); ptr += len; if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, field, flen); ptr += flen; } } } mach_write_to_2(old_ptr, ptr - old_ptr); } /*----------------------------------------*/ /* Write pointers to the previous and the next undo log records */ if (trx_undo_left(undo_page, ptr) < 2) { return(0); } mach_write_to_2(ptr, first_free); ptr += 2; mach_write_to_2(undo_page + first_free, ptr - undo_page); mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, ptr - undo_page); /* Write to the REDO log about this change in the UNDO log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, ptr - undo_page, mtr); return(first_free); }
void process_ibpage(page_t *page) { ulint page_id; rec_t *origin; ulint offsets[MAX_TABLE_FIELDS + 2]; ulint offset, i; int is_page_valid = 0; int comp; unsigned int expected_records = 0; unsigned int actual_records = 0; int16_t b, infimum, supremum; // Skip tables if filter used if (use_filter_id) { dulint index_id = mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID); if (index_id.low != filter_id.low || index_id.high != filter_id.high) { if (debug) { page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); printf("Skipped using index id filter: %lu!\n", page_id); } return; } } // Read page id page_id = mach_read_from_4(page + FIL_PAGE_OFFSET); if (debug) printf("Page id: %lu\n", page_id); fprintf(f_result, "-- Page id: %lu", page_id); // Check requested and actual formats if (!check_page_format(page)) return; if(table_definitions_cnt == 0){ fprintf(stderr, "There are no table definitions. Please check include/table_defs.h\n"); exit(EXIT_FAILURE); } is_page_valid = check_page(page, &expected_records); // comp == 1 if page in COMPACT format and 0 if REDUNDANT comp = page_is_comp(page); fprintf(f_result, ", Format: %s", (comp ) ? "COMPACT": "REDUNDANT"); infimum = (comp) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM; supremum = (comp) ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM; // Find possible data area start point (at least 5 bytes of utility data) if(is_page_valid){ b = mach_read_from_2(page + infimum - 2); offset = (comp) ? infimum + b : b; } else{ offset = 100 + record_extra_bytes; } fprintf(f_result, ", Records list: %s", is_page_valid? "Valid": "Invalid"); fprintf(f_result, ", Expected records: (%u %lu)", expected_records, mach_read_from_2(page + PAGE_HEADER + PAGE_N_RECS)); fprintf(f_result, "\n"); if (debug) printf("Starting offset: %lu (%lX). Checking %d table definitions.\n", offset, offset, table_definitions_cnt); // Walk through all possible positions to the end of page // (start of directory - extra bytes of the last rec) //is_page_valid = 0; while (offset < UNIV_PAGE_SIZE - record_extra_bytes && ( (offset != supremum ) || !is_page_valid) ) { // Get record pointer origin = page + offset; if (debug) printf("\nChecking offset: 0x%lX: ", offset); // Check all tables for (i = 0; i < table_definitions_cnt; i++) { // Get table info table_def_t *table = &(table_definitions[i]); if (debug) printf(" (%s) ", table->name); // Check if origin points to a valid record if (check_for_a_record(page, origin, table, offsets) && check_constraints(origin, table, offsets)) { actual_records++; if (debug) printf("\n---------------------------------------------------\n" "PAGE%lu: Found a table %s record: %p (offset = %lu)\n", \ page_id, table->name, origin, offset); if(is_page_valid){ process_ibrec(page, origin, table, offsets); b = mach_read_from_2(page + offset - 2); offset = (comp) ? offset + b : b; } else{ offset += process_ibrec(page, origin, table, offsets); } if (debug) printf("Next offset: 0x%lX", offset); break; } else{ if(is_page_valid){ b = mach_read_from_2(page + offset - 2); offset = (comp) ? offset + b : b; } else{ offset++; } if (debug) printf("\nNext offset: %lX", offset); } } } fprintf(f_result, "-- Page id: %lu", page_id); fprintf(f_result, ", Found records: %u", actual_records); fprintf(f_result, ", Lost records: %s", (actual_records != expected_records) ? "YES": "NO"); fprintf(f_result, ", Leaf page: %s", (mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL) == 0)? "YES": "NO"); fprintf(f_result, "\n"); }
/***********************************************************************//** Updates the last not yet purged history log info in rseg when we have purged a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ static void trx_purge_rseg_get_next_history_log( /*================================*/ trx_rseg_t* rseg) /*!< in: rollback segment */ { page_t* undo_page; trx_ulogf_t* log_hdr; fil_addr_t prev_log_addr; trx_id_t trx_no; ibool del_marks; mtr_t mtr; rseg_queue_t rseg_queue; const void* ptr; mutex_enter(&(rseg->mutex)); ut_a(rseg->last_page_no != FIL_NULL); purge_sys->purge_trx_no = rseg->last_trx_no + 1; purge_sys->purge_undo_no = 0; purge_sys->next_stored = FALSE; mtr_start(&mtr); undo_page = trx_undo_page_get_s_latched( rseg->space, rseg->zip_size, rseg->last_page_no, &mtr); log_hdr = undo_page + rseg->last_offset; /* Increase the purge page count by one for every handled log */ purge_sys->n_pages_handled++; prev_log_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); if (prev_log_addr.page == FIL_NULL) { /* No logs left in the history list */ rseg->last_page_no = FIL_NULL; mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); mutex_enter(&kernel_mutex); /* Add debug code to track history list corruption reported on the MySQL mailing list on Nov 9, 2004. The fut0lst.c file-based list was corrupt. The prev node pointer was FIL_NULL, even though the list length was over 8 million nodes! We assume that purge truncates the history list in large size pieces, and if we here reach the head of the list, the list cannot be longer than 2000 000 undo logs now. */ if (trx_sys->rseg_history_len > 2000000) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: purge reached the" " head of the history list,\n" "InnoDB: but its length is still" " reported as %lu! Make a detailed bug\n" "InnoDB: report, and submit it" " to http://bugs.mysql.com\n", (ulong) trx_sys->rseg_history_len); } mutex_exit(&kernel_mutex); return; } mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); /* Read the trx number and del marks from the previous log header */ mtr_start(&mtr); log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, prev_log_addr.page, &mtr) + prev_log_addr.boffset; trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS); mtr_commit(&mtr); mutex_enter(&(rseg->mutex)); rseg->last_page_no = prev_log_addr.page; rseg->last_offset = prev_log_addr.boffset; rseg->last_trx_no = trx_no; rseg->last_del_marks = del_marks; rseg_queue.rseg = rseg; rseg_queue.trx_no = rseg->last_trx_no; /* Purge can also produce events, however these are already ordered in the rollback segment and any user generated event will be greater than the events that Purge produces. ie. Purge can never produce events from an empty rollback segment. */ mutex_enter(&purge_sys->bh_mutex); ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); ut_a(ptr != NULL); mutex_exit(&purge_sys->bh_mutex); mutex_exit(&(rseg->mutex)); }
byte* mlog_parse_index( /*=============*/ /* out: parsed record end, NULL if not a complete record */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ /* out: new value of log_ptr */ ibool comp, /* in: TRUE=compact record format */ dict_index_t** index) /* out, own: dummy index */ { ulint i, n, n_uniq; dict_table_t* table; dict_index_t* ind; ut_ad(comp == FALSE || comp == TRUE); if (comp) { if (end_ptr < ptr + 4) { return(NULL); } n = mach_read_from_2(ptr); ptr += 2; n_uniq = mach_read_from_2(ptr); ut_ad(n_uniq <= n); if (end_ptr < ptr + (n + 1) * 2) { return(NULL); } } else { n = n_uniq = 1; } table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, comp); ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", DICT_HDR_SPACE, 0, n); ind->table = table; ind->n_uniq = n_uniq; if (n_uniq != n) { ind->type = DICT_CLUSTERED; } /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ ind->cached = TRUE; if (comp) { for (i = 0; i < n; i++) { ulint len = mach_read_from_2(ptr += 2); /* The high-order bit of len is the NOT NULL flag; the rest is 0 or 0x7fff for variable-length fields, and 1..0x7ffe for fixed-length fields. */ dict_mem_table_add_col(table, "DUMMY", ((len + 1) & 0x7fff) <= 1 ? DATA_BINARY : DATA_FIXBINARY, len & 0x8000 ? DATA_NOT_NULL : 0, len & 0x7fff, 0); dict_index_add_col(ind, dict_table_get_nth_col(table, i), 0, 0); } ptr += 2; } *index = ind; return(ptr); }
byte* mlog_parse_nbytes( /*==============*/ /* out: parsed record end, NULL if not a complete record or a corrupt record */ ulint type, /* in: log record type: MLOG_1BYTE, ... */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ byte* page) /* in: page where to apply the log record, or NULL */ { ulint offset; ulint val; dulint dval; ut_a(type <= MLOG_8BYTES); if (end_ptr < ptr + 2) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (type == MLOG_8BYTES) { ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); if (ptr == NULL) { return(NULL); } if (page) { mach_write_to_8(page + offset, dval); } return(ptr); } ptr = mach_parse_compressed(ptr, end_ptr, &val); if (ptr == NULL) { return(NULL); } if (type == MLOG_1BYTE) { if (val > 0xFFUL) { recv_sys->found_corrupt_log = TRUE; return(NULL); } } else if (type == MLOG_2BYTES) { if (val > 0xFFFFUL) { recv_sys->found_corrupt_log = TRUE; return(NULL); } } else { if (type != MLOG_4BYTES) { recv_sys->found_corrupt_log = TRUE; return(NULL); } } if (page) { if (type == MLOG_1BYTE) { mach_write_to_1(page + offset, val); } else if (type == MLOG_2BYTES) { mach_write_to_2(page + offset, val); } else { ut_a(type == MLOG_4BYTES); mach_write_to_4(page + offset, val); } } return(ptr); }
/*************************************************************************** Updates the last not yet purged history log info in rseg when we have purged a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ static void trx_purge_rseg_get_next_history_log( /*================================*/ trx_rseg_t* rseg) /* in: rollback segment */ { page_t* undo_page; trx_ulogf_t* log_hdr; trx_usegf_t* seg_hdr; fil_addr_t prev_log_addr; dulint trx_no; ibool del_marks; mtr_t mtr; ut_ad(mutex_own(&(purge_sys->mutex))); mutex_enter(&(rseg->mutex)); ut_a(rseg->last_page_no != FIL_NULL); purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1); purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->next_stored = FALSE; mtr_start(&mtr); undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->last_page_no, &mtr); log_hdr = undo_page + rseg->last_offset; seg_hdr = undo_page + TRX_UNDO_SEG_HDR; /* Increase the purge page count by one for every handled log */ purge_sys->n_pages_handled++; prev_log_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); if (prev_log_addr.page == FIL_NULL) { /* No logs left in the history list */ rseg->last_page_no = FIL_NULL; mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); mutex_enter(&kernel_mutex); /* Add debug code to track history list corruption reported on the MySQL mailing list on Nov 9, 2004. The fut0lst.c file-based list was corrupt. The prev node pointer was FIL_NULL, even though the list length was over 8 million nodes! We assume that purge truncates the history list in moderate size pieces, and if we here reach the head of the list, the list cannot be longer than 20 000 undo logs now. */ if (trx_sys->rseg_history_len > 20000) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: purge reached the" " head of the history list,\n" "InnoDB: but its length is still" " reported as %lu! Make a detailed bug\n" "InnoDB: report, and submit it" " to http://bugs.mysql.com\n", (ulong) trx_sys->rseg_history_len); } mutex_exit(&kernel_mutex); return; } mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); /* Read the trx number and del marks from the previous log header */ mtr_start(&mtr); log_hdr = trx_undo_page_get_s_latched(rseg->space, prev_log_addr.page, &mtr) + prev_log_addr.boffset; trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS); mtr_commit(&mtr); mutex_enter(&(rseg->mutex)); rseg->last_page_no = prev_log_addr.page; rseg->last_offset = prev_log_addr.boffset; rseg->last_trx_no = trx_no; rseg->last_del_marks = del_marks; mutex_exit(&(rseg->mutex)); }
/*************************************************************//** Pretty prints a dfield value according to its data type. Also the hex string is printed if a string contains non-printable characters. */ UNIV_INTERN void dfield_print_also_hex( /*==================*/ const dfield_t* dfield) /*!< in: dfield */ { const byte* data; ulint len; ulint prtype; ulint i; ibool print_also_hex; len = dfield_get_len(dfield); data = dfield_get_data(dfield); if (dfield_is_null(dfield)) { fputs("NULL", stderr); return; } prtype = dtype_get_prtype(dfield_get_type(dfield)); switch (dtype_get_mtype(dfield_get_type(dfield))) { dulint id; case DATA_INT: switch (len) { ulint val; case 1: val = mach_read_from_1(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x80; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 2: val = mach_read_from_2(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x8000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 3: val = mach_read_from_3(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x800000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 4: val = mach_read_from_4(data); if (!(prtype & DATA_UNSIGNED)) { val &= ~0x80000000; fprintf(stderr, "%ld", (long) val); } else { fprintf(stderr, "%lu", (ulong) val); } break; case 6: id = mach_read_from_6(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case 7: id = mach_read_from_7(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case 8: id = mach_read_from_8(data); fprintf(stderr, "{%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; default: goto print_hex; } break; case DATA_SYS: switch (prtype & DATA_SYS_PRTYPE_MASK) { case DATA_TRX_ID: id = mach_read_from_6(data); fprintf(stderr, "trx_id " TRX_ID_FMT, TRX_ID_PREP_PRINTF(id)); break; case DATA_ROLL_PTR: id = mach_read_from_7(data); fprintf(stderr, "roll_ptr {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; case DATA_ROW_ID: id = mach_read_from_6(data); fprintf(stderr, "row_id {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); break; default: id = mach_dulint_read_compressed(data); fprintf(stderr, "mix_id {%lu %lu}", ut_dulint_get_high(id), ut_dulint_get_low(id)); } break; case DATA_CHAR: case DATA_VARCHAR: print_also_hex = FALSE; for (i = 0; i < len; i++) { int c = *data++; if (!isprint(c)) { print_also_hex = TRUE; fprintf(stderr, "\\x%02x", (unsigned char) c); } else { putc(c, stderr); } } if (dfield_is_ext(dfield)) { fputs("(external)", stderr); } if (!print_also_hex) { break; } data = dfield_get_data(dfield); /* fall through */ case DATA_BINARY: default: print_hex: fputs(" Hex: ",stderr); for (i = 0; i < len; i++) { fprintf(stderr, "%02lx", (ulint) *data++); } if (dfield_is_ext(dfield)) { fputs("(external)", stderr); } } }
/********************************************************************//** Removes unnecessary history data from a rollback segment. */ static void trx_purge_truncate_rseg_history( /*============================*/ trx_rseg_t* rseg, /*!< in: rollback segment */ trx_id_t limit_trx_no, /*!< in: remove update undo logs whose trx number is < limit_trx_no */ undo_no_t limit_undo_no) /*!< in: if transaction number is equal to limit_trx_no, truncate undo records with undo number < limit_undo_no */ { fil_addr_t hdr_addr; fil_addr_t prev_hdr_addr; trx_rsegf_t* rseg_hdr; page_t* undo_page; trx_ulogf_t* log_hdr; trx_usegf_t* seg_hdr; ulint n_removed_logs = 0; mtr_t mtr; trx_id_t undo_trx_no; mtr_start(&mtr); mutex_enter(&(rseg->mutex)); rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no, &mtr); hdr_addr = trx_purge_get_log_from_hist( flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr)); loop: if (hdr_addr.page == FIL_NULL) { mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); return; } undo_page = trx_undo_page_get(rseg->space, rseg->zip_size, hdr_addr.page, &mtr); log_hdr = undo_page + hdr_addr.boffset; undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); if (undo_trx_no >= limit_trx_no) { if (undo_trx_no == limit_trx_no) { trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page, hdr_addr.boffset, limit_undo_no); } mutex_enter(&kernel_mutex); ut_a(trx_sys->rseg_history_len >= n_removed_logs); trx_sys->rseg_history_len -= n_removed_logs; mutex_exit(&kernel_mutex); flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY, log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr); mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); return; } prev_hdr_addr = trx_purge_get_log_from_hist( flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); n_removed_logs++; seg_hdr = undo_page + TRX_UNDO_SEG_HDR; if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE) && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) { /* We can free the whole log segment */ mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); trx_purge_free_segment(rseg, hdr_addr, n_removed_logs); n_removed_logs = 0; } else { mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); } mtr_start(&mtr); mutex_enter(&(rseg->mutex)); rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no, &mtr); hdr_addr = prev_hdr_addr; goto loop; }
/************************************************************************** Reports in the undo log of an insert of a clustered index record. */ static ulint trx_undo_page_report_insert( /*========================*/ /* out: offset of the inserted entry on the page if succeed, 0 if fail */ page_t* undo_page, /* in: undo log page */ trx_t* trx, /* in: transaction */ dict_index_t* index, /* in: clustered index */ dtuple_t* clust_entry, /* in: index entry which will be inserted to the clustered index */ mtr_t* mtr) /* in: mtr */ { ulint first_free; byte* ptr; ulint len; dfield_t* field; ulint flen; ulint i; ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE); ptr = undo_page + first_free; ut_ad(first_free <= UNIV_PAGE_SIZE); if (trx_undo_left(undo_page, ptr) < 30) { /* NOTE: the value 30 must be big enough such that the general fields written below fit on the undo log page */ return(0); } /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; /* Store first some general parameters to the undo log */ mach_write_to_1(ptr, TRX_UNDO_INSERT_REC); ptr++; len = mach_dulint_write_much_compressed(ptr, trx->undo_no); ptr += len; len = mach_dulint_write_much_compressed(ptr, (index->table)->id); ptr += len; /*----------------------------------------*/ /* Store then the fields required to uniquely determine the record to be inserted in the clustered index */ for (i = 0; i < dict_index_get_n_unique(index); i++) { field = dtuple_get_nth_field(clust_entry, i); flen = dfield_get_len(field); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } len = mach_write_compressed(ptr, flen); ptr += len; if (flen != UNIV_SQL_NULL) { if (trx_undo_left(undo_page, ptr) < flen) { return(0); } ut_memcpy(ptr, dfield_get_data(field), flen); ptr += flen; } } if (trx_undo_left(undo_page, ptr) < 2) { return(0); } /*----------------------------------------*/ /* Write pointers to the previous and the next undo log records */ if (trx_undo_left(undo_page, ptr) < 2) { return(0); } mach_write_to_2(ptr, first_free); ptr += 2; mach_write_to_2(undo_page + first_free, ptr - undo_page); mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, ptr - undo_page); /* Write the log entry to the REDO log of this change in the UNDO log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, ptr - undo_page, mtr); return(first_free); }
/********************************************************//** Parses a log record written by mlog_write_ulint or mlog_write_dulint. @return parsed record end, NULL if not a complete record or a corrupt record */ UNIV_INTERN byte* mlog_parse_nbytes( /*==============*/ ulint type, /*!< in: log record type: MLOG_1BYTE, ... */ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint val; dulint dval; ut_a(type <= MLOG_8BYTES); ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 2) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (type == MLOG_8BYTES) { ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); if (ptr == NULL) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_8 (((page_zip_des_t*) page_zip)->data + offset, dval); } mach_write_to_8(page + offset, dval); } return(ptr); } ptr = mach_parse_compressed(ptr, end_ptr, &val); if (ptr == NULL) { return(NULL); } switch (type) { case MLOG_1BYTE: if (UNIV_UNLIKELY(val > 0xFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_1 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_1(page + offset, val); } break; case MLOG_2BYTES: if (UNIV_UNLIKELY(val > 0xFFFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_2 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_2(page + offset, val); } break; case MLOG_4BYTES: if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_4 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_4(page + offset, val); } break; default: corrupt: recv_sys->found_corrupt_log = TRUE; ptr = NULL; } return(ptr); }
static void print_page(uchar *p) { int type = mach_read_from_2(p + FIL_PAGE_TYPE); if (type == FIL_PAGE_TYPE_ALLOCATED) { return; } printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_OFFSET", mach_read_from_4(p + FIL_PAGE_OFFSET)); printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_SPACE_OR_CHKSUM", mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_PREV", mach_read_from_4(p + FIL_PAGE_PREV)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_NEXT", mach_read_from_4(p + FIL_PAGE_NEXT)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_LSN", mach_read_from_4(p + FIL_PAGE_LSN)); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_TYPE", mach_read_from_2(p + FIL_PAGE_TYPE)); dulint flush_lsn_tuple = mach_read_from_6(p + FIL_PAGE_FILE_FLUSH_LSN); uint64_t flush_lsn = (((uint64_t) flush_lsn_tuple.high) << 32) + flush_lsn_tuple.low; printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "FIL_PAGE_FILE_FLUSH_LSN", flush_lsn); printf(COLUMN_NAME_FMT " %ld\n", "FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID", mach_read_from_4(p + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); printf(COLUMN_NAME_FMT " 0x%08lX\n", "FIL_PAGE_END_LSN_OLD_CHKSUM", mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM)); uchar *pd = p + FIL_PAGE_DATA; if (type == FIL_PAGE_TYPE_FSP_HDR) { printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_SPACE", mach_read_from_4(pd + FSEG_HDR_SPACE)); printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_PAGE_NO", mach_read_from_4(pd + FSEG_HDR_PAGE_NO)); printf(COLUMN_NAME_FMT " %ld\n", "FSEG_HDR_OFFSET", mach_read_from_4(pd + FSEG_HDR_OFFSET)); } else if (type == FIL_PAGE_INDEX) { printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_N_HEAP", mach_read_from_2(pd + PAGE_N_HEAP)); printf(COLUMN_NAME_FMT " 0x%lX\n", "PAGE_FREE", mach_read_from_2(pd + PAGE_FREE)); dulint index_id_tuple = mach_read_from_8(pd + PAGE_INDEX_ID); uint64_t index_id = (((uint64_t) index_id_tuple.high) << 32) + index_id_tuple.low; printf(COLUMN_NAME_FMT " %" PRIu64 "\n", "PAGE_INDEX_ID", index_id); printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_LEAF", mach_read_from_4(pd + PAGE_BTR_SEG_LEAF + FSEG_HDR_SPACE)); printf(COLUMN_NAME_FMT " %ld\n", "PAGE_BTR_SEG_TOP", mach_read_from_4(pd + PAGE_BTR_SEG_TOP + FSEG_HDR_SPACE)); #if 0 int i; for (i = 0; i < 80; i += 4) { if (i == PAGE_BTR_SEG_LEAF || i == PAGE_N_HEAP || i == PAGE_INDEX_ID || i == PAGE_INDEX_ID + 4) { continue; } char column_name[256]; snprintf(column_name, sizeof(column_name), "FIL_PAGE_DATA + %2d", i); printf(COLUMN_NAME_FMT " %ld\n", column_name, mach_read_from_4(p + FIL_PAGE_DATA + i)); } #endif } printf("\n"); }
/********************************************************//** Parses a log record written by mlog_open_and_write_index. @return parsed record end, NULL if not a complete record */ UNIV_INTERN byte* mlog_parse_index( /*=============*/ byte* ptr, /*!< in: buffer */ const byte* end_ptr,/*!< in: buffer end */ ibool comp, /*!< in: TRUE=compact record format */ dict_index_t** index) /*!< out, own: dummy index */ { ulint i, n, n_uniq; dict_table_t* table; dict_index_t* ind; ut_ad(comp == FALSE || comp == TRUE); if (comp) { if (end_ptr < ptr + 4) { return(NULL); } n = mach_read_from_2(ptr); ptr += 2; n_uniq = mach_read_from_2(ptr); ptr += 2; ut_ad(n_uniq <= n); if (end_ptr < ptr + n * 2) { return(NULL); } } else { n = n_uniq = 1; } table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, comp ? DICT_TF_COMPACT : 0); ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", DICT_HDR_SPACE, 0, n); ind->table = table; ind->n_uniq = (unsigned int) n_uniq; if (n_uniq != n) { ut_a(n_uniq + DATA_ROLL_PTR <= n); ind->type = DICT_CLUSTERED; } if (comp) { for (i = 0; i < n; i++) { ulint len = mach_read_from_2(ptr); ptr += 2; /* The high-order bit of len is the NOT NULL flag; the rest is 0 or 0x7fff for variable-length fields, and 1..0x7ffe for fixed-length fields. */ dict_mem_table_add_col( table, NULL, NULL, ((len + 1) & 0x7fff) <= 1 ? DATA_BINARY : DATA_FIXBINARY, len & 0x8000 ? DATA_NOT_NULL : 0, len & 0x7fff); dict_index_add_col(ind, table, dict_table_get_nth_col(table, i), 0); } dict_table_add_system_columns(table, table->heap); if (n_uniq != n) { /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */ ut_a(DATA_TRX_ID_LEN == dict_index_get_nth_col(ind, DATA_TRX_ID - 1 + n_uniq)->len); ut_a(DATA_ROLL_PTR_LEN == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1 + n_uniq)->len); ind->fields[DATA_TRX_ID - 1 + n_uniq].col = &table->cols[n + DATA_TRX_ID]; ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col = &table->cols[n + DATA_ROLL_PTR]; } } /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ ind->cached = TRUE; *index = ind; return(ptr); }
/********************************************************//** Parses a log record written by mlog_open_and_write_index. @return parsed record end, NULL if not a complete record */ UNIV_INTERN byte* mlog_parse_index( /*=============*/ byte* ptr, /*!< in: buffer */ const byte* end_ptr,/*!< in: buffer end */ ibool comp, /*!< in: TRUE=compact record format */ dict_index_t** index) /*!< out, own: dummy index */ { ulint i, n, n_uniq; dict_table_t* table; dict_index_t* ind; ibool is_gcs = FALSE; ulint n_fields_before_alter = 0; /* 快速alter table前聚集索引的字段数 */ ut_ad(comp == FALSE || comp == TRUE); if (comp) { if (end_ptr < ptr + 4) { return(NULL); } n = mach_read_from_2(ptr); if (n & 0x8000) /* 最高位为1表示GCS表 */ { is_gcs = TRUE; n &= 0x7FFF; } ptr += 2; if (is_gcs) { n_fields_before_alter = mach_read_from_2(ptr); ut_ad(n_fields_before_alter < n && n_fields_before_alter > 0); ptr += 2; /* 确保地址有效!*/ if (end_ptr < ptr + 2) { return(NULL); } } n_uniq = mach_read_from_2(ptr); ptr += 2; ut_ad(n_uniq <= n); if (end_ptr < ptr + n * 2) { return(NULL); } } else { n = n_uniq = 1; } table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, comp ? DICT_TF_COMPACT : 0, is_gcs, n_fields_before_alter); ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", DICT_HDR_SPACE, 0, n); ind->table = table; ind->n_uniq = (unsigned int) n_uniq; if (n_uniq != n) { ut_a(n_uniq + DATA_ROLL_PTR <= n); ind->type = DICT_CLUSTERED; } if (comp) { for (i = 0; i < n; i++) { ulint len = mach_read_from_2(ptr); ptr += 2; /* The high-order bit of len is the NOT NULL flag; the rest is 0 or 0x7fff for variable-length fields, and 1..0x7ffe for fixed-length fields. */ /* redo阶段压缩属性不需指定 */ dict_mem_table_add_col( table, NULL, NULL, ((len + 1) & 0x7fff) <= 1 ? DATA_BINARY : DATA_FIXBINARY, /* 若len 为0或0x7fff,可认为是变长字段;否则是定长字段 */ len & 0x8000 ? DATA_NOT_NULL : 0, len & 0x7fff); if (is_gcs && n_fields_before_alter > 0 && n_fields_before_alter <= i) { dict_col_t* col = NULL; col = dict_table_get_nth_col(table, i); /* 添加默认值信息,但只是占位符,并不需真正的默认值信息 */ if (!dict_col_is_nullable(col)) dict_mem_table_set_col_default(table, col, table->heap); } dict_index_add_col(ind, table, dict_table_get_nth_col(table, i), 0); } dict_table_add_system_columns(table, table->heap); if (n_uniq != n) { /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */ ut_a(DATA_TRX_ID_LEN == dict_index_get_nth_col(ind, DATA_TRX_ID - 1 + n_uniq)->len); ut_a(DATA_ROLL_PTR_LEN == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1 + n_uniq)->len); ind->fields[DATA_TRX_ID - 1 + n_uniq].col = &table->cols[n + DATA_TRX_ID]; ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col = &table->cols[n + DATA_ROLL_PTR]; /* set the col_ind col->ind */ ind->fields[DATA_TRX_ID - 1 + n_uniq].col_ind = ind->fields[DATA_TRX_ID - 1 + n_uniq].col->ind; ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col_ind = ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col->ind; } if (dict_index_is_gcs_clust_after_alter_table(ind)) { ut_ad(table->n_cols == table->n_def); ut_a(table->n_cols_before_alter_table > 0 && table->n_cols_before_alter_table <= table->n_cols); ind->n_fields_before_alter = n_fields_before_alter; ind->n_nullable_before_alter = dict_index_get_first_n_field_n_nullable(ind, ind->n_fields_before_alter); } else { ind->n_fields_before_alter = 0; ind->n_nullable_before_alter = 0; } } /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ ind->cached = TRUE; *index = ind; return(ptr); }