ibool row_undo_search_clust_to_pcur( /*==========================*/ /* out: TRUE if found; NOTE the node->pcur must be closed by the caller, regardless of the return value */ undo_node_t* node) /* in: row undo node */ { dict_index_t* clust_index; ibool found; mtr_t mtr; ibool ret; rec_t* rec; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; *offsets_ = (sizeof offsets_) / sizeof *offsets_; mtr_start(&mtr); clust_index = dict_table_get_first_index(node->table); found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF, node->table, node->ref, &mtr); rec = btr_pcur_get_rec(&(node->pcur)); offsets = rec_get_offsets(rec, clust_index, offsets, ULINT_UNDEFINED, &heap); if (!found || 0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(rec, clust_index, offsets))) { /* We must remove the reservation on the undo log record BEFORE releasing the latch on the clustered index page: this is to make sure that some thread will eventually undo the modification corresponding to node->roll_ptr. */ /* fputs("--------------------undoing a previous version\n", stderr); */ ret = FALSE; } else { node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, node->heap); btr_pcur_store_position(&(node->pcur), &mtr); ret = TRUE; } btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } return(ret); }
/***********************************************************//** Undoes a modify in a clustered index record. @return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */ static ulint row_undo_mod_clust_low( /*===================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr, /*!< in: mtr; must be committed before latching any further pages */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; #ifdef UNIV_DEBUG ibool success; #endif /* UNIV_DEBUG */ pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); #ifdef UNIV_DEBUG success = #endif /* UNIV_DEBUG */ btr_pcur_restore_position(mode, pcur, mtr); ut_ad(success); if (mode == BTR_MODIFY_LEAF) { err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG, btr_cur, node->update, node->cmpl_info, thr, mtr); } else { mem_heap_t* heap = NULL; big_rec_t* dummy_big_rec; ut_ad(mode == BTR_MODIFY_TREE); err = btr_cur_pessimistic_update( BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG, btr_cur, &heap, &dummy_big_rec, node->update, node->cmpl_info, thr, mtr); ut_a(!dummy_big_rec); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } } return(err); }
/******************************************************************//** Frees the free_block field from a memory heap. */ UNIV_INTERN void mem_heap_free_block_free( /*=====================*/ mem_heap_t* heap) /*!< in: heap */ { if (UNIV_LIKELY_NULL(heap->free_block)) { buf_block_free(heap->free_block); heap->free_block = NULL; } }
/********************************************************//** Parses a log record written by mlog_write_string. @return parsed record end, NULL if not a complete record */ UNIV_INTERN byte* mlog_parse_string( /*==============*/ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint len; ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 4) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; len = mach_read_from_2(ptr); ptr += 2; if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (end_ptr < ptr + len) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { memcpy(((page_zip_des_t*) page_zip)->data + offset, ptr, len); } memcpy(page + offset, ptr, len); } return(ptr + len); }
/********************************************************//** Parses a log record written by mlog_write_ulint or mlog_write_dulint. @return parsed record end, NULL if not a complete record or a corrupt record */ UNIV_INTERN byte* mlog_parse_nbytes( /*==============*/ ulint type, /*!< in: log record type: MLOG_1BYTE, ... */ byte* ptr, /*!< in: buffer */ byte* end_ptr,/*!< in: buffer end */ byte* page, /*!< in: page where to apply the log record, or NULL */ void* page_zip)/*!< in/out: compressed page, or NULL */ { ulint offset; ulint val; dulint dval; ut_a(type <= MLOG_8BYTES); ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX); if (end_ptr < ptr + 2) { return(NULL); } offset = mach_read_from_2(ptr); ptr += 2; if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; return(NULL); } if (type == MLOG_8BYTES) { ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); if (ptr == NULL) { return(NULL); } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_8 (((page_zip_des_t*) page_zip)->data + offset, dval); } mach_write_to_8(page + offset, dval); } return(ptr); } ptr = mach_parse_compressed(ptr, end_ptr, &val); if (ptr == NULL) { return(NULL); } switch (type) { case MLOG_1BYTE: if (UNIV_UNLIKELY(val > 0xFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_1 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_1(page + offset, val); } break; case MLOG_2BYTES: if (UNIV_UNLIKELY(val > 0xFFFFUL)) { goto corrupt; } if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_2 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_2(page + offset, val); } break; case MLOG_4BYTES: if (page) { if (UNIV_LIKELY_NULL(page_zip)) { mach_write_to_4 (((page_zip_des_t*) page_zip)->data + offset, val); } mach_write_to_4(page + offset, val); } break; default: corrupt: recv_sys->found_corrupt_log = TRUE; ptr = NULL; } return(ptr); }
/*******************************************************************//** Builds from a secondary index record a row reference with which we can search the clustered index record. */ UNIV_INTERN void row_build_row_ref_in_tuple( /*=======================*/ dtuple_t* ref, /*!< in/out: row reference built; see the NOTE below! */ const rec_t* rec, /*!< in: record in the index; NOTE: the data fields in ref will point directly into this record, therefore, the buffer page of this record must be at least s-latched and the latch held as long as the row reference is used! */ const dict_index_t* index, /*!< in: secondary index */ ulint* offsets,/*!< in: rec_get_offsets(rec, index) or NULL */ trx_t* trx) /*!< in: transaction */ { const dict_index_t* clust_index; dfield_t* dfield; const byte* field; ulint len; ulint ref_len; ulint pos; ulint clust_col_prefix_len; ulint i; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); ut_a(ref); ut_a(index); ut_a(rec); ut_ad(!dict_index_is_clust(index)); if (UNIV_UNLIKELY(!index->table)) { fputs("InnoDB: table ", stderr); notfound: ut_print_name(stderr, trx, TRUE, index->table_name); fputs(" for index ", stderr); ut_print_name(stderr, trx, FALSE, index->name); fputs(" not found\n", stderr); ut_error; } clust_index = dict_table_get_first_index(index->table); if (UNIV_UNLIKELY(!clust_index)) { fputs("InnoDB: clust index for table ", stderr); goto notfound; } if (!offsets) { offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); } else { ut_ad(rec_offs_validate(rec, index, offsets)); } /* Secondary indexes must not contain externally stored columns. */ ut_ad(!rec_offs_any_extern(offsets)); ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); dict_index_copy_types(ref, clust_index, ref_len); for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); pos = dict_index_get_nth_field_pos(index, clust_index, i); ut_a(pos != ULINT_UNDEFINED); field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); /* If the primary key contains a column prefix, then the secondary index may contain a longer prefix of the same column, or the full column, and we must adjust the length accordingly. */ clust_col_prefix_len = dict_index_get_nth_field( clust_index, i)->prefix_len; if (clust_col_prefix_len > 0) { if (len != UNIV_SQL_NULL) { const dtype_t* dtype = dfield_get_type(dfield); dfield_set_len(dfield, dtype_get_at_most_n_mbchars( dtype->prtype, dtype->mbminlen, dtype->mbmaxlen, clust_col_prefix_len, len, (char*) field)); } } } ut_ad(dtuple_check_typed(ref)); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } }
/*****************************************************************//** When an insert or purge to a table is performed, this function builds the entry to be inserted into or purged from an index on the table. @return index entry which should be inserted or purged, or NULL if the externally stored columns in the clustered index record are unavailable and ext != NULL */ UNIV_INTERN dtuple_t* row_build_index_entry( /*==================*/ const dtuple_t* row, /*!< in: row which should be inserted or purged */ row_ext_t* ext, /*!< in: externally stored column prefixes, or NULL */ dict_index_t* index, /*!< in: index on the table */ mem_heap_t* heap) /*!< in: memory heap from which the memory for the index entry is allocated */ { dtuple_t* entry; ulint entry_len; ulint i; ut_ad(row && index && heap); ut_ad(dtuple_check_typed(row)); entry_len = dict_index_get_n_fields(index); entry = dtuple_create(heap, entry_len); if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { dtuple_set_n_fields_cmp(entry, entry_len); /* There may only be externally stored columns in a clustered index B-tree of a user table. */ ut_a(!ext); } else { dtuple_set_n_fields_cmp( entry, dict_index_get_n_unique_in_tree(index)); } for (i = 0; i < entry_len; i++) { const dict_field_t* ind_field = dict_index_get_nth_field(index, i); const dict_col_t* col = ind_field->col; ulint col_no = dict_col_get_no(col); dfield_t* dfield = dtuple_get_nth_field(entry, i); const dfield_t* dfield2 = dtuple_get_nth_field(row, col_no); ulint len = dfield_get_len(dfield2); dfield_copy(dfield, dfield2); if (dfield_is_null(dfield)) { continue; } if (ind_field->prefix_len == 0 && (!dfield_is_ext(dfield) || dict_index_is_clust(index))) { /* The dfield_copy() above suffices for columns that are stored in-page, or for clustered index record columns that are not part of a column prefix in the PRIMARY KEY. */ continue; } /* If the column is stored externally (off-page) in the clustered index, it must be an ordering field in the secondary index. In the Antelope format, only prefix-indexed columns may be stored off-page in the clustered index record. In the Barracuda format, also fully indexed long CHAR or VARCHAR columns may be stored off-page. */ ut_ad(col->ord_part); if (UNIV_LIKELY_NULL(ext)) { /* See if the column is stored externally. */ const byte* buf = row_ext_lookup(ext, col_no, &len); if (UNIV_LIKELY_NULL(buf)) { if (UNIV_UNLIKELY(buf == field_ref_zero)) { return(NULL); } dfield_set_data(dfield, buf, len); } if (ind_field->prefix_len == 0) { /* In the Barracuda format (ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED), we can have a secondary index on an entire column that is stored off-page in the clustered index. As this is not a prefix index (prefix_len == 0), include the entire off-page column in the secondary index record. */ continue; } } else if (dfield_is_ext(dfield)) { /* This table is either in Antelope format (ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT) or a purge record where the ordered part of the field is not external. In Antelope, the maximum column prefix index length is 767 bytes, and the clustered index record contains a 768-byte prefix of each off-page column. */ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE); len -= BTR_EXTERN_FIELD_REF_SIZE; dfield_set_len(dfield, len); } /* If a column prefix index, take only the prefix. */ if (ind_field->prefix_len) { len = dtype_get_at_most_n_mbchars( col->prtype, col->mbminlen, col->mbmaxlen, ind_field->prefix_len, len, dfield_get_data(dfield)); dfield_set_len(dfield, len); } } ut_ad(dtuple_check_typed(entry)); return(entry); }
/*******************************************************************//** An inverse function to row_build_index_entry. Builds a row from a record in a clustered index. @return own: row built; see the NOTE below! */ UNIV_INTERN dtuple_t* row_build( /*======*/ ulint type, /*!< in: ROW_COPY_POINTERS or ROW_COPY_DATA; the latter copies also the data fields to heap while the first only places pointers to data fields on the index page, and thus is more efficient */ const dict_index_t* index, /*!< in: clustered index */ const rec_t* rec, /*!< in: record in the clustered index; NOTE: in the case ROW_COPY_POINTERS the data fields in the row will point directly into this record, therefore, the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ const ulint* offsets,/*!< in: rec_get_offsets(rec,index) or NULL, in which case this function will invoke rec_get_offsets() */ const dict_table_t* col_table, /*!< in: table, to check which externally stored columns occur in the ordering columns of an index, or NULL if index->table should be consulted instead */ row_ext_t** ext, /*!< out, own: cache of externally stored column prefixes, or NULL */ mem_heap_t* heap) /*!< in: memory heap from which the memory needed is allocated */ { dtuple_t* row; const dict_table_t* table; ulint n_fields; ulint n_ext_cols; ulint* ext_cols = NULL; /* remove warning */ ulint len; ulint row_len; byte* buf; ulint i; ulint j; mem_heap_t* tmp_heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); ut_ad(index && rec && heap); ut_ad(dict_index_is_clust(index)); ut_ad(!mutex_own(&kernel_mutex)); if (!offsets) { offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &tmp_heap); } else { ut_ad(rec_offs_validate(rec, index, offsets)); } #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG if (rec_offs_any_null_extern(rec, offsets)) { /* This condition can occur during crash recovery before trx_rollback_active() has completed execution, or when a concurrently executing row_ins_index_entry_low() has committed the B-tree mini-transaction but has not yet managed to restore the cursor position for writing the big_rec. */ ut_a(trx_undo_roll_ptr_is_insert( row_get_rec_roll_ptr(rec, index, offsets))); } #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ buf = mem_heap_alloc(heap, rec_offs_size(offsets)); rec = rec_copy(buf, rec, offsets); /* Avoid a debug assertion in rec_offs_validate(). */ rec_offs_make_valid(rec, index, (ulint*) offsets); } table = index->table; row_len = dict_table_get_n_cols(table); row = dtuple_create(heap, row_len); dict_table_copy_types(row, table); dtuple_set_info_bits(row, rec_get_info_bits( rec, dict_table_is_comp(table))); n_fields = rec_offs_n_fields(offsets); n_ext_cols = rec_offs_n_extern(offsets); if (n_ext_cols) { ext_cols = mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols); } for (i = j = 0; i < n_fields; i++) { dict_field_t* ind_field = dict_index_get_nth_field(index, i); const dict_col_t* col = dict_field_get_col(ind_field); ulint col_no = dict_col_get_no(col); dfield_t* dfield = dtuple_get_nth_field(row, col_no); if (ind_field->prefix_len == 0) { const byte* field = rec_get_nth_field( rec, offsets, i, &len); dfield_set_data(dfield, field, len); } if (rec_offs_nth_extern(offsets, i)) { dfield_set_ext(dfield); if (UNIV_LIKELY_NULL(col_table)) { ut_a(col_no < dict_table_get_n_cols(col_table)); col = dict_table_get_nth_col( col_table, col_no); } if (col->ord_part) { /* We will have to fetch prefixes of externally stored columns that are referenced by column prefixes. */ ext_cols[j++] = col_no; } } } ut_ad(dtuple_check_typed(row)); if (!ext) { /* REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. No cache is needed. */ ut_ad(dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP); } else if (j) { *ext = row_ext_create(j, ext_cols, row, dict_table_zip_size(index->table), heap); } else { *ext = NULL; } if (tmp_heap) { mem_heap_free(tmp_heap); } return(row); }
/***********************************************************//** Looks for the clustered index record when node has the row reference. The pcur in node is used in the search. If found, stores the row to node, and stores the position of pcur, and detaches it. The pcur must be closed by the caller in any case. @return TRUE if found; NOTE the node->pcur must be closed by the caller, regardless of the return value */ UNIV_INTERN ibool row_undo_search_clust_to_pcur( /*==========================*/ undo_node_t* node) /*!< in: row undo node */ { dict_index_t* clust_index; ibool found; mtr_t mtr; ibool ret; rec_t* rec; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; rec_offs_init(offsets_); mtr_start(&mtr); clust_index = dict_table_get_first_index(node->table); found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF, node->table, node->ref, &mtr); rec = btr_pcur_get_rec(&(node->pcur)); offsets = rec_get_offsets(rec, clust_index, offsets, ULINT_UNDEFINED, &heap); if (!found || 0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(rec, clust_index, offsets))) { /* We must remove the reservation on the undo log record BEFORE releasing the latch on the clustered index page: this is to make sure that some thread will eventually undo the modification corresponding to node->roll_ptr. */ /* fputs("--------------------undoing a previous version\n", stderr); */ ret = FALSE; } else { row_ext_t** ext; if (dict_table_get_format(node->table) >= DICT_TF_FORMAT_ZIP) { /* In DYNAMIC or COMPRESSED format, there is no prefix of externally stored columns in the clustered index record. Build a cache of column prefixes. */ ext = &node->ext; } else { /* REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. No cache is needed. */ ext = NULL; node->ext = NULL; } node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, NULL, ext, node->heap); if (node->update) { node->undo_row = dtuple_copy(node->row, node->heap); row_upd_replace(node->undo_row, &node->undo_ext, clust_index, node->update, node->heap); } else { node->undo_row = NULL; node->undo_ext = NULL; } btr_pcur_store_position(&(node->pcur), &mtr); ret = TRUE; } btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } return(ret); }
/***********************************************************//** Removes a delete marked clustered index record if possible. @return TRUE if success, or if not found, or if modified after the delete marking */ static ibool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in: row purge node */ ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ { dict_index_t* index; btr_pcur_t* pcur; btr_cur_t* btr_cur; ibool success; ulint err; mtr_t mtr; rec_t* rec; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); index = dict_table_get_first_index(node->table); pcur = &(node->pcur); btr_cur = btr_pcur_get_btr_cur(pcur); log_free_check(); mtr_start(&mtr); success = row_purge_reposition_pcur(mode, node, &mtr); if (!success) { /* The record is already removed */ btr_pcur_commit_specify_mtr(pcur, &mtr); return(TRUE); } rec = btr_pcur_get_rec(pcur); if (node->roll_ptr != row_get_rec_roll_ptr( rec, index, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap))) { if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } /* Someone else has modified the record later: do not remove */ btr_pcur_commit_specify_mtr(pcur, &mtr); return(TRUE); } if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } if (mode == BTR_MODIFY_LEAF) { success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { ut_ad(mode == BTR_MODIFY_TREE); btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, &mtr); if (err == DB_SUCCESS) { success = TRUE; } else if (err == DB_OUT_OF_FILE_SPACE) { success = FALSE; } else { ut_error; } } btr_pcur_commit_specify_mtr(pcur, &mtr); return(success); }