/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_DEL. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_del_sec( /*=====================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err = DB_SUCCESS; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(thr_is_recv(thr)); } else { err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { break; } } mem_heap_empty(heap); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(err); }
/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); dict_table_skip_corrupt_index(node->index); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(trx_is_recv(node->trx)); } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } dict_table_next_uncorrupted_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a modify operation on a row of a table. @return DB_SUCCESS or error code */ UNIV_INTERN ulint row_undo_mod( /*=========*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_MODIFY); row_undo_mod_parse_undo_rec(node, thr); if (!node->table || !row_undo_search_clust_to_pcur(node)) { /* It is already undone, or will be undone by another query thread, or table was dropped */ trx_undo_rec_release(node->trx, node->undo_no); node->state = UNDO_NODE_FETCH_NEXT; return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { err = row_undo_mod_upd_exist_sec(node, thr); } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { err = row_undo_mod_del_mark_sec(node, thr); } else { ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); err = row_undo_mod_upd_del_sec(node, thr); } if (err != DB_SUCCESS) { return(err); } err = row_undo_mod_clust(node, thr); return(err); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is DEL_MARK. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_del_mark_sec( /*======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
/***********************************************************//** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node); heap = mem_heap_create(1024); while (node->index != NULL) { /* skip corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node); }
/***********************************************************//** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern_func( /*===============================*/ #ifdef UNIV_DEBUG const que_thr_t*thr, /*!< in: query thread */ #endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint i; mtr_t mtr; ut_ad(node); if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { const upd_field_t* ufield = upd_get_nth_field(node->update, i); if (dfield_is_ext(&ufield->new_val)) { buf_block_t* block; ulint internal_offset; byte* data_field; /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the undo record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((const byte*) dfield_get_data(&ufield->new_val)) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, because it is also a leaf page. Since we will have a latch on an undo log page, we would break the latching order if we would only later latch the root page of such a tree! */ btr_root_get(index, &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr); buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); data_field = buf_block_get_frame(block) + offset + internal_offset; ut_a(dfield_get_len(&ufield->new_val) >= BTR_EXTERN_FIELD_REF_SIZE); btr_free_externally_stored_field( index, data_field + dfield_get_len(&ufield->new_val) - BTR_EXTERN_FIELD_REF_SIZE, NULL, NULL, NULL, 0, RB_NONE, &mtr); mtr_commit(&mtr); } } }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_EXIST. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_exist_sec( /*=======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { /* No change in secondary indexes */ return(DB_SUCCESS); } heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in row_upd_clust_rec_by_insert() before the updated externally stored columns (BLOBs) of the new clustered index entry were written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. */ ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP); /* This is only legitimate when rolling back an incomplete transaction after crash recovery. */ ut_a(thr_get_trx(thr)->is_recovered); /* The server must have crashed before completing the insert of the new clustered index entry and before inserting to the secondary indexes. Because node->row was not yet written to this index, we can ignore it. But we must restore node->undo_row. */ } else { /* NOTE that if we updated the fields of a delete-marked secondary index record so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot return to the original values because we do not know them. But this should not cause problems because in row0sel.c, in queries we always retrieve the clustered index record or an earlier version of it, if the secondary index record through which we do the search is delete-marked. */ err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } mem_heap_empty(heap); } /* We may have to update the delete mark in the secondary index record of the previous version of the row. We also need to update the fields of the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }