/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record, or a statement is being rolled back because an error occurred while storing off-page columns. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. */ } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } node->index = dict_table_get_next_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(srv_force_recovery, node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(trx_is_recv(node->trx)); } else { err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } node->index = dict_table_get_next_index(node->index); } return(row_undo_ins_remove_clust_rec(node)); }
/*************************************************************** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node && thr); heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, thr, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node, thr); }
ulint row_undo_ins( /*=========*/ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ undo_node_t* node, /* in: row undo node */ que_thr_t* thr) /* in: query thread */ { dtuple_t* entry; ibool found; ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node, thr); if (node->table == NULL) { found = FALSE; } else { found = row_undo_search_clust_to_pcur(node, thr); } if (!found) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { entry = row_build_index_entry(node->row, node->index, node->heap); err = row_undo_ins_remove_sec(node->index, entry, thr); if (err != DB_SUCCESS) { return(err); } node->index = dict_table_get_next_index(node->index); } err = row_undo_ins_remove_clust_rec(node, thr); return(err); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_DEL. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_del_sec( /*=====================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err = DB_SUCCESS; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(thr_is_recv(thr)); } else { err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { break; } } mem_heap_empty(heap); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(err); }
/***********************************************************//** Undoes a modify operation on a row of a table. @return DB_SUCCESS or error code */ UNIV_INTERN ulint row_undo_mod( /*=========*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_MODIFY); row_undo_mod_parse_undo_rec(node, thr); if (!node->table || !row_undo_search_clust_to_pcur(node)) { /* It is already undone, or will be undone by another query thread, or table was dropped */ trx_undo_rec_release(node->trx, node->undo_no); node->state = UNDO_NODE_FETCH_NEXT; return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { err = row_undo_mod_upd_exist_sec(node, thr); } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { err = row_undo_mod_del_mark_sec(node, thr); } else { ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); err = row_undo_mod_upd_del_sec(node, thr); } if (err != DB_SUCCESS) { return(err); } err = row_undo_mod_clust(node, thr); return(err); }
void row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ que_thr_t* thr) /*!< in: query thread */ { ibool updated_extern; ut_ad(node); ut_ad(thr); node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr, &node->reservation, node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); return; } if (node->undo_rec != &trx_purge_dummy_rec && row_purge_parse_undo_rec(node, &updated_extern, thr)) { node->found_clust = FALSE; node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { row_purge_del_mark(node); } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { row_purge_upd_exist_or_extern(thr, node); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); } /* Do some cleanup */ trx_purge_rec_release(node->reservation); mem_heap_empty(node->heap); thr->run_node = node; }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is DEL_MARK. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_del_mark_sec( /*======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
/********************************************************************//** Disable the adaptive hash search system and empty the index. */ UNIV_INTERN void btr_search_disable(void) /*====================*/ { dict_table_t* table; mutex_enter(&dict_sys->mutex); rw_lock_x_lock(&btr_search_latch); btr_search_enabled = FALSE; /* Clear the index->search_info->ref_count of every index in the data dictionary cache. */ for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table; table = UT_LIST_GET_NEXT(table_LRU, table)) { dict_index_t* index; for (index = dict_table_get_first_index(table); index; index = dict_table_get_next_index(index)) { index->search_info->ref_count = 0; } } mutex_exit(&dict_sys->mutex); /* Set all block->index = NULL. */ buf_pool_clear_hash_index(); /* Clear the adaptive hash index. */ hash_table_clear(btr_search_sys->hash_index); mem_heap_empty(btr_search_sys->hash_index->heap); rw_lock_x_unlock(&btr_search_latch); }
/***********************************************************//** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node); heap = mem_heap_create(1024); while (node->index != NULL) { /* skip corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node); }
/*************************************************************** Fetches an undo log record and does the purge for the recorded operation. If none left, or the current purge completed, returns the control to the parent node, which is always a query thread node. */ static ulint row_purge( /*======*/ /* out: DB_SUCCESS if operation successfully completed, else error code */ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { dulint roll_ptr; ibool purge_needed; ibool updated_extern; ut_ad(node && thr); node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, &(node->reservation), node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); return(DB_SUCCESS); } node->roll_ptr = roll_ptr; if (node->undo_rec == &trx_purge_dummy_rec) { purge_needed = FALSE; } else { purge_needed = row_purge_parse_undo_rec(node, &updated_extern, thr); } if (purge_needed) { node->found_clust = FALSE; node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { row_purge_del_mark(node, thr); } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { row_purge_upd_exist_or_extern(node, thr); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } rw_lock_x_unlock(&(purge_sys->purge_is_running)); } /* Do some cleanup */ trx_purge_rec_release(node->reservation); mem_heap_empty(node->heap); thr->run_node = node; return(DB_SUCCESS); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_EXIST. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_exist_sec( /*=======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { /* No change in secondary indexes */ return(DB_SUCCESS); } heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in row_upd_clust_rec_by_insert() before the updated externally stored columns (BLOBs) of the new clustered index entry were written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. */ ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP); /* This is only legitimate when rolling back an incomplete transaction after crash recovery. */ ut_a(thr_get_trx(thr)->is_recovered); /* The server must have crashed before completing the insert of the new clustered index entry and before inserting to the secondary indexes. Because node->row was not yet written to this index, we can ignore it. But we must restore node->undo_row. */ } else { /* NOTE that if we updated the fields of a delete-marked secondary index record so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot return to the original values because we do not know them. But this should not cause problems because in row0sel.c, in queries we always retrieve the clustered index record or an earlier version of it, if the secondary index record through which we do the search is delete-marked. */ err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } mem_heap_empty(heap); } /* We may have to update the delete mark in the secondary index record of the previous version of the row. We also need to update the fields of the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
/*************************************************************** Updates the affected index records of a row. When the control is transferred to this node, we assume that we have a persistent cursor which was on a record, and the position of the cursor is stored in the cursor. */ static ulint row_upd( /*====*/ /* out: DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { ulint err = DB_SUCCESS; ut_ad(node && thr); if (node->in_mysql_interface) { /* We do not get the cmpl_info value from the MySQL interpreter: we must calculate it on the fly: */ if (node->is_delete || row_upd_changes_some_index_ord_field_binary( node->table, node->update)) { node->cmpl_info = 0; } else { node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; } } if (node->state == UPD_NODE_UPDATE_CLUSTERED || node->state == UPD_NODE_INSERT_CLUSTERED) { err = row_upd_clust_step(node, thr); if (err != DB_SUCCESS) { goto function_exit; } } if (!node->is_delete && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { goto function_exit; } while (node->index != NULL) { err = row_upd_sec_step(node, thr); if (err != DB_SUCCESS) { goto function_exit; } node->index = dict_table_get_next_index(node->index); } function_exit: if (err == DB_SUCCESS) { /* Do some cleanup */ if (node->row != NULL) { mem_heap_empty(node->heap); node->row = NULL; node->n_ext_vec = 0; } node->state = UPD_NODE_UPDATE_CLUSTERED; } return(err); }
/*************************************************************** Updates the clustered index record. */ static ulint row_upd_clust_step( /*===============*/ /* out: DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT in case of a lock wait, else error code */ upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { dict_index_t* index; btr_pcur_t* pcur; ibool success; ibool check_ref; ulint err; mtr_t* mtr; mtr_t mtr_buf; index = dict_table_get_first_index(node->table); check_ref = row_upd_index_is_referenced(index); pcur = node->pcur; /* We have to restore the cursor to its position */ mtr = &mtr_buf; mtr_start(mtr); /* If the restoration does not succeed, then the same transaction has deleted the record on which the cursor was, and that is an SQL error. If the restoration succeeds, it may still be that the same transaction has successively deleted and inserted a record with the same ordering fields, but in that case we know that the transaction has at least an implicit x-lock on the record. */ ut_a(pcur->rel_pos == BTR_PCUR_ON); success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); if (!success) { err = DB_RECORD_NOT_FOUND; mtr_commit(mtr); return(err); } /* If this is a row in SYS_INDEXES table of the data dictionary, then we have to free the file segments of the index tree associated with the index */ if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr); mtr_commit(mtr); mtr_start(mtr); success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); if (!success) { err = DB_ERROR; mtr_commit(mtr); return(err); } } if (!node->has_clust_rec_x_lock) { err = lock_clust_rec_modify_check_and_lock(0, btr_pcur_get_rec(pcur), index, thr); if (err != DB_SUCCESS) { mtr_commit(mtr); return(err); } } /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, mtr); if (err != DB_SUCCESS) { return(err); } node->state = UPD_NODE_UPDATE_ALL_SEC; node->index = dict_table_get_next_index(index); return(err); } /* If the update is made for MySQL, we already have the update vector ready, else we have to do some evaluation: */ if (!node->in_mysql_interface) { /* Copy the necessary columns from clust_rec and calculate the new values to set */ row_upd_copy_columns(btr_pcur_get_rec(pcur), UT_LIST_GET_FIRST(node->columns)); row_upd_eval_new_vals(node->update); } if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { err = row_upd_clust_rec(node, index, thr, mtr); return(err); } row_upd_store_row(node); if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform the update by delete marking and inserting. TODO! What to do to the 'Halloween problem', where an update moves the record forward in index so that it is again updated when the cursor arrives there? Solution: the read operation must check the undo record undo number when choosing records to update. MySQL solves now the problem externally! */ err = row_upd_clust_rec_by_insert(node, index, thr, check_ref, mtr); if (err != DB_SUCCESS) { return(err); } node->state = UPD_NODE_UPDATE_ALL_SEC; } else { err = row_upd_clust_rec(node, index, thr, mtr); if (err != DB_SUCCESS) { return(err); } node->state = UPD_NODE_UPDATE_SOME_SEC; } node->index = dict_table_get_next_index(index); return(err); }
/***********************************************************//** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern_func( /*===============================*/ #ifdef UNIV_DEBUG const que_thr_t*thr, /*!< in: query thread */ #endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint i; mtr_t mtr; ut_ad(node); if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { const upd_field_t* ufield = upd_get_nth_field(node->update, i); if (dfield_is_ext(&ufield->new_val)) { buf_block_t* block; ulint internal_offset; byte* data_field; /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the undo record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((const byte*) dfield_get_data(&ufield->new_val)) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, because it is also a leaf page. Since we will have a latch on an undo log page, we would break the latching order if we would only later latch the root page of such a tree! */ btr_root_get(index, &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr); buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); data_field = buf_block_get_frame(block) + offset + internal_offset; ut_a(dfield_get_len(&ufield->new_val) >= BTR_EXTERN_FIELD_REF_SIZE); btr_free_externally_stored_field( index, data_field + dfield_get_len(&ufield->new_val) - BTR_EXTERN_FIELD_REF_SIZE, NULL, NULL, NULL, 0, RB_NONE, &mtr); mtr_commit(&mtr); } } }
/*************************************************************** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern( /*==========================*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; upd_field_t* ufield; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint internal_offset; byte* data_field; ulint data_field_len; ulint i; mtr_t mtr; ut_ad(node && thr); if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, thr, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { ufield = upd_get_nth_field(node->update, i); if (ufield->extern_storage) { /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the unod record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((byte*)ufield->new_val.data) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_tree_get_lock(index->tree), &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + offset + internal_offset; buf_page_dbg_add_level(buf_frame_align(data_field), SYNC_TRX_UNDO_PAGE); data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } }
/*************************************************************** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern( /*==========================*/ purge_node_t* node) /* in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; upd_field_t* ufield; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint internal_offset; byte* data_field; ulint data_field_len; ulint i; mtr_t mtr; ut_ad(node); if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { ufield = upd_get_nth_field(node->update, i); if (ufield->extern_storage) { /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the unod record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((byte*)ufield->new_val.data) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, because it is also a leaf page. Since we will have a latch on an undo log page, we would break the latching order if we would only later latch the root page of such a tree! */ btr_root_get(index, &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + offset + internal_offset; #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(buf_frame_align(data_field), SYNC_TRX_UNDO_PAGE); #endif /* UNIV_SYNC_DEBUG */ data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } }
ulint dict_load_foreigns( /*===============*/ /* out: DB_SUCCESS or error code */ char* table_name) /* in: table name */ { btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; dict_index_t* sec_index; dict_table_t* sys_foreign; rec_t* rec; byte* field; ulint len; char* id ; ulint err; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); sys_foreign = dict_table_get_low("SYS_FOREIGN"); if (sys_foreign == NULL) { /* No foreign keys defined yet in this database */ fprintf(stderr, "InnoDB: Error: no foreign key system tables in the database\n"); return(DB_ERROR); } mtr_start(&mtr); /* Get the secondary index based on FOR_NAME from table SYS_FOREIGN */ sec_index = dict_table_get_next_index( dict_table_get_first_index(sys_foreign)); start_load: heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); dfield_set_data(dfield, table_name, ut_strlen(table_name)); dict_index_copy_types(tuple, sec_index, 1); btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); loop: rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { /* End of index */ goto load_next_index; } /* Now we have the record in the secondary index containing a table name and a foreign constraint ID */ rec = btr_pcur_get_rec(&pcur); field = rec_get_nth_field(rec, 0, &len); /* Check if the table name in record is the one searched for */ if (len != ut_strlen(table_name) || 0 != ut_memcmp(field, table_name, len)) { goto load_next_index; } if (rec_get_deleted_flag(rec)) { goto next_rec; } /* Now we get a foreign key constraint id */ field = rec_get_nth_field(rec, 1, &len); id = mem_heap_alloc(heap, len + 1); ut_memcpy(id, field, len); id[len] = '\0'; btr_pcur_store_position(&pcur, &mtr); mtr_commit(&mtr); /* Load the foreign constraint definition to the dictionary cache */ err = dict_load_foreign(id); if (err != DB_SUCCESS) { btr_pcur_close(&pcur); mem_heap_free(heap); return(err); } mtr_start(&mtr); btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); goto loop; load_next_index: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); sec_index = dict_table_get_next_index(sec_index); if (sec_index != NULL) { mtr_start(&mtr); goto start_load; } return(DB_SUCCESS); }
dict_table_t* dict_load_table_on_id( /*==================*/ /* out: table; NULL if table does not exist */ dulint table_id) /* in: table id */ { byte id_buf[8]; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; dict_index_t* sys_table_ids; dict_table_t* sys_tables; rec_t* rec; byte* field; ulint len; dict_table_t* table; char* name; mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); /* NOTE that the operation of this function is protected by the dictionary mutex, and therefore no deadlocks can occur with other dictionary operations. */ mtr_start(&mtr); /*---------------------------------------------------*/ /* Get the secondary index based on ID for table SYS_TABLES */ sys_tables = dict_sys->sys_tables; sys_table_ids = dict_table_get_next_index( dict_table_get_first_index(sys_tables)); heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); /* Write the table id in byte format to id_buf */ mach_write_to_8(id_buf, table_id); dfield_set_data(dfield, id_buf, 8); dict_index_copy_types(tuple, sys_table_ids, 1); btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) || rec_get_deleted_flag(rec)) { /* Not found */ btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); return(NULL); } /*---------------------------------------------------*/ /* Now we have the record in the secondary index containing the table ID and NAME */ rec = btr_pcur_get_rec(&pcur); field = rec_get_nth_field(rec, 0, &len); ut_ad(len == 8); /* Check if the table id in record is the one searched for */ if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); return(NULL); } /* Now we get the table name from the record */ field = rec_get_nth_field(rec, 1, &len); name = mem_heap_alloc(heap, len + 1); ut_memcpy(name, field, len); name[len] = '\0'; /* Load the table definition to memory */ table = dict_load_table(name); ut_a(table); btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); return(table); }