/*************************************************************** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node && thr); heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, thr, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node, thr); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_DEL. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_del_sec( /*=====================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err = DB_SUCCESS; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(thr_is_recv(thr)); } else { err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { break; } } mem_heap_empty(heap); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(err); }
/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); dict_table_skip_corrupt_index(node->index); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(trx_is_recv(node->trx)); } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } dict_table_next_uncorrupted_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record, or a statement is being rolled back because an error occurred while storing off-page columns. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. */ } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } node->index = dict_table_get_next_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is DEL_MARK. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_del_mark_sec( /*======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
ulint row_undo_ins( /*=========*/ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ undo_node_t* node, /* in: row undo node */ que_thr_t* thr) /* in: query thread */ { dtuple_t* entry; ibool found; ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node, thr); if (node->table == NULL) { found = FALSE; } else { found = row_undo_search_clust_to_pcur(node, thr); } if (!found) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { entry = row_build_index_entry(node->row, node->index, node->heap); err = row_undo_ins_remove_sec(node->index, entry, thr); if (err != DB_SUCCESS) { return(err); } node->index = dict_table_get_next_index(node->index); } err = row_undo_ins_remove_clust_rec(node, thr); return(err); }
/***********************************************************//** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node); heap = mem_heap_create(1024); while (node->index != NULL) { /* skip corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node); }
/*****************************************************************//** Finds out if an active transaction has inserted or modified a secondary index record. NOTE: the kernel mutex is temporarily released in this function! @return NULL if committed, else the active transaction */ UNIV_INTERN trx_t* row_vers_impl_x_locked_off_kernel( /*==============================*/ const rec_t* rec, /*!< in: record in a secondary index */ dict_index_t* index, /*!< in: the secondary index */ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ { dict_index_t* clust_index; rec_t* clust_rec; ulint* clust_offsets; rec_t* version; trx_id_t trx_id; mem_heap_t* heap; mem_heap_t* heap2; dtuple_t* row; dtuple_t* entry = NULL; /* assignment to eliminate compiler warning */ trx_t* trx; ulint rec_del; #ifdef UNIV_DEBUG ulint err; #endif /* UNIV_DEBUG */ mtr_t mtr; ulint comp; ut_ad(mutex_own(&kernel_mutex)); #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ mutex_exit(&kernel_mutex); mtr_start(&mtr); /* Search for the clustered index record: this is a time-consuming operation: therefore we release the kernel mutex; also, the release is required by the latching order convention. The latch on the clustered index locks the top of the stack of versions. We also reserve purge_latch to lock the bottom of the version stack. */ clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr); if (!clust_rec) { /* In a rare case it is possible that no clust rec is found for a secondary index record: if in row0umod.c row_undo_mod_remove_clust_low() we have already removed the clust rec, while purge is still cleaning and removing secondary index records associated with earlier versions of the clustered index record. In that case there cannot be any implicit lock on the secondary index record, because an active transaction which has modified the secondary index record has also modified the clustered index record. And in a rollback we always undo the modifications to secondary index records before the clustered index record. */ mutex_enter(&kernel_mutex); mtr_commit(&mtr); return(NULL); } heap = mem_heap_create(1024); clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap); trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); mtr_s_lock(&(purge_sys->latch), &mtr); mutex_enter(&kernel_mutex); trx = NULL; if (!trx_is_active(trx_id)) { /* The transaction that modified or inserted clust_rec is no longer active: no implicit lock on rec */ goto exit_func; } if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, clust_offsets, TRUE)) { /* Corruption noticed: try to avoid a crash by returning */ goto exit_func; } comp = page_rec_is_comp(rec); ut_ad(index->table == clust_index->table); ut_ad(!!comp == dict_table_is_comp(index->table)); ut_ad(!comp == !page_rec_is_comp(clust_rec)); /* We look up if some earlier version, which was modified by the trx_id transaction, of the clustered index record would require rec to be in a different state (delete marked or unmarked, or have different field values, or not existing). If there is such a version, then rec was modified by the trx_id transaction, and it has an implicit x-lock on rec. Note that if clust_rec itself would require rec to be in a different state, then the trx_id transaction has not yet had time to modify rec, and does not necessarily have an implicit x-lock on rec. */ rec_del = rec_get_deleted_flag(rec, comp); trx = NULL; version = clust_rec; for (;;) { rec_t* prev_version; ulint vers_del; row_ext_t* ext; trx_id_t prev_trx_id; mutex_exit(&kernel_mutex); /* While we retrieve an earlier version of clust_rec, we release the kernel mutex, because it may take time to access the disk. After the release, we have to check if the trx_id transaction is still active. We keep the semaphore in mtr on the clust_rec page, so that no other transaction can update it and get an implicit x-lock on rec. */ heap2 = heap; heap = mem_heap_create(1024); #ifdef UNIV_DEBUG err = #endif /* UNIV_DEBUG */ trx_undo_prev_version_build(clust_rec, &mtr, version, clust_index, clust_offsets, heap, &prev_version); mem_heap_free(heap2); /* free version and clust_offsets */ if (prev_version == NULL) { mutex_enter(&kernel_mutex); if (!trx_is_active(trx_id)) { /* Transaction no longer active: no implicit x-lock */ break; } /* If the transaction is still active, clust_rec must be a fresh insert, because no previous version was found. */ ut_ad(err == DB_SUCCESS); /* It was a freshly inserted version: there is an implicit x-lock on rec */ trx = trx_get_on_id(trx_id); break; } clust_offsets = rec_get_offsets(prev_version, clust_index, NULL, ULINT_UNDEFINED, &heap); vers_del = rec_get_deleted_flag(prev_version, comp); prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, clust_offsets); /* The stack of versions is locked by mtr. Thus, it is safe to fetch the prefixes for externally stored columns. */ row = row_build(ROW_COPY_POINTERS, clust_index, prev_version, clust_offsets, NULL, &ext, heap); entry = row_build_index_entry(row, ext, index, heap); /* entry may be NULL if a record was inserted in place of a deleted record, and the BLOB pointers of the new record were not initialized yet. But in that case, prev_version should be NULL. */ ut_a(entry); mutex_enter(&kernel_mutex); if (!trx_is_active(trx_id)) { /* Transaction no longer active: no implicit x-lock */ break; } /* If we get here, we know that the trx_id transaction is still active and it has modified prev_version. Let us check if prev_version would require rec to be in a different state. */ /* The previous version of clust_rec must be accessible, because the transaction is still active and clust_rec was not a fresh insert. */ ut_ad(err == DB_SUCCESS); /* We check if entry and rec are identified in the alphabetical ordering */ if (0 == cmp_dtuple_rec(entry, rec, offsets)) { /* The delete marks of rec and prev_version should be equal for rec to be in the state required by prev_version */ if (rec_del != vers_del) { trx = trx_get_on_id(trx_id); break; } /* It is possible that the row was updated so that the secondary index record remained the same in alphabetical ordering, but the field values changed still. For example, 'abc' -> 'ABC'. Check also that. */ dtuple_set_types_binary(entry, dtuple_get_n_fields(entry)); if (0 != cmp_dtuple_rec(entry, rec, offsets)) { trx = trx_get_on_id(trx_id); break; } } else if (!rec_del) { /* The delete mark should be set in rec for it to be in the state required by prev_version */ trx = trx_get_on_id(trx_id); break; } if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) { /* The versions modified by the trx_id transaction end to prev_version: no implicit x-lock */ break; } version = prev_version; }/* for (;;) */ exit_func: mtr_commit(&mtr); mem_heap_free(heap); return(trx); }
/*****************************************************************//** Finds out if a version of the record, where the version >= the current purge view, should have ientry as its secondary index entry. We check if there is any not delete marked version of the record where the trx id >= purge view, and the secondary index entry and ientry are identified in the alphabetical ordering; exactly in this case we return TRUE. @return TRUE if earlier version should have */ UNIV_INTERN ibool row_vers_old_has_index_entry( /*=========================*/ ibool also_curr,/*!< in: TRUE if also rec is included in the versions to search; otherwise only versions prior to it are searched */ const rec_t* rec, /*!< in: record in the clustered index; the caller must have a latch on the page */ mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will also hold the latch on purge_view */ dict_index_t* index, /*!< in: the secondary index */ const dtuple_t* ientry) /*!< in: the secondary index entry */ { const rec_t* version; rec_t* prev_version; dict_index_t* clust_index; ulint* clust_offsets; mem_heap_t* heap; mem_heap_t* heap2; const dtuple_t* row; const dtuple_t* entry; ulint err; ulint comp; ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ mtr_s_lock(&(purge_sys->latch), mtr); clust_index = dict_table_get_first_index(index->table); comp = page_rec_is_comp(rec); ut_ad(!dict_table_is_comp(index->table) == !comp); heap = mem_heap_create(1024); clust_offsets = rec_get_offsets(rec, clust_index, NULL, ULINT_UNDEFINED, &heap); if (also_curr && !rec_get_deleted_flag(rec, comp)) { row_ext_t* ext; /* The stack of versions is locked by mtr. Thus, it is safe to fetch the prefixes for externally stored columns. */ row = row_build(ROW_COPY_POINTERS, clust_index, rec, clust_offsets, NULL, &ext, heap); entry = row_build_index_entry(row, ext, index, heap); /* If entry == NULL, the record contains unset BLOB pointers. This must be a freshly inserted record. If this is called from row_purge_remove_sec_if_poss_low(), the thread will hold latches on the clustered index and the secondary index. Because the insert works in three steps: (1) insert the record to clustered index (2) store the BLOBs and update BLOB pointers (3) insert records to secondary indexes the purge thread can safely ignore freshly inserted records and delete the secondary index record. The thread that inserted the new record will be inserting the secondary index records. */ /* NOTE that we cannot do the comparison as binary fields because the row is maybe being modified so that the clustered index record has already been updated to a different binary value in a char field, but the collation identifies the old and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { mem_heap_free(heap); return(TRUE); } } version = rec; for (;;) { heap2 = heap; heap = mem_heap_create(1024); err = trx_undo_prev_version_build(rec, mtr, version, clust_index, clust_offsets, heap, &prev_version); mem_heap_free(heap2); /* free version and clust_offsets */ if (err != DB_SUCCESS || !prev_version) { /* Versions end here */ mem_heap_free(heap); return(FALSE); } clust_offsets = rec_get_offsets(prev_version, clust_index, NULL, ULINT_UNDEFINED, &heap); if (!rec_get_deleted_flag(prev_version, comp)) { row_ext_t* ext; /* The stack of versions is locked by mtr. Thus, it is safe to fetch the prefixes for externally stored columns. */ row = row_build(ROW_COPY_POINTERS, clust_index, prev_version, clust_offsets, NULL, &ext, heap); entry = row_build_index_entry(row, ext, index, heap); /* If entry == NULL, the record contains unset BLOB pointers. This must be a freshly inserted record that we can safely ignore. For the justification, see the comments after the previous row_build_index_entry() call. */ /* NOTE that we cannot do the comparison as binary fields because maybe the secondary index record has already been updated to a different binary value in a char field, but the collation identifies the old and new value anyway! */ if (entry && !dtuple_coll_cmp(ientry, entry)) { mem_heap_free(heap); return(TRUE); } } version = prev_version; } }
/*************************************************************** Marks the clustered index record deleted and inserts the updated version of the record to the index. This function should be used when the ordering fields of the clustered index record change. This should be quite rare in database applications. */ static ulint row_upd_clust_rec_by_insert( /*========================*/ /* out: DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ ibool check_ref,/* in: TRUE if index may be referenced in a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; dtuple_t* entry; ulint err; ut_ad(node); ut_ad(index->type & DICT_CLUSTERED); trx = thr_get_trx(thr); table = node->table; pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); if (node->state != UPD_NODE_INSERT_CLUSTERED) { err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur, TRUE, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); return(err); } /* Mark as not-owned the externally stored fields which the new row inherits from the delete marked record: purge should not free those externally stored fields even if the delete marked record is removed from the index tree, or updated. */ btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), node->update, mtr); if (check_ref) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( pcur, table, index, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); return(err); } } } mtr_commit(mtr); node->state = UPD_NODE_INSERT_CLUSTERED; heap = mem_heap_create(500); entry = row_build_index_entry(node->row, index, heap); row_upd_clust_index_replace_new_col_vals(entry, node->update); row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); /* If we return from a lock wait, for example, we may have extern fields marked as not-owned in entry (marked if the if-branch above). We must unmark them. */ btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, node->n_ext_vec); /* We must mark non-updated extern fields in entry as inherited, so that a possible rollback will not free them */ btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec, node->n_ext_vec, node->update); err = row_ins_index_entry(index, entry, node->ext_vec, node->n_ext_vec, thr); mem_heap_free(heap); return(err); }
/*************************************************************** Updates a secondary index entry of a row. */ static ulint row_upd_sec_index_entry( /*====================*/ /* out: DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { ibool check_ref; ibool found; dict_index_t* index; dtuple_t* entry; btr_pcur_t pcur; btr_cur_t* btr_cur; mem_heap_t* heap; rec_t* rec; ulint err = DB_SUCCESS; mtr_t mtr; char err_buf[1000]; index = node->index; check_ref = row_upd_index_is_referenced(index); heap = mem_heap_create(1024); /* Build old index entry */ entry = row_build_index_entry(node->row, index, heap); log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); btr_cur = btr_pcur_get_btr_cur(&pcur); rec = btr_cur_get_rec(btr_cur); if (!found) { fprintf(stderr, "InnoDB: error in sec index entry update in\n" "InnoDB: index %s table %s\n", index->name, index->table->name); dtuple_sprintf(err_buf, 900, entry); fprintf(stderr, "InnoDB: tuple %s\n", err_buf); rec_sprintf(err_buf, 900, rec); fprintf(stderr, "InnoDB: record %s\n", err_buf); fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to [email protected]\n"); trx_print(thr_get_trx(thr)); } else { /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in row_ins_index_entry below */ if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); if (err == DB_SUCCESS && check_ref) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( &pcur, index->table, index, thr, &mtr); if (err != DB_SUCCESS) { goto close_cur; } } } } close_cur: btr_pcur_close(&pcur); mtr_commit(&mtr); if (node->is_delete || err != DB_SUCCESS) { mem_heap_free(heap); return(err); } /* Build a new index entry */ row_upd_index_replace_new_col_vals(entry, index, node->update); /* Insert new index entry */ err = row_ins_index_entry(index, entry, NULL, 0, thr); mem_heap_free(heap); return(err); }
/***********************************************************//** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern_func( /*===============================*/ #ifdef UNIV_DEBUG const que_thr_t*thr, /*!< in: query thread */ #endif /* UNIV_DEBUG */ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint i; mtr_t mtr; ut_ad(node); if (node->rec_type == TRX_UNDO_UPD_DEL_REC || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, NULL, NULL)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { const upd_field_t* ufield = upd_get_nth_field(node->update, i); if (dfield_is_ext(&ufield->new_val)) { buf_block_t* block; ulint internal_offset; byte* data_field; /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the undo record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((const byte*) dfield_get_data(&ufield->new_val)) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, because it is also a leaf page. Since we will have a latch on an undo log page, we would break the latching order if we would only later latch the root page of such a tree! */ btr_root_get(index, &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr); buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); data_field = buf_block_get_frame(block) + offset + internal_offset; ut_a(dfield_get_len(&ufield->new_val) >= BTR_EXTERN_FIELD_REF_SIZE); btr_free_externally_stored_field( index, data_field + dfield_get_len(&ufield->new_val) - BTR_EXTERN_FIELD_REF_SIZE, NULL, NULL, NULL, 0, RB_NONE, &mtr); mtr_commit(&mtr); } } }
/*************************************************************** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern( /*==========================*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; upd_field_t* ufield; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint internal_offset; byte* data_field; ulint data_field_len; ulint i; mtr_t mtr; ut_ad(node && thr); if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, thr, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { ufield = upd_get_nth_field(node->update, i); if (ufield->extern_storage) { /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the unod record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((byte*)ufield->new_val.data) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_tree_get_lock(index->tree), &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + offset + internal_offset; buf_page_dbg_add_level(buf_frame_align(data_field), SYNC_TRX_UNDO_PAGE); data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } }
/*************************************************************** Purges an update of an existing record. Also purges an update of a delete marked record if that record contained an externally stored field. */ static void row_purge_upd_exist_or_extern( /*==========================*/ purge_node_t* node) /* in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; upd_field_t* ufield; ibool is_insert; ulint rseg_id; ulint page_no; ulint offset; ulint internal_offset; byte* data_field; ulint data_field_len; ulint i; mtr_t mtr; ut_ad(node); if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { goto skip_secondaries; } heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, index, entry); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); skip_secondaries: /* Free possible externally stored fields */ for (i = 0; i < upd_get_n_fields(node->update); i++) { ufield = upd_get_nth_field(node->update, i); if (ufield->extern_storage) { /* We use the fact that new_val points to node->undo_rec and get thus the offset of dfield data inside the unod record. Then we can calculate from node->roll_ptr the file address of the new_val data */ internal_offset = ((byte*)ufield->new_val.data) - node->undo_rec; ut_a(internal_offset < UNIV_PAGE_SIZE); trx_undo_decode_roll_ptr(node->roll_ptr, &is_insert, &rseg_id, &page_no, &offset); mtr_start(&mtr); /* We have to acquire an X-latch to the clustered index tree */ index = dict_table_get_first_index(node->table); mtr_x_lock(dict_index_get_lock(index), &mtr); /* NOTE: we must also acquire an X-latch to the root page of the tree. We will need it when we free pages from the tree. If the tree is of height 1, the tree X-latch does NOT protect the root page, because it is also a leaf page. Since we will have a latch on an undo log page, we would break the latching order if we would only later latch the root page of such a tree! */ btr_root_get(index, &mtr); /* We assume in purge of externally stored fields that the space id of the undo log record is 0! */ data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) + offset + internal_offset; #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(buf_frame_align(data_field), SYNC_TRX_UNDO_PAGE); #endif /* UNIV_SYNC_DEBUG */ data_field_len = ufield->new_val.len; btr_free_externally_stored_field(index, data_field, data_field_len, FALSE, &mtr); mtr_commit(&mtr); } } }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_EXIST. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_exist_sec( /*=======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { /* No change in secondary indexes */ return(DB_SUCCESS); } heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in row_upd_clust_rec_by_insert() before the updated externally stored columns (BLOBs) of the new clustered index entry were written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. */ ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP); /* This is only legitimate when rolling back an incomplete transaction after crash recovery. */ ut_a(thr_get_trx(thr)->is_recovered); /* The server must have crashed before completing the insert of the new clustered index entry and before inserting to the secondary indexes. Because node->row was not yet written to this index, we can ignore it. But we must restore node->undo_row. */ } else { /* NOTE that if we updated the fields of a delete-marked secondary index record so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot return to the original values because we do not know them. But this should not cause problems because in row0sel.c, in queries we always retrieve the clustered index record or an earlier version of it, if the secondary index record through which we do the search is delete-marked. */ err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } mem_heap_empty(heap); } /* We may have to update the delete mark in the secondary index record of the previous version of the row. We also need to update the fields of the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }