/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); dict_table_skip_corrupt_index(node->index); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(trx_is_recv(node->trx)); } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } dict_table_next_uncorrupted_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a fresh insert of a row to a table. A fresh insert means that the same clustered index unique key did not have any record, even delete marked, at the time of the insert. InnoDB is eager in a rollback: if it figures out that an index record will be removed in the purge anyway, it will remove it in the rollback. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ UNIV_INTERN ulint row_undo_ins( /*=========*/ undo_node_t* node) /*!< in: row undo node */ { ut_ad(node); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node); if (!node->table || !row_undo_search_clust_to_pcur(node)) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } /* Iterate over all the indexes and undo the insert.*/ /* Skip the clustered index (the first index) */ node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { dtuple_t* entry; ulint err; entry = row_build_index_entry(node->row, node->ext, node->index, node->heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record, or a statement is being rolled back because an error occurred while storing off-page columns. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. */ } else { log_free_check(); err = row_undo_ins_remove_sec(node->index, entry); if (err != DB_SUCCESS) { return(err); } } node->index = dict_table_get_next_index(node->index); } log_free_check(); return(row_undo_ins_remove_clust_rec(node)); }
/***********************************************************//** Undoes a modify operation on a row of a table. @return DB_SUCCESS or error code */ UNIV_INTERN ulint row_undo_mod( /*=========*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_MODIFY); row_undo_mod_parse_undo_rec(node, thr); if (!node->table || !row_undo_search_clust_to_pcur(node)) { /* It is already undone, or will be undone by another query thread, or table was dropped */ trx_undo_rec_release(node->trx, node->undo_no); node->state = UNDO_NODE_FETCH_NEXT; return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { err = row_undo_mod_upd_exist_sec(node, thr); } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { err = row_undo_mod_del_mark_sec(node, thr); } else { ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); err = row_undo_mod_upd_del_sec(node, thr); } if (err != DB_SUCCESS) { return(err); } err = row_undo_mod_clust(node, thr); return(err); }
ulint row_undo_ins( /*=========*/ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ undo_node_t* node, /* in: row undo node */ que_thr_t* thr) /* in: query thread */ { dtuple_t* entry; ibool found; ulint err; ut_ad(node && thr); ut_ad(node->state == UNDO_NODE_INSERT); row_undo_ins_parse_undo_rec(node, thr); if (node->table == NULL) { found = FALSE; } else { found = row_undo_search_clust_to_pcur(node, thr); } if (!found) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); while (node->index != NULL) { entry = row_build_index_entry(node->row, node->index, node->heap); err = row_undo_ins_remove_sec(node->index, entry, thr); if (err != DB_SUCCESS) { return(err); } node->index = dict_table_get_next_index(node->index); } err = row_undo_ins_remove_clust_rec(node, thr); return(err); }
/******************************************************************* Removes a clustered index record. The pcur in node was positioned on the record, now it is detached. */ static ulint row_undo_ins_remove_clust_rec( /*==========================*/ /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ undo_node_t* node, /* in: undo node */ que_thr_t* thr) /* in: query thread */ { btr_cur_t* btr_cur; ibool success; ulint err; ulint n_tries = 0; mtr_t mtr; UT_NOT_USED(thr); mtr_start(&mtr); success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur), &mtr); ut_a(success); if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { /* Drop the index tree associated with the row in SYS_INDEXES table: */ dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr); mtr_commit(&mtr); mtr_start(&mtr); success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur), &mtr); ut_a(success); } btr_cur = btr_pcur_get_btr_cur(&(node->pcur)); success = btr_cur_optimistic_delete(btr_cur, &mtr); btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); if (success) { trx_undo_rec_release(node->trx, node->undo_no); return(DB_SUCCESS); } retry: /* If did not succeed, try pessimistic descent to tree */ mtr_start(&mtr); success = btr_pcur_restore_position(BTR_MODIFY_TREE, &(node->pcur), &mtr); ut_a(success); btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); /* The delete operation may fail if we have little file space left: TODO: easiest to crash the database and restart with more file space */ if (err == DB_OUT_OF_FILE_SPACE && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); n_tries++; os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); goto retry; } btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); trx_undo_rec_release(node->trx, node->undo_no); return(err); }
/***********************************************************//** Undoes a modify in a clustered index record. Sets also the node state for the next round of undo. @return DB_SUCCESS or error code: we may run out of file space */ static ulint row_undo_mod_clust( /*===============*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t* pcur; mtr_t mtr; ulint err; ibool success; ibool more_vers; undo_no_t new_undo_no; ut_ad(node && thr); log_free_check(); /* Check if also the previous version of the clustered index record should be undone in this same rollback operation */ more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no); pcur = &(node->pcur); mtr_start(&mtr); /* Try optimistic processing of the record, keeping changes within the index page */ err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF); if (err != DB_SUCCESS) { btr_pcur_commit_specify_mtr(pcur, &mtr); /* We may have to modify tree structure: do a pessimistic descent down the index tree */ mtr_start(&mtr); err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE); } btr_pcur_commit_specify_mtr(pcur, &mtr); if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { mtr_start(&mtr); err = row_undo_mod_remove_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF); if (err != DB_SUCCESS) { btr_pcur_commit_specify_mtr(pcur, &mtr); /* We may have to modify tree structure: do a pessimistic descent down the index tree */ mtr_start(&mtr); err = row_undo_mod_remove_clust_low(node, thr, &mtr, BTR_MODIFY_TREE); } btr_pcur_commit_specify_mtr(pcur, &mtr); } node->state = UNDO_NODE_FETCH_NEXT; trx_undo_rec_release(node->trx, node->undo_no); if (more_vers && err == DB_SUCCESS) { /* Reserve the undo log record to the prior version after committing &mtr: this is necessary to comply with the latching order, as &mtr may contain the fsp latch which is lower in the latch hierarchy than trx->undo_mutex. */ success = trx_undo_rec_reserve(node->trx, new_undo_no); if (success) { node->state = UNDO_NODE_PREV_VERS; } } return(err); }