/****************************************************************//** Send the reply message when a signal in the queue of the trx has been handled. */ UNIV_INTERN void trx_sig_reply( /*==========*/ trx_sig_t* sig, /*!< in: signal */ que_thr_t** next_thr) /*!< in/out: next query thread to run; if the value which is passed in is a pointer to a NULL pointer, then the calling function can start running a new query thread */ { trx_t* receiver_trx; ut_ad(sig); ut_ad(mutex_own(&kernel_mutex)); if (sig->receiver != NULL) { ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT); receiver_trx = thr_get_trx(sig->receiver); UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals, sig); ut_ad(receiver_trx->sess->state != SESS_ERROR); que_thr_end_wait(sig->receiver, next_thr); sig->receiver = NULL; } }
/***********************************************************//** Parses the row reference and other info in a modify undo log record. */ static void row_undo_mod_parse_undo_rec( /*========================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { dict_index_t* clust_index; byte* ptr; undo_no_t undo_no; table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; ulint info_bits; ulint type; ulint cmpl_info; ibool dummy_extern; trx_t* trx; ut_ad(node && thr); trx = thr_get_trx(thr); ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, &dummy_extern, &undo_no, &table_id); node->rec_type = type; node->table = dict_table_get_on_id(table_id, trx); /* TODO: other fixes associated with DROP TABLE + rollback in the same table by another user */ if (node->table == NULL) { /* Table was dropped */ return; } if (node->table->ibd_file_missing) { /* We skip undo operations to missing .ibd files */ node->table = NULL; return; } clust_index = dict_table_get_first_index(node->table); ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, &info_bits); ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), node->heap); trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, roll_ptr, info_bits, trx, node->heap, &(node->update)); node->new_roll_ptr = roll_ptr; node->new_trx_id = trx_id; node->cmpl_info = cmpl_info; }
void row_purge( /*======*/ purge_node_t* node, /*!< in: row purge node */ que_thr_t* thr) /*!< in: query thread */ { ibool updated_extern; ut_ad(node); ut_ad(thr); node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr, &node->reservation, node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); return; } if (node->undo_rec != &trx_purge_dummy_rec && row_purge_parse_undo_rec(node, &updated_extern, thr)) { node->found_clust = FALSE; node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { row_purge_del_mark(node); } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { row_purge_upd_exist_or_extern(thr, node); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); } /* Do some cleanup */ trx_purge_rec_release(node->reservation); mem_heap_empty(node->heap); thr->run_node = node; }
/***************************************************************//** Builds an index definition row to insert. @return DB_SUCCESS or error code */ static ulint dict_build_index_def_step( /*======================*/ que_thr_t* thr, /*!< in: query thread */ ind_node_t* node) /*!< in: index create node */ { dict_table_t* table; dict_index_t* index; dtuple_t* row; trx_t* trx; ut_ad(mutex_own(&(dict_sys->mutex))); trx = thr_get_trx(thr); index = node->index; table = dict_table_get_low(index->table_name); if (table == NULL) { return(DB_TABLE_NOT_FOUND); } trx->table_id = table->id; node->table = table; ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || dict_index_is_clust(index)); dict_hdr_get_new_id(NULL, &index->id, NULL); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ index->space = table->space; node->page_no = FIL_NULL; row = dict_create_sys_indexes_tuple(index, node->heap); node->ind_row = row; ins_node_set_new_row(node->ind_def, row); /* Note that the index was created by this transaction. */ index->trx_id = trx->id; return(DB_SUCCESS); }
/***********************************************************//** Undoes a row operation in a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ UNIV_INTERN que_thr_t* row_undo_step( /*==========*/ que_thr_t* thr) /*!< in: query thread */ { ulint err; undo_node_t* node; trx_t* trx; ut_ad(thr); srv_activity_count++; trx = thr_get_trx(thr); node = thr->run_node; ut_ad(que_node_get_type(node) == QUE_NODE_UNDO); err = row_undo(node, thr); trx->error_state = err; if (err != DB_SUCCESS) { /* SQL error detected */ fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", (ulong) err); if (err == DB_OUT_OF_FILE_SPACE) { fprintf(stderr, "InnoDB: Error 13 means out of tablespace.\n" "InnoDB: Consider increasing" " your tablespace.\n"); exit(1); } ut_error; return(NULL); } return(thr); }
/******************************************************************* Builds an index definition row to insert. */ static ulint dict_build_index_def_step( /*======================*/ /* out: DB_SUCCESS or error code */ que_thr_t* thr, /* in: query thread */ ind_node_t* node) /* in: index create node */ { dict_table_t* table; dict_index_t* index; dtuple_t* row; trx_t* trx; ut_ad(mutex_own(&(dict_sys->mutex))); trx = thr_get_trx(thr); index = node->index; table = dict_table_get_low(index->table_name); if (table == NULL) { return(DB_TABLE_NOT_FOUND); } trx->table_id = table->id; node->table = table; ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) || (index->type & DICT_CLUSTERED)); index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); /* Inherit the space id from the table; we store all indexes of a table in the same tablespace */ index->space = table->space; node->page_no = FIL_NULL; row = dict_create_sys_indexes_tuple(index, node->heap); node->ind_row = row; ins_node_set_new_row(node->ind_def, row); return(DB_SUCCESS); }
/***********************************************************//** Performs an execution step for a commit type node in a query graph. @return query thread to run next, or NULL */ UNIV_INTERN que_thr_t* trx_commit_step( /*============*/ que_thr_t* thr) /*!< in: query thread */ { commit_node_t* node; que_thr_t* next_thr; node = thr->run_node; ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); if (thr->prev_node == que_node_get_parent(node)) { node->state = COMMIT_NODE_SEND; } if (node->state == COMMIT_NODE_SEND) { mutex_enter(&kernel_mutex); node->state = COMMIT_NODE_WAIT; next_thr = NULL; thr->state = QUE_THR_SIG_REPLY_WAIT; /* Send the commit signal to the transaction */ trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF, thr, NULL, &next_thr); mutex_exit(&kernel_mutex); return(next_thr); } ut_ad(node->state == COMMIT_NODE_WAIT); node->state = COMMIT_NODE_SEND; thr->run_node = que_node_get_parent(node); return(thr); }
/****************************************************************//** Sends a signal to a trx object. */ UNIV_INTERN void trx_sig_send( /*=========*/ trx_t* trx, /*!< in: trx handle */ ulint type, /*!< in: signal type */ ulint sender, /*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */ que_thr_t* receiver_thr, /*!< in: query thread which wants the reply, or NULL; if type is TRX_SIG_END_WAIT, this must be NULL */ trx_savept_t* savept, /*!< in: possible rollback savepoint, or NULL */ que_thr_t** next_thr) /*!< in/out: next query thread to run; if the value which is passed in is a pointer to a NULL pointer, then the calling function can start running a new query thread; if the parameter is NULL, it is ignored */ { trx_sig_t* sig; trx_t* receiver_trx; ut_ad(trx); ut_ad(mutex_own(&kernel_mutex)); if (!trx_sig_is_compatible(trx, type, sender)) { /* The signal is not compatible with the other signals in the queue: die */ ut_error; } /* Queue the signal object */ if (UT_LIST_GET_LEN(trx->signals) == 0) { /* The signal list is empty: the 'sig' slot must be unused (we improve performance a bit by avoiding mem_alloc) */ sig = &(trx->sig); } else { /* It might be that the 'sig' slot is unused also in this case, but we choose the easy way of using mem_alloc */ sig = mem_alloc(sizeof(trx_sig_t)); } UT_LIST_ADD_LAST(signals, trx->signals, sig); sig->type = type; sig->sender = sender; sig->receiver = receiver_thr; if (savept) { sig->savept = *savept; } if (receiver_thr) { receiver_trx = thr_get_trx(receiver_thr); UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals, sig); } if (trx->sess->state == SESS_ERROR) { trx_sig_reply_wait_to_suspended(trx); } if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) { ut_error; } /* If there were no other signals ahead in the queue, try to start handling of the signal */ if (UT_LIST_GET_FIRST(trx->signals) == sig) { trx_sig_start_handle(trx, next_thr); } }
/*************************************************************** Fetches an undo log record and does the purge for the recorded operation. If none left, or the current purge completed, returns the control to the parent node, which is always a query thread node. */ static ulint row_purge( /*======*/ /* out: DB_SUCCESS if operation successfully completed, else error code */ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { dulint roll_ptr; ibool purge_needed; ibool updated_extern; trx_t* trx; ut_ad(node && thr); trx = thr_get_trx(thr); node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, &(node->reservation), node->heap); if (!node->undo_rec) { /* Purge completed for this query thread */ thr->run_node = que_node_get_parent(node); return(DB_SUCCESS); } node->roll_ptr = roll_ptr; if (node->undo_rec == &trx_purge_dummy_rec) { purge_needed = FALSE; } else { purge_needed = row_purge_parse_undo_rec(node, &updated_extern, thr); /* If purge_needed == TRUE, we must also remember to unfreeze data dictionary! */ } if (purge_needed) { node->found_clust = FALSE; node->index = dict_table_get_next_index( dict_table_get_first_index(node->table)); if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { row_purge_del_mark(node); } else if (updated_extern || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { row_purge_upd_exist_or_extern(node); } if (node->found_clust) { btr_pcur_close(&(node->pcur)); } row_mysql_unfreeze_data_dictionary(trx); } /* Do some cleanup */ trx_purge_rec_release(node->reservation); mem_heap_empty(node->heap); thr->run_node = node; return(DB_SUCCESS); }
que_thr_t* dict_create_index_step( /*===================*/ /* out: query thread to run next or NULL */ que_thr_t* thr) /* in: query thread */ { ind_node_t* node; ulint err = DB_ERROR; trx_t* trx; ut_ad(thr); ut_ad(mutex_own(&(dict_sys->mutex))); trx = thr_get_trx(thr); node = thr->run_node; ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX); if (thr->prev_node == que_node_get_parent(node)) { node->state = INDEX_BUILD_INDEX_DEF; } if (node->state == INDEX_BUILD_INDEX_DEF) { /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ err = dict_build_index_def_step(thr, node); if (err != DB_SUCCESS) { goto function_exit; } node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; thr->run_node = node->ind_def; return(thr); } if (node->state == INDEX_BUILD_FIELD_DEF) { if (node->field_no < (node->index)->n_fields) { err = dict_build_field_def_step(node); if (err != DB_SUCCESS) { goto function_exit; } node->field_no++; thr->run_node = node->field_def; return(thr); } else { node->state = INDEX_CREATE_INDEX_TREE; } } if (node->state == INDEX_CREATE_INDEX_TREE) { err = dict_create_index_tree_step(node); if (err != DB_SUCCESS) { goto function_exit; } node->state = INDEX_COMMIT_WORK; } if (node->state == INDEX_COMMIT_WORK) { /* Index was correctly defined: do NOT commit the transaction (CREATE INDEX does NOT currently do an implicit commit of the current transaction) */ node->state = INDEX_ADD_TO_CACHE; /* thr->run_node = node->commit_node; return(thr); */ } if (node->state == INDEX_ADD_TO_CACHE) { dict_index_add_to_cache(node->table, node->index, node->page_no); err = DB_SUCCESS; } function_exit: trx->error_state = err; if (err == DB_SUCCESS) { /* Ok: do nothing */ } else if (err == DB_LOCK_WAIT) { return(NULL); } else { /* SQL error detected */ return(NULL); } thr->run_node = que_node_get_parent(node); return(thr); }
/*************************************************************** Parses the row reference and other info in a modify undo log record. */ static ibool row_purge_parse_undo_rec( /*=====================*/ /* out: TRUE if purge operation required */ purge_node_t* node, /* in: row undo node */ ibool* updated_extern, /* out: TRUE if an externally stored field was updated */ que_thr_t* thr) /* in: query thread */ { dict_index_t* clust_index; byte* ptr; dulint undo_no; dulint table_id; dulint trx_id; dulint roll_ptr; ulint info_bits; ulint type; ulint cmpl_info; ut_ad(node && thr); ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, updated_extern, &undo_no, &table_id); node->rec_type = type; if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) { return(FALSE); } ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, &info_bits); node->table = NULL; if (type == TRX_UNDO_UPD_EXIST_REC && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) { /* Purge requires no changes to indexes: we may return */ return(FALSE); } mutex_enter(&(dict_sys->mutex)); node->table = dict_table_get_on_id_low(table_id, thr_get_trx(thr)); rw_lock_x_lock(&(purge_sys->purge_is_running)); mutex_exit(&(dict_sys->mutex)); if (node->table == NULL) { /* The table has been dropped: no need to do purge */ rw_lock_x_unlock(&(purge_sys->purge_is_running)); return(FALSE); } clust_index = dict_table_get_first_index(node->table); ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), node->heap); ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, roll_ptr, info_bits, node->heap, &(node->update)); /* Read to the partial row the fields that occur in indexes */ if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) { ptr = trx_undo_rec_get_partial_row(ptr, clust_index, &(node->row), node->heap); } return(TRUE); }
/******************************************************************* Builds a table definition to insert. */ static ulint dict_build_table_def_step( /*======================*/ /* out: DB_SUCCESS or error code */ que_thr_t* thr, /* in: query thread */ tab_node_t* node) /* in: table create node */ { dict_table_t* table; dtuple_t* row; ulint error; const char* path_or_name; ibool is_path; mtr_t mtr; ulint i; ulint row_len; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); thr_get_trx(thr)->table_id = table->id; row_len = 0; for (i = 0; i < table->n_def; i++) { row_len += dict_col_get_min_size(&table->cols[i]); } if (row_len > BTR_PAGE_MAX_REC_SIZE) { return(DB_TOO_BIG_RECORD); } if (srv_file_per_table) { /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, - page 1 is an ibuf bitmap page, - page 2 is the first inode page, - page 3 will contain the root of the clustered index of the table we create here. */ ulint space = 0; /* reset to zero for the call below */ if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ path_or_name = table->dir_path_of_temp_table; is_path = TRUE; } else { path_or_name = table->name; is_path = FALSE; } error = fil_create_new_single_table_tablespace( &space, path_or_name, is_path, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; if (error != DB_SUCCESS) { return(error); } mtr_start(&mtr); fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); mtr_commit(&mtr); } row = dict_create_sys_tables_tuple(table, node->heap); ins_node_set_new_row(node->tab_def, row); return(DB_SUCCESS); }
/*************************************************************** Marks the clustered index record deleted and inserts the updated version of the record to the index. This function should be used when the ordering fields of the clustered index record change. This should be quite rare in database applications. */ static ulint row_upd_clust_rec_by_insert( /*========================*/ /* out: DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ ibool check_ref,/* in: TRUE if index may be referenced in a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { mem_heap_t* heap; btr_pcur_t* pcur; btr_cur_t* btr_cur; trx_t* trx; dict_table_t* table; dtuple_t* entry; ulint err; ut_ad(node); ut_ad(index->type & DICT_CLUSTERED); trx = thr_get_trx(thr); table = node->table; pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); if (node->state != UPD_NODE_INSERT_CLUSTERED) { err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur, TRUE, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); return(err); } /* Mark as not-owned the externally stored fields which the new row inherits from the delete marked record: purge should not free those externally stored fields even if the delete marked record is removed from the index tree, or updated. */ btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), node->update, mtr); if (check_ref) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( pcur, table, index, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); return(err); } } } mtr_commit(mtr); node->state = UPD_NODE_INSERT_CLUSTERED; heap = mem_heap_create(500); entry = row_build_index_entry(node->row, index, heap); row_upd_clust_index_replace_new_col_vals(entry, node->update); row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); /* If we return from a lock wait, for example, we may have extern fields marked as not-owned in entry (marked if the if-branch above). We must unmark them. */ btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, node->n_ext_vec); /* We must mark non-updated extern fields in entry as inherited, so that a possible rollback will not free them */ btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec, node->n_ext_vec, node->update); err = row_ins_index_entry(index, entry, node->ext_vec, node->n_ext_vec, thr); mem_heap_free(heap); return(err); }
que_thr_t* row_upd_step( /*=========*/ /* out: query thread to run next or NULL */ que_thr_t* thr) /* in: query thread */ { upd_node_t* node; sel_node_t* sel_node; que_node_t* parent; ulint err = DB_SUCCESS; trx_t* trx; ut_ad(thr); trx = thr_get_trx(thr); trx_start_if_not_started(trx); node = thr->run_node; sel_node = node->select; parent = que_node_get_parent(node); ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); if (thr->prev_node == parent) { node->state = UPD_NODE_SET_IX_LOCK; } if (node->state == UPD_NODE_SET_IX_LOCK) { if (!node->has_clust_rec_x_lock) { /* It may be that the current session has not yet started its transaction, or it has been committed: */ err = lock_table(0, node->table, LOCK_IX, thr); if (err != DB_SUCCESS) { goto error_handling; } } node->state = UPD_NODE_UPDATE_CLUSTERED; if (node->searched_update) { /* Reset the cursor */ sel_node->state = SEL_NODE_OPEN; /* Fetch a row to update */ thr->run_node = sel_node; return(thr); } } /* sel_node is NULL if we are in the MySQL interface */ if (sel_node && (sel_node->state != SEL_NODE_FETCH)) { if (!node->searched_update) { /* An explicit cursor should be positioned on a row to update */ ut_error; err = DB_ERROR; goto error_handling; } ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); /* No more rows to update, or the select node performed the updates directly in-place */ thr->run_node = parent; return(thr); } /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ err = row_upd(node, thr); error_handling: trx->error_state = err; if (err == DB_SUCCESS) { /* Ok: do nothing */ } else if (err == DB_LOCK_WAIT) { return(NULL); } else { return(NULL); } /* DO THE TRIGGER ACTIONS HERE */ if (node->searched_update) { /* Fetch next row to update */ thr->run_node = sel_node; } else { /* It was an explicit cursor update */ thr->run_node = parent; } node->state = UPD_NODE_UPDATE_CLUSTERED; return(thr); }
/*************************************************************** Updates a secondary index entry of a row. */ static ulint row_upd_sec_index_entry( /*====================*/ /* out: DB_SUCCESS if operation successfully completed, else error code or DB_LOCK_WAIT */ upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { ibool check_ref; ibool found; dict_index_t* index; dtuple_t* entry; btr_pcur_t pcur; btr_cur_t* btr_cur; mem_heap_t* heap; rec_t* rec; ulint err = DB_SUCCESS; mtr_t mtr; char err_buf[1000]; index = node->index; check_ref = row_upd_index_is_referenced(index); heap = mem_heap_create(1024); /* Build old index entry */ entry = row_build_index_entry(node->row, index, heap); log_free_check(); mtr_start(&mtr); found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); btr_cur = btr_pcur_get_btr_cur(&pcur); rec = btr_cur_get_rec(btr_cur); if (!found) { fprintf(stderr, "InnoDB: error in sec index entry update in\n" "InnoDB: index %s table %s\n", index->name, index->table->name); dtuple_sprintf(err_buf, 900, entry); fprintf(stderr, "InnoDB: tuple %s\n", err_buf); rec_sprintf(err_buf, 900, rec); fprintf(stderr, "InnoDB: record %s\n", err_buf); fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to [email protected]\n"); trx_print(thr_get_trx(thr)); } else { /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in row_ins_index_entry below */ if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); if (err == DB_SUCCESS && check_ref) { /* NOTE that the following call loses the position of pcur ! */ err = row_upd_check_references_constraints( &pcur, index->table, index, thr, &mtr); if (err != DB_SUCCESS) { goto close_cur; } } } } close_cur: btr_pcur_close(&pcur); mtr_commit(&mtr); if (node->is_delete || err != DB_SUCCESS) { mem_heap_free(heap); return(err); } /* Build a new index entry */ row_upd_index_replace_new_col_vals(entry, index, node->update); /* Insert new index entry */ err = row_ins_index_entry(index, entry, NULL, 0, thr); mem_heap_free(heap); return(err); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_EXIST. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_exist_sec( /*=======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { /* No change in secondary indexes */ return(DB_SUCCESS); } heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; if (row_upd_changes_ord_field_binary(node->index, node->update, thr, node->row, node->ext)) { /* Build the newest version of the index entry */ entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The server must have crashed in row_upd_clust_rec_by_insert() before the updated externally stored columns (BLOBs) of the new clustered index entry were written. */ /* The table must be in DYNAMIC or COMPRESSED format. REDUNDANT and COMPACT formats store a local 768-byte prefix of each externally stored column. */ ut_a(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP); /* This is only legitimate when rolling back an incomplete transaction after crash recovery. */ ut_a(thr_get_trx(thr)->is_recovered); /* The server must have crashed before completing the insert of the new clustered index entry and before inserting to the secondary indexes. Because node->row was not yet written to this index, we can ignore it. But we must restore node->undo_row. */ } else { /* NOTE that if we updated the fields of a delete-marked secondary index record so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot return to the original values because we do not know them. But this should not cause problems because in row0sel.c, in queries we always retrieve the clustered index record or an earlier version of it, if the secondary index record through which we do the search is delete-marked. */ err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } mem_heap_empty(heap); } /* We may have to update the delete mark in the secondary index record of the previous version of the row. We also need to update the fields of the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ entry = row_build_index_entry(node->undo_row, node->undo_ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
/***********************************************************//** Parses the row reference and other info in a modify undo log record. @return TRUE if purge operation required: NOTE that then the CALLER must unfreeze data dictionary! */ static ibool row_purge_parse_undo_rec( /*=====================*/ purge_node_t* node, /*!< in: row undo node */ ibool* updated_extern, /*!< out: TRUE if an externally stored field was updated */ que_thr_t* thr) /*!< in: query thread */ { dict_index_t* clust_index; byte* ptr; trx_t* trx; undo_no_t undo_no; table_id_t table_id; trx_id_t trx_id; roll_ptr_t roll_ptr; ulint info_bits; ulint type; ut_ad(node && thr); trx = thr_get_trx(thr); ptr = trx_undo_rec_get_pars( node->undo_rec, &type, &node->cmpl_info, updated_extern, &undo_no, &table_id); node->rec_type = type; if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) { return(FALSE); } ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, &info_bits); node->table = NULL; if (type == TRX_UNDO_UPD_EXIST_REC && node->cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) { /* Purge requires no changes to indexes: we may return */ return(FALSE); } /* Prevent DROP TABLE etc. from running when we are doing the purge for this row */ row_mysql_freeze_data_dictionary(trx); mutex_enter(&(dict_sys->mutex)); node->table = dict_table_get_on_id_low(table_id); mutex_exit(&(dict_sys->mutex)); if (node->table == NULL) { /* The table has been dropped: no need to do purge */ err_exit: row_mysql_unfreeze_data_dictionary(trx); return(FALSE); } if (node->table->ibd_file_missing) { /* We skip purge of missing .ibd files */ node->table = NULL; goto err_exit; } clust_index = dict_table_get_first_index(node->table); if (clust_index == NULL) { /* The table was corrupt in the data dictionary */ goto err_exit; } ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), node->heap); ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, roll_ptr, info_bits, trx, node->heap, &(node->update)); /* Read to the partial row the fields that occur in indexes */ if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { ptr = trx_undo_rec_get_partial_row( ptr, clust_index, &node->row, type == TRX_UNDO_UPD_DEL_REC, node->heap); } return(TRUE); }
/***************************************************************//** Builds a table definition to insert. @return DB_SUCCESS or error code */ static ulint dict_build_table_def_step( /*======================*/ que_thr_t* thr, /*!< in: query thread */ tab_node_t* node) /*!< in: table create node */ { dict_table_t* table; dtuple_t* row; ulint error; ulint flags; const char* path_or_name; ibool is_path; mtr_t mtr; ulint space = 0; ibool file_per_table; ut_ad(mutex_own(&(dict_sys->mutex))); table = node->table; /* Cache the global variable "srv_file_per_table" to a local variable before using it. Please note "srv_file_per_table" is not under dict_sys mutex protection, and could be changed while executing this function. So better to cache the current value to a local variable, and all future reference to "srv_file_per_table" should use this local variable. */ file_per_table = srv_file_per_table; dict_hdr_get_new_id(&table->id, NULL, NULL); thr_get_trx(thr)->table_id = table->id; if (file_per_table) { /* Get a new space id if srv_file_per_table is set */ dict_hdr_get_new_id(NULL, NULL, &space); if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) { return(DB_ERROR); } /* We create a new single-table tablespace for the table. We initially let it be 4 pages: - page 0 is the fsp header and an extent descriptor page, - page 1 is an ibuf bitmap page, - page 2 is the first inode page, - page 3 will contain the root of the clustered index of the table we create here. */ if (table->dir_path_of_temp_table) { /* We place tables created with CREATE TEMPORARY TABLE in the tmp dir of mysqld server */ path_or_name = table->dir_path_of_temp_table; is_path = TRUE; } else { path_or_name = table->name; is_path = FALSE; } ut_ad(dict_table_get_format(table) <= DICT_TF_FORMAT_MAX); ut_ad(!dict_table_zip_size(table) || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP); flags = table->flags & ~(~0 << DICT_TF_BITS); error = fil_create_new_single_table_tablespace( space, path_or_name, is_path, flags == DICT_TF_COMPACT ? 0 : flags, FIL_IBD_FILE_INITIAL_SIZE); table->space = (unsigned int) space; if (error != DB_SUCCESS) { return(error); } mtr_start(&mtr); fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); mtr_commit(&mtr); } else { /* Create in the system tablespace: disallow new features */ table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT; } row = dict_create_sys_tables_tuple(table, node->heap); ins_node_set_new_row(node->tab_def, row); return(DB_SUCCESS); }
/***********************************************************//** Creates an index. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ UNIV_INTERN que_thr_t* dict_create_index_step( /*===================*/ que_thr_t* thr) /*!< in: query thread */ { ind_node_t* node; ulint err = DB_ERROR; trx_t* trx; ut_ad(thr); ut_ad(mutex_own(&(dict_sys->mutex))); trx = thr_get_trx(thr); node = thr->run_node; ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX); if (thr->prev_node == que_node_get_parent(node)) { node->state = INDEX_BUILD_INDEX_DEF; } if (node->state == INDEX_BUILD_INDEX_DEF) { /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ err = dict_build_index_def_step(thr, node); if (err != DB_SUCCESS) { goto function_exit; } node->state = INDEX_BUILD_FIELD_DEF; node->field_no = 0; thr->run_node = node->ind_def; return(thr); } if (node->state == INDEX_BUILD_FIELD_DEF) { if (node->field_no < (node->index)->n_fields) { err = dict_build_field_def_step(node); if (err != DB_SUCCESS) { goto function_exit; } node->field_no++; thr->run_node = node->field_def; return(thr); } else { node->state = INDEX_ADD_TO_CACHE; } } if (node->state == INDEX_ADD_TO_CACHE) { index_id_t index_id = node->index->id; // err = dict_index_add_to_cache( // node->table, node->index, FIL_NULL, // trx_is_strict(trx) // || dict_table_get_format(node->table) // >= DICT_TF_FORMAT_ZIP); //z- //因为trx_is_strict!/为什么ha_innodb__.cc中的stub无效呢?;----2011-09-20-10-02; err=DB_SUCCESS; //z+ //zlq //----2011-10-12-21-05--21-07; node->index = dict_index_get_if_in_cache_low(index_id); // ut_a(!node->index == (err != DB_SUCCESS)); //z- //zlqlxm //2011-10-13 if (err != DB_SUCCESS) { goto function_exit; } node->state = INDEX_CREATE_INDEX_TREE; } if (node->state == INDEX_CREATE_INDEX_TREE) { err = dict_create_index_tree_step(node); if (err != DB_SUCCESS) { dict_index_remove_from_cache(node->table, node->index); node->index = NULL; goto function_exit; } node->index->page = node->page_no; node->state = INDEX_COMMIT_WORK; } if (node->state == INDEX_COMMIT_WORK) { /* Index was correctly defined: do NOT commit the transaction (CREATE INDEX does NOT currently do an implicit commit of the current transaction) */ node->state = INDEX_CREATE_INDEX_TREE; /* thr->run_node = node->commit_node; return(thr); */ } function_exit: trx->error_state = err; if (err == DB_SUCCESS) { /* Ok: do nothing */ } else if (err == DB_LOCK_WAIT) { return(NULL); } else { /* SQL error detected */ return(NULL); } thr->run_node = que_node_get_parent(node); return(thr); }
/***********************************************************//** Creates a table. This is a high-level function used in SQL execution graphs. @return query thread to run next or NULL */ UNIV_INTERN que_thr_t* dict_create_table_step( /*===================*/ que_thr_t* thr) /*!< in: query thread */ { tab_node_t* node; ulint err = DB_ERROR; trx_t* trx; ut_ad(thr); ut_ad(mutex_own(&(dict_sys->mutex))); trx = thr_get_trx(thr); node = thr->run_node; ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE); if (thr->prev_node == que_node_get_parent(node)) { node->state = TABLE_BUILD_TABLE_DEF; } if (node->state == TABLE_BUILD_TABLE_DEF) { /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ err = dict_build_table_def_step(thr, node); if (err != DB_SUCCESS) { goto function_exit; } node->state = TABLE_BUILD_COL_DEF; node->col_no = 0; thr->run_node = node->tab_def; return(thr); } if (node->state == TABLE_BUILD_COL_DEF) { if (node->col_no < (node->table)->n_def) { err = dict_build_col_def_step(node); if (err != DB_SUCCESS) { goto function_exit; } node->col_no++; thr->run_node = node->col_def; return(thr); } else { node->state = TABLE_COMMIT_WORK; } } if (node->state == TABLE_COMMIT_WORK) { /* Table was correctly defined: do NOT commit the transaction (CREATE TABLE does NOT do an implicit commit of the current transaction) */ node->state = TABLE_ADD_TO_CACHE; /* thr->run_node = node->commit_node; return(thr); */ } if (node->state == TABLE_ADD_TO_CACHE) { dict_table_add_to_cache(node->table, node->heap); err = DB_SUCCESS; } function_exit: trx->error_state = err; if (err == DB_SUCCESS) { /* Ok: do nothing */ } else if (err == DB_LOCK_WAIT) { return(NULL); } else { /* SQL error detected */ return(NULL); } thr->run_node = que_node_get_parent(node); return(thr); }
/***********************************************************//** Delete unmarks a secondary index entry which must be found. It might not be delete-marked at the moment, but it does not harm to unmark it anyway. We also need to update the fields of the secondary index record if we updated its fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. @return DB_FAIL or DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_del_unmark_sec_and_undo_update( /*========================================*/ ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ que_thr_t* thr, /*!< in: query thread */ dict_index_t* index, /*!< in: index */ const dtuple_t* entry) /*!< in: index entry */ { mem_heap_t* heap; btr_pcur_t pcur; btr_cur_t* btr_cur; upd_t* update; ulint err = DB_SUCCESS; big_rec_t* dummy_big_rec; mtr_t mtr; trx_t* trx = thr_get_trx(thr); enum row_search_result search_result; /* Ignore indexes that are being created. */ if (UNIV_UNLIKELY(*index->name == TEMP_INDEX_PREFIX)) { return(DB_SUCCESS); } log_free_check(); mtr_start(&mtr); ut_ad(mode == BTR_MODIFY_TREE || mode == BTR_MODIFY_LEAF); search_result = row_search_index_entry(index, entry, mode, &pcur, &mtr); switch (search_result) { case ROW_BUFFERED: case ROW_NOT_DELETED_REF: /* These are invalid outcomes, because the mode passed to row_search_index_entry() did not include any of the flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ ut_error; case ROW_NOT_FOUND: fputs("InnoDB: error in sec index entry del undo in\n" "InnoDB: ", stderr); dict_index_name_print(stderr, trx, index); fputs("\n" "InnoDB: tuple ", stderr); dtuple_print(stderr, entry); fputs("\n" "InnoDB: record ", stderr); rec_print(stderr, btr_pcur_get_rec(&pcur), index); putc('\n', stderr); trx_print(stderr, trx, 0); fputs("\n" "InnoDB: Submit a detailed bug report" " to http://bugs.mysql.com\n", stderr); break; case ROW_FOUND: btr_cur = btr_pcur_get_btr_cur(&pcur); err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, btr_cur, FALSE, thr, &mtr); ut_a(err == DB_SUCCESS); heap = mem_heap_create(100); update = row_upd_build_sec_rec_difference_binary( index, entry, btr_cur_get_rec(btr_cur), trx, heap); if (upd_get_n_fields(update) == 0) { /* Do nothing */ } else if (mode == BTR_MODIFY_LEAF) { /* Try an optimistic updating of the record, keeping changes within the page */ err = btr_cur_optimistic_update( BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, btr_cur, update, 0, thr, &mtr); switch (err) { case DB_OVERFLOW: case DB_UNDERFLOW: case DB_ZIP_OVERFLOW: err = DB_FAIL; } } else { ut_a(mode == BTR_MODIFY_TREE); err = btr_cur_pessimistic_update( BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, btr_cur, &heap, &dummy_big_rec, update, 0, thr, &mtr); ut_a(!dummy_big_rec); } mem_heap_free(heap); } btr_pcur_close(&pcur); mtr_commit(&mtr); return(err); }