upd_node_t* upd_node_create( /*============*/ /* out, own: update node */ mem_heap_t* heap) /* in: mem heap where created */ { upd_node_t* node; node = mem_heap_alloc(heap, sizeof(upd_node_t)); node->common.type = QUE_NODE_UPDATE; node->state = UPD_NODE_UPDATE_CLUSTERED; node->select_will_do_update = FALSE; node->in_mysql_interface = FALSE; node->row = NULL; node->ext_vec = NULL; node->index = NULL; node->update = NULL; node->select = NULL; node->heap = mem_heap_create(128); node->magic_n = UPD_NODE_MAGIC_N; node->cmpl_info = 0; return(node); }
/*******************************************************************//** Rollback a transaction used in MySQL. @return error code or DB_SUCCESS */ UNIV_INTERN int trx_general_rollback_for_mysql( /*===========================*/ trx_t* trx, /*!< in: transaction handle */ trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if partial rollback requested, or NULL for complete rollback */ { mem_heap_t* heap; que_thr_t* thr; roll_node_t* roll_node; /* Tell Innobase server that there might be work for utility threads: */ srv_active_wake_master_thread(); trx_start_if_not_started(trx); heap = mem_heap_create(512); roll_node = roll_node_create(heap); if (savept) { roll_node->partial = TRUE; roll_node->savept = *savept; } trx->error_state = DB_SUCCESS; thr = pars_complete_graph_for_exec(roll_node, trx, heap); ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); que_run_threads(thr); mutex_enter(&kernel_mutex); while (trx->que_state != TRX_QUE_RUNNING) { mutex_exit(&kernel_mutex); os_thread_sleep(100000); mutex_enter(&kernel_mutex); } mutex_exit(&kernel_mutex); mem_heap_free(heap); ut_a(trx->error_state == DB_SUCCESS); /* Tell Innobase server that there might be work for utility threads: */ srv_active_wake_master_thread(); return((int) trx->error_state); }
/******************************************************************** Builds a purge 'query' graph. The actual purge is performed by executing this query graph. */ static que_t* trx_purge_graph_build(void) /*=======================*/ /* out, own: the query graph */ { mem_heap_t* heap; que_fork_t* fork; que_thr_t* thr; /* que_thr_t* thr2; */ heap = mem_heap_create(512); fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); fork->trx = purge_sys->trx; thr = que_thr_create(fork, heap); thr->child = row_purge_node_create(thr, heap); /* thr2 = que_thr_create(fork, fork, heap); thr2->child = row_purge_node_create(fork, thr2, heap); */ return(fork); }
tab_node_t* tab_create_graph_create( /*====================*/ /* out, own: table create node */ dict_table_t* table, /* in: table to create, built as a memory data structure */ mem_heap_t* heap) /* in: heap where created */ { tab_node_t* node; node = mem_heap_alloc(heap, sizeof(tab_node_t)); node->common.type = QUE_NODE_CREATE_TABLE; node->table = table; node->state = TABLE_BUILD_TABLE_DEF; node->heap = mem_heap_create(256); node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables, heap); node->tab_def->common.parent = node; node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns, heap); node->col_def->common.parent = node; node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; return(node); }
/**********************************************************************//** Creates a table memory object. @return own: table object */ UNIV_INTERN dict_table_t* dict_mem_table_create( /*==================*/ const char* name, /*!< in: table name */ ulint space, /*!< in: space where the clustered index of the table is placed; this parameter is ignored if the table is made a member of a cluster */ ulint n_cols, /*!< in: number of columns */ ulint flags) /*!< in: table flags */ { dict_table_t* table; mem_heap_t* heap; ut_ad(name); ut_a(!(flags & (~0 << DICT_TF2_BITS))); heap = mem_heap_create(DICT_HEAP_SIZE); table = mem_heap_zalloc(heap, sizeof(dict_table_t)); table->heap = heap; table->flags = (unsigned int) flags; table->name = mem_heap_strdup(heap, name); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) * sizeof(dict_col_t)); ut_d(table->magic_n = DICT_TABLE_MAGIC_N); return(table); }
/*************************************************************** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node, /* in: row purge node */ que_thr_t* thr) /* in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node && thr); heap = mem_heap_create(1024); while (node->index != NULL) { index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, index, heap); row_purge_remove_sec_if_poss(node, thr, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node, thr); }
/**********************************************************************//** Creates an index memory object. @return own: index object */ UNIV_INTERN dict_index_t* dict_mem_index_create( /*==================*/ const char* table_name, /*!< in: table name */ const char* index_name, /*!< in: index name */ ulint space, /*!< in: space where the index tree is placed, ignored if the index is of the clustered type */ ulint type, /*!< in: DICT_UNIQUE, DICT_CLUSTERED, ... ORed */ ulint n_fields) /*!< in: number of fields */ { dict_index_t* index; mem_heap_t* heap; ut_ad(table_name && index_name); heap = mem_heap_create(DICT_HEAP_SIZE); index = mem_heap_zalloc(heap, sizeof(dict_index_t)); dict_mem_fill_index_struct(index, heap, table_name, index_name, space, type, n_fields); return(index); }
ind_node_t* ind_create_graph_create( /*====================*/ /* out, own: index create node */ dict_index_t* index, /* in: index to create, built as a memory data structure */ mem_heap_t* heap) /* in: heap where created */ { ind_node_t* node; node = mem_heap_alloc(heap, sizeof(ind_node_t)); node->common.type = QUE_NODE_CREATE_INDEX; node->index = index; node->state = INDEX_BUILD_INDEX_DEF; node->page_no = FIL_NULL; node->heap = mem_heap_create(256); node->ind_def = ins_node_create(INS_DIRECT, dict_sys->sys_indexes, heap); node->ind_def->common.parent = node; node->field_def = ins_node_create(INS_DIRECT, dict_sys->sys_fields, heap); node->field_def->common.parent = node; node->commit_node = commit_node_create(heap); node->commit_node->common.parent = node; return(node); }
/********************************************************************//** Creates a row undo node to a query graph. @return own: undo node */ UNIV_INTERN undo_node_t* row_undo_node_create( /*=================*/ trx_t* trx, /*!< in: transaction */ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ mem_heap_t* heap) /*!< in: memory heap where created */ { undo_node_t* undo; ut_ad(trx && parent && heap); undo = mem_heap_alloc(heap, sizeof(undo_node_t)); undo->common.type = QUE_NODE_UNDO; undo->common.parent = parent; undo->state = UNDO_NODE_FETCH_NEXT; undo->trx = trx; btr_pcur_init(&(undo->pcur)); undo->heap = mem_heap_create(256); return(undo); }
/************************************************************//** Adds a new block to a dyn array. @return created block */ UNIV_INTERN dyn_block_t* dyn_array_add_block( /*================*/ dyn_array_t* arr) /*!< in: dyn array */ { mem_heap_t* heap; dyn_block_t* block; ut_ad(arr); ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); if (arr->heap == NULL) { UT_LIST_INIT(arr->base); UT_LIST_ADD_FIRST(list, arr->base, arr); arr->heap = mem_heap_create(sizeof(dyn_block_t)); } block = dyn_array_get_last_block(arr); block->used = block->used | DYN_BLOCK_FULL_FLAG; heap = arr->heap; block = mem_heap_alloc(heap, sizeof(dyn_block_t)); block->used = 0; UT_LIST_ADD_LAST(list, arr->base, block); return(block); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is UPD_DEL. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_upd_del_sec( /*=====================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err = DB_SUCCESS; ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); if (UNIV_UNLIKELY(!entry)) { /* The database must have crashed after inserting a clustered index record but before writing all the externally stored columns of that record. Because secondary index entries are inserted after the clustered index record, we may assume that the secondary index record does not exist. However, this situation may only occur during the rollback of incomplete transactions. */ ut_a(thr_is_recv(thr)); } else { err = row_undo_mod_del_mark_or_remove_sec( node, thr, index, entry); if (err != DB_SUCCESS) { break; } } mem_heap_empty(heap); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(err); }
/************************************************************************* Checks if possible foreign key constraints hold after a delete of the record under pcur. NOTE that this function will temporarily commit mtr and lose pcur position! */ static ulint row_upd_check_references_constraints( /*=================================*/ /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error code */ btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the cursor position is lost in this function! */ dict_table_t* table, /* in: table in question */ dict_index_t* index, /* in: index of the cursor */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr */ { dict_foreign_t* foreign; mem_heap_t* heap; dtuple_t* entry; rec_t* rec; ulint err; rec = btr_pcur_get_rec(pcur); heap = mem_heap_create(500); entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); mtr_commit(mtr); mtr_start(mtr); rw_lock_s_lock(&dict_foreign_key_check_lock); foreign = UT_LIST_GET_FIRST(table->referenced_list); while (foreign) { if (foreign->referenced_index == index) { err = row_ins_check_foreign_constraint(FALSE, foreign, table, index, entry, thr); if (err != DB_SUCCESS) { rw_lock_s_unlock(&dict_foreign_key_check_lock); mem_heap_free(heap); return(err); } } foreign = UT_LIST_GET_NEXT(referenced_list, foreign); } rw_lock_s_unlock(&dict_foreign_key_check_lock); mem_heap_free(heap); return(DB_SUCCESS); }
dtuple_t* dtuple_create_for_mysql( /*====================*/ /* out, own created dtuple */ void** heap, /* out: created memory heap */ ulint n_fields) /* in: number of fields */ { *heap = (void*)mem_heap_create(500); return(dtuple_create(*((mem_heap_t**)heap), n_fields)); }
/**********************************************************************//** Creates a table memory object. @return own: table object */ UNIV_INTERN dict_table_t* dict_mem_table_create( /*==================*/ const char* name, /*!< in: table name */ ulint space, /*!< in: space where the clustered index of the table is placed; this parameter is ignored if the table is made a member of a cluster */ ulint n_cols, /*!< in: number of columns */ ulint flags) /*!< in: table flags */ { dict_table_t* table; mem_heap_t* heap; ut_ad(name); ut_a(!(flags & (~0 << DICT_TF2_BITS))); heap = mem_heap_create(DICT_HEAP_SIZE); table = mem_heap_zalloc(heap, sizeof(dict_table_t)); table->heap = heap; table->flags = (unsigned int) flags; table->name = ut_malloc(strlen(name) + 1); memcpy(table->name, name, strlen(name) + 1); table->space = (unsigned int) space; table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) * sizeof(dict_col_t)); #ifndef UNIV_HOTBACKUP table->autoinc_lock = mem_heap_alloc(heap, lock_get_size()); mutex_create(autoinc_mutex_key, &table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); table->autoinc = 0; /* The number of transactions that are either waiting on the AUTOINC lock or have been granted the lock. */ table->n_waiting_or_granted_auto_inc_locks = 0; table->is_corrupt = FALSE; #endif /* !UNIV_HOTBACKUP */ ut_d(table->magic_n = DICT_TABLE_MAGIC_N); return(table); }
void dict_load_sys_table( /*================*/ dict_table_t* table) /* in: system table */ { mem_heap_t* heap; ut_ad(mutex_own(&(dict_sys->mutex))); heap = mem_heap_create(1000); dict_load_indexes(table, heap); mem_heap_free(heap); }
/***********************************************************//** Undoes a modify in secondary indexes when undo record type is DEL_MARK. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static ulint row_undo_mod_del_mark_sec( /*======================*/ undo_node_t* node, /*!< in: row undo node */ que_thr_t* thr) /*!< in: query thread */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ulint err; heap = mem_heap_create(1024); while (node->index != NULL) { /* Skip all corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; entry = row_build_index_entry(node->row, node->ext, index, heap); ut_a(entry); err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_LEAF, thr, index, entry); if (err == DB_FAIL) { err = row_undo_mod_del_unmark_sec_and_undo_update( BTR_MODIFY_TREE, thr, index, entry); } if (err != DB_SUCCESS) { mem_heap_free(heap); return(err); } node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); return(DB_SUCCESS); }
/**********************************************************************//** Creates and initializes a foreign constraint memory object. @return own: foreign constraint struct */ UNIV_INTERN dict_foreign_t* dict_mem_foreign_create(void) /*=========================*/ { dict_foreign_t* foreign; mem_heap_t* heap; heap = mem_heap_create(100); foreign = mem_heap_zalloc(heap, sizeof(dict_foreign_t)); foreign->heap = heap; return(foreign); }
/********************************************************************//** Creates the global purge system control structure and inits the history mutex. */ UNIV_INTERN void trx_purge_sys_create( /*=================*/ ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */ { ut_ad(mutex_own(&kernel_mutex)); purge_sys = mem_zalloc(sizeof(trx_purge_t)); /* Take ownership of ib_bh, we are responsible for freeing it. */ purge_sys->ib_bh = ib_bh; purge_sys->state = TRX_STOP_PURGE; purge_sys->n_pages_handled = 0; purge_sys->purge_trx_no = 0; purge_sys->purge_undo_no = 0; purge_sys->next_stored = FALSE; ut_d(purge_sys->done_trx_no = 0); rw_lock_create(trx_purge_latch_key, &purge_sys->latch, SYNC_PURGE_LATCH); mutex_create( purge_sys_bh_mutex_key, &purge_sys->bh_mutex, SYNC_PURGE_QUEUE); purge_sys->heap = mem_heap_create(256); purge_sys->arr = trx_undo_arr_create(); purge_sys->sess = sess_open(); purge_sys->trx = purge_sys->sess->trx; purge_sys->trx->is_purge = 1; ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); purge_sys->query = trx_purge_graph_build(); purge_sys->prebuilt_view = read_view_oldest_copy_or_open_new(0, NULL); purge_sys->view = purge_sys->prebuilt_view; }
rec_t* row_get_clust_rec( /*==============*/ /* out: record or NULL, if no record found */ ulint mode, /* in: BTR_MODIFY_LEAF, ... */ rec_t* rec, /* in: record in a secondary index */ dict_index_t* index, /* in: secondary index */ dict_index_t** clust_index,/* out: clustered index */ mtr_t* mtr) /* in: mtr */ { mem_heap_t* heap; dtuple_t* ref; dict_table_t* table; btr_pcur_t pcur; ibool found; rec_t* clust_rec; ut_ad((index->type & DICT_CLUSTERED) == 0); table = index->table; heap = mem_heap_create(256); ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap); found = row_search_on_row_ref(&pcur, mode, table, ref, mtr); clust_rec = btr_pcur_get_rec(&pcur); mem_heap_free(heap); btr_pcur_close(&pcur); *clust_index = dict_table_get_first_index(table); if (!found) { return(NULL); } return(clust_rec); }
/********************************************************************//** Creates a purge node to a query graph. @return own: purge node */ UNIV_INTERN purge_node_t* row_purge_node_create( /*==================*/ que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ mem_heap_t* heap) /*!< in: memory heap where created */ { purge_node_t* node; ut_ad(parent && heap); node = mem_heap_alloc(heap, sizeof(purge_node_t)); node->common.type = QUE_NODE_PURGE; node->common.parent = parent; node->heap = mem_heap_create(256); return(node); }
/********************************************************************//** Creates the global purge system control structure and inits the history mutex. */ UNIV_INTERN void trx_purge_sys_create(void) /*======================*/ { ut_ad(mutex_own(&kernel_mutex)); purge_sys = mem_alloc(sizeof(trx_purge_t)); purge_sys->state = TRX_STOP_PURGE; purge_sys->n_pages_handled = 0; purge_sys->purge_trx_no = ut_dulint_zero; purge_sys->purge_undo_no = ut_dulint_zero; purge_sys->next_stored = FALSE; ut_d(purge_sys->done_trx_no = ut_dulint_zero); rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); mutex_create(&purge_sys->mutex, SYNC_PURGE_SYS); purge_sys->heap = mem_heap_create(256); purge_sys->arr = trx_undo_arr_create(); purge_sys->sess = sess_open(); purge_sys->trx = purge_sys->sess->trx; purge_sys->trx->is_purge = 1; ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); purge_sys->query = trx_purge_graph_build(); purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, purge_sys->heap); }
/**********************************************************************//** Creates an index memory object. @return own: index object */ UNIV_INTERN dict_index_t* dict_mem_index_create( /*==================*/ const char* table_name, /*!< in: table name */ const char* index_name, /*!< in: index name */ ulint space, /*!< in: space where the index tree is placed, ignored if the index is of the clustered type */ ulint type, /*!< in: DICT_UNIQUE, DICT_CLUSTERED, ... ORed */ ulint n_fields) /*!< in: number of fields */ { dict_index_t* index; mem_heap_t* heap; ut_ad(table_name && index_name); heap = mem_heap_create(DICT_HEAP_SIZE); index = mem_heap_zalloc(heap, sizeof(dict_index_t)); index->heap = heap; index->type = type; #ifndef UNIV_HOTBACKUP index->space = (unsigned int) space; #endif /* !UNIV_HOTBACKUP */ index->name = mem_heap_strdup(heap, index_name); index->table_name = table_name; index->n_fields = (unsigned int) n_fields; index->fields = mem_heap_alloc(heap, 1 + n_fields * sizeof(dict_field_t)); /* The '1 +' above prevents allocation of an empty mem block */ #ifdef UNIV_DEBUG index->magic_n = DICT_INDEX_MAGIC_N; #endif /* UNIV_DEBUG */ return(index); }
/*********************************************************************//** Fetches the clustered index record for a secondary index record. The latches on the secondary index record are preserved. @return record or NULL, if no record found */ UNIV_INTERN rec_t* row_get_clust_rec( /*==============*/ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ const rec_t* rec, /*!< in: record in a secondary index */ dict_index_t* index, /*!< in: secondary index */ dict_index_t** clust_index,/*!< out: clustered index */ mtr_t* mtr) /*!< in: mtr */ { mem_heap_t* heap; dtuple_t* ref; dict_table_t* table; btr_pcur_t pcur; ibool found; rec_t* clust_rec; ut_ad(!dict_index_is_clust(index)); table = index->table; heap = mem_heap_create(256); ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap); found = row_search_on_row_ref(&pcur, mode, table, ref, mtr); clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL; mem_heap_free(heap); btr_pcur_close(&pcur); *clust_index = dict_table_get_first_index(table); return(clust_rec); }
/***********************************************************//** Purges a delete marking of a record. */ static void row_purge_del_mark( /*===============*/ purge_node_t* node) /*!< in: row purge node */ { mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; ut_ad(node); heap = mem_heap_create(1024); while (node->index != NULL) { /* skip corrupted secondary index */ dict_table_skip_corrupt_index(node->index); if (!node->index) { break; } index = node->index; /* Build the index entry */ entry = row_build_index_entry(node->row, NULL, index, heap); ut_a(entry); row_purge_remove_sec_if_poss(node, index, entry); node->index = dict_table_get_next_index(node->index); } mem_heap_free(heap); row_purge_remove_clust_if_poss(node); }
/****************************************************************//** Creates and initializes a transaction object. @return own: the transaction */ UNIV_INTERN trx_t* trx_create( /*=======*/ sess_t* sess) /*!< in: session */ { trx_t* trx; ut_ad(mutex_own(&kernel_mutex)); ut_ad(sess); trx = mem_alloc(sizeof(trx_t)); trx->magic_n = TRX_MAGIC_N; trx->op_info = ""; trx->is_purge = 0; trx->is_recovered = 0; trx->conc_state = TRX_NOT_STARTED; trx->start_time = time(NULL); trx->isolation_level = TRX_ISO_REPEATABLE_READ; trx->id = ut_dulint_zero; trx->no = ut_dulint_max; trx->support_xa = TRUE; trx->check_foreigns = TRUE; trx->check_unique_secondary = TRUE; trx->flush_log_later = FALSE; trx->must_flush_log_later = FALSE; trx->dict_operation = TRX_DICT_OP_NONE; trx->table_id = ut_dulint_zero; trx->mysql_thd = NULL; trx->active_trans = 0; trx->duplicates = 0; trx->n_mysql_tables_in_use = 0; trx->mysql_n_tables_locked = 0; trx->mysql_log_file_name = NULL; trx->mysql_log_offset = 0; mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO); trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; trx->insert_undo = NULL; trx->update_undo = NULL; trx->undo_no_arr = NULL; trx->error_state = DB_SUCCESS; trx->error_key_num = 0; trx->detailed_error[0] = '\0'; trx->sess = sess; trx->que_state = TRX_QUE_RUNNING; trx->n_active_thrs = 0; trx->handling_signals = FALSE; UT_LIST_INIT(trx->signals); UT_LIST_INIT(trx->reply_signals); trx->graph = NULL; trx->wait_lock = NULL; trx->was_chosen_as_deadlock_victim = FALSE; UT_LIST_INIT(trx->wait_thrs); trx->lock_heap = mem_heap_create_in_buffer(256); UT_LIST_INIT(trx->trx_locks); UT_LIST_INIT(trx->trx_savepoints); trx->dict_operation_lock_mode = 0; trx->has_search_latch = FALSE; trx->search_latch_timeout = BTR_SEA_TIMEOUT; trx->declared_to_be_inside_innodb = FALSE; trx->n_tickets_to_enter_innodb = 0; trx->global_read_view_heap = mem_heap_create(256); trx->global_read_view = NULL; trx->read_view = NULL; /* Set X/Open XA transaction identification to NULL */ memset(&trx->xid, 0, sizeof(trx->xid)); trx->xid.formatID = -1; trx->n_autoinc_rows = 0; /* Remember to free the vector explicitly. */ trx->autoinc_locks = ib_vector_create( mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4); return(trx); }
/**************************************************************//** Restores the stored position of a persistent cursor bufferfixing the page and obtaining the specified latches. If the cursor position was saved when the (1) cursor was positioned on a user record: this function restores the position to the last record LESS OR EQUAL to the stored record; (2) cursor was positioned on a page infimum record: restores the position to the last record LESS than the user record which was the successor of the page infimum; (3) cursor was positioned on the page supremum: restores to the first record GREATER than the user record which was the predecessor of the supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. @return TRUE if the cursor position was stored when it was on a user record and it can be restored on a user record whose ordering fields are identical to the ones of the original user record */ UNIV_INTERN ibool btr_pcur_restore_position_func( /*===========================*/ ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /*!< in: detached persistent cursor */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr) /*!< in: mtr */ { dict_index_t* index; dtuple_t* tuple; ulint mode; ulint old_mode; mem_heap_t* heap; ut_ad(mtr); ut_ad(mtr->state == MTR_ACTIVE); index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED) || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) { ut_print_buf(stderr, cursor, sizeof(btr_pcur_t)); putc('\n', stderr); if (cursor->trx_if_known) { trx_print(stderr, cursor->trx_if_known, 0); } ut_error; } if (UNIV_UNLIKELY (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { /* In these cases we do not try an optimistic restoration, but always do a search */ btr_cur_open_at_index_side( cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr); cursor->latch_mode = latch_mode; cursor->pos_state = BTR_PCUR_IS_POSITIONED; cursor->block_when_stored = btr_pcur_get_block(cursor); return(FALSE); } ut_a(cursor->old_rec); ut_a(cursor->old_n_fields); if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF) || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) { /* Try optimistic restoration */ if (UNIV_LIKELY(buf_page_optimistic_get( latch_mode, cursor->block_when_stored, cursor->modify_clock, file, line, mtr))) { cursor->pos_state = BTR_PCUR_IS_POSITIONED; buf_block_dbg_add_level( btr_pcur_get_block(cursor), dict_index_is_ibuf(index) ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE); if (cursor->rel_pos == BTR_PCUR_ON) { #ifdef UNIV_DEBUG const rec_t* rec; const ulint* offsets1; const ulint* offsets2; #endif /* UNIV_DEBUG */ cursor->latch_mode = latch_mode; #ifdef UNIV_DEBUG rec = btr_pcur_get_rec(cursor); heap = mem_heap_create(256); offsets1 = rec_get_offsets( cursor->old_rec, index, NULL, cursor->old_n_fields, &heap); offsets2 = rec_get_offsets( rec, index, NULL, cursor->old_n_fields, &heap); ut_ad(!cmp_rec_rec(cursor->old_rec, rec, offsets1, offsets2, index)); mem_heap_free(heap); #endif /* UNIV_DEBUG */ return(TRUE); } return(FALSE); } } /* If optimistic restoration did not succeed, open the cursor anew */ heap = mem_heap_create(256); tuple = dict_index_build_data_tuple(index, cursor->old_rec, cursor->old_n_fields, heap); /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; switch (cursor->rel_pos) { case BTR_PCUR_ON: mode = PAGE_CUR_LE; break; case BTR_PCUR_AFTER: mode = PAGE_CUR_G; break; case BTR_PCUR_BEFORE: mode = PAGE_CUR_L; break; default: ut_error; mode = 0; } btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode, cursor, 0, file, line, mtr); /* Restore the old search mode */ cursor->search_mode = old_mode; switch (cursor->rel_pos) { case BTR_PCUR_ON: if (btr_pcur_is_on_user_rec(cursor) && !cmp_dtuple_rec( tuple, btr_pcur_get_rec(cursor), rec_get_offsets(btr_pcur_get_rec(cursor), index, NULL, ULINT_UNDEFINED, &heap))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! But we can retain the value of old_rec */ cursor->block_when_stored = btr_pcur_get_block(cursor); cursor->modify_clock = buf_block_get_modify_clock( cursor->block_when_stored); cursor->old_stored = BTR_PCUR_OLD_STORED; mem_heap_free(heap); return(TRUE); } #ifdef UNIV_DEBUG /* fall through */ case BTR_PCUR_BEFORE: case BTR_PCUR_AFTER: break; default: ut_error; #endif /* UNIV_DEBUG */ } mem_heap_free(heap); /* We have to store new position information, modify_clock etc., to the cursor because it can now be on a different page, the record under it may have been removed, etc. */ btr_pcur_store_position(cursor, mtr); return(FALSE); }
ibool btr_pcur_restore_position( /*======================*/ /* out: TRUE if the cursor position was stored when it was on a user record and it can be restored on a user record whose ordering fields are identical to the ones of the original user record */ ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /* in: detached persistent cursor */ mtr_t* mtr) /* in: mtr */ { dict_index_t* index; page_t* page; dtuple_t* tuple; ulint mode; ulint old_mode; mem_heap_t* heap; index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED) || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) { ut_print_buf(stderr, cursor, sizeof(btr_pcur_t)); if (cursor->trx_if_known) { trx_print(stderr, cursor->trx_if_known, 0); } ut_error; } if (UNIV_UNLIKELY( cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { /* In these cases we do not try an optimistic restoration, but always do a search */ btr_cur_open_at_index_side( cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr); cursor->block_when_stored = buf_block_align(btr_pcur_get_page(cursor)); return(FALSE); } ut_a(cursor->old_rec); ut_a(cursor->old_n_fields); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF) || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) { /* Try optimistic restoration */ if (UNIV_LIKELY(buf_page_optimistic_get( latch_mode, cursor->block_when_stored, page, cursor->modify_clock, mtr))) { cursor->pos_state = BTR_PCUR_IS_POSITIONED; #ifdef UNIV_SYNC_DEBUG buf_page_dbg_add_level(page, SYNC_TREE_NODE); #endif /* UNIV_SYNC_DEBUG */ if (cursor->rel_pos == BTR_PCUR_ON) { #ifdef UNIV_DEBUG rec_t* rec; ulint* offsets1; ulint* offsets2; #endif /* UNIV_DEBUG */ cursor->latch_mode = latch_mode; #ifdef UNIV_DEBUG rec = btr_pcur_get_rec(cursor); heap = mem_heap_create(256); offsets1 = rec_get_offsets( cursor->old_rec, index, NULL, cursor->old_n_fields, &heap); offsets2 = rec_get_offsets( rec, index, NULL, cursor->old_n_fields, &heap); ut_ad(!cmp_rec_rec(cursor->old_rec, rec, offsets1, offsets2, index)); mem_heap_free(heap); #endif /* UNIV_DEBUG */ return(TRUE); } return(FALSE); } } /* If optimistic restoration did not succeed, open the cursor anew */ heap = mem_heap_create(256); tuple = dict_index_build_data_tuple(index, cursor->old_rec, cursor->old_n_fields, heap); /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; switch (cursor->rel_pos) { case BTR_PCUR_ON: mode = PAGE_CUR_LE; break; case BTR_PCUR_AFTER: mode = PAGE_CUR_G; break; case BTR_PCUR_BEFORE: mode = PAGE_CUR_L; break; default: ut_error; mode = 0; /* silence a warning */ } btr_pcur_open_with_no_init(index, tuple, mode, latch_mode, cursor, 0, mtr); /* Restore the old search mode */ cursor->search_mode = old_mode; if (btr_pcur_is_on_user_rec(cursor, mtr)) { switch (cursor->rel_pos) { case BTR_PCUR_ON: if (!cmp_dtuple_rec( tuple, btr_pcur_get_rec(cursor), rec_get_offsets(btr_pcur_get_rec(cursor), index, NULL, ULINT_UNDEFINED, &heap))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! But we can retain the value of old_rec */ cursor->block_when_stored = buf_block_align( btr_pcur_get_page(cursor)); cursor->modify_clock = buf_block_get_modify_clock( cursor->block_when_stored); cursor->old_stored = BTR_PCUR_OLD_STORED; mem_heap_free(heap); return(TRUE); } break; case BTR_PCUR_BEFORE: page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); break; case BTR_PCUR_AFTER: page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); break; #ifdef UNIV_DEBUG default: ut_error; #endif /* UNIV_DEBUG */ } } mem_heap_free(heap); /* We have to store new position information, modify_clock etc., to the cursor because it can now be on a different page, the record under it may have been removed, etc. */ btr_pcur_store_position(cursor, mtr); return(FALSE); }
big_rec_t* dtuple_convert_big_rec( /*===================*/ /* out, own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry */ ulint* ext_vec,/* in: array of externally stored fields, or NULL: if a field already is externally stored, then we cannot move it to the vector this function returns */ ulint n_ext_vec)/* in: number of elements is ext_vec */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; ulint size; ulint n_fields; ulint longest; ulint longest_i = ULINT_MAX; ibool is_externally_stored; ulint i; ulint j; ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for the longest field whose type is DATA_BLOB */ n_fields = 0; while (rec_get_converted_size(index, entry) >= ut_min(page_get_free_space_of_empty( index->table->comp) / 2, REC_MAX_DATA_SIZE)) { longest = 0; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { /* Skip over fields which already are externally stored */ is_externally_stored = FALSE; if (ext_vec) { for (j = 0; j < n_ext_vec; j++) { if (ext_vec[j] == i) { is_externally_stored = TRUE; } } } if (!is_externally_stored) { dfield = dtuple_get_nth_field(entry, i); if (dfield->len != UNIV_SQL_NULL && dfield->len > longest) { longest = dfield->len; longest_i = i; } } } /* We do not store externally fields which are smaller than DICT_MAX_INDEX_COL_LEN */ ut_a(DICT_MAX_INDEX_COL_LEN > REC_1BYTE_OFFS_LIMIT); if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 + DICT_MAX_INDEX_COL_LEN) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector; we do not let data size of the remaining entry drop below 128 which is the limit for the 2-byte offset storage format in a physical record. This we accomplish by storing 128 bytes of data in entry itself, and only the remaining part to big rec vec. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); vector->fields[n_fields].field_no = longest_i; ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN); vector->fields[n_fields].len = dfield->len - DICT_MAX_INDEX_COL_LEN; vector->fields[n_fields].data = mem_heap_alloc(heap, vector->fields[n_fields].len); /* Copy data (from the end of field) to big rec vector */ ut_memcpy(vector->fields[n_fields].data, ((byte*)dfield->data) + dfield->len - vector->fields[n_fields].len, vector->fields[n_fields].len); dfield->len = dfield->len - vector->fields[n_fields].len + BTR_EXTERN_FIELD_REF_SIZE; /* Set the extern field reference in dfield to zero */ memset(((byte*)dfield->data) + dfield->len - BTR_EXTERN_FIELD_REF_SIZE, 0, BTR_EXTERN_FIELD_REF_SIZE); n_fields++; } vector->n_fields = n_fields; return(vector); }
/**************************************************************//** Moves parts of long fields in entry to the big record vector so that the size of tuple drops below the maximum record size allowed in the database. Moves data only from those fields which are not necessary to determine uniquely the insertion place of the tuple in the index. @return own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many fixed-length or short fields in entry or the index is clustered */ UNIV_INTERN big_rec_t* dtuple_convert_big_rec( /*===================*/ dict_index_t* index, /*!< in: index */ dtuple_t* entry, /*!< in/out: index entry */ ulint* n_ext) /*!< in/out: number of externally stored columns */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; dict_field_t* ifield; ulint size; ulint n_fields; ulint local_len; ulint local_prefix_len; if (UNIV_UNLIKELY(!dict_index_is_clust(index))) { return(NULL); } if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) { /* up to MySQL 5.1: store a 768-byte prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN; } else { /* new-format table: do not store any BLOB prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE; } ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry, *n_ext); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for a variable-length field that yields the biggest savings when stored externally */ n_fields = 0; while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, *n_ext), dict_table_is_comp(index->table), dict_index_get_n_fields(index), dict_table_zip_size(index->table))) { ulint i; ulint longest = 0; ulint longest_i = ULINT_MAX; byte* data; big_rec_field_t* b; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { ulint savings; dfield = dtuple_get_nth_field(entry, i); ifield = dict_index_get_nth_field(index, i); /* Skip fixed-length, NULL, externally stored, or short columns */ if (ifield->fixed_len || dfield_is_null(dfield) || dfield_is_ext(dfield) || dfield_get_len(dfield) <= local_len || dfield_get_len(dfield) <= BTR_EXTERN_FIELD_REF_SIZE * 2) { goto skip_field; } savings = dfield_get_len(dfield) - local_len; /* Check that there would be savings */ if (longest >= savings) { goto skip_field; } longest_i = i; longest = savings; skip_field: continue; } if (!longest) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); ifield = dict_index_get_nth_field(index, longest_i); local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE; b = &vector->fields[n_fields]; b->field_no = longest_i; b->len = dfield_get_len(dfield) - local_prefix_len; b->data = (char*) dfield_get_data(dfield) + local_prefix_len; /* Allocate the locally stored part of the column. */ data = mem_heap_alloc(heap, local_len); /* Copy the local prefix. */ memcpy(data, dfield_get_data(dfield), local_prefix_len); /* Clear the extern field reference (BLOB pointer). */ memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE); #if 0 /* The following would fail the Valgrind checks in page_cur_insert_rec_low() and page_cur_insert_rec_zip(). The BLOB pointers in the record will be initialized after the record and the BLOBs have been written. */ UNIV_MEM_ALLOC(data + local_prefix_len, BTR_EXTERN_FIELD_REF_SIZE); #endif dfield_set_data(dfield, data, local_len); dfield_set_ext(dfield); n_fields++; (*n_ext)++; ut_ad(n_fields < dtuple_get_n_fields(entry)); } vector->n_fields = n_fields; return(vector); }
/*******************************************************************//** Roll back an active transaction. */ static void trx_rollback_active( /*================*/ trx_t* trx) /*!< in/out: transaction */ { mem_heap_t* heap; que_fork_t* fork; que_thr_t* thr; roll_node_t* roll_node; dict_table_t* table; ib_int64_t rows_to_undo; const char* unit = ""; ibool dictionary_locked = FALSE; heap = mem_heap_create(512); fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); fork->trx = trx; thr = que_thr_create(fork, heap); roll_node = roll_node_create(heap); thr->child = roll_node; roll_node->common.parent = thr; mutex_enter(&kernel_mutex); trx->graph = fork; ut_a(thr == que_fork_start_command(fork)); trx_roll_crash_recv_trx = trx; trx_roll_max_undo_no = trx->undo_no; trx_roll_progress_printed_pct = 0; rows_to_undo = trx_roll_max_undo_no; if (rows_to_undo > 1000000000) { rows_to_undo = rows_to_undo / 1000000; unit = "M"; } ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s" " rows to undo\n", (ullint) trx->id, (ulong) rows_to_undo, unit); mutex_exit(&kernel_mutex); trx->mysql_thread_id = os_thread_get_curr_id(); trx->mysql_process_no = os_proc_get_number(); if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { row_mysql_lock_data_dictionary(trx); dictionary_locked = TRUE; } que_run_threads(thr); mutex_enter(&kernel_mutex); while (trx->que_state != TRX_QUE_RUNNING) { mutex_exit(&kernel_mutex); fprintf(stderr, "InnoDB: Waiting for rollback of trx id " TRX_ID_FMT " to end\n", (ullint) trx->id); os_thread_sleep(100000); mutex_enter(&kernel_mutex); } mutex_exit(&kernel_mutex); if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE && trx->table_id != 0) { /* If the transaction was for a dictionary operation, we drop the relevant table, if it still exists */ fprintf(stderr, "InnoDB: Dropping table with id %llu" " in recovery if it exists\n", (ullint) trx->table_id); table = dict_table_get_on_id_low(trx->table_id); if (table) { ulint err; fputs("InnoDB: Table found: dropping table ", stderr); ut_print_name(stderr, trx, TRUE, table->name); fputs(" in recovery\n", stderr); err = row_drop_table_for_mysql(table->name, trx, TRUE); trx_commit_for_mysql(trx); ut_a(err == (int) DB_SUCCESS); } } if (dictionary_locked) { row_mysql_unlock_data_dictionary(trx); } fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT " completed\n", (ullint) trx->id); mem_heap_free(heap); trx_roll_crash_recv_trx = NULL; }