/*******************************************************************//** Rolls back a transaction back to a named savepoint. Modifications after the savepoint are undone but InnoDB does NOT release the corresponding locks which are stored in memory. If a lock is 'implicit', that is, a new inserted row holds a lock where the lock information is carried by the trx id stored in the row, these locks are naturally released in the rollback. Savepoints which were set after this savepoint are deleted. @return if no savepoint of the name found then DB_NO_SAVEPOINT, otherwise DB_SUCCESS */ UNIV_INTERN ulint trx_rollback_to_savepoint_for_mysql( /*================================*/ trx_t* trx, /*!< in: transaction handle */ const char* savepoint_name, /*!< in: savepoint name */ ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache position corresponding to this savepoint; MySQL needs this information to remove the binlog entries of the queries executed after the savepoint */ { trx_named_savept_t* savep; ulint err; savep = UT_LIST_GET_FIRST(trx->trx_savepoints); while (savep != NULL) { if (0 == ut_strcmp(savep->name, savepoint_name)) { /* Found */ break; } savep = UT_LIST_GET_NEXT(trx_savepoints, savep); } if (savep == NULL) { return(DB_NO_SAVEPOINT); } if (trx->conc_state == TRX_NOT_STARTED) { ut_print_timestamp(stderr); fputs(" InnoDB: Error: transaction has a savepoint ", stderr); ut_print_name(stderr, trx, FALSE, savep->name); fputs(" though it is not started\n", stderr); return(DB_ERROR); } /* We can now free all savepoints strictly later than this one */ trx_roll_savepoints_free(trx, savep); *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; trx->op_info = "rollback to a savepoint"; err = trx_general_rollback_for_mysql(trx, &savep->savept); /* Store the current undo_no of the transaction so that we know where to roll back if we have to roll back the next SQL statement: */ trx_mark_sql_stat_end(trx); trx->op_info = ""; return(err); }
void trx_commit_off_kernel( /*==================*/ trx_t* trx) /* in: transaction */ { page_t* update_hdr_page; dulint lsn; trx_rseg_t* rseg; trx_undo_t* undo; ibool must_flush_log = FALSE; mtr_t mtr; ut_ad(mutex_own(&kernel_mutex)); trx->must_flush_log_later = FALSE; rseg = trx->rseg; if (trx->insert_undo != NULL || trx->update_undo != NULL) { mutex_exit(&kernel_mutex); mtr_start(&mtr); must_flush_log = TRUE; /* Change the undo log segment states from TRX_UNDO_ACTIVE to some other state: these modifications to the file data structure define the transaction as committed in the file based world, at the serialization point of the log sequence number lsn obtained below. */ mutex_enter(&(rseg->mutex)); if (trx->insert_undo != NULL) { trx_undo_set_state_at_finish( rseg, trx, trx->insert_undo, &mtr); } undo = trx->update_undo; if (undo) { mutex_enter(&kernel_mutex); trx->no = trx_sys_get_new_trx_no(); mutex_exit(&kernel_mutex); /* It is not necessary to obtain trx->undo_mutex here because only a single OS thread is allowed to do the transaction commit for this transaction. */ update_hdr_page = trx_undo_set_state_at_finish( rseg, trx, undo, &mtr); /* We have to do the cleanup for the update log while holding the rseg mutex because update log headers have to be put to the history list in the order of the trx number. */ trx_undo_update_cleanup(trx, update_hdr_page, &mtr); } mutex_exit(&(rseg->mutex)); /* Update the latest MySQL binlog name and offset info in trx sys header if MySQL binlogging is on or the database server is a MySQL replication slave */ if (trx->mysql_log_file_name && trx->mysql_log_file_name[0] != '\0') { trx_sys_update_mysql_binlog_offset( trx->mysql_log_file_name, trx->mysql_log_offset, TRX_SYS_MYSQL_LOG_INFO, &mtr); trx->mysql_log_file_name = NULL; } /* The following call commits the mini-transaction, making the whole transaction committed in the file-based world, at this log sequence number. The transaction becomes 'durable' when we write the log to disk, but in the logical sense the commit in the file-based data structures (undo logs etc.) happens here. NOTE that transaction numbers, which are assigned only to transactions with an update undo log, do not necessarily come in exactly the same order as commit lsn's, if the transactions have different rollback segments. To get exactly the same order we should hold the kernel mutex up to this point, adding to to the contention of the kernel mutex. However, if a transaction T2 is able to see modifications made by a transaction T1, T2 will always get a bigger transaction number and a bigger commit lsn than T1. */ /*--------------*/ mtr_commit(&mtr); /*--------------*/ lsn = mtr.end_lsn; mutex_enter(&kernel_mutex); } ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED); ut_ad(mutex_own(&kernel_mutex)); /* The following assignment makes the transaction committed in memory and makes its changes to data visible to other transactions. NOTE that there is a small discrepancy from the strict formal visibility rules here: a human user of the database can see modifications made by another transaction T even before the necessary log segment has been flushed to the disk. If the database happens to crash before the flush, the user has seen modifications from T which will never be a committed transaction. However, any transaction T2 which sees the modifications of the committing transaction T, and which also itself makes modifications to the database, will get an lsn larger than the committing transaction T. In the case where the log flush fails, and T never gets committed, also T2 will never get committed. */ /*--------------------------------------*/ trx->conc_state = TRX_COMMITTED_IN_MEMORY; /*--------------------------------------*/ lock_release_off_kernel(trx); if (trx->global_read_view) { read_view_close(trx->global_read_view); mem_heap_empty(trx->global_read_view_heap); trx->global_read_view = NULL; } trx->read_view = NULL; if (must_flush_log) { mutex_exit(&kernel_mutex); if (trx->insert_undo != NULL) { trx_undo_insert_cleanup(trx); } /* NOTE that we could possibly make a group commit more efficient here: call os_thread_yield here to allow also other trxs to come to commit! */ /*-------------------------------------*/ /* Depending on the my.cnf options, we may now write the log buffer to the log files, making the transaction durable if the OS does not crash. We may also flush the log files to disk, making the transaction durable also at an OS crash or a power outage. The idea in InnoDB's group commit is that a group of transactions gather behind a trx doing a physical disk write to log files, and when that physical write has been completed, one of those transactions does a write which commits the whole group. Note that this group commit will only bring benefit if there are > 2 users in the database. Then at least 2 users can gather behind one doing the physical log write to disk. If we are calling trx_commit() under MySQL's binlog mutex, we will delay possible log write and flush to a separate function trx_commit_complete_for_mysql(), which is only called when the thread has released the binlog mutex. This is to make the group commit algorithm to work. Otherwise, the MySQL binlog mutex would serialize all commits and prevent a group of transactions from gathering. */ if (trx->flush_log_later) { /* Do nothing yet */ trx->must_flush_log_later = TRUE; } else if (srv_flush_log_at_trx_commit == 0) { /* Do nothing */ } else if (srv_flush_log_at_trx_commit == 1) { if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { /* Write the log to the log files AND flush them to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); } } else if (srv_flush_log_at_trx_commit == 2) { /* Write the log but do not flush it to disk */ log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); } else { ut_error; } trx->commit_lsn = lsn; /*-------------------------------------*/ mutex_enter(&kernel_mutex); } /* Free savepoints */ trx_roll_savepoints_free(trx, NULL); trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; trx->mysql_query_str = NULL; ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); }