void TxnManager::send_finish_messages() { rsp_cnt = query->partitions_touched.size() - 1; assert(IS_LOCAL(get_txn_id())); DEBUG("%ld Send FINISH messages to %d\n",get_txn_id(),rsp_cnt); for(uint64_t i = 0; i < query->partitions_touched.size(); i++) { if(GET_NODE_ID(query->partitions_touched[i]) == g_node_id) { continue; } msg_queue.enqueue(get_thd_id(),Message::create_message(this,RFIN),GET_NODE_ID(query->partitions_touched[i])); } }
RC YCSBTxnManager::run_txn() { RC rc = RCOK; assert(CC_ALG != CALVIN); if(IS_LOCAL(txn->txn_id) && state == YCSB_0 && next_record_id == 0) { DEBUG("Running txn %ld\n",txn->txn_id); //query->print(); query->partitions_touched.add_unique(GET_PART_ID(0,g_node_id)); } uint64_t starttime = get_sys_clock(); while(rc == RCOK && !is_done()) { rc = run_txn_state(); } uint64_t curr_time = get_sys_clock(); txn_stats.process_time += curr_time - starttime; txn_stats.process_time_short += curr_time - starttime; txn_stats.wait_starttime = get_sys_clock(); if(IS_LOCAL(get_txn_id())) { if(is_done() && rc == RCOK) rc = start_commit(); else if(rc == Abort) rc = start_abort(); } else if(rc == Abort){ rc = abort(); } return rc; }
void TxnManager::cleanup(RC rc) { #if CC_ALG == OCC && MODE == NORMAL_MODE occ_man.finish(rc,this); #endif ts_t starttime = get_sys_clock(); uint64_t row_cnt = txn->accesses.get_count(); assert(txn->accesses.get_count() == txn->row_cnt); //assert((WORKLOAD == YCSB && row_cnt <= g_req_per_query) || (WORKLOAD == TPCC && row_cnt <= g_max_items_per_txn*2 + 3)); DEBUG("Cleanup %ld %ld\n",get_txn_id(),row_cnt); for (int rid = row_cnt - 1; rid >= 0; rid --) { cleanup_row(rc,rid); } #if CC_ALG == CALVIN // cleanup locked rows for (uint64_t i = 0; i < calvin_locked_rows.size(); i++) { row_t * row = calvin_locked_rows[i]; row->return_row(rc,RD,this,row); } #endif if (rc == Abort) { txn->release_inserts(get_thd_id()); txn->insert_rows.clear(); INC_STATS(get_thd_id(), abort_time, get_sys_clock() - starttime); } }
RC TxnManager::commit() { DEBUG("Commit %ld\n",get_txn_id()); release_locks(RCOK); #if CC_ALG == MAAT time_table.release(get_thd_id(),get_txn_id()); #endif commit_stats(); #if LOGGING LogRecord * record = logger.createRecord(get_txn_id(),L_NOTIFY,0,0); if(g_repl_cnt > 0) { msg_queue.enqueue(get_thd_id(),Message::create_message(record,LOG_MSG),g_node_id + g_node_cnt + g_client_node_cnt); } logger.enqueueRecord(record); return WAIT; #endif return Commit; }
void TxnManager::send_prepare_messages() { rsp_cnt = query->partitions_touched.size() - 1; DEBUG("%ld Send PREPARE messages to %d\n",get_txn_id(),rsp_cnt); for(uint64_t i = 0; i < query->partitions_touched.size(); i++) { if(GET_NODE_ID(query->partitions_touched[i]) == g_node_id) { continue; } msg_queue.enqueue(get_thd_id(),Message::create_message(this,RPREPARE),GET_NODE_ID(query->partitions_touched[i])); } }
RC TxnManager::start_abort() { txn->rc = Abort; DEBUG("%ld start_abort\n",get_txn_id()); if(query->partitions_touched.size() > 1) { send_finish_messages(); abort(); return Abort; } return abort(); }
void TxnManager::commit_stats() { uint64_t commit_time = get_sys_clock(); uint64_t timespan_short = commit_time - txn_stats.restart_starttime; uint64_t timespan_long = commit_time - txn_stats.starttime; INC_STATS(get_thd_id(),total_txn_commit_cnt,1); if(!IS_LOCAL(get_txn_id()) && CC_ALG != CALVIN) { INC_STATS(get_thd_id(),remote_txn_commit_cnt,1); txn_stats.commit_stats(get_thd_id(),get_txn_id(),get_batch_id(), timespan_long, timespan_short); return; } INC_STATS(get_thd_id(),txn_cnt,1); INC_STATS(get_thd_id(),local_txn_commit_cnt,1); INC_STATS(get_thd_id(), txn_run_time, timespan_long); if(query->partitions_touched.size() > 1) { INC_STATS(get_thd_id(),multi_part_txn_cnt,1); INC_STATS(get_thd_id(),multi_part_txn_run_time,timespan_long); } else { INC_STATS(get_thd_id(),single_part_txn_cnt,1); INC_STATS(get_thd_id(),single_part_txn_run_time,timespan_long); } /*if(cflt) { INC_STATS(get_thd_id(),cflt_cnt_txn,1); }*/ txn_stats.commit_stats(get_thd_id(),get_txn_id(),get_batch_id(),timespan_long, timespan_short); #if CC_ALG == CALVIN return; #endif INC_STATS_ARR(get_thd_id(),start_abort_commit_latency, timespan_short); INC_STATS_ARR(get_thd_id(),last_start_commit_latency, timespan_short); INC_STATS_ARR(get_thd_id(),first_start_commit_latency, timespan_long); assert(query->partitions_touched.size() > 0); INC_STATS(get_thd_id(),parts_touched,query->partitions_touched.size()); INC_STATS(get_thd_id(),part_cnt[query->partitions_touched.size()-1],1); for(uint64_t i = 0 ; i < query->partitions_touched.size(); i++) { INC_STATS(get_thd_id(),part_acc[query->partitions_touched[i]],1); } }
RC TxnManager::abort() { if(aborted) return Abort; DEBUG("Abort %ld\n",get_txn_id()); txn->rc = Abort; INC_STATS(get_thd_id(),total_txn_abort_cnt,1); txn_stats.abort_cnt++; if(IS_LOCAL(get_txn_id())) { INC_STATS(get_thd_id(), local_txn_abort_cnt, 1); } else { INC_STATS(get_thd_id(), remote_txn_abort_cnt, 1); txn_stats.abort_stats(get_thd_id()); } aborted = true; release_locks(Abort); #if CC_ALG == MAAT //assert(time_table.get_state(get_txn_id()) == MAAT_ABORTED); time_table.release(get_thd_id(),get_txn_id()); #endif uint64_t timespan = get_sys_clock() - txn_stats.restart_starttime; if (IS_LOCAL(get_txn_id()) && warmup_done) { INC_STATS_ARR(get_thd_id(),start_abort_commit_latency, timespan); } /* // latency from most recent start or restart of transaction PRINT_LATENCY("lat_s %ld %ld 0 %f %f %f %f %f %f 0.0\n" , get_txn_id() , txn_stats.work_queue_cnt , (double) timespan / BILLION , (double) txn_stats.work_queue_time / BILLION , (double) txn_stats.msg_queue_time / BILLION , (double) txn_stats.cc_block_time / BILLION , (double) txn_stats.cc_time / BILLION , (double) txn_stats.process_time / BILLION ); */ //commit_stats(); return Abort; }
void txn_man::cleanup(RC rc) { for (int rid = row_cnt - 1; rid >= 0; rid --) { #if !NOGRAPHITE part_id = accesses[rid]->orig_row->get_part_id(); if (g_hw_migrate) { if (part_id != CarbonGetHostTileId()) CarbonMigrateThread(part_id); } #endif row_t * orig_r = accesses[rid]->orig_row; access_t type = accesses[rid]->type; if (type == WR && rc == Abort) type = XP; if (ROLL_BACK && type == XP && (CC_ALG == DL_DETECT || CC_ALG == NO_WAIT || CC_ALG == WAIT_DIE)) { orig_r->return_row(type, this, accesses[rid]->orig_data); } else { orig_r->return_row(type, this, accesses[rid]->data); } #if ROLL_BACK && (CC_ALG == DL_DETECT || CC_ALG == NO_WAIT || CC_ALG == WAIT_DIE) if (type == WR) { accesses[rid]->orig_data->free_row(); mem_allocator.free(accesses[rid]->orig_data, sizeof(row_t)); } #endif accesses[rid]->data = NULL; } if (rc == Abort) { for (UInt32 i = 0; i < insert_cnt; i ++) { row_t * row = insert_rows[i]; assert(g_part_alloc == false); #if CC_ALG != HSTORE && CC_ALG != OCC mem_allocator.free(row->manager, 0); #endif row->free_row(); mem_allocator.free(row, sizeof(row)); } } row_cnt = 0; wr_cnt = 0; insert_cnt = 0; #if CC_ALG == DL_DETECT dl_detector.clear_dep(get_txn_id()); #endif }
RC TxnManager::send_remote_reads() { assert(CC_ALG == CALVIN); #if !YCSB_ABORT_MODE && WORKLOAD == YCSB return RCOK; #endif assert(query->active_nodes.size() == g_node_cnt); for(uint64_t i = 0; i < query->active_nodes.size(); i++) { if(i == g_node_id) continue; if(query->active_nodes[i] == 1) { DEBUG("(%ld,%ld) send_remote_read to %ld\n",get_txn_id(),get_batch_id(),i); msg_queue.enqueue(get_thd_id(),Message::create_message(this,RFWD),i); } } return RCOK; }
RC TxnManager::validate() { #if MODE != NORMAL_MODE return RCOK; #endif if (CC_ALG != OCC && CC_ALG != MAAT) { return RCOK; } RC rc = RCOK; uint64_t starttime = get_sys_clock(); if(CC_ALG == OCC && rc == RCOK) rc = occ_man.validate(this); if(CC_ALG == MAAT && rc == RCOK) { rc = maat_man.validate(this); // Note: home node must be last to validate if(IS_LOCAL(get_txn_id()) && rc == RCOK) { rc = maat_man.find_bound(this); } } INC_STATS(get_thd_id(),txn_validate_time,get_sys_clock() - starttime); return rc; }
RC YCSBTxnManager::acquire_locks() { uint64_t starttime = get_sys_clock(); assert(CC_ALG == CALVIN); YCSBQuery* ycsb_query = (YCSBQuery*) query; locking_done = false; RC rc = RCOK; incr_lr(); assert(ycsb_query->requests.size() == g_req_per_query); assert(phase == CALVIN_RW_ANALYSIS); for (uint32_t rid = 0; rid < ycsb_query->requests.size(); rid ++) { ycsb_request * req = ycsb_query->requests[rid]; uint64_t part_id = _wl->key_to_part( req->key ); DEBUG("LK Acquire (%ld,%ld) %d,%ld -> %ld\n",get_txn_id(),get_batch_id(),req->acctype,req->key,GET_NODE_ID(part_id)); if(GET_NODE_ID(part_id) != g_node_id) continue; INDEX * index = _wl->the_index; itemid_t * item; item = index_read(index, req->key, part_id); row_t * row = ((row_t *)item->location); RC rc2 = get_lock(row,req->acctype); if(rc2 != RCOK) { rc = rc2; } } if(decr_lr() == 0) { if(ATOM_CAS(lock_ready,false,true)) rc = RCOK; } txn_stats.wait_starttime = get_sys_clock(); /* if(rc == WAIT && lock_ready_cnt == 0) { if(ATOM_CAS(lock_ready,false,true)) //lock_ready = true; rc = RCOK; } */ INC_STATS(get_thd_id(),calvin_sched_time,get_sys_clock() - starttime); locking_done = true; return rc; }
RC TPCCTxnManager::run_txn() { #if MODE == SETUP_MODE return RCOK; #endif RC rc = RCOK; uint64_t starttime = get_sys_clock(); #if CC_ALG == CALVIN rc = run_calvin_txn(); return rc; #endif if(IS_LOCAL(txn->txn_id) && (state == TPCC_PAYMENT0 || state == TPCC_NEWORDER0)) { DEBUG("Running txn %ld\n",txn->txn_id); #if DISTR_DEBUG query->print(); #endif query->partitions_touched.add_unique(GET_PART_ID(0,g_node_id)); } while(rc == RCOK && !is_done()) { rc = run_txn_state(); } uint64_t curr_time = get_sys_clock(); txn_stats.process_time += curr_time - starttime; txn_stats.process_time_short += curr_time - starttime; if(IS_LOCAL(get_txn_id())) { if(is_done() && rc == RCOK) rc = start_commit(); else if(rc == Abort) rc = start_abort(); } return rc; }
RC TxnManager::start_commit() { RC rc = RCOK; DEBUG("%ld start_commit RO?%d\n",get_txn_id(),query->readonly()); if(is_multi_part()) { if(!query->readonly() || CC_ALG == OCC || CC_ALG == MAAT) { // send prepare messages send_prepare_messages(); rc = WAIT_REM; } else { send_finish_messages(); rsp_cnt = 0; rc = commit(); } } else { // is not multi-part rc = validate(); if(rc == RCOK) rc = commit(); else start_abort(); } return rc; }
RC TPCCTxnManager::send_remote_request() { assert(IS_LOCAL(get_txn_id())); TPCCQuery* tpcc_query = (TPCCQuery*) query; TPCCRemTxnType next_state = TPCC_FIN; uint64_t w_id = tpcc_query->w_id; uint64_t c_w_id = tpcc_query->c_w_id; uint64_t dest_node_id = UINT64_MAX; if(state == TPCC_PAYMENT0) { dest_node_id = GET_NODE_ID(wh_to_part(w_id)); next_state = TPCC_PAYMENT2; } else if(state == TPCC_PAYMENT4) { dest_node_id = GET_NODE_ID(wh_to_part(c_w_id)); next_state = TPCC_FIN; } else if(state == TPCC_NEWORDER0) { dest_node_id = GET_NODE_ID(wh_to_part(w_id)); next_state = TPCC_NEWORDER6; } else if(state == TPCC_NEWORDER8) { dest_node_id = GET_NODE_ID(wh_to_part(tpcc_query->items[next_item_id]->ol_supply_w_id)); /* while(GET_NODE_ID(wh_to_part(tpcc_query->items[next_item_id]->ol_supply_w_id)) != dest_node_id) { msg->items.add(tpcc_query->items[next_item_id++]); } */ if(is_done()) next_state = TPCC_FIN; else next_state = TPCC_NEWORDER6; } else { assert(false); } TPCCQueryMessage * msg = (TPCCQueryMessage*)Message::create_message(this,RQRY); msg->state = state; query->partitions_touched.add_unique(GET_PART_ID(0,dest_node_id)); msg_queue.enqueue(get_thd_id(),msg,dest_node_id); state = next_state; return WAIT_REM; }
RC TxnManager::get_row(row_t * row, access_t type, row_t *& row_rtn) { uint64_t starttime = get_sys_clock(); uint64_t timespan; RC rc = RCOK; DEBUG_M("TxnManager::get_row access alloc\n"); Access * access; access_pool.get(get_thd_id(),access); //uint64_t row_cnt = txn->row_cnt; //assert(txn->accesses.get_count() - 1 == row_cnt); this->last_row = row; this->last_type = type; rc = row->get_row(type, this, access->data); if (rc == Abort || rc == WAIT) { row_rtn = NULL; DEBUG_M("TxnManager::get_row(abort) access free\n"); access_pool.put(get_thd_id(),access); timespan = get_sys_clock() - starttime; INC_STATS(get_thd_id(), txn_manager_time, timespan); INC_STATS(get_thd_id(), txn_conflict_cnt, 1); //cflt = true; #if DEBUG_TIMELINE printf("CONFLICT %ld %ld\n",get_txn_id(),get_sys_clock()); #endif return rc; } access->type = type; access->orig_row = row; #if ROLL_BACK && (CC_ALG == DL_DETECT || CC_ALG == NO_WAIT || CC_ALG == WAIT_DIE || CC_ALG == HSTORE || CC_ALG == HSTORE_SPEC) if (type == WR) { //printf("alloc 10 %ld\n",get_txn_id()); uint64_t part_id = row->get_part_id(); DEBUG_M("TxnManager::get_row row_t alloc\n") row_pool.get(get_thd_id(),access->orig_data); access->orig_data->init(row->get_table(), part_id, 0); access->orig_data->copy(row); assert(access->orig_data->get_schema() == row->get_schema()); // ARIES-style physiological logging #if LOGGING //LogRecord * record = logger.createRecord(LRT_UPDATE,L_UPDATE,get_txn_id(),part_id,row->get_table()->get_table_id(),row->get_primary_key()); LogRecord * record = logger.createRecord(get_txn_id(),L_UPDATE,row->get_table()->get_table_id(),row->get_primary_key()); if(g_repl_cnt > 0) { msg_queue.enqueue(get_thd_id(),Message::create_message(record,LOG_MSG),g_node_id + g_node_cnt + g_client_node_cnt); } logger.enqueueRecord(record); #endif } #endif ++txn->row_cnt; if (type == WR) ++txn->write_cnt; txn->accesses.add(access); timespan = get_sys_clock() - starttime; INC_STATS(get_thd_id(), txn_manager_time, timespan); row_rtn = access->data; if(CC_ALG == HSTORE || CC_ALG == HSTORE_SPEC || CC_ALG == CALVIN) assert(rc == RCOK); return rc; }