// Return call for get_row if waiting RC row_t::get_row_post_wait(access_t type, TxnManager * txn, row_t *& row) { RC rc = RCOK; assert(CC_ALG == WAIT_DIE || CC_ALG == MVCC || CC_ALG == TIMESTAMP); #if CC_ALG == WAIT_DIE assert(txn->lock_ready); rc = RCOK; //ts_t endtime = get_sys_clock(); row = this; #elif CC_ALG == MVCC || CC_ALG == TIMESTAMP assert(txn->ts_ready); //INC_STATS(thd_id, time_wait, t2 - t1); row = txn->cur_row; assert(row->get_data() != NULL); assert(row->get_table() != NULL); assert(row->get_schema() == this->get_schema()); assert(row->get_table_name() != NULL); if (CC_ALG == MVCC && type == WR) { DEBUG_M("row_t::get_row_post_wait MVCC alloc \n"); row_t * newr = (row_t *) mem_allocator.alloc(sizeof(row_t)); newr->init(this->get_table(), get_part_id()); newr->copy(row); row = newr; } #endif return rc; }
void BackupRestore::tuple_a(restore_callback_t *cb) { Uint32 partition_id = cb->fragId; while (cb->retries < 10) { /** * start transactions */ cb->connection = m_ndb->startTransaction(); if (cb->connection == NULL) { if (errorHandler(cb)) { m_ndb->sendPollNdb(3000, 1); continue; } err << "Cannot start transaction" << endl; exitHandler(); } // if const TupleS &tup = cb->tup; const NdbDictionary::Table * table = get_table(tup.getTable()->m_dictTable); NdbOperation * op = cb->connection->getNdbOperation(table); if (op == NULL) { if (errorHandler(cb)) continue; err << "Cannot get operation: " << cb->connection->getNdbError() << endl; exitHandler(); } // if if (op->writeTuple() == -1) { if (errorHandler(cb)) continue; err << "Error defining op: " << cb->connection->getNdbError() << endl; exitHandler(); } // if if (table->getFragmentType() == NdbDictionary::Object::UserDefined) { if (table->getDefaultNoPartitionsFlag()) { /* This can only happen for HASH partitioning with user defined hash function where user hasn't specified the number of partitions and we have to calculate it. We use the hash value stored in the record to calculate the partition to use. */ int i = tup.getNoOfAttributes() - 1; const AttributeData *attr_data = tup.getData(i); Uint32 hash_value = *attr_data->u_int32_value; op->setPartitionId(get_part_id(table, hash_value)); } else { /* Either RANGE or LIST (with or without subparts) OR HASH partitioning with user defined hash function but with fixed set of partitions. */ op->setPartitionId(partition_id); } } int ret = 0; for (int j = 0; j < 2; j++) { for (int i = 0; i < tup.getNoOfAttributes(); i++) { const AttributeDesc * attr_desc = tup.getDesc(i); const AttributeData * attr_data = tup.getData(i); int size = attr_desc->size; int arraySize = attr_desc->arraySize; char * dataPtr = attr_data->string_value; Uint32 length = 0; if (!attr_data->null) { const unsigned char * src = (const unsigned char *)dataPtr; switch(attr_desc->m_column->getType()){ case NdbDictionary::Column::Varchar: case NdbDictionary::Column::Varbinary: length = src[0] + 1; break; case NdbDictionary::Column::Longvarchar: case NdbDictionary::Column::Longvarbinary: length = src[0] + (src[1] << 8) + 2; break; default: length = attr_data->size; break; } } if (j == 0 && tup.getTable()->have_auto_inc(i)) tup.getTable()->update_max_auto_val(dataPtr,size*arraySize); if (attr_desc->m_column->getPrimaryKey()) { if (j == 1) continue; ret = op->equal(i, dataPtr, length); } else { if (j == 0) continue; if (attr_data->null) ret = op->setValue(i, NULL, 0); else ret = op->setValue(i, dataPtr, length); } if (ret < 0) { ndbout_c("Column: %d type %d %d %d %d",i, attr_desc->m_column->getType(), size, arraySize, length); break; } } if (ret < 0) break; } if (ret < 0) { if (errorHandler(cb)) continue; err << "Error defining op: " << cb->connection->getNdbError() << endl; exitHandler(); } // Prepare transaction (the transaction is NOT yet sent to NDB) cb->connection->executeAsynchPrepare(NdbTransaction::Commit, &callback, cb); m_transactions++; return; } err << "Retried transaction " << cb->retries << " times.\nLast error" << m_ndb->getNdbError(cb->error_code) << endl << "...Unable to recover from errors. Exiting..." << endl; exitHandler(); }
TsReqEntry * Row_ts::get_req_entry() { uint64_t part_id = get_part_id(_row); return (TsReqEntry *) mem_allocator.alloc(sizeof(TsReqEntry), part_id); }
RC index_btree::find_leaf(glob_param params, idx_key_t key, idx_acc_t access_type, bt_node *& leaf, bt_node *& last_ex) { // RC rc; UInt32 i; bt_node * c = find_root(params.part_id); assert(c != NULL); bt_node * child; if (access_type == INDEX_NONE) { while (!c->is_leaf) { for (i = 0; i < c->num_keys; i++) { if (key < c->keys[i]) break; } c = (bt_node *)c->pointers[i]; } leaf = c; return RCOK; } // key should be inserted into the right side of i if (!latch_node(c, LATCH_SH)) return Abort; while (!c->is_leaf) { assert(get_part_id(c) == params.part_id); assert(get_part_id(c->keys) == params.part_id); for (i = 0; i < c->num_keys; i++) { if (key < c->keys[i]) break; } child = (bt_node *)c->pointers[i]; if (!latch_node(child, LATCH_SH)) { release_latch(c); cleanup(c, last_ex); last_ex = NULL; return Abort; } if (access_type == INDEX_INSERT) { if (child->num_keys == order - 1) { if (upgrade_latch(c) != RCOK) { release_latch(c); release_latch(child); cleanup(c, last_ex); last_ex = NULL; return Abort; } if (last_ex == NULL) last_ex = c; } else { cleanup(c, last_ex); last_ex = NULL; release_latch(c); } } else release_latch(c); // release the LATCH_SH on c c = child; } // c is leaf // at this point, if the access is a read, then only the leaf is latched by LATCH_SH // if the access is an insertion, then the leaf is sh latched and related nodes in the tree // are ex latched. if (access_type == INDEX_INSERT) { if (upgrade_latch(c) != RCOK) { release_latch(c); cleanup(c, last_ex); return Abort; } } leaf = c; assert (leaf->is_leaf); return RCOK; }
RC row_t::get_row(access_t type, txn_man * txn, row_t *& row) { RC rc = RCOK; #if CC_ALG == WAIT_DIE || CC_ALG == NO_WAIT || CC_ALG == DL_DETECT uint64_t thd_id = txn->get_thd_id(); lock_t lt = (type == RD || type == SCAN)? LOCK_SH : LOCK_EX; #if CC_ALG == DL_DETECT uint64_t * txnids; int txncnt; rc = this->manager->lock_get(lt, txn, txnids, txncnt); #else rc = this->manager->lock_get(lt, txn); #endif if (rc == RCOK) { row = this; } else if (rc == Abort) {} else if (rc == WAIT) { ASSERT(CC_ALG == WAIT_DIE || CC_ALG == DL_DETECT); uint64_t starttime = get_sys_clock(); #if CC_ALG == DL_DETECT bool dep_added = false; #endif uint64_t endtime; txn->lock_abort = false; INC_STATS(txn->get_thd_id(), wait_cnt, 1); while (!txn->lock_ready && !txn->lock_abort) { #if CC_ALG == WAIT_DIE continue; #elif CC_ALG == DL_DETECT uint64_t last_detect = starttime; uint64_t last_try = starttime; uint64_t now = get_sys_clock(); if (now - starttime > g_timeout ) { txn->lock_abort = true; break; } if (g_no_dl) continue; int ok = 0; if ((now - last_detect > g_dl_loop_detect) && (now - last_try > DL_LOOP_TRIAL)) { if (!dep_added) { ok = dl_detector.add_dep(txn->get_txn_id(), txnids, txncnt, txn->row_cnt); if (ok == 0) dep_added = true; else if (ok == 16) last_try = now; } if (dep_added) { ok = dl_detector.detect_cycle(txn->get_txn_id()); if (ok == 16) // failed to lock the deadlock detector last_try = now; else if (ok == 0) last_detect = now; else if (ok == 1) { last_detect = now; } } } #endif } if (txn->lock_ready) rc = RCOK; else if (txn->lock_abort) { rc = Abort; return_row(type, txn, NULL); } endtime = get_sys_clock(); INC_TMP_STATS(thd_id, time_wait, endtime - starttime); row = this; } return rc; #elif CC_ALG == TIMESTAMP || CC_ALG == MVCC uint64_t thd_id = txn->get_thd_id(); // For TIMESTAMP RD, a new copy of the row will be returned. // for MVCC RD, the version will be returned instead of a copy // So for MVCC RD-WR, the version should be explicitly copied. row_t * newr = NULL; #if CC_ALG == TIMESTAMP // TIMESTAMP makes a whole copy of the row before reading txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t), this->get_part_id()); txn->cur_row->init(get_table(), this->get_part_id()); #elif CC_ALG == MVCC if (type == WR) { newr = (row_t *) mem_allocator.alloc(sizeof(row_t), get_part_id()); newr->init(this->get_table(), get_part_id()); } #endif if (type == WR) { rc = this->manager->access(txn, P_REQ, NULL); if (rc != RCOK) return rc; } if ((type == WR && rc == RCOK) || type == RD || type == SCAN) { rc = this->manager->access(txn, R_REQ, NULL); if (rc == RCOK ) { row = txn->cur_row; } else if (rc == WAIT) { uint64_t t1 = get_sys_clock(); while (!txn->ts_ready) {} uint64_t t2 = get_sys_clock(); INC_TMP_STATS(thd_id, time_wait, t2 - t1); row = txn->cur_row; } else if (rc == Abort) { } if (rc != Abort) { assert(row->get_data() != NULL); assert(row->get_table() != NULL); assert(row->get_schema() == this->get_schema()); assert(row->get_table_name() != NULL); } } if (rc != Abort && CC_ALG == MVCC && type == WR) { newr->copy(row); row = newr; } return rc; #elif CC_ALG == OCC // OCC always make a local copy regardless of read or write txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t), get_part_id()); txn->cur_row->init(get_table(), get_part_id()); rc = this->manager->access(txn, R_REQ); row = txn->cur_row; return rc; #elif CC_ALG == HSTORE || CC_ALG == VLL row = this; return rc; #else assert(false); #endif }
RC row_t::get_row(access_t type, TxnManager * txn, row_t *& row) { RC rc = RCOK; #if MODE==NOCC_MODE || MODE==QRY_ONLY_MODE row = this; return rc; #endif #if ISOLATION_LEVEL == NOLOCK row = this; return rc; #endif /* #if ISOLATION_LEVEL == READ_UNCOMMITTED if(type == RD) { row = this; return rc; } #endif */ #if CC_ALG == MAAT DEBUG_M("row_t::get_row MAAT alloc \n"); txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t)); txn->cur_row->init(get_table(), get_part_id()); rc = this->manager->access(type,txn); txn->cur_row->copy(this); row = txn->cur_row; assert(rc == RCOK); goto end; #endif #if CC_ALG == WAIT_DIE || CC_ALG == NO_WAIT //uint64_t thd_id = txn->get_thd_id(); lock_t lt = (type == RD || type == SCAN)? LOCK_SH : LOCK_EX; rc = this->manager->lock_get(lt, txn); if (rc == RCOK) { row = this; } else if (rc == Abort) {} else if (rc == WAIT) { ASSERT(CC_ALG == WAIT_DIE); } goto end; #elif CC_ALG == TIMESTAMP || CC_ALG == MVCC //uint64_t thd_id = txn->get_thd_id(); // For TIMESTAMP RD, a new copy of the row will be returned. // for MVCC RD, the version will be returned instead of a copy // So for MVCC RD-WR, the version should be explicitly copied. // row_t * newr = NULL; #if CC_ALG == TIMESTAMP // TIMESTAMP makes a whole copy of the row before reading DEBUG_M("row_t::get_row TIMESTAMP alloc \n"); txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t)); txn->cur_row->init(get_table(), this->get_part_id()); #endif if (type == WR) { rc = this->manager->access(txn, P_REQ, NULL); if (rc != RCOK) goto end; } if ((type == WR && rc == RCOK) || type == RD || type == SCAN) { rc = this->manager->access(txn, R_REQ, NULL); if (rc == RCOK ) { row = txn->cur_row; } else if (rc == WAIT) { rc = WAIT; goto end; } else if (rc == Abort) { } if (rc != Abort) { assert(row->get_data() != NULL); assert(row->get_table() != NULL); assert(row->get_schema() == this->get_schema()); assert(row->get_table_name() != NULL); } } if (rc != Abort && CC_ALG == MVCC && type == WR) { DEBUG_M("row_t::get_row MVCC alloc \n"); row_t * newr = (row_t *) mem_allocator.alloc(sizeof(row_t)); newr->init(this->get_table(), get_part_id()); newr->copy(row); row = newr; } goto end; #elif CC_ALG == OCC // OCC always make a local copy regardless of read or write DEBUG_M("row_t::get_row OCC alloc \n"); txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t)); txn->cur_row->init(get_table(), get_part_id()); rc = this->manager->access(txn, R_REQ); row = txn->cur_row; goto end; #elif CC_ALG == HSTORE || CC_ALG == HSTORE_SPEC || CC_ALG == CALVIN #if CC_ALG == HSTORE_SPEC if(txn_table.spec_mode) { DEBUG_M("row_t::get_row HSTORE_SPEC alloc \n"); txn->cur_row = (row_t *) mem_allocator.alloc(sizeof(row_t)); txn->cur_row->init(get_table(), get_part_id()); rc = this->manager->access(txn, R_REQ); row = txn->cur_row; goto end; } #endif row = this; goto end; #else assert(false); #endif end: return rc; }