void BuildIndex(std::shared_ptr<index::Index> index, storage::DataTable *table) { auto &txn_manager = concurrency::TransactionManagerFactory::GetInstance(); auto txn = txn_manager.BeginTransaction(); oid_t start_tile_group_count = START_OID; oid_t table_tile_group_count = table->GetTileGroupCount(); while (start_tile_group_count < table_tile_group_count) { auto tile_group = table->GetTileGroup(start_tile_group_count++); auto column_count = table->GetSchema()->GetColumnCount(); oid_t active_tuple_count = tile_group->GetNextTupleSlot(); for (oid_t tuple_id = 0; tuple_id < active_tuple_count; tuple_id++) { std::unique_ptr<storage::Tuple> tuple_ptr( new storage::Tuple(table->GetSchema(), true)); CopyTuple(tuple_id, tuple_ptr.get(), tile_group.get(), column_count); ItemPointer location(tile_group->GetTileGroupId(), tuple_id); ItemPointer *index_entry_ptr = nullptr; table->InsertInIndexes(tuple_ptr.get(), location, txn, &index_entry_ptr); } index->IncrementIndexedTileGroupOffset(); } txn_manager.CommitTransaction(txn); }
void IndexTuner::BuildIndex(storage::DataTable* table, std::shared_ptr<index::Index> index) { auto table_schema = table->GetSchema(); auto index_tile_group_offset = index->GetIndexedTileGroupOff(); auto table_tile_group_count = table->GetTileGroupCount(); oid_t tile_groups_indexed = 0; auto index_schema = index->GetKeySchema(); auto indexed_columns = index_schema->GetIndexedColumns(); std::unique_ptr<storage::Tuple> key(new storage::Tuple(index_schema, true)); while (index_tile_group_offset < table_tile_group_count && (tile_groups_indexed < max_tile_groups_indexed)) { std::unique_ptr<storage::Tuple> tuple_ptr( new storage::Tuple(table_schema, true)); auto tile_group = table->GetTileGroup(index_tile_group_offset); auto tile_group_id = tile_group->GetTileGroupId(); oid_t active_tuple_count = tile_group->GetNextTupleSlot(); for (oid_t tuple_id = 0; tuple_id < active_tuple_count; tuple_id++) { // Copy over the tuple tile_group->CopyTuple(tuple_id, tuple_ptr.get()); // Set the location ItemPointer location(tile_group_id, tuple_id); // Set the key key->SetFromTuple(tuple_ptr.get(), indexed_columns, index->GetPool()); // Insert in specific index // index->InsertEntry(key.get(), location); } // Update indexed tile group offset (set of tgs indexed) index->IncrementIndexedTileGroupOffset(); // Sleep a bit // std::this_thread::sleep_for(std::chrono::microseconds(sleep_duration)); index_tile_group_offset++; tile_groups_indexed++; } }
storage::TileGroup *DataTable::TransformTileGroup( const oid_t &tile_group_offset, const double &theta) { // First, check if the tile group is in this table if (tile_group_offset >= tile_groups_.GetSize()) { LOG_ERROR("Tile group offset not found in table : %u ", tile_group_offset); return nullptr; } auto tile_group_id = tile_groups_.FindValid(tile_group_offset, invalid_tile_group_id); // Get orig tile group from catalog auto &catalog_manager = catalog::Manager::GetInstance(); auto tile_group = catalog_manager.GetTileGroup(tile_group_id); auto diff = tile_group->GetSchemaDifference(default_partition_); // Check threshold for transformation if (diff < theta) { return nullptr; } LOG_TRACE("Transforming tile group : %u", tile_group_offset); // Get the schema for the new transformed tile group auto new_schema = TransformTileGroupSchema(tile_group.get(), default_partition_); // Allocate space for the transformed tile group std::shared_ptr<storage::TileGroup> new_tile_group( TileGroupFactory::GetTileGroup( tile_group->GetDatabaseId(), tile_group->GetTableId(), tile_group->GetTileGroupId(), tile_group->GetAbstractTable(), new_schema, default_partition_, tile_group->GetAllocatedTupleCount())); // Set the transformed tile group column-at-a-time SetTransformedTileGroup(tile_group.get(), new_tile_group.get()); // Set the location of the new tile group // and clean up the orig tile group catalog_manager.AddTileGroup(tile_group_id, new_tile_group); return new_tile_group.get(); }
// Validate that MVCC storage is correct, it assumes an old-to-new chain // Invariants // 1. Transaction id should either be INVALID_TXNID or INITIAL_TXNID // 2. Begin commit id should <= end commit id // 3. Timestamp consistence // 4. Version doubly linked list consistency static void ValidateMVCC_OldToNew(storage::DataTable *table) { auto &catalog_manager = catalog::Manager::GetInstance(); LOG_INFO("Validating MVCC storage"); int tile_group_count = table->GetTileGroupCount(); LOG_INFO("The table has %d tile groups in the table", tile_group_count); for (int tile_group_offset = 0; tile_group_offset < tile_group_count; tile_group_offset++) { LOG_INFO("Validate tile group #%d", tile_group_offset); auto tile_group = table->GetTileGroup(tile_group_offset); auto tile_group_header = tile_group->GetHeader(); size_t tuple_count = tile_group->GetAllocatedTupleCount(); LOG_INFO("Tile group #%d has allocated %lu tuples", tile_group_offset, tuple_count); // 1. Transaction id should either be INVALID_TXNID or INITIAL_TXNID for (oid_t tuple_slot = 0; tuple_slot < tuple_count; tuple_slot++) { txn_id_t txn_id = tile_group_header->GetTransactionId(tuple_slot); EXPECT_TRUE(txn_id == INVALID_TXN_ID || txn_id == INITIAL_TXN_ID) << "Transaction id is not INVALID_TXNID or INITIAL_TXNID"; } LOG_INFO("[OK] All tuples have valid txn id"); // double avg_version_chain_length = 0.0; for (oid_t tuple_slot = 0; tuple_slot < tuple_count; tuple_slot++) { txn_id_t txn_id = tile_group_header->GetTransactionId(tuple_slot); cid_t begin_cid = tile_group_header->GetBeginCommitId(tuple_slot); cid_t end_cid = tile_group_header->GetEndCommitId(tuple_slot); ItemPointer next_location = tile_group_header->GetNextItemPointer(tuple_slot); ItemPointer prev_location = tile_group_header->GetPrevItemPointer(tuple_slot); // 2. Begin commit id should <= end commit id EXPECT_TRUE(begin_cid <= end_cid) << "Tuple begin commit id is less than or equal to end commit id"; // This test assumes a oldest-to-newest version chain if (txn_id != INVALID_TXN_ID) { EXPECT_TRUE(begin_cid != MAX_CID) << "Non invalid txn shouldn't have a MAX_CID begin commit id"; // The version is an oldest version if (prev_location.IsNull()) { if (next_location.IsNull()) { EXPECT_EQ(end_cid, MAX_CID) << "Single version has a non MAX_CID end commit time"; } else { cid_t prev_end_cid = end_cid; ItemPointer prev_location(tile_group->GetTileGroupId(), tuple_slot); while (!next_location.IsNull()) { auto next_tile_group = catalog_manager.GetTileGroup(next_location.block); auto next_tile_group_header = next_tile_group->GetHeader(); txn_id_t next_txn_id = next_tile_group_header->GetTransactionId( next_location.offset); if (next_txn_id == INVALID_TXN_ID) { // If a version in the version chain has a INVALID_TXN_ID, it // must be at the tail // of the chain. It is either because we have deleted a tuple // (so append a invalid tuple), // or because this new version is aborted. EXPECT_TRUE( next_tile_group_header->GetNextItemPointer( next_location.offset).IsNull()) << "Invalid version in a version chain and is not delete"; } cid_t next_begin_cid = next_tile_group_header->GetBeginCommitId( next_location.offset); cid_t next_end_cid = next_tile_group_header->GetEndCommitId(next_location.offset); // 3. Timestamp consistence if (next_begin_cid == MAX_CID) { // It must be an aborted version, it should be at the end of the // chain EXPECT_TRUE( next_tile_group_header->GetNextItemPointer( next_location.offset).IsNull()) << "Version with MAX_CID begin cid is not version tail"; } else { EXPECT_EQ(prev_end_cid, next_begin_cid) << "Prev end commit id should equal net begin commit id"; ItemPointer next_prev_location = next_tile_group_header->GetPrevItemPointer( next_location.offset); // 4. Version doubly linked list consistency EXPECT_TRUE(next_prev_location.offset == prev_location.offset && next_prev_location.block == prev_location.block) << "Next version's prev version does not match"; } prev_location = next_location; prev_end_cid = next_end_cid; next_location = next_tile_group_header->GetNextItemPointer( next_location.offset); } // Now prev_location is at the tail of the version chain ItemPointer last_location = prev_location; auto last_tile_group = catalog_manager.GetTileGroup(last_location.block); auto last_tile_group_header = last_tile_group->GetHeader(); // txn_id_t last_txn_id = // last_tile_group_header->GetTransactionId(last_location.offset); cid_t last_end_cid = last_tile_group_header->GetEndCommitId(last_location.offset); EXPECT_TRUE( last_tile_group_header->GetNextItemPointer(last_location.offset) .IsNull()) << "Last version has a next pointer"; EXPECT_EQ(last_end_cid, MAX_CID) << "Last version doesn't end with MAX_CID"; } } } else { EXPECT_TRUE(tile_group_header->GetNextItemPointer(tuple_slot).IsNull()) << "Invalid tuple must not have next item pointer"; } } LOG_INFO("[OK] oldest-to-newest version chain validated"); } }
/** * @brief Creates logical tile from tile group and applies scan predicate. * @return true on success, false otherwise. */ bool SeqScanExecutor::DExecute() { // Scanning over a logical tile. if (children_.size() == 1) { // FIXME Check all requirements for children_.size() == 0 case. LOG_TRACE("Seq Scan executor :: 1 child "); PL_ASSERT(target_table_ == nullptr); PL_ASSERT(column_ids_.size() == 0); while (children_[0]->Execute()) { std::unique_ptr<LogicalTile> tile(children_[0]->GetOutput()); if (predicate_ != nullptr) { // Invalidate tuples that don't satisfy the predicate. for (oid_t tuple_id : *tile) { expression::ContainerTuple<LogicalTile> tuple(tile.get(), tuple_id); if (predicate_->Evaluate(&tuple, nullptr, executor_context_) .IsFalse()) { tile->RemoveVisibility(tuple_id); } } } if (0 == tile->GetTupleCount()) { // Avoid returning empty tiles continue; } /* Hopefully we needn't do projections here */ SetOutput(tile.release()); return true; } return false; } // Scanning a table else if (children_.size() == 0) { LOG_TRACE("Seq Scan executor :: 0 child "); PL_ASSERT(target_table_ != nullptr); PL_ASSERT(column_ids_.size() > 0); // Force to use occ txn manager if dirty read is forbidden concurrency::TransactionManager &transaction_manager = concurrency::TransactionManagerFactory::GetInstance(); // LOG_TRACE("Number of tuples: %f", // target_table_->GetIndex(0)->GetNumberOfTuples()); // Retrieve next tile group. while (current_tile_group_offset_ < table_tile_group_count_) { auto tile_group = target_table_->GetTileGroup(current_tile_group_offset_++); auto tile_group_header = tile_group->GetHeader(); oid_t active_tuple_count = tile_group->GetNextTupleSlot(); // Construct position list by looping through tile group // and applying the predicate. std::vector<oid_t> position_list; for (oid_t tuple_id = 0; tuple_id < active_tuple_count; tuple_id++) { ItemPointer location(tile_group->GetTileGroupId(), tuple_id); // check transaction visibility if (transaction_manager.IsVisible(tile_group_header, tuple_id)) { // if the tuple is visible, then perform predicate evaluation. if (predicate_ == nullptr) { position_list.push_back(tuple_id); auto res = transaction_manager.PerformRead(location); if (!res) { transaction_manager.SetTransactionResult(RESULT_FAILURE); return res; } } else { expression::ContainerTuple<storage::TileGroup> tuple( tile_group.get(), tuple_id); auto eval = predicate_->Evaluate(&tuple, nullptr, executor_context_) .IsTrue(); if (eval == true) { position_list.push_back(tuple_id); auto res = transaction_manager.PerformRead(location); if (!res) { transaction_manager.SetTransactionResult(RESULT_FAILURE); return res; } } } } } // Don't return empty tiles if (position_list.size() == 0) { continue; } // Construct logical tile. std::unique_ptr<LogicalTile> logical_tile(LogicalTileFactory::GetTile()); logical_tile->AddColumns(tile_group, column_ids_); logical_tile->AddPositionList(std::move(position_list)); SetOutput(logical_tile.release()); return true; } } return false; }
bool HybridScanExecutor::SeqScanUtil() { assert(children_.size() == 0); // LOG_INFO("Hybrid executor, Seq Scan :: 0 child"); assert(table_ != nullptr); assert(column_ids_.size() > 0); auto &transaction_manager = concurrency::TransactionManagerFactory::GetInstance(); // Retrieve next tile group. while (current_tile_group_offset_ < table_tile_group_count_) { auto tile_group = table_->GetTileGroup(current_tile_group_offset_++); auto tile_group_header = tile_group->GetHeader(); oid_t active_tuple_count = tile_group->GetNextTupleSlot(); // Construct position list by looping through tile group // and applying the predicate. oid_t upper_bound_block = 0; if (item_pointers_.size() > 0) { auto reverse_iter = item_pointers_.rbegin(); upper_bound_block = reverse_iter->block; } std::vector<oid_t> position_list; for (oid_t tuple_id = 0; tuple_id < active_tuple_count; tuple_id++) { ItemPointer location(tile_group->GetTileGroupId(), tuple_id); if (type_ == planner::HYBRID && item_pointers_.size() > 0 && location.block <= upper_bound_block) { if (item_pointers_.find(location) != item_pointers_.end()) { continue; } } // check transaction visibility if (transaction_manager.IsVisible(tile_group_header, tuple_id)) { // if the tuple is visible, then perform predicate evaluation. if (predicate_ == nullptr) { position_list.push_back(tuple_id); } else { expression::ContainerTuple<storage::TileGroup> tuple( tile_group.get(), tuple_id); auto eval = predicate_->Evaluate(&tuple, nullptr, executor_context_) .IsTrue(); if (eval == true) { position_list.push_back(tuple_id); } } } else { expression::ContainerTuple<storage::TileGroup> tuple( tile_group.get(), tuple_id); auto eval = predicate_->Evaluate(&tuple, nullptr, executor_context_) .IsTrue(); if (eval == true) { position_list.push_back(tuple_id); auto res = transaction_manager.PerformRead(location); if (!res) { transaction_manager.SetTransactionResult(RESULT_FAILURE); return res; } } } } // Don't return empty tiles if (position_list.size() == 0) { continue; } // Construct logical tile. std::unique_ptr<LogicalTile> logical_tile(LogicalTileFactory::GetTile()); logical_tile->AddColumns(tile_group, column_ids_); logical_tile->AddPositionList(std::move(position_list)); LOG_INFO("Hybrid executor, Seq Scan :: Got a logical tile"); SetOutput(logical_tile.release()); return true; } return false; }
/** * @brief Creates logical tile from tile group and applies scan predicate. * @return true on success, false otherwise. */ bool SeqScanExecutor::DExecute() { // Scanning over a logical tile. if (children_.size() == 1 && // There will be a child node on the create index scenario, // but we don't want to use this execution flow !(GetRawNode()->GetChildren().size() > 0 && GetRawNode()->GetChildren()[0].get()->GetPlanNodeType() == PlanNodeType::CREATE && ((planner::CreatePlan *)GetRawNode()->GetChildren()[0].get()) ->GetCreateType() == CreateType::INDEX)) { // FIXME Check all requirements for children_.size() == 0 case. LOG_TRACE("Seq Scan executor :: 1 child "); PELOTON_ASSERT(target_table_ == nullptr); PELOTON_ASSERT(column_ids_.size() == 0); while (children_[0]->Execute()) { std::unique_ptr<LogicalTile> tile(children_[0]->GetOutput()); if (predicate_ != nullptr) { // Invalidate tuples that don't satisfy the predicate. for (oid_t tuple_id : *tile) { ContainerTuple<LogicalTile> tuple(tile.get(), tuple_id); auto eval = predicate_->Evaluate(&tuple, nullptr, executor_context_); if (eval.IsFalse()) { // if (predicate_->Evaluate(&tuple, nullptr, executor_context_) // .IsFalse()) { tile->RemoveVisibility(tuple_id); } } } if (0 == tile->GetTupleCount()) { // Avoid returning empty tiles continue; } /* Hopefully we needn't do projections here */ SetOutput(tile.release()); return true; } return false; } // Scanning a table else if (children_.size() == 0 || // If we are creating an index, there will be a child (children_.size() == 1 && // This check is only needed to pass seq_scan_test // unless it is possible to add a executor child // without a corresponding plan. GetRawNode()->GetChildren().size() > 0 && // Check if the plan is what we actually expect. GetRawNode()->GetChildren()[0].get()->GetPlanNodeType() == PlanNodeType::CREATE && // If it is, confirm it is for indexes ((planner::CreatePlan *)GetRawNode()->GetChildren()[0].get()) ->GetCreateType() == CreateType::INDEX)) { LOG_TRACE("Seq Scan executor :: 0 child "); PELOTON_ASSERT(target_table_ != nullptr); PELOTON_ASSERT(column_ids_.size() > 0); if (children_.size() > 0 && !index_done_) { children_[0]->Execute(); // This stops continuous executions due to // a parent and avoids multiple creations // of the same index. index_done_ = true; } concurrency::TransactionManager &transaction_manager = concurrency::TransactionManagerFactory::GetInstance(); bool acquire_owner = GetPlanNode<planner::AbstractScan>().IsForUpdate(); auto current_txn = executor_context_->GetTransaction(); // Retrieve next tile group. while (current_tile_group_offset_ < table_tile_group_count_) { auto tile_group = target_table_->GetTileGroup(current_tile_group_offset_++); auto tile_group_header = tile_group->GetHeader(); oid_t active_tuple_count = tile_group->GetNextTupleSlot(); // Construct position list by looping through tile group // and applying the predicate. std::vector<oid_t> position_list; for (oid_t tuple_id = 0; tuple_id < active_tuple_count; tuple_id++) { ItemPointer location(tile_group->GetTileGroupId(), tuple_id); auto visibility = transaction_manager.IsVisible( current_txn, tile_group_header, tuple_id); // check transaction visibility if (visibility == VisibilityType::OK) { // if the tuple is visible, then perform predicate evaluation. if (predicate_ == nullptr) { position_list.push_back(tuple_id); auto res = transaction_manager.PerformRead(current_txn, location, acquire_owner); if (!res) { transaction_manager.SetTransactionResult(current_txn, ResultType::FAILURE); return res; } } else { ContainerTuple<storage::TileGroup> tuple(tile_group.get(), tuple_id); LOG_TRACE("Evaluate predicate for a tuple"); auto eval = predicate_->Evaluate(&tuple, nullptr, executor_context_); LOG_TRACE("Evaluation result: %s", eval.GetInfo().c_str()); if (eval.IsTrue()) { position_list.push_back(tuple_id); auto res = transaction_manager.PerformRead(current_txn, location, acquire_owner); if (!res) { transaction_manager.SetTransactionResult(current_txn, ResultType::FAILURE); return res; } else { LOG_TRACE("Sequential Scan Predicate Satisfied"); } } } } } // Don't return empty tiles if (position_list.size() == 0) { continue; } // Construct logical tile. std::unique_ptr<LogicalTile> logical_tile(LogicalTileFactory::GetTile()); logical_tile->AddColumns(tile_group, column_ids_); logical_tile->AddPositionList(std::move(position_list)); LOG_TRACE("Information %s", logical_tile->GetInfo().c_str()); SetOutput(logical_tile.release()); return true; } } return false; }