/** * @brief Creates logical tiles from the two input logical tiles after applying * join predicate. * @return true on success, false otherwise. * * ExecutorContext is set when executing IN+NestLoop. For example: * select * from Foo1 where age IN (select id from Foo2 where name='mike'); * Here: * "select id from Foo2 where name='mike'" is transformed as left child. * "select * from Foo1 where age " is the right child. * "IN" is transformed as a execute context, in NestLoop * We put the results of left child in executor_context using NestLoop, and the * right child can execute using this context. Otherwise, the right child can't * execute. And there is no predicate_ for IN+NestLoop * * For now, we only set this context for IN operator. Normally, the right child * has a complete query that can execute without the context, and we use predicate_ * to join the left and right result. * */ bool NestedLoopJoinExecutor::DExecute() { LOG_INFO("********** Nested Loop %s Join executor :: 2 children ", GetJoinTypeString()); // Loop until we have non-empty result tile or exit for (;;) { // Build outer join output when done if (left_child_done_ && right_child_done_) { return BuildOuterJoinOutput(); } //===------------------------------------------------------------------===// // Pick left and right tiles //===------------------------------------------------------------------===// LogicalTile *left_tile = nullptr; LogicalTile *right_tile = nullptr; bool advance_right_child = false; // If we have already retrieved all left child's results in buffer if (left_child_done_ == true) { LOG_TRACE("Advance the left buffer iterator."); assert(!right_result_tiles_.empty()); left_result_itr_++; if (left_result_itr_ >= left_result_tiles_.size()) { advance_right_child = true; left_result_itr_ = 0; } } // Otherwise, we must attempt to execute the left child else { // Left child is finished, no more tiles if (children_[0]->Execute() == false) { LOG_TRACE("Left child is exhausted."); left_child_done_ = true; left_result_itr_ = 0; advance_right_child = true; } // Buffer the left child's result else { LOG_TRACE("Retrieve a new tile from left child"); BufferLeftTile(children_[0]->GetOutput()); left_result_itr_ = left_result_tiles_.size() - 1; } } if (advance_right_child == true || right_result_tiles_.empty()) { // return if right tile is empty if (right_child_done_ && right_result_tiles_.empty()) { return BuildOuterJoinOutput(); } assert(left_result_itr_ == 0); // Right child is finished, no more tiles if (children_[1]->Execute() == false) { LOG_TRACE("Right child is exhausted. Returning false."); // Right child exhausted. // Release cur Right tile. Clear right child's result buffer and return. right_child_done_ = true; return BuildOuterJoinOutput(); } // Buffer the Right child's result else { LOG_TRACE("Advance the Right child."); BufferRightTile(children_[1]->GetOutput()); // return if left tile is empty if (left_child_done_ && left_result_tiles_.empty()) { return BuildOuterJoinOutput(); } } } right_tile = right_result_tiles_.back().get(); left_tile = left_result_tiles_[left_result_itr_].get(); //===------------------------------------------------------------------===// // Build Join Tile //===------------------------------------------------------------------===// // Build output logical tile auto output_tile = BuildOutputLogicalTile(left_tile, right_tile); // Build position lists LogicalTile::PositionListsBuilder pos_lists_builder(left_tile, right_tile); // Go over every pair of tuples in left and right logical tiles for (auto right_tile_row_itr : *right_tile) { bool has_left_match = false; for (auto left_tile_row_itr : *left_tile) { // Join predicate exists if (predicate_ != nullptr) { expression::ContainerTuple<executor::LogicalTile> left_tuple( left_tile, left_tile_row_itr); expression::ContainerTuple<executor::LogicalTile> right_tuple( right_tile, right_tile_row_itr); // Join predicate is false. Skip pair and continue. if (predicate_->Evaluate(&left_tuple, &right_tuple, executor_context_) .IsFalse()) { continue; } } RecordMatchedLeftRow(left_result_itr_, left_tile_row_itr); // For Left and Full Outer Join has_left_match = true; // Insert a tuple into the output logical tile // First, copy the elements in left logical tile's tuple pos_lists_builder.AddRow(left_tile_row_itr, right_tile_row_itr); } // Inner loop of NLJ // For Right and Full Outer Join if (has_left_match) { RecordMatchedRightRow(right_result_tiles_.size() - 1, right_tile_row_itr); } } // Outer loop of NLJ // Check if we have any join tuples. if (pos_lists_builder.Size() > 0) { output_tile->SetPositionListsAndVisibility(pos_lists_builder.Release()); SetOutput(output_tile.release()); return true; } LOG_TRACE("This pair produces empty join result. Continue the loop."); } // end the very beginning for loop }
// For this version, the work flow is that we first lookup the left table, and // use the result to lookup right table. If left table is done that means right // table is also done. So we only keep the left_child_done_ as the sign. bool NestedLoopJoinExecutor::DExecute() { LOG_TRACE("********** Nested Loop %s Join executor :: 2 children ", GetJoinTypeString()); // Grab info from plan node and check it const planner::NestedLoopJoinPlan &node = GetPlanNode<planner::NestedLoopJoinPlan>(); // Pick out the left and right columns const std::vector<oid_t> &join_column_ids_left = node.GetJoinColumnsLeft(); const std::vector<oid_t> &join_column_ids_right = node.GetJoinColumnsRight(); // We should first deal with the current result. Otherwise we will cache a lot // data which is not good to utilize memory. After that we call child execute. // Since is the high level idea, each time we get tile from left, we should // finish this tile, and then call child[0] execute for next tile. for (;;) { //===------------------------------------------------------------------===// // Pick left and right tiles //===------------------------------------------------------------------===// // If we have already retrieved all left child's results in buffer if (left_child_done_ == true) { LOG_TRACE("Left is done which means all join comparison completes"); return false; } // If left tile result is not done, continue the left tuples if (!left_tile_done_) { // Tuple result ContainerTuple<executor::LogicalTile> left_tuple(left_tile_.get(), left_tile_row_itr_); // Grab the values if (!join_column_ids_left.empty() && !join_column_ids_right.empty()) { std::vector<type::Value> join_values; for (auto column_id : join_column_ids_left) { type::Value predicate_value = left_tuple.GetValue(column_id); join_values.push_back(predicate_value); } // Pass the columns and values to right executor LOG_TRACE("Update the new value for index predicate"); children_[1]->UpdatePredicate(join_column_ids_right, join_values); } // Execute the right child to get the right tile if (children_[1]->Execute() == true) { LOG_TRACE("Advance the Right child."); std::unique_ptr<LogicalTile> right_tile(children_[1]->GetOutput()); PL_ASSERT(right_tile != nullptr); // Construct output result auto output_tile = BuildOutputLogicalTile(left_tile_.get(), right_tile.get()); // Build position list LogicalTile::PositionListsBuilder pos_lists_builder(left_tile_.get(), right_tile.get()); // Go over every pair of tuples in left and right logical tiles for (auto right_tile_row_itr : *right_tile) { // Insert a tuple into the output logical tile // First, copy the elements in left logical tile's tuple LOG_TRACE("Insert a tuple into the output logical tile"); ContainerTuple<executor::LogicalTile> right_tuple(right_tile.get(), right_tile_row_itr); if (predicate_ != nullptr) { auto eval = predicate_->Evaluate(&left_tuple, &right_tuple, executor_context_); // Join predicate is false. Skip pair and continue. if (eval.IsFalse()) { LOG_TRACE("Not math join predicate"); continue; } LOG_TRACE("Find a tuple with join predicate"); } pos_lists_builder.AddRow(left_tile_row_itr_, right_tile_row_itr); } // Outer loop of NLJ // Now current left tile is done LOG_TRACE("pos_lists_builder's size : %ld", pos_lists_builder.Size()); if (pos_lists_builder.Size() > 0) { LOG_TRACE("Set output result"); output_tile->SetPositionListsAndVisibility( pos_lists_builder.Release()); SetOutput(output_tile.release()); LOG_TRACE("result is : %s", GetOutputInfo()->GetInfo().c_str()); return true; } continue; } // Right table is finished for the current left tuple. move to the next else { if (!left_child_done_) { LOG_TRACE("right child is done, but left is not, so reset right"); children_[1]->ResetState(); // When all right table is done, examine whether left tile is done // If left tile is done, next loop will directly execute child[0] if (left_tile_row_itr_ == left_tile_->GetTupleCount() - 1) { LOG_TRACE("left tile is done"); // Set up flag and go the execute child 0 to get the next tile left_tile_done_ = true; } else { // Move the row to the next one in left tile LOG_TRACE("Advance left row"); left_tile_row_itr_++; // Continue the new left row continue; } } else { LOG_TRACE("Both left and right child are done"); right_child_done_ = true; return false; } } } // End handle left tile // Otherwise, we must attempt to execute the left child to get a new left // tile // Left child is finished, no more tiles if (children_[0]->Execute() == false) { LOG_TRACE("Left child is exhausted."); return false; } // Cache the new tile else { // Get the left child's result LOG_TRACE("Retrieve a new tile from left child"); left_tile_.reset(children_[0]->GetOutput()); // Set the flag with init status left_tile_done_ = false; left_tile_row_itr_ = 0; } LOG_TRACE("Get a new left tile. Continue the loop."); } // end the very beginning for loop }
/** * @brief Creates logical tiles from the two input logical tiles after applying * join predicate. * @return true on success, false otherwise. */ bool HashJoinExecutor::DExecute() { // build hash map for right table if (!right_child_done_) { while (hash_executor_->Execute() == true) BufferRightTile(children_[1]->GetOutput()); right_child_done_ = true; } for (;;) { // left child & right child all done if (left_child_done_ && right_child_done_) { return BuildOuterJoinOutput(); } // if there is remaining pairs in buffer, release one at a time. if (!buffered_output_tiles.empty()) { // just hand in one. SetOutput(buffered_output_tiles.front()); buffered_output_tiles.pop_front(); return true; } // traverse every left child tile if (children_[0]->Execute()) { BufferLeftTile(children_[0]->GetOutput()); LogicalTile *left_tile = left_result_tiles_.back().get(); // traverse every tuple in curt left tile for (auto left_tile_row_itr : *left_tile) { auto hash = HashExecutor::HashMapType::key_type( left_tile, left_tile_row_itr, &hash_executor_->GetHashKeyIds()); auto hash_result = hash_executor_->GetHashTable().find(hash); if (hash_result != hash_executor_->GetHashTable().end()) { RecordMatchedLeftRow(left_logical_tile_itr_, left_tile_row_itr); // traverse right set for (auto iter = hash_result->second.begin(); iter != hash_result->second.end(); ++iter) { auto tile_index = iter->first; auto tuple_index = iter->second; RecordMatchedRightRow(tile_index, tuple_index); LogicalTile *right_tile = right_result_tiles_[tile_index].get(); LogicalTile::PositionListsBuilder pos_lists_builder(left_tile, right_tile); pos_lists_builder.AddRow(left_tile_row_itr, tuple_index); auto output_tile = BuildOutputLogicalTile(left_tile, right_tile); output_tile->SetPositionListsAndVisibility( pos_lists_builder.Release()); buffered_output_tiles.emplace_back(output_tile.release()); } // end of traversing right set } // end of if match } // end of traversal of curt left_tile // Release at most one pair. // PS: This should be done after traversing all the tuples in curt tile left_logical_tile_itr_++; if (!buffered_output_tiles.empty()) { // release one at a time SetOutput(buffered_output_tiles.front()); buffered_output_tiles.pop_front(); return true; } } // end of still have left tile // All left tiles are exhausted. else { left_child_done_ = true; return BuildOuterJoinOutput(); } } // end of infinite loop // never should go here return false; } // end of DExecute
/** * @brief Creates logical tiles from the two input logical tiles after applying * join predicate. * @return true on success, false otherwise. */ bool MergeJoinExecutor::DExecute() { LOG_INFO( "********** Merge Join executor :: 2 children " "left:: start: %lu, end: %lu, done: %d " "right:: start: %lu, end: %lu, done: %d", left_start_row, left_end_row, left_child_done_, right_start_row, right_end_row, right_child_done_); // Build outer join output when done if (right_child_done_ && left_child_done_) { return BuildOuterJoinOutput(); } //===--------------------------------------------------------------------===// // Pick right and left tiles //===--------------------------------------------------------------------===// // Try to get next tile from RIGHT child if (((right_child_done_ == false) && (right_start_row == right_end_row)) || (left_child_done_ == true)) { if (children_[1]->Execute() == false) { LOG_TRACE("Did not get right tile "); right_child_done_ = true; // Try again return DExecute(); } LOG_TRACE("Got right tile "); auto right_tile = children_[1]->GetOutput(); BufferRightTile(right_tile); right_start_row = 0; right_end_row = Advance(right_tile, right_start_row, false); LOG_TRACE("size of right tiles: %lu", right_result_tiles_.size()); } // Try to get next tile from LEFT child if (((left_child_done_ == false) && (left_start_row == left_end_row)) || (right_child_done_ == true)) { if (children_[0]->Execute() == false) { LOG_TRACE("Did not get left tile "); left_child_done_ = true; // Try again return DExecute(); } LOG_TRACE("Got left tile "); auto left_tile = children_[0]->GetOutput(); BufferLeftTile(left_tile); left_start_row = 0; left_end_row = Advance(left_tile, left_start_row, true); LOG_TRACE("size of left tiles: %lu", left_result_tiles_.size()); } // Check if we have logical tiles to process if(left_result_tiles_.empty() || right_result_tiles_.empty()) { return false; } LogicalTile *left_tile = left_result_tiles_.back().get(); LogicalTile *right_tile = right_result_tiles_.back().get(); //===--------------------------------------------------------------------===// // Build Join Tile //===--------------------------------------------------------------------===// // Build output logical tile auto output_tile = BuildOutputLogicalTile(left_tile, right_tile); // Build position lists LogicalTile::PositionListsBuilder pos_lists_builder(left_tile, right_tile); while ((left_end_row > left_start_row) && (right_end_row > right_start_row)) { expression::ContainerTuple<executor::LogicalTile> left_tuple( left_tile, left_start_row); expression::ContainerTuple<executor::LogicalTile> right_tuple( right_tile, right_start_row); bool not_matching_tuple_pair = false; // Evaluate and compare the join clauses for (auto &clause : *join_clauses_) { auto left_value = clause.left_->Evaluate(&left_tuple, &right_tuple, nullptr); auto right_value = clause.right_->Evaluate(&left_tuple, &right_tuple, nullptr); // Compare the values int comparison = left_value.Compare(right_value); // Left key < Right key, advance left if (comparison < 0) { LOG_TRACE("left < right, advance left "); left_start_row = left_end_row; left_end_row = Advance(left_tile, left_start_row, true); not_matching_tuple_pair = true; break; } // Left key > Right key, advance right else if (comparison > 0) { LOG_TRACE("left > right, advance right "); right_start_row = right_end_row; right_end_row = Advance(right_tile, right_start_row, false); not_matching_tuple_pair = true; break; } // Left key == Right key, go and check next join clause } // Atleast one of the join clauses don't match // One of the tile has been advanced if (not_matching_tuple_pair) { continue; } // Join clauses matched, try to match predicate LOG_TRACE("one pair of tuples matches join clause "); // Join predicate exists if (predicate_ != nullptr) { if (predicate_->Evaluate(&left_tuple, &right_tuple, executor_context_) .IsFalse()) { // Join predicate is false. Advance both. left_start_row = left_end_row; left_end_row = Advance(left_tile, left_start_row, true); right_start_row = right_end_row; right_end_row = Advance(right_tile, right_start_row, false); } } // Sub tile matched, do a Cartesian product // Go over every pair of tuples in left and right logical tiles for (size_t left_tile_row_itr = left_start_row; left_tile_row_itr < left_end_row; left_tile_row_itr++) { for (size_t right_tile_row_itr = right_start_row; right_tile_row_itr < right_end_row; right_tile_row_itr++) { // Insert a tuple into the output logical tile pos_lists_builder.AddRow(left_tile_row_itr, right_tile_row_itr); RecordMatchedLeftRow(left_result_tiles_.size() - 1, left_tile_row_itr); RecordMatchedRightRow(right_result_tiles_.size() - 1, right_tile_row_itr); } } // Then, advance both left_start_row = left_end_row; left_end_row = Advance(left_tile, left_start_row, true); right_start_row = right_end_row; right_end_row = Advance(right_tile, right_start_row, false); } // Check if we have any join tuples. if (pos_lists_builder.Size() > 0) { output_tile->SetPositionListsAndVisibility(pos_lists_builder.Release()); SetOutput(output_tile.release()); return true; } // Try again else { // If we are out of any more pairs of child tiles to examine, // then we will return false earlier in this function // So, no need to return false here DExecute(); } return true; }
/** * @brief Creates logical tiles from the two input logical tiles after applying * join predicate. * @return true on success, false otherwise. */ bool HashJoinExecutor::DExecute() { LOG_INFO("Hash Join Executor"); // Loop until we have non-empty result join logical tile or exit while (true) { // if (!buffered_output_tiles.empty()) { if (!result.empty()) { auto* output_tile = result.back(); result.pop_back(); SetOutput(output_tile); return true; } // Build outer join output when done if (left_child_done_ && right_child_done_) { return BuildOuterJoinOutput(); } //===--------------------------------------------------------------------===// // Pick right and left tiles //===--------------------------------------------------------------------===// // Get all the logical tiles from RIGHT child if (!right_child_done_) { while (children_[1]->Execute()) { BufferRightTile(children_[1]->GetOutput()); } right_child_done_ = true; LOG_INFO("Hash Join Executor: Got all %lu right tiles.", right_result_tiles_.size()); } // Get next logical tile from LEFT child if (children_[0]->Execute()) { BufferLeftTile(children_[0]->GetOutput()); LOG_INFO("Hash Join Executor: Got left tile %p.", left_result_tiles_.back().get()); } else { // Left input is exhausted, loop around left_child_done_ = true; return BuildOuterJoinOutput(); } if (right_result_tiles_.empty()) { /// No right children, a hash lookup would be empty. Continue ... continue; } //===--------------------------------------------------------------------===// // Build Join Tile //===--------------------------------------------------------------------===// LogicalTile* left_tile = left_result_tiles_.back().get(); std::unordered_map<size_t, std::unique_ptr<LogicalTile::PositionListsBuilder>> right_matches; // Get the hash table from the hash executor auto& hash_table = hash_executor_->GetHashTable(); auto& hash_columns = hash_executor_->GetHashKeyIds(); for (oid_t left_tid : *left_tile) { /// Create key and probe hash table HashExecutor::HashMapType::key_type key(left_tile, left_tid, &hash_columns); const auto& iter = hash_table.find(key); if (iter == hash_table.end()) { continue; } auto& matches = iter->second; for (auto& match : matches) { auto right_tile_index = match.first; auto* right_tile = right_result_tiles_[right_tile_index].get(); auto right_tid = match.second; RecordMatchedRightRow(right_tile_index, right_tid); RecordMatchedLeftRow(left_result_tiles_.size() - 1, left_tid); const auto& pos_match_iter = right_matches.find(right_tile_index); if (pos_match_iter == right_matches.end()) { std::unique_ptr<LogicalTile::PositionListsBuilder> builder{ new LogicalTile::PositionListsBuilder(left_tile, right_tile)}; right_matches.insert( std::make_pair(right_tile_index, std::move(builder))); } right_matches[right_tile_index]->AddRow(left_tid, right_tid); } } // Create a new logical tile for every grouped match in matches for (auto& iter : right_matches) { auto output_tile = BuildOutputLogicalTile( left_tile, right_result_tiles_[iter.first].get()); auto& pos_lists_builder = iter.second; output_tile->SetPositionListsAndVisibility(pos_lists_builder->Release()); result.push_back(output_tile.release()); } } }