예제 #1
0
 /**
  * @brief Constructor for merging sorted runs to generate a sorted relation.
  *
  * @param input_relation The relation to merge sorted blocks.
  * @param output_relation The output relation.
  * @param output_destination_index The index of the InsertDestination in the
  *        QueryContext to store the sorted blocks in.
  * @param run_relation The temporary relation used to store intermediate runs
  *                     of blocks.
  * @param run_block_destination_index The index of the InsertDestination in
  *        the QueryContext to store the intermediate blocks in the merging
  *        process.
  * @param sort_config_index The index of the Sort configuration in
  *        QueryContext.
  * @param merge_factor Merge factor of this operator.
  * @param top_k Only return the first \c top_k results. Return all results if
  *              \c top_k is 0.
  * @param input_relation_is_stored Boolean to indicate is input relation is
  *                                 stored or streamed.
  **/
 SortMergeRunOperator(const CatalogRelation &input_relation,
                      const CatalogRelation &output_relation,
                      const QueryContext::insert_destination_id output_destination_index,
                      const CatalogRelation &run_relation,
                      const QueryContext::insert_destination_id run_block_destination_index,
                      const QueryContext::sort_config_id sort_config_index,
                      const std::size_t merge_factor,
                      const std::size_t top_k,
                      const bool input_relation_is_stored)
     : input_relation_(input_relation),
       output_relation_(output_relation),
       output_destination_index_(output_destination_index),
       sort_config_index_(sort_config_index),
       merge_factor_(merge_factor),
       top_k_(top_k),
       merge_tree_(merge_factor_),
       input_relation_block_ids_(input_relation_is_stored
                                     ? input_relation.getBlocksSnapshot()
                                     : std::vector<block_id>()),
       num_input_workorders_generated_(0),
       run_relation_(run_relation),
       run_block_destination_index_(run_block_destination_index),
       input_relation_is_stored_(input_relation_is_stored),
       input_stream_done_(input_relation_is_stored),
       started_(false) {
   DCHECK_GT(merge_factor_, 1u);
 }
block_id StorageManager::createBlock(const CatalogRelation &relation,
                                     const StorageBlockLayout *layout) {
  if (layout == NULL) {
    layout = &(relation.getDefaultStorageBlockLayout());
  }

  size_t num_slots = layout->getDescription().num_slots();
  DEBUG_ASSERT(num_slots > 0);
  size_t slot_index = getSlots(num_slots);
  void *new_block_mem = getSlotAddress(slot_index);
  ++block_index_;

  BlockHandle new_block_handle;
  new_block_handle.slot_index_low = slot_index;
  new_block_handle.slot_index_high = slot_index + num_slots;
  new_block_handle.block = new StorageBlock(relation,
                                            block_index_,
                                            *layout,
                                            true,
                                            new_block_mem,
                                            kSlotSizeBytes * num_slots);

  blocks_[block_index_] = new_block_handle;
  return block_index_;
}
예제 #3
0
 /**
  * @brief Constructor for aggregating with arbitrary expressions in projection
  *        list.
  *
  * @param input_relation The relation to perform aggregation over.
  * @param input_relation_is_stored If input_relation is a stored relation and
  *        is fully available to the operator before it can start generating
  *        workorders.
  * @param aggr_state_index The index of the AggregationState in QueryContext.
  **/
 AggregationOperator(const CatalogRelation &input_relation,
                     bool input_relation_is_stored,
                     const QueryContext::aggregation_state_id aggr_state_index)
     : input_relation_is_stored_(input_relation_is_stored),
       input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
                                                          : std::vector<block_id>()),
       aggr_state_index_(aggr_state_index),
       num_workorders_generated_(0),
       started_(false) {}
  static std::size_t EstimateBytesForTuples(const CatalogRelation &relation,
                                             const TupleStorageSubBlockDescription &description) {

	  // initialize bloom filter parameters object
	  ScopedPtr<BloomParameters> bloom_filter_params;
	  bloom_filter_params.reset(getBloomFilterConfig());

	  // number of bytes taken by bloom filter per attribute
	  std::size_t bloom_filter_size = bloom_filter_params->optimal_parameters.table_size / bits_per_char;

	  size_t total_size = 0;
	  size_t size_per_attribute = bloom_filter_size + sizeof(BloomFilter);
	  CatalogRelation::const_iterator attr_it;
	  for (attr_it = relation.begin(); attr_it != relation.end(); ++attr_it) {
		  total_size += size_per_attribute;
	  }
	  return total_size;
  }
예제 #5
0
std::size_t PrintToScreen::GetNumTuplesInRelation(
    const CatalogRelation &relation, StorageManager *storage_manager) {
  const std::vector<block_id> &blocks = relation.getBlocksSnapshot();
  std::size_t total_num_tuples = 0;
  for (block_id block : blocks) {
    total_num_tuples +=
        storage_manager->getBlock(block, relation)->getNumTuples();
  }
  return total_num_tuples;
}
  DefaultBloomFilterSubBlock(
		  const CatalogRelation &relation,
		  const TupleStorageSubBlock &tuple_store,
	      const BloomFilterSubBlockDescription &description,
		  const bool new_block,
		  void *sub_block_memory,
		  const std::size_t sub_block_memory_size)
 	 	  : BloomFilterSubBlock(relation,
 	 			  	  	  	  	tuple_store,
 	 			  	  	  	  	description,
								new_block,
								sub_block_memory,
								sub_block_memory_size) {

	  // initialize the bloom filters and store them in the sub_block_memory
	  bloom_filter_params_.reset(getBloomFilterConfig());

	  // number of bytes taken by bloom filter per attribute
	  bloom_filter_size_ = bloom_filter_params_->optimal_parameters.table_size / bits_per_char;


	  CatalogRelation::const_iterator attr_it;
	  void* bloom_filter_addr = sub_block_memory_;
	  bloom_filter_data_.reset(static_cast<unsigned char*>(bloom_filter_addr));

	  // allocate space for bloom_filter_data_
	  for (attr_it = relation.begin(); attr_it != relation.end(); ++attr_it) {
		  bloom_filter_addr = (static_cast<unsigned char*>(bloom_filter_addr) + bloom_filter_size_);
	  }


	  // allocate space for bloom_filters_
	  bloom_filters_.reset(static_cast<BloomFilter*>(bloom_filter_addr));
	  unsigned int i = 0;
	  for (attr_it = relation.begin(); attr_it != relation.end(); ++attr_it, ++i) {
		  ScopedPtr<BloomFilter> bloomFilter(new BloomFilter(*bloom_filter_params_, bloom_filter_data_.get() + i*bloom_filter_size_
															  ));
		  memcpy(bloom_filter_addr, bloomFilter.get(), sizeof(*bloomFilter));
		  bloom_filter_addr = (static_cast<char*>(bloom_filter_addr) + sizeof(BloomFilter));
	  }

  } ;
예제 #7
0
void DropTableWorkOrder::execute(QueryContext *query_context,
                                 CatalogDatabase *database,
                                 StorageManager *storage_manager) {
  DCHECK(database != nullptr);
  DCHECK(storage_manager != nullptr);

  CatalogRelation *relation = database->getRelationByIdMutable(rel_id_);
  DCHECK(relation != nullptr);

  std::vector<block_id> relation_blocks(relation->getBlocksSnapshot());

  for (const block_id relation_block_id : relation_blocks) {
    storage_manager->deleteBlockOrBlobFile(relation_block_id);
  }

  if (only_drop_blocks_) {
    relation->clearBlocks();
  } else {
    database->dropRelationById(rel_id_);
  }
}
 /**
  * @brief Constructor for sorting tuples in blocks based on the sort
  * configuration and writing to output destination.
  *
  * @param input_relation The relation to generate sorted runs of.
  * @param output_relation The output relation.
  * @param output_destination_index The index of the InsertDestination in the
  *        QueryContext to store the sorted blocks of runs.
  * @param sort_config Sort configuration specifying ORDER BY, ordering and
  *                    null ordering. The operator makes a copy of the
  *                    configuration.
  * @param input_relation_is_stored Does the input relation contain the blocks
  *                                 to sort. If \c false, the blocks are
  *                                 streamed.
  **/
 SortRunGenerationOperator(const CatalogRelation &input_relation,
                           const CatalogRelation &output_relation,
                           const QueryContext::insert_destination_id output_destination_index,
                           const QueryContext::sort_config_id sort_config_index,
                           bool input_relation_is_stored)
     : input_relation_(input_relation),
       output_relation_(output_relation),
       output_destination_index_(output_destination_index),
       sort_config_index_(sort_config_index),
       input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
                                                          : std::vector<block_id>()),
       num_workorders_generated_(0),
       started_(false),
       input_relation_is_stored_(input_relation_is_stored) {}
예제 #9
0
 /**
  * @brief Constructor.
  *
  * @param input_relation The relation to build hash table on.
  * @param input_relation_is_stored If input_relation is a stored relation and
  *        is fully available to the operator before it can start generating
  *        workorders.
  * @param join_key_attributes The IDs of equijoin attributes in
  *        input_relation.
  * @param any_join_key_attributes_nullable If any attribute is nullable.
  * @param hash_table_index The index of the JoinHashTable in QueryContext.
  *        The HashTable's key Type(s) should be the Type(s) of the
  *        join_key_attributes in input_relation.
  **/
 BuildHashOperator(const CatalogRelation &input_relation,
                   const bool input_relation_is_stored,
                   const std::vector<attribute_id> &join_key_attributes,
                   const bool any_join_key_attributes_nullable,
                   const QueryContext::join_hash_table_id hash_table_index)
   : input_relation_(input_relation),
     input_relation_is_stored_(input_relation_is_stored),
     join_key_attributes_(join_key_attributes),
     any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
     hash_table_index_(hash_table_index),
     input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
                                                        : std::vector<block_id>()),
     num_workorders_generated_(0),
     started_(false) {}
bool CompressedPackedRowStoreTupleStorageSubBlock::DescriptionIsValid(
    const CatalogRelation &relation,
    const TupleStorageSubBlockDescription &description) {
  // Make sure description is initialized and specifies
  // CompressedPackedRowStore.
  if (!description.IsInitialized()) {
    return false;
  }
  if (description.sub_block_type() != TupleStorageSubBlockDescription::COMPRESSED_PACKED_ROW_STORE) {
    return false;
  }

  // Make sure relation does not have nullable attributes.
  if (relation.hasNullableAttributes()) {
    return false;
  }

  // Make sure all the specified compressed attributes exist and can be ordered
  // by LessComparison.
  const Comparison &less_comparison = Comparison::GetComparison(Comparison::kLess);
  CompatUnorderedSet<attribute_id>::unordered_set compressed_variable_length_attributes;
  for (int compressed_attribute_num = 0;
       compressed_attribute_num < description.ExtensionSize(
           CompressedPackedRowStoreTupleStorageSubBlockDescription::compressed_attribute_id);
       ++compressed_attribute_num) {
    attribute_id compressed_attribute_id = description.GetExtension(
        CompressedPackedRowStoreTupleStorageSubBlockDescription::compressed_attribute_id,
        compressed_attribute_num);
    if (!relation.hasAttributeWithId(compressed_attribute_id)) {
      return false;
    }
    const Type &attr_type = relation.getAttributeById(compressed_attribute_id).getType();
    if (!less_comparison.canCompareTypes(attr_type, attr_type)) {
      return false;
    }
    if (attr_type.isVariableLength()) {
      compressed_variable_length_attributes.insert(compressed_attribute_id);
    }
  }

  // If the relation has variable-length attributes, make sure they are all
  // compressed.
  if (relation.isVariableLength()) {
    for (CatalogRelation::const_iterator attr_it = relation.begin();
         attr_it != relation.end();
         ++attr_it) {
      if (attr_it->getType().isVariableLength()) {
        if (compressed_variable_length_attributes.find(attr_it->getID())
            == compressed_variable_length_attributes.end()) {
          return false;
        }
      }
    }
  }

  return true;
}
예제 #11
0
 /**
  * @brief Constructor for selection with arbitrary expressions in projection
  *        list.
  *
  * @param input_relation The relation to perform selection over.
  * @param output_relation The output relation.
  * @param output_destination_index The index of the InsertDestination in the
  *        QueryContext to insert the selection results.
  * @param predicate_index The index of selection predicate in QueryContext.
  *        All tuples matching pred will be selected (or kInvalidPredicateId to
  *        select all tuples).
  * @param selection_index The group index of Scalars in QueryContext, which
  *        will be evaluated to project input tuples.
  * @param input_relation_is_stored If input_relation is a stored relation and
  *        is fully available to the operator before it can start generating
  *        workorders.
  **/
 SelectOperator(const CatalogRelation &input_relation,
                const CatalogRelation &output_relation,
                const QueryContext::insert_destination_id output_destination_index,
                const QueryContext::predicate_id predicate_index,
                const QueryContext::scalar_group_id selection_index,
                bool input_relation_is_stored)
     : input_relation_(input_relation),
       output_relation_(output_relation),
       output_destination_index_(output_destination_index),
       predicate_index_(predicate_index),
       selection_index_(selection_index),
       simple_selection_(nullptr),
       input_relation_block_ids_(input_relation_is_stored ? input_relation.getBlocksSnapshot()
                                                          : std::vector<block_id>()),
       num_workorders_generated_(0),
       simple_projection_(false),
       input_relation_is_stored_(input_relation_is_stored),
       started_(false) {}
 /**
  * @brief Constructor for SampleOperator with the sampling percentage and type of sampling.
  *
  * @param query_id The ID of the query to which this operator belongs.
  * @param input_relation The relation to perform sampling over.
  * @param output_relation The output relation.
  * @param output_destination_index The index of the InsertDestination in the
  *        QueryContext to insert the sampling results.
  * @param input_relation_is_stored If input_relation is a stored relation and
  *        is fully available to the operator before it can start generating
  *        workorders.
  * @param is_block_sample Flag indicating whether the sample type is block or tuple.
  * @param percentage The percentage of data to be sampled.
  **/
 SampleOperator(
     const std::size_t query_id,
     const CatalogRelation &input_relation,
     const CatalogRelationSchema &output_relation,
     const QueryContext::insert_destination_id output_destination_index,
     const bool input_relation_is_stored,
     const bool is_block_sample,
     const int percentage)
     : RelationalOperator(query_id),
       input_relation_(input_relation),
       output_relation_(output_relation),
       output_destination_index_(output_destination_index),
       input_relation_is_stored_(input_relation_is_stored),
       is_block_sample_(is_block_sample),
       percentage_(percentage),
       input_relation_block_ids_(input_relation_is_stored
                                     ? input_relation.getBlocksSnapshot()
                                     : std::vector<block_id>()),
       num_workorders_generated_(0),
       started_(false) {}
예제 #13
0
void PrintToScreen::PrintRelation(const CatalogRelation &relation,
                                  StorageManager *storage_manager,
                                  FILE *out) {
  if (!FLAGS_printing_enabled) {
    return;
  }

  vector<int> column_widths;
  column_widths.reserve(relation.size());

  for (CatalogRelation::const_iterator attr_it = relation.begin();
       attr_it != relation.end();
       ++attr_it) {
    // Printed column needs to be wide enough to print:
    //   1. The attribute name (in the printed "header").
    //   2. Any value of the attribute's Type.
    //   3. If the attribute's Type is nullable, the 4-character string "NULL".
    // We pick the largest of these 3 widths as the column width.
    int column_width = static_cast<int>(attr_it->getDisplayName().length());
    column_width = column_width < attr_it->getType().getPrintWidth()
                   ? attr_it->getType().getPrintWidth()
                   : column_width;
    column_width = attr_it->getType().isNullable() && (column_width < 4)
                   ? 4
                   : column_width;
    column_widths.push_back(column_width);
  }

  printHBar(column_widths, out);

  fputc('|', out);
  vector<int>::const_iterator width_it = column_widths.begin();
  CatalogRelation::const_iterator attr_it = relation.begin();
  for (; width_it != column_widths.end(); ++width_it, ++attr_it) {
    fprintf(out,
            "%-*s|",
            *width_it,
            attr_it->getDisplayName().c_str());
  }
  fputc('\n', out);

  printHBar(column_widths, out);

  std::vector<block_id> blocks = relation.getBlocksSnapshot();
  for (const block_id current_block_id : blocks) {
    BlockReference block = storage_manager->getBlock(current_block_id, relation);
    const TupleStorageSubBlock &tuple_store = block->getTupleStorageSubBlock();

    if (tuple_store.isPacked()) {
      for (tuple_id tid = 0; tid <= tuple_store.getMaxTupleID(); ++tid) {
        printTuple(tuple_store, tid, column_widths, out);
      }
    } else {
      std::unique_ptr<TupleIdSequence> existence_map(tuple_store.getExistenceMap());
      for (tuple_id tid : *existence_map) {
        printTuple(tuple_store, tid, column_widths, out);
      }
    }
  }

  printHBar(column_widths, out);
}
예제 #14
0
QueryManager::QueryStatusCode QueryManager::processMessage(
    const TaggedMessage &tagged_message) {
  dag_node_index op_index;
  switch (tagged_message.message_type()) {
    case kWorkOrderCompleteMessage: {
      serialization::WorkOrderCompletionMessage proto;
      CHECK(proto.ParseFromArray(tagged_message.message(),
                                 tagged_message.message_bytes()));

      op_index = proto.operator_index();
      processWorkOrderCompleteMessage(proto.operator_index());
      break;
    }
    case kRebuildWorkOrderCompleteMessage: {
      serialization::WorkOrderCompletionMessage proto;
      CHECK(proto.ParseFromArray(tagged_message.message(),
                                 tagged_message.message_bytes()));

      op_index = proto.operator_index();
      processRebuildWorkOrderCompleteMessage(proto.operator_index());
      break;
    }
    case kCatalogRelationNewBlockMessage: {
      serialization::CatalogRelationNewBlockMessage proto;
      CHECK(proto.ParseFromArray(tagged_message.message(),
                                 tagged_message.message_bytes()));

      const block_id block = proto.block_id();

      CatalogRelation *relation =
          static_cast<CatalogDatabase*>(catalog_database_)->getRelationByIdMutable(proto.relation_id());
      relation->addBlock(block);

      if (proto.has_partition_id()) {
        relation->getPartitionSchemeMutable()->addBlockToPartition(
            proto.partition_id(), block);
      }
      return QueryStatusCode::kNone;
    }
    case kDataPipelineMessage: {
      // Possible message senders include InsertDestinations and some
      // operators which modify existing blocks.
      serialization::DataPipelineMessage proto;
      CHECK(proto.ParseFromArray(tagged_message.message(),
                                 tagged_message.message_bytes()));

      op_index = proto.operator_index();
      processDataPipelineMessage(proto.operator_index(),
                                 proto.block_id(),
                                 proto.relation_id());
      break;
    }
    case kWorkOrdersAvailableMessage: {
      serialization::WorkOrdersAvailableMessage proto;
      CHECK(proto.ParseFromArray(tagged_message.message(),
                                 tagged_message.message_bytes()));

      op_index = proto.operator_index();

      // Check if new work orders are available.
      fetchNormalWorkOrders(op_index);

      // Dispatch the WorkerMessages to the workers. We prefer to start the search
      // for the schedulable WorkOrders beginning from 'op_index'. The first
      // candidate worker to receive the next WorkOrder is the one that sent the
      // response message to Foreman.
      // TODO(zuyu): Improve the data locality for the next WorkOrder.
      break;
    }
    case kWorkOrderFeedbackMessage: {
      WorkOrder::FeedbackMessage msg(
          const_cast<void *>(tagged_message.message()),
          tagged_message.message_bytes());

      op_index = msg.header().rel_op_index;
      processFeedbackMessage(msg);
      break;
    }
    default:
      LOG(FATAL) << "Unknown message type found in QueryManager";
  }

  if (query_exec_state_->hasExecutionFinished(op_index)) {
    return QueryStatusCode::kOperatorExecuted;
  }

  // As kQueryExecuted takes precedence over kOperatorExecuted, we check again.
  if (query_exec_state_->hasQueryExecutionFinished()) {
    return QueryStatusCode::kQueryExecuted;
  }

  return QueryStatusCode::kNone;
}