Example #1
void SequentialHeapMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                                       size_t source_column_index,
                                       atable_ptr_t merged_table,
                                       size_t destination_column_index,
                                       value_id_mapping_t &value_id_mapping,
                                       bool useValid,
                                       const std::vector<bool>& valid) {

  std::vector<AbstractTable::SharedDictionaryPtr > value_id_maps;
  AbstractTable::SharedDictionaryPtr new_dict;

  // shortcut for dicts

  for (size_t table = 0; table < input_tables.size(); table++) {
    if (!types::isCompatible(merged_table->metadataAt(destination_column_index).getType(),
         input_tables[table]->metadataAt(source_column_index).getType())) {
      throw std::runtime_error("Dictionary types don't match");
    auto dict = std::dynamic_pointer_cast<BaseDictionary<T>>(input_tables[table]->dictionaryAt(source_column_index));

  // Create new BaseDictionary - shrink when merge finished?
  new_dict = createNewDict<T>(input_tables, value_id_maps, value_id_mapping, source_column_index, useValid, valid);
  // set new value id map for column
  merged_table->setDictionaryAt(new_dict, destination_column_index);
Example #2
void SimpleTableDump::dumpHeader(std::string name, atable_ptr_t table) {
  std::stringstream header;
  std::vector<std::string> names, types;
  std::vector<uint32_t> parts;

  // Get names and types
  for (size_t i = 0; i < table->columnCount(); ++i) {

  // This calculation will break if the width of the value_id changes
  // or someone forgets to simply update the width accordingly in the
  // constructor of the table
  for (size_t i = 0; i < table->partitionCount(); ++i) {

  // Dump and join
  header << std::accumulate(names.begin(), names.end(), std::string(), infix(" | ")) << "\n";
  header << std::accumulate(types.begin(), types.end(), std::string(), infix(" | ")) << "\n";
  std::vector<std::string> allParts;
  for (size_t i = 0; i < parts.size(); ++i) {
    auto p = parts[i];
    auto tmp = std::vector<std::string>(p, std::to_string(i) + "_R");
    allParts.insert(allParts.end(), tmp.begin(), tmp.end());
  header << std::accumulate(allParts.begin(), allParts.end(), std::string(), infix(" | ")) << "\n";
  header << "===";

  std::string fullPath = _baseDirectory + "/" + name + "/header.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  data << header.str();
Example #3
 value_type operator() () {
   auto d = std::dynamic_pointer_cast<OrderPreservingDictionary<R>>(_dict);
   size_t tabSize = _main->size();
   size_t start = _main->size() - _delta->size();
   for(size_t row = start; row < tabSize; ++row) {
     _main->setValueId(_dstCol, row, ValueId{d->getValueIdForValue(_delta->getValue<R>(_col, row-start)), 0});
Example #4
Store::Store(atable_ptr_t main_table) :
    delta(main_table->copy_structure(create_concurrent_dict, create_concurrent_storage)),
    _cidBeginVector(main_table->size(), 0),
    _cidEndVector(main_table->size(), tx::INF_CID),
    _tidVector(main_table->size(), tx::UNKNOWN) {
Example #5
void RawTable::appendRows(const atable_ptr_t& rows) {
  type_switch<hyrise_basic_types> ts;
  for(size_t row=0; row < rows->size(); ++row) {
    rawtable::RowHelper rh(_metadata);
    for(size_t column=0; column < _metadata.size(); ++column) {
      type_func tf(rows, rh, column, row);
      ts(rows->typeOfColumn(column), tf);
    std::unique_ptr<byte, void (*)(void *)> data(rh.build(), &std::free);
Example #6
void SimpleStoreMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                              atable_ptr_t merged_table,
                              const column_mapping_t &column_mapping,
                              const uint64_t newSize,
                              bool useValid,
                              const std::vector<bool>& valid) {

  if (useValid)
    throw std::runtime_error("SimpleStoreMerger does not support valid vectors");

  if(input_tables.size() != 2) throw std::runtime_error("SimpleStoreMerger does not support more than two tables");
  auto delta = std::dynamic_pointer_cast<const RawTable>(input_tables[1]);
  auto main = input_tables[0];

  // Prepare type handling
  MergeDictFunctor fun;
  type_switch<hyrise_basic_types> ts;

  std::vector<MergeDictFunctor::result> mergedDictionaries(column_mapping.size());

  // Extract unique values for delta
  for(const auto& kv : column_mapping) {
    const auto& col = kv.first;
    const auto& dst = kv.second;
    fun.prepare(main, delta, col);
    auto result = ts(main->typeOfColumn(col), fun);
    merged_table->setDictionaryAt(result.dict, dst);
    mergedDictionaries[col] = result;

  // Update the values of the new Table
  size_t tabSize = main->size();
  for(size_t row=0; row < tabSize; ++row) {
    for( const auto& kv : column_mapping) {
      const auto& col = kv.first;
      const auto& dst = kv.second;
      merged_table->setValueId(dst, row, ValueId{mergedDictionaries[col].mapping[main->getValueId(col, row).valueId], 0});

  // Map the values for the values in the uncompressed delta
  MapValueForValueId map;
  for( const auto& kv : column_mapping) {
    const auto& col = kv.first;
    const auto& dst = kv.second;
    map.prepare(merged_table, dst, mergedDictionaries[col].dict, col, delta);
    ts(merged_table->typeOfColumn(dst), map);
Example #7
void SequentialHeapMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                                       atable_ptr_t merged_table,
                                       const column_mapping_t &column_mapping,
                                       const uint64_t newSize,
                                       bool useValid,
                                       const std::vector<bool>& valid) {

  //if (input_tables.size () != 2)
  //  throw std::runtime_error("Merging more than 2 tables is not supported with this merger...");

  std::vector<value_id_mapping_t> mappingPerAtrtibute(input_tables[0]->columnCount());

  for (const auto & kv: column_mapping) {
    const auto &source = kv.first;
    const auto &destination = kv.second;
    switch (merged_table->metadataAt(destination).getType()) {
    case IntegerType:
    case IntegerTypeDelta:
    case IntegerTypeDeltaConcurrent:
      mergeValues<hyrise_int_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
    case FloatType:
    case FloatTypeDelta:
    case FloatTypeDeltaConcurrent:
      mergeValues<hyrise_float_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
    case StringType:
    case StringTypeDelta:
    case StringTypeDeltaConcurrent:
      mergeValues<hyrise_string_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
    case IntegerNoDictType:
    case FloatNoDictType:
      merged_table->setDictionaryAt(makeDictionary(merged_table->typeOfColumn(destination)), destination);


  // Only after the dictionaries are merged copy the values
  for (const auto & kv: column_mapping) {
    const auto &source = kv.first;
    const auto &destination = kv.second;
    // copy the actual values and apply mapping
    copyValues(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
Example #8
void SequentialHeapMerger::copyValues(const std::vector<c_atable_ptr_t > &input_tables,
                                      size_t source_column_index,
                                      atable_ptr_t &merged_table,
                                      size_t destination_column_index,
                                      std::vector<std::vector<value_id_t> > &value_id_mapping,
                                      bool useValid,
                                      const std::vector<bool>& valid) {
  ValueId value_id;

  // copy all value ids to the new doc vector
  // and apply value id mapping
  size_t merged_table_row = 0;

  // Only apply the mapping if we have one, for non-dict columns, we
  // just copy the "value_ids". We use almost identical source code
  // here to avoid the additional branch in the inner loop. Not pretty
  // but it works.
  if (value_id_mapping.size() > 0) {
    size_t part_counter = 0;
    for (size_t table = 0; table < input_tables.size(); table++) {
      for (size_t row = 0; row < input_tables[table]->size(); row++) {
	if (!useValid || (useValid && valid[part_counter + row])) {
	  value_id.valueId = input_tables[table]->getValueId(source_column_index, row).valueId;
	  value_id.valueId = value_id_mapping[table][value_id.valueId]; // translate value id to new dict
	  merged_table->setValueId(destination_column_index, merged_table_row, value_id);
      part_counter += input_tables[table]->size();
  } else {
    // No dict columns
    size_t part_counter = 0;
    for (size_t table = 0; table < input_tables.size(); table++) {
      for (size_t row = 0; row < input_tables[table]->size(); row++) {
	if (!useValid || (useValid && valid[part_counter + row])) {
	  value_id.valueId = input_tables[table]->getValueId(source_column_index, row).valueId;
	  merged_table->setValueId(destination_column_index, merged_table_row, value_id);
      part_counter += input_tables[table]->size();


Example #9
 inline void operator()() {
   auto dict = std::dynamic_pointer_cast<BaseDictionary<R>>(table->dictionaryAt(col));
   const R* ptr = (R*)(data + sizeof(size_t));
   size_t size = *((size_t*)data);  // first sizeof(size_t) bytes store dictionary size;
   for (size_t i = 0; i < size; ++i) {
Example #10
 inline void operator()() {
   auto dict = checked_pointer_cast<ConcurrentUnorderedDictionary<R>>(table->dictionaryAt(col));
   size_t size;
   data.read((char*)&size, sizeof(size_t));
   std::vector<R> values(size);
   data.read((char*)&values[0], size * sizeof(R));
   for (const auto value : values) {
Example #11
void SimpleTableDump::dumpAttribute(std::string name, atable_ptr_t table, size_t col) {
  assert(std::dynamic_pointer_cast<Store>(table) ==
         nullptr);  // this should never be called with a store directly, but with main and delta table sepratly.
  std::string fullPath = _baseDirectory + "/" + name + "/" + table->nameOfColumn(col) + ".attr.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);

  // size_t tableSize = table->size(); // get size before, to avoid chasing updates..
  auto tableSize = table->checkpointSize();

  std::vector<value_id_t> vidVector;

  for (size_t i = 0; i < tableSize; ++i) {
    ValueId v;
    v = table->getValueId(col, i);
    vidVector[i] = v.valueId;
  data.write((char*)&vidVector[0], tableSize * sizeof(value_id_t));
Example #12
void SimpleTableDump::dumpDictionary(std::string name, atable_ptr_t table, size_t col, bool delta) {
  std::string fullPath = _baseDirectory + "/" + name + "/" + table->nameOfColumn(col) + ".dict.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  if (!delta) {
    // We make a small hack here, first we obtain the size of the
    // dictionary then we virtually create all value ids, this can break
    // if the dictionary has no contigous value ids
    // size_t dictionarySize = table->dictionaryAt(col)->size();
    write_to_stream_functor fun(data, table->dictionaryAt(col));  // will pick main dictionary by default for stores
    type_switch<hyrise_basic_types> ts;
    ts(table->typeOfColumn(col), fun);
    /*for(size_t i=0; i < dictionarySize; ++i) {
      ts(table->typeOfColumn(col), fun);
  } else {
    write_to_stream_functor_delta_dict fun(
        data, table->dictionaryAt(col));  // will pick main dictionary by default for stores
    type_switch<hyrise_basic_types> ts;
    ts(table->typeOfColumn(col), fun);
Example #13
void write_to_dict_functor_mmap::operator()<hyrise_string_t>() {
  auto dict = std::dynamic_pointer_cast<BaseDictionary<hyrise_string_t>>(table->dictionaryAt(col));
  size_t size = *((size_t*)data);  // first sizeof(size_t) bytes store dictionary size;
  const size_t* sptr = (size_t*)(data + sizeof(size_t));
  const char* cptr = data + 2 * sizeof(size_t);
  size_t read;
  for (size_t i = 0; i < size; ++i) {
    std::string val(cptr, *sptr);
    read = *sptr;
    sptr = (size_t*)(cptr + read);
    cptr = cptr + read + sizeof(size_t);
Example #14
void write_to_delta_vector_functor::operator()<hyrise_string_t>() {
  auto dict = checked_pointer_cast<ConcurrentUnorderedDictionary<hyrise_string_t>>(table->dictionaryAt(col));
  size_t size;
  // copy whole file to buffer first
  data.seekg(0, data.end);
  int length = data.tellg();
  data.seekg(0, data.beg);
  char* buffer = new char[length];
  data.read(buffer, length);
  char* position_in_buffer = buffer;
  // file's format is (int)nr_of_entries, [(int)length_of_string, string]
  memcpy(&size, position_in_buffer, sizeof(size_t));
  position_in_buffer += sizeof(size_t);

  for (size_t i = 0; i < size; ++i) {
    size_t s;
    memcpy(&s, position_in_buffer, sizeof(size_t));
    position_in_buffer += sizeof(size_t);
    std::string tmp(s, '\0');
    memcpy(&tmp[0], position_in_buffer, s);
    position_in_buffer += s;
  delete[] buffer;
  if (position_in_buffer != (buffer + length)) {
    throw std::runtime_error("Warning, did not read whole file.");
  // Equivalent on regular file object, without buffer:
  // for (size_t i=0; i<size; ++i) {
  //   size_t s;
  //   data.read((char*) &s, sizeof(size_t));
  //   std::string tmp(s, '\0');
  //   data.read(&tmp[0], s);
  //   dict->addValue(tmp);
  // }
Example #15
column_mapping_t identityMap(atable_ptr_t input) {
  column_mapping_t map;
  for (size_t column_index = 0; column_index < input->columnCount(); ++column_index)
    map[column_index] = column_index;
  return map;
Example #16
void SimpleTableDump::dumpMetaData(std::string name, atable_ptr_t table) {
  std::string fullPath = _baseDirectory + "/" + name + "/metadata.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  data << table->checkpointSize();