void SequentialHeapMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                                       size_t source_column_index,
                                       atable_ptr_t merged_table,
                                       size_t destination_column_index,
                                       value_id_mapping_t &value_id_mapping,
                                       bool useValid,
                                       const std::vector<bool>& valid) {

  std::vector<AbstractTable::SharedDictionaryPtr > value_id_maps;
  AbstractTable::SharedDictionaryPtr new_dict;

  // shortcut for dicts
  value_id_maps.reserve(input_tables.size());

  for (size_t table = 0; table < input_tables.size(); table++) {
    if (!types::isCompatible(merged_table->metadataAt(destination_column_index).getType(),
         input_tables[table]->metadataAt(source_column_index).getType())) {
      throw std::runtime_error("Dictionary types don't match");
    }
    auto dict = std::dynamic_pointer_cast<BaseDictionary<T>>(input_tables[table]->dictionaryAt(source_column_index));
    value_id_maps.push_back(dict);
  }

  // Create new BaseDictionary - shrink when merge finished?
  new_dict = createNewDict<T>(input_tables, value_id_maps, value_id_mapping, source_column_index, useValid, valid);
  // set new value id map for column
  merged_table->setDictionaryAt(new_dict, destination_column_index);
}
Exemple #2
0
void SimpleTableDump::dumpHeader(std::string name, atable_ptr_t table) {
  std::stringstream header;
  std::vector<std::string> names, types;
  std::vector<uint32_t> parts;

  // Get names and types
  for (size_t i = 0; i < table->columnCount(); ++i) {
    names.push_back(table->nameOfColumn(i));
    types.push_back(data_type_to_string(table->typeOfColumn(i)));
  }

  // This calculation will break if the width of the value_id changes
  // or someone forgets to simply update the width accordingly in the
  // constructor of the table
  for (size_t i = 0; i < table->partitionCount(); ++i) {
    parts.push_back(table->partitionWidth(i));
  }

  // Dump and join
  header << std::accumulate(names.begin(), names.end(), std::string(), infix(" | ")) << "\n";
  header << std::accumulate(types.begin(), types.end(), std::string(), infix(" | ")) << "\n";
  std::vector<std::string> allParts;
  for (size_t i = 0; i < parts.size(); ++i) {
    auto p = parts[i];
    auto tmp = std::vector<std::string>(p, std::to_string(i) + "_R");
    allParts.insert(allParts.end(), tmp.begin(), tmp.end());
  }
  header << std::accumulate(allParts.begin(), allParts.end(), std::string(), infix(" | ")) << "\n";
  header << "===";

  std::string fullPath = _baseDirectory + "/" + name + "/header.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  data << header.str();
  data.close();
}
Exemple #3
0
 value_type operator() () {
   auto d = std::dynamic_pointer_cast<OrderPreservingDictionary<R>>(_dict);
   size_t tabSize = _main->size();
   size_t start = _main->size() - _delta->size();
   for(size_t row = start; row < tabSize; ++row) {
     _main->setValueId(_dstCol, row, ValueId{d->getValueIdForValue(_delta->getValue<R>(_col, row-start)), 0});
   }
 }
Exemple #4
0
Store::Store(atable_ptr_t main_table) :
    _delta_size(0),
    _main_table(main_table),
    delta(main_table->copy_structure(create_concurrent_dict, create_concurrent_storage)),
    merger(createDefaultMerger()),
    _cidBeginVector(main_table->size(), 0),
    _cidEndVector(main_table->size(), tx::INF_CID),
    _tidVector(main_table->size(), tx::UNKNOWN) {
  setUuid();
}
Exemple #5
0
void RawTable::appendRows(const atable_ptr_t& rows) {
  type_switch<hyrise_basic_types> ts;
  for(size_t row=0; row < rows->size(); ++row) {
    rawtable::RowHelper rh(_metadata);
    for(size_t column=0; column < _metadata.size(); ++column) {
      type_func tf(rows, rh, column, row);
      ts(rows->typeOfColumn(column), tf);
    }
    std::unique_ptr<byte, void (*)(void *)> data(rh.build(), &std::free);
    appendRow(data.get());
  }
}
Exemple #6
0
void SimpleStoreMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                              atable_ptr_t merged_table,
                              const column_mapping_t &column_mapping,
                              const uint64_t newSize,
                              bool useValid,
                              const std::vector<bool>& valid) {

  if (useValid)
    throw std::runtime_error("SimpleStoreMerger does not support valid vectors");


  if(input_tables.size() != 2) throw std::runtime_error("SimpleStoreMerger does not support more than two tables");
  auto delta = std::dynamic_pointer_cast<const RawTable>(input_tables[1]);
  auto main = input_tables[0];

  // Prepare type handling
  MergeDictFunctor fun;
  type_switch<hyrise_basic_types> ts;

  std::vector<MergeDictFunctor::result> mergedDictionaries(column_mapping.size());

  // Extract unique values for delta
  for(const auto& kv : column_mapping) {
    const auto& col = kv.first;
    const auto& dst = kv.second;
    fun.prepare(main, delta, col);
    auto result = ts(main->typeOfColumn(col), fun);
    merged_table->setDictionaryAt(result.dict, dst);
    mergedDictionaries[col] = result;
  }


  // Update the values of the new Table
  merged_table->resize(newSize);
  size_t tabSize = main->size();
  for(size_t row=0; row < tabSize; ++row) {
    for( const auto& kv : column_mapping) {
      const auto& col = kv.first;
      const auto& dst = kv.second;
      merged_table->setValueId(dst, row, ValueId{mergedDictionaries[col].mapping[main->getValueId(col, row).valueId], 0});
    }
  }

  // Map the values for the values in the uncompressed delta
  MapValueForValueId map;
  for( const auto& kv : column_mapping) {
    const auto& col = kv.first;
    const auto& dst = kv.second;
    map.prepare(merged_table, dst, mergedDictionaries[col].dict, col, delta);
    ts(merged_table->typeOfColumn(dst), map);
  }
}
void SequentialHeapMerger::mergeValues(const std::vector<c_atable_ptr_t > &input_tables,
                                       atable_ptr_t merged_table,
                                       const column_mapping_t &column_mapping,
                                       const uint64_t newSize,
                                       bool useValid,
                                       const std::vector<bool>& valid) {

  //if (input_tables.size () != 2)
  //  throw std::runtime_error("Merging more than 2 tables is not supported with this merger...");

  std::vector<value_id_mapping_t> mappingPerAtrtibute(input_tables[0]->columnCount());

  for (const auto & kv: column_mapping) {
    const auto &source = kv.first;
    const auto &destination = kv.second;
    switch (merged_table->metadataAt(destination).getType()) {
    case IntegerType:
    case IntegerTypeDelta:
    case IntegerTypeDeltaConcurrent:
      mergeValues<hyrise_int_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
    break;
    
    case FloatType:
    case FloatTypeDelta:
    case FloatTypeDeltaConcurrent:
      mergeValues<hyrise_float_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
      break;
      
    case StringType:
    case StringTypeDelta:
    case StringTypeDeltaConcurrent:
      mergeValues<hyrise_string_t>(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
      break;
    case IntegerNoDictType:
    case FloatNoDictType:
      merged_table->setDictionaryAt(makeDictionary(merged_table->typeOfColumn(destination)), destination);
    default:
      break;
    }
  }

  merged_table->resize(newSize);

  // Only after the dictionaries are merged copy the values
  for (const auto & kv: column_mapping) {
    const auto &source = kv.first;
    const auto &destination = kv.second;
    // copy the actual values and apply mapping
    copyValues(input_tables, source, merged_table, destination, mappingPerAtrtibute[source], useValid, valid);
  }
}
void SequentialHeapMerger::copyValues(const std::vector<c_atable_ptr_t > &input_tables,
                                      size_t source_column_index,
                                      atable_ptr_t &merged_table,
                                      size_t destination_column_index,
                                      std::vector<std::vector<value_id_t> > &value_id_mapping,
                                      bool useValid,
                                      const std::vector<bool>& valid) {
  ValueId value_id;

  // copy all value ids to the new doc vector
  // and apply value id mapping
  size_t merged_table_row = 0;

  // Only apply the mapping if we have one, for non-dict columns, we
  // just copy the "value_ids". We use almost identical source code
  // here to avoid the additional branch in the inner loop. Not pretty
  // but it works.
  if (value_id_mapping.size() > 0) {
    size_t part_counter = 0;
    for (size_t table = 0; table < input_tables.size(); table++) {
      for (size_t row = 0; row < input_tables[table]->size(); row++) {
	if (!useValid || (useValid && valid[part_counter + row])) {
	  value_id.valueId = input_tables[table]->getValueId(source_column_index, row).valueId;
	  value_id.valueId = value_id_mapping[table][value_id.valueId]; // translate value id to new dict
	  merged_table->setValueId(destination_column_index, merged_table_row, value_id);
	  merged_table_row++;
	}
      }
      part_counter += input_tables[table]->size();
    }
  } else {
    
    // No dict columns
    size_t part_counter = 0;
    for (size_t table = 0; table < input_tables.size(); table++) {
      for (size_t row = 0; row < input_tables[table]->size(); row++) {
	if (!useValid || (useValid && valid[part_counter + row])) {
	  value_id.valueId = input_tables[table]->getValueId(source_column_index, row).valueId;
	  merged_table->setValueId(destination_column_index, merged_table_row, value_id);
	  merged_table_row++;
	}
      }
      part_counter += input_tables[table]->size();
    }

  }

}
Exemple #9
0
 inline void operator()() {
   auto dict = std::dynamic_pointer_cast<BaseDictionary<R>>(table->dictionaryAt(col));
   const R* ptr = (R*)(data + sizeof(size_t));
   size_t size = *((size_t*)data);  // first sizeof(size_t) bytes store dictionary size;
   dict->reserve(size);
   for (size_t i = 0; i < size; ++i) {
     dict->addValue(*(ptr++));
   }
 }
Exemple #10
0
 inline void operator()() {
   auto dict = checked_pointer_cast<ConcurrentUnorderedDictionary<R>>(table->dictionaryAt(col));
   size_t size;
   data.read((char*)&size, sizeof(size_t));
   std::vector<R> values(size);
   data.read((char*)&values[0], size * sizeof(R));
   for (const auto value : values) {
     dict->addValue(value);
   }
 }
Exemple #11
0
void SimpleTableDump::dumpAttribute(std::string name, atable_ptr_t table, size_t col) {
  assert(std::dynamic_pointer_cast<Store>(table) ==
         nullptr);  // this should never be called with a store directly, but with main and delta table sepratly.
  std::string fullPath = _baseDirectory + "/" + name + "/" + table->nameOfColumn(col) + ".attr.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);

  // size_t tableSize = table->size(); // get size before, to avoid chasing updates..
  auto tableSize = table->checkpointSize();

  std::vector<value_id_t> vidVector;
  vidVector.resize(tableSize);

  for (size_t i = 0; i < tableSize; ++i) {
    ValueId v;
    v = table->getValueId(col, i);
    vidVector[i] = v.valueId;
  }
  data.write((char*)&vidVector[0], tableSize * sizeof(value_id_t));
  data.close();
}
Exemple #12
0
void SimpleTableDump::dumpDictionary(std::string name, atable_ptr_t table, size_t col, bool delta) {
  std::string fullPath = _baseDirectory + "/" + name + "/" + table->nameOfColumn(col) + ".dict.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  if (!delta) {
    // We make a small hack here, first we obtain the size of the
    // dictionary then we virtually create all value ids, this can break
    // if the dictionary has no contigous value ids
    // size_t dictionarySize = table->dictionaryAt(col)->size();
    write_to_stream_functor fun(data, table->dictionaryAt(col));  // will pick main dictionary by default for stores
    type_switch<hyrise_basic_types> ts;
    ts(table->typeOfColumn(col), fun);
    /*for(size_t i=0; i < dictionarySize; ++i) {
      fun.setCol(col);
      fun.setVid(i);
      ts(table->typeOfColumn(col), fun);
    }*/
  } else {
    write_to_stream_functor_delta_dict fun(
        data, table->dictionaryAt(col));  // will pick main dictionary by default for stores
    type_switch<hyrise_basic_types> ts;
    ts(table->typeOfColumn(col), fun);
  }
  data.close();
}
Exemple #13
0
void write_to_dict_functor_mmap::operator()<hyrise_string_t>() {
  auto dict = std::dynamic_pointer_cast<BaseDictionary<hyrise_string_t>>(table->dictionaryAt(col));
  size_t size = *((size_t*)data);  // first sizeof(size_t) bytes store dictionary size;
  dict->reserve(size);
  const size_t* sptr = (size_t*)(data + sizeof(size_t));
  const char* cptr = data + 2 * sizeof(size_t);
  size_t read;
  for (size_t i = 0; i < size; ++i) {
    std::string val(cptr, *sptr);
    dict->addValue(val);
    read = *sptr;
    sptr = (size_t*)(cptr + read);
    cptr = cptr + read + sizeof(size_t);
  }
}
Exemple #14
0
void write_to_delta_vector_functor::operator()<hyrise_string_t>() {
  auto dict = checked_pointer_cast<ConcurrentUnorderedDictionary<hyrise_string_t>>(table->dictionaryAt(col));
  size_t size;
  // copy whole file to buffer first
  data.seekg(0, data.end);
  int length = data.tellg();
  data.seekg(0, data.beg);
  char* buffer = new char[length];
  data.read(buffer, length);
  char* position_in_buffer = buffer;
  // file's format is (int)nr_of_entries, [(int)length_of_string, string]
  memcpy(&size, position_in_buffer, sizeof(size_t));
  position_in_buffer += sizeof(size_t);

  for (size_t i = 0; i < size; ++i) {
    size_t s;
    memcpy(&s, position_in_buffer, sizeof(size_t));
    position_in_buffer += sizeof(size_t);
    std::string tmp(s, '\0');
    memcpy(&tmp[0], position_in_buffer, s);
    position_in_buffer += s;
    dict->addValue(tmp);
  }
  delete[] buffer;
  if (position_in_buffer != (buffer + length)) {
    throw std::runtime_error("Warning, did not read whole file.");
  }
  // Equivalent on regular file object, without buffer:
  // for (size_t i=0; i<size; ++i) {
  //   size_t s;
  //   data.read((char*) &s, sizeof(size_t));
  //   std::string tmp(s, '\0');
  //   data.read(&tmp[0], s);
  //   dict->addValue(tmp);
  // }
}
Exemple #15
0
column_mapping_t identityMap(atable_ptr_t input) {
  column_mapping_t map;
  for (size_t column_index = 0; column_index < input->columnCount(); ++column_index)
    map[column_index] = column_index;
  return map;
}
Exemple #16
0
void SimpleTableDump::dumpMetaData(std::string name, atable_ptr_t table) {
  std::string fullPath = _baseDirectory + "/" + name + "/metadata.dat";
  std::ofstream data(fullPath, std::ios::out | std::ios::binary);
  data << table->checkpointSize();
  data.close();
}