Exemplo n.º 1
0
size_t block_writer::write_block(size_t segment_id,
                                 size_t column_id, 
                                 char* data,
                                 block_info block) {
  DASSERT_LT(segment_id, m_index_info.nsegments);
  DASSERT_LT(column_id, m_index_info.columns.size());
  DASSERT_TRUE(m_output_files[segment_id] != nullptr);
  // try to compress the data
  size_t compress_bound = LZ4_compressBound(block.block_size);
  auto compression_buffer = m_buffer_pool.get_new_buffer();
  compression_buffer->resize(compress_bound);
  char* cbuffer = compression_buffer->data();
  size_t clen = compress_bound;
  clen = LZ4_compress(data, cbuffer, block.block_size);

  char* buffer_to_write = NULL;
  size_t buffer_to_write_len = 0;
  if (clen < COMPRESSION_DISABLE_THRESHOLD * block.block_size) {
    // compression has a benefit!
    block.flags |= LZ4_COMPRESSION;
    block.length = clen;
    buffer_to_write = cbuffer;
    buffer_to_write_len = clen;
  } else {
    // compression has no benefit! do not compress!
    // unset LZ4
    block.flags &= (~(size_t)LZ4_COMPRESSION);
    block.length = block.block_size;
    buffer_to_write = data;
    buffer_to_write_len = block.block_size;
  }

  size_t padding = ((buffer_to_write_len + 4095) / 4096) * 4096 - buffer_to_write_len;
  ASSERT_LT(padding, 4096);
  // write!
  m_output_file_locks[segment_id].lock();
  block.offset = m_output_bytes_written[segment_id];
  m_output_bytes_written[segment_id] += buffer_to_write_len + padding;
  m_index_info.columns[column_id].segment_sizes[segment_id] += block.num_elem;
  m_output_files[segment_id]->write(buffer_to_write, buffer_to_write_len);
  m_output_files[segment_id]->write(padding_bytes, padding);
  m_blocks[segment_id][column_id].push_back(block);
  m_output_file_locks[segment_id].unlock();

  m_buffer_pool.release_buffer(std::move(compression_buffer));

  if (!m_output_files[segment_id]->good()) {
    log_and_throw_io_failure("Fail to write. Disk may be full.");
  }
  return buffer_to_write_len;
}
Exemplo n.º 2
0
void pysgraph_synchronize::load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) {
  DASSERT_LT(partition_id, m_num_partitions);
  DASSERT_FALSE(m_is_partition_loaded[partition_id]);
  m_vertex_partitions[partition_id] = std::move(vertices);
  m_is_partition_loaded[partition_id] = true;
  DASSERT_TRUE(is_loaded(partition_id));
}
Exemplo n.º 3
0
 /**
  * Increments the value of a log entry
  */
 void thr_dec_log_entry(size_t entry, double value) {
   event_log_thread_local_type* ev = get_thread_counter_ref();
   DASSERT_LT(entry, MAX_LOG_SIZE);
   // does not work for cumulative logs
   DASSERT_NE((int)logs[entry]->logtype, (int) log_type::CUMULATIVE);
   DASSERT_EQ(logs[entry]->is_callback_entry, false);
   ev->values[entry] -= value;
 }
Exemplo n.º 4
0
/// Return the next element in the chunk.
inline std::vector<flexible_type>&& sframe_reader_buffer::next() {
  if (m_buffer_pos == m_buffer.size()) {
    refill();
    m_buffer_pos = 0;
  }
  DASSERT_LT(m_buffer_pos, m_buffer.size());
  ++m_iter;
  return std::move(m_buffer[m_buffer_pos++]);
}
Exemplo n.º 5
0
T&& sarray_reader_buffer<T>::next() {
  if (m_buffer_pos == m_buffer.size()) {
    refill();
    m_buffer_pos = 0;
  }
  DASSERT_LT(m_buffer_pos, m_buffer.size());
  ++m_iter;
  return std::move(m_buffer[m_buffer_pos++]);
}
/// Return the next element in the chunk.
inline const sframe_rows::row& sframe_reader_buffer::next() {
  if (m_buffer_pos == m_buffer.num_rows()) {
    refill();
    m_buffer_pos = 0;
  }
  DASSERT_LT(m_buffer_pos, m_buffer.num_rows());
  ++m_iter;
  m_current.copy_reference(m_buffer[m_buffer_pos++]);
  return m_current;
}
Exemplo n.º 7
0
 void add(const T& t) {
   // we use std::hash first, to bring it to a 64-bit number
   // Then cityhash's hash64 twice to distribute the hash.
   // empirically, one hash64 does not produce enough scattering to
   // get a good estimate
   size_t h = hash64(hash64(std::hash<T>()(t)));
   size_t index = h >> (64 - m_b);
   DASSERT_LT(index, m_buckets.size());
   unsigned char pos = h != 0 ? 1 + __builtin_clz(h) : sizeof(size_t);
   m_buckets[index] = std::max(m_buckets[index], pos);
 }
Exemplo n.º 8
0
static inline size_t get_bin_idx(
    flexible_type value,
    double scale_min,
    double scale_max
  ) {
  double range = scale_max - scale_min;
  size_t bin = std::floor(
    ((static_cast<double>(value) - scale_min) / range) *
    static_cast<double>(continuous_result::MAX_BINS)
  );
  if (bin == continuous_result::MAX_BINS) {
    bin -= 1;
  }
  DASSERT_LT(bin, continuous_result::MAX_BINS);
  return bin;
}
Exemplo n.º 9
0
/** 
 * Create an SFrame parallel iterator.
 */
parallel_sframe_iterator::parallel_sframe_iterator(
    const parallel_sframe_iterator_initializer& it_init, size_t thread_idx, size_t num_threads)
    : sources(it_init.sources)
    , column_offsets(it_init.column_offsets)
{

  DASSERT_LT(thread_idx, num_threads);

  buffers.resize(sources.size());

  start_idx = it_init.row_start + 
        (thread_idx * it_init.global_block_size) / num_threads;
  end_idx   = it_init.row_start + 
        ((thread_idx + 1) * it_init.global_block_size) / num_threads;

  max_block_size = std::min(sframe_config::SFRAME_READ_BATCH_SIZE, end_idx - start_idx);
  for(auto& b : buffers)
    b.reserve(max_block_size); 
  reset();
}
Exemplo n.º 10
0
 // Add a new element to the specified bucket.
 void add(const value_type& val, size_t bucketid) {
   DASSERT_LT(bucketid, buckets.size());
   buckets[bucketid]->add(val);
 };
Exemplo n.º 11
0
 inline void load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) {
   DASSERT_LT(partition_id, m_num_partitions);
   DASSERT_FALSE(m_is_partition_loaded[partition_id]);
   m_vertex_partitions[partition_id] = &vertices;
   m_is_partition_loaded[partition_id] = true;
 }
Exemplo n.º 12
0
 inline bool is_loaded(size_t partition_id) {
   DASSERT_LT(partition_id, m_num_partitions);
   return m_is_partition_loaded[partition_id];
 }
Exemplo n.º 13
0
 inline std::vector<sgraph_vertex_data>& get_partition(size_t partition_id) {
   DASSERT_LT(partition_id, m_num_partitions);
   DASSERT_TRUE(is_loaded(partition_id));
   return m_vertex_partitions[partition_id];
 }
Exemplo n.º 14
0
 /**
  * Increments the value of a log entry
  */
 inline void thr_inc_log_entry(size_t entry, double value) {
   event_log_thread_local_type* ev = get_thread_counter_ref();
   DASSERT_LT(entry, MAX_LOG_SIZE);
   DASSERT_EQ(logs[entry]->is_callback_entry, false);
   ev->values[entry] += value;
 }
Exemplo n.º 15
0
 flexible_type operator()(const flexible_type& i) const{
   DASSERT_LT(i, m_id_vec->size());
   return m_id_vec->at(i);
 }