size_t block_writer::write_block(size_t segment_id, size_t column_id, char* data, block_info block) { DASSERT_LT(segment_id, m_index_info.nsegments); DASSERT_LT(column_id, m_index_info.columns.size()); DASSERT_TRUE(m_output_files[segment_id] != nullptr); // try to compress the data size_t compress_bound = LZ4_compressBound(block.block_size); auto compression_buffer = m_buffer_pool.get_new_buffer(); compression_buffer->resize(compress_bound); char* cbuffer = compression_buffer->data(); size_t clen = compress_bound; clen = LZ4_compress(data, cbuffer, block.block_size); char* buffer_to_write = NULL; size_t buffer_to_write_len = 0; if (clen < COMPRESSION_DISABLE_THRESHOLD * block.block_size) { // compression has a benefit! block.flags |= LZ4_COMPRESSION; block.length = clen; buffer_to_write = cbuffer; buffer_to_write_len = clen; } else { // compression has no benefit! do not compress! // unset LZ4 block.flags &= (~(size_t)LZ4_COMPRESSION); block.length = block.block_size; buffer_to_write = data; buffer_to_write_len = block.block_size; } size_t padding = ((buffer_to_write_len + 4095) / 4096) * 4096 - buffer_to_write_len; ASSERT_LT(padding, 4096); // write! m_output_file_locks[segment_id].lock(); block.offset = m_output_bytes_written[segment_id]; m_output_bytes_written[segment_id] += buffer_to_write_len + padding; m_index_info.columns[column_id].segment_sizes[segment_id] += block.num_elem; m_output_files[segment_id]->write(buffer_to_write, buffer_to_write_len); m_output_files[segment_id]->write(padding_bytes, padding); m_blocks[segment_id][column_id].push_back(block); m_output_file_locks[segment_id].unlock(); m_buffer_pool.release_buffer(std::move(compression_buffer)); if (!m_output_files[segment_id]->good()) { log_and_throw_io_failure("Fail to write. Disk may be full."); } return buffer_to_write_len; }
void pysgraph_synchronize::load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) { DASSERT_LT(partition_id, m_num_partitions); DASSERT_FALSE(m_is_partition_loaded[partition_id]); m_vertex_partitions[partition_id] = std::move(vertices); m_is_partition_loaded[partition_id] = true; DASSERT_TRUE(is_loaded(partition_id)); }
/** * Increments the value of a log entry */ void thr_dec_log_entry(size_t entry, double value) { event_log_thread_local_type* ev = get_thread_counter_ref(); DASSERT_LT(entry, MAX_LOG_SIZE); // does not work for cumulative logs DASSERT_NE((int)logs[entry]->logtype, (int) log_type::CUMULATIVE); DASSERT_EQ(logs[entry]->is_callback_entry, false); ev->values[entry] -= value; }
/// Return the next element in the chunk. inline std::vector<flexible_type>&& sframe_reader_buffer::next() { if (m_buffer_pos == m_buffer.size()) { refill(); m_buffer_pos = 0; } DASSERT_LT(m_buffer_pos, m_buffer.size()); ++m_iter; return std::move(m_buffer[m_buffer_pos++]); }
T&& sarray_reader_buffer<T>::next() { if (m_buffer_pos == m_buffer.size()) { refill(); m_buffer_pos = 0; } DASSERT_LT(m_buffer_pos, m_buffer.size()); ++m_iter; return std::move(m_buffer[m_buffer_pos++]); }
/// Return the next element in the chunk. inline const sframe_rows::row& sframe_reader_buffer::next() { if (m_buffer_pos == m_buffer.num_rows()) { refill(); m_buffer_pos = 0; } DASSERT_LT(m_buffer_pos, m_buffer.num_rows()); ++m_iter; m_current.copy_reference(m_buffer[m_buffer_pos++]); return m_current; }
void add(const T& t) { // we use std::hash first, to bring it to a 64-bit number // Then cityhash's hash64 twice to distribute the hash. // empirically, one hash64 does not produce enough scattering to // get a good estimate size_t h = hash64(hash64(std::hash<T>()(t))); size_t index = h >> (64 - m_b); DASSERT_LT(index, m_buckets.size()); unsigned char pos = h != 0 ? 1 + __builtin_clz(h) : sizeof(size_t); m_buckets[index] = std::max(m_buckets[index], pos); }
static inline size_t get_bin_idx( flexible_type value, double scale_min, double scale_max ) { double range = scale_max - scale_min; size_t bin = std::floor( ((static_cast<double>(value) - scale_min) / range) * static_cast<double>(continuous_result::MAX_BINS) ); if (bin == continuous_result::MAX_BINS) { bin -= 1; } DASSERT_LT(bin, continuous_result::MAX_BINS); return bin; }
/** * Create an SFrame parallel iterator. */ parallel_sframe_iterator::parallel_sframe_iterator( const parallel_sframe_iterator_initializer& it_init, size_t thread_idx, size_t num_threads) : sources(it_init.sources) , column_offsets(it_init.column_offsets) { DASSERT_LT(thread_idx, num_threads); buffers.resize(sources.size()); start_idx = it_init.row_start + (thread_idx * it_init.global_block_size) / num_threads; end_idx = it_init.row_start + ((thread_idx + 1) * it_init.global_block_size) / num_threads; max_block_size = std::min(sframe_config::SFRAME_READ_BATCH_SIZE, end_idx - start_idx); for(auto& b : buffers) b.reserve(max_block_size); reset(); }
// Add a new element to the specified bucket. void add(const value_type& val, size_t bucketid) { DASSERT_LT(bucketid, buckets.size()); buckets[bucketid]->add(val); };
inline void load_vertex_partition(size_t partition_id, std::vector<sgraph_vertex_data>& vertices) { DASSERT_LT(partition_id, m_num_partitions); DASSERT_FALSE(m_is_partition_loaded[partition_id]); m_vertex_partitions[partition_id] = &vertices; m_is_partition_loaded[partition_id] = true; }
inline bool is_loaded(size_t partition_id) { DASSERT_LT(partition_id, m_num_partitions); return m_is_partition_loaded[partition_id]; }
inline std::vector<sgraph_vertex_data>& get_partition(size_t partition_id) { DASSERT_LT(partition_id, m_num_partitions); DASSERT_TRUE(is_loaded(partition_id)); return m_vertex_partitions[partition_id]; }
/** * Increments the value of a log entry */ inline void thr_inc_log_entry(size_t entry, double value) { event_log_thread_local_type* ev = get_thread_counter_ref(); DASSERT_LT(entry, MAX_LOG_SIZE); DASSERT_EQ(logs[entry]->is_callback_entry, false); ev->values[entry] += value; }
flexible_type operator()(const flexible_type& i) const{ DASSERT_LT(i, m_id_vec->size()); return m_id_vec->at(i); }