PascalHashTable* PHNew(OPHeap* heap, uint64_t num_objects, double load, size_t key_inline_size, size_t valsize) { PascalHashTable* table; uint64_t capacity; uint32_t capacity_clz, capacity_ms4b, capacity_msb; size_t bucket_size; void* bucket_ptr; op_assert(load > 0.0 && load < 1.0, "load %lf must within close interval (0.0, 1.0)\n", load); capacity = (uint64_t)(num_objects / load); if (capacity < 8) capacity = 8; capacity_clz = __builtin_clzl(capacity); capacity_msb = 64 - capacity_clz; capacity_ms4b = round_up_div(capacity, 1UL << (capacity_msb - 4)); capacity = (uint64_t)capacity_ms4b << (capacity_msb - 4); bucket_size = sizeof(oplenref_t) + key_inline_size + valsize; table = OPCalloc(heap, 1, sizeof(PascalHashTable)); if (!table) return NULL; bucket_ptr = OPCalloc(heap, 1, bucket_size * capacity); if (!bucket_ptr) { OPDealloc(table); return NULL; } table->bucket_ref = OPPtr2Ref(bucket_ptr); table->large_data_threshold = DEFAULT_LARGE_DATA_THRESHOLD; table->capacity_clz = capacity_clz; table->capacity_ms4b = capacity_ms4b; table->objcnt_high = (uint64_t)(capacity * load); table->objcnt_low = capacity * 2 / 10; table->key_inline_size = key_inline_size; table->valsize = valsize; return table; }
void GpuSplineAlgorithm1::set_scan_sequence(ScanSequence::s_ptr new_scan_sequence) { //EventTimerRAII event_timer; //event_timer.restart(); // all lines in the scan sequence have the same timestamp if (!has_equal_timestamps(new_scan_sequence)) { throw std::runtime_error("scan sequences must currently have equal timestamps"); } m_fixed_alg->set_scan_sequence(new_scan_sequence); // Ensure that set_scatterers() has been called first if (m_common_knots.size() == 0) { throw std::runtime_error("set_scatterers() must be called before set_scan_sequence"); } const auto num_lines = new_scan_sequence->get_num_lines(); if (num_lines <= 0) { throw std::runtime_error("No scanlines"); } // HACK: using parameter value from first scanline const float PARAMETER_VAL = new_scan_sequence->get_scanline(0).get_timestamp(); // evaluate the basis functions and upload to constant memory - always at max degree+1 // basis functions that are non-zero at any parameter value. int cs_idx_start, cs_idx_end; std::tie(cs_idx_start, cs_idx_end) = bspline_storve::get_lower_upper_inds(m_common_knots, PARAMETER_VAL, m_spline_degree); const auto num_nonzero = cs_idx_end-cs_idx_start+1; if (num_nonzero != m_spline_degree+1) throw std::logic_error("illegal number of non-zero basis functions"); // evaluate all basis functions since it will be checked that the ones supposed to // be zero in fact are zero. std::vector<float> host_basis_functions(m_num_cs); // TODO: move to set_scatterers()? for (int i = 0; i < m_num_cs; i++) { host_basis_functions[i] = bspline_storve::bsplineBasis(i, m_spline_degree, PARAMETER_VAL, m_common_knots); } if (!sanity_check_spline_lower_upper_bound(host_basis_functions, cs_idx_start, cs_idx_end)) { throw std::runtime_error("b-spline basis bounds failed sanity check"); } // only copy the non-zero-basis functions const auto src_ptr = host_basis_functions.data() + cs_idx_start; if (!splineAlg1_updateConstantMemory(src_ptr, num_nonzero*sizeof(float))) { throw std::runtime_error("Failed copying to symbol memory"); } int num_threads = 128; int num_blocks = round_up_div(m_num_splines, num_threads); //dim3 grid_size(num_blocks, 1, 1); //dim3 block_size(num_threads, 1, 1); const cudaStream_t cuda_stream = 0; launch_RenderSplineKernel(num_blocks, num_threads, cuda_stream, m_control_xs->data(), m_control_ys->data(), m_control_zs->data(), m_fixed_alg->m_device_point_xs->data(), m_fixed_alg->m_device_point_ys->data(), m_fixed_alg->m_device_point_zs->data(), cs_idx_start, cs_idx_end, m_num_splines); cudaErrorCheck( cudaDeviceSynchronize() ); //auto ms = event_timer.stop(); //std::cout << "GPU spline alg.1 : set_scan_sequence(): rendering spline scatterers took " << ms << " millisec.\n"; }
int blob_encode_t::get_shard_len(int n,int next_packet_len) { return round_up_div(current_len+(int)sizeof(u16_t)+next_packet_len,n); }
int blob_encode_t::get_shard_len(int n) { return round_up_div(current_len,n); }