Exemple #1
0
PascalHashTable*
PHNew(OPHeap* heap, uint64_t num_objects, double load,
      size_t key_inline_size, size_t valsize)
{
  PascalHashTable* table;
  uint64_t capacity;
  uint32_t capacity_clz, capacity_ms4b, capacity_msb;
  size_t bucket_size;
  void* bucket_ptr;

  op_assert(load > 0.0 && load < 1.0,
            "load %lf must within close interval (0.0, 1.0)\n", load);
  capacity = (uint64_t)(num_objects / load);
  if (capacity < 8)
    capacity = 8;
  capacity_clz = __builtin_clzl(capacity);
  capacity_msb = 64 - capacity_clz;
  capacity_ms4b = round_up_div(capacity, 1UL << (capacity_msb - 4));
  capacity = (uint64_t)capacity_ms4b << (capacity_msb - 4);

  bucket_size = sizeof(oplenref_t) + key_inline_size + valsize;

  table = OPCalloc(heap, 1, sizeof(PascalHashTable));
  if (!table)
    return NULL;
  bucket_ptr = OPCalloc(heap, 1, bucket_size * capacity);
  if (!bucket_ptr)
    {
      OPDealloc(table);
      return NULL;
    }
  table->bucket_ref = OPPtr2Ref(bucket_ptr);
  table->large_data_threshold = DEFAULT_LARGE_DATA_THRESHOLD;
  table->capacity_clz = capacity_clz;
  table->capacity_ms4b = capacity_ms4b;
  table->objcnt_high = (uint64_t)(capacity * load);
  table->objcnt_low = capacity * 2 / 10;
  table->key_inline_size = key_inline_size;
  table->valsize = valsize;
  return table;
}
void GpuSplineAlgorithm1::set_scan_sequence(ScanSequence::s_ptr new_scan_sequence) {
    //EventTimerRAII event_timer;
    //event_timer.restart();

    // all lines in the scan sequence have the same timestamp
    if (!has_equal_timestamps(new_scan_sequence)) {
        throw std::runtime_error("scan sequences must currently have equal timestamps");
    }

    m_fixed_alg->set_scan_sequence(new_scan_sequence);

    // Ensure that set_scatterers() has been called first
    if (m_common_knots.size() == 0) {
        throw std::runtime_error("set_scatterers() must be called before set_scan_sequence");
    }

    const auto num_lines = new_scan_sequence->get_num_lines();
    if (num_lines <= 0) {
        throw std::runtime_error("No scanlines");
    }
    // HACK: using parameter value from first scanline
    const float PARAMETER_VAL = new_scan_sequence->get_scanline(0).get_timestamp();



    // evaluate the basis functions and upload to constant memory - always at max degree+1
    // basis functions that are non-zero at any parameter value.
    int cs_idx_start, cs_idx_end;
    std::tie(cs_idx_start, cs_idx_end) = bspline_storve::get_lower_upper_inds(m_common_knots,
                                                                              PARAMETER_VAL,
                                                                              m_spline_degree);
    const auto num_nonzero = cs_idx_end-cs_idx_start+1;
    if (num_nonzero != m_spline_degree+1) throw std::logic_error("illegal number of non-zero basis functions");

    // evaluate all basis functions since it will be checked that the ones supposed to
    // be zero in fact are zero.
    std::vector<float> host_basis_functions(m_num_cs); // TODO: move to set_scatterers()?
    for (int i = 0; i < m_num_cs; i++) {
        host_basis_functions[i] = bspline_storve::bsplineBasis(i, m_spline_degree, PARAMETER_VAL, m_common_knots);
    }
    
    if (!sanity_check_spline_lower_upper_bound(host_basis_functions, cs_idx_start, cs_idx_end)) {
        throw std::runtime_error("b-spline basis bounds failed sanity check");
    }
    
    // only copy the non-zero-basis functions
    const auto src_ptr = host_basis_functions.data() + cs_idx_start;
    if (!splineAlg1_updateConstantMemory(src_ptr, num_nonzero*sizeof(float))) {
        throw std::runtime_error("Failed copying to symbol memory");   
    }
    
    int num_threads = 128;
    int num_blocks = round_up_div(m_num_splines, num_threads);
    //dim3 grid_size(num_blocks, 1, 1);
    //dim3 block_size(num_threads, 1, 1);
    
    const cudaStream_t cuda_stream = 0;
    launch_RenderSplineKernel(num_blocks, num_threads, cuda_stream,
                              m_control_xs->data(),
                              m_control_ys->data(),
                              m_control_zs->data(),
                              m_fixed_alg->m_device_point_xs->data(),
                              m_fixed_alg->m_device_point_ys->data(),
                              m_fixed_alg->m_device_point_zs->data(),
                              cs_idx_start,
                              cs_idx_end,
                              m_num_splines);
    cudaErrorCheck( cudaDeviceSynchronize() );
    //auto ms = event_timer.stop();
    //std::cout << "GPU spline alg.1 : set_scan_sequence(): rendering spline scatterers took " << ms << " millisec.\n";
}
Exemple #3
0
int blob_encode_t::get_shard_len(int n,int next_packet_len)
{
	return round_up_div(current_len+(int)sizeof(u16_t)+next_packet_len,n);
}
Exemple #4
0
int blob_encode_t::get_shard_len(int n)
{
	return round_up_div(current_len,n);
}