CUGIP_DECL_DEVICE inline float atomicFloatCAS(float *address, float old, float val) { int i_val = __float_as_int(val); int tmp0 = __float_as_int(old); return __int_as_float(atomicCAS((int *)address, tmp0, i_val)); }
CCL_NAMESPACE_BEGIN static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels, Progress& progress) { /* create input */ int width = res; int height = res; device_vector<uint4> d_input; device_vector<float4> d_output; uint4 *d_input_data = d_input.resize(width*height); for(int y = 0; y < height; y++) { for(int x = 0; x < width; x++) { float u = x/(float)width; float v = y/(float)height; uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0); d_input_data[x + y*width] = in; } } /* compute on device */ d_output.resize(width*height); memset((void*)d_output.data_pointer, 0, d_output.memory_size()); device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); device->mem_alloc(d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); device->mem_alloc(d_output, MEM_WRITE_ONLY); DeviceTask main_task(DeviceTask::SHADER); main_task.shader_input = d_input.device_pointer; main_task.shader_output = d_output.device_pointer; main_task.shader_eval_type = SHADER_EVAL_BACKGROUND; main_task.shader_x = 0; main_task.shader_w = width*height; main_task.num_samples = 1; main_task.get_cancel = function_bind(&Progress::get_cancel, &progress); /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */ list<DeviceTask> split_tasks; main_task.split(split_tasks, 1, 128*128); foreach(DeviceTask& task, split_tasks) { device->task_add(task); device->task_wait(); device->mem_copy_from(d_output, task.shader_x, 1, task.shader_w, sizeof(float4)); }
//float atomic func __device__ static float atomicMul(float* address, float val) { int* address_as_int = (int*)address; int old = *address_as_int, assumed; do { assumed = old; old = atomicCAS(address_as_int, assumed, __float_as_int(__int_as_float(assumed) * val)); } while (assumed != old); return __int_as_float(old); }
__device__ __forceinline__ float atomicAdd(float* address, float val) { #if CV_CUDEV_ARCH >= 200 return ::atomicAdd(address, val); #else int* address_as_i = (int*) address; int old = *address_as_i, assumed; do { assumed = old; old = ::atomicCAS(address_as_i, assumed, __float_as_int(val + __int_as_float(assumed))); } while (assumed != old); return __int_as_float(old); #endif }
__device__ static float atomicMax(float* address, float val) { #if CV_CUDEV_ARCH >= 120 int* address_as_i = (int*) address; int old = *address_as_i, assumed; do { assumed = old; old = ::atomicCAS(address_as_i, assumed, __float_as_int(::fmaxf(val, __int_as_float(assumed)))); } while (assumed != old); return __int_as_float(old); #else (void) address; (void) val; return 0.0f; #endif }
void BVH8::refit_node(int idx, bool leaf, BoundBox &bbox, uint &visibility) { if (leaf) { int4 *data = &pack.leaf_nodes[idx]; int4 c = data[0]; /* Refit leaf node. */ for (int prim = c.x; prim < c.y; prim++) { int pidx = pack.prim_index[prim]; int tob = pack.prim_object[prim]; Object *ob = objects[tob]; if (pidx == -1) { /* Object instance. */ bbox.grow(ob->bounds); } else { /* Primitives. */ const Mesh *mesh = ob->mesh; if (pack.prim_type[prim] & PRIMITIVE_ALL_CURVE) { /* Curves. */ int str_offset = (params.top_level) ? mesh->curve_offset : 0; Mesh::Curve curve = mesh->get_curve(pidx - str_offset); int k = PRIMITIVE_UNPACK_SEGMENT(pack.prim_type[prim]); curve.bounds_grow(k, &mesh->curve_keys[0], &mesh->curve_radius[0], bbox); visibility |= PATH_RAY_CURVE; /* Motion curves. */ if (mesh->use_motion_blur) { Attribute *attr = mesh->curve_attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr) { size_t mesh_size = mesh->curve_keys.size(); size_t steps = mesh->motion_steps - 1; float3 *key_steps = attr->data_float3(); for (size_t i = 0; i < steps; i++) { curve.bounds_grow(k, key_steps + i * mesh_size, &mesh->curve_radius[0], bbox); } } } } else { /* Triangles. */ int tri_offset = (params.top_level) ? mesh->tri_offset : 0; Mesh::Triangle triangle = mesh->get_triangle(pidx - tri_offset); const float3 *vpos = &mesh->verts[0]; triangle.bounds_grow(vpos, bbox); /* Motion triangles. */ if (mesh->use_motion_blur) { Attribute *attr = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); if (attr) { size_t mesh_size = mesh->verts.size(); size_t steps = mesh->motion_steps - 1; float3 *vert_steps = attr->data_float3(); for (size_t i = 0; i < steps; i++) { triangle.bounds_grow(vert_steps + i * mesh_size, bbox); } } } } } visibility |= ob->visibility; } float4 leaf_data[BVH_ONODE_LEAF_SIZE]; leaf_data[0].x = __int_as_float(c.x); leaf_data[0].y = __int_as_float(c.y); leaf_data[0].z = __uint_as_float(visibility); leaf_data[0].w = __uint_as_float(c.w); memcpy(&pack.leaf_nodes[idx], leaf_data, sizeof(float4) * BVH_ONODE_LEAF_SIZE); } else { float8 *data = (float8 *)&pack.nodes[idx]; bool is_unaligned = (__float_as_uint(data[0].a) & PATH_RAY_NODE_UNALIGNED) != 0; /* Refit inner node, set bbox from children. */ BoundBox child_bbox[8] = {BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty, BoundBox::empty}; int child[8]; uint child_visibility[8] = {0}; int num_nodes = 0; for (int i = 0; i < 8; ++i) { child[i] = __float_as_int(data[(is_unaligned) ? 13 : 7][i]); if (child[i] != 0) { refit_node((child[i] < 0) ? -child[i] - 1 : child[i], (child[i] < 0), child_bbox[i], child_visibility[i]); ++num_nodes; bbox.grow(child_bbox[i]); visibility |= child_visibility[i]; } } if (is_unaligned) { Transform aligned_space[8] = {transform_identity(), transform_identity(), transform_identity(), transform_identity(), transform_identity(), transform_identity(), transform_identity(), transform_identity()}; pack_unaligned_node( idx, aligned_space, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes); } else { pack_aligned_node(idx, child_bbox, child, visibility, 0.0f, 1.0f, num_nodes); } } }