// ---------------------------------------------------------------------------- // Generates a opacity map from the transfer function specification // ---------------------------------------------------------------------------- void mapOpacity(Image3D<float> & map, std::list<std::shared_ptr<Node>> transfer) { float samples = static_cast<float>(map.width()) - 1; auto buffer = map.data(); memset(buffer, 0, map.size()*sizeof(float)); auto iter = transfer.begin(); auto curr = *iter; iter++; while (iter != transfer.end()) { auto next = *iter; size_t x1 = static_cast<size_t>(curr->density * samples); if (x1 >= map.width()) x1 = map.width()-1; size_t x2 = static_cast<size_t>(next->density * samples); if (x2 >= map.width()) x2 = map.width()-1; float y1 = curr->material->opticalThickness; float y2 = next->material->opticalThickness; for (size_t i = x1; i <= x2; i++) { float px = i / samples - curr->density; float py = next->density - curr->density; float part = low( high(px / py, 0.0f), 1.0f ); buffer[i] = - logf( 1.f - (1.f - part) * y1 - part * y2 ); } curr = next; iter++; } }
// ---------------------------------------------------------------------------- // Generates a diffuse map from the transfer function specification // ---------------------------------------------------------------------------- void mapSpecular(Image3D<Vector<UInt8,4>> & map, std::list<std::shared_ptr<Node>> transfer) { float samples = static_cast<float>(map.width()) - 1; auto buffer = map.data(); memset(buffer, 0, map.size()*sizeof(Vector<UInt8,4>)); auto iter = transfer.begin(); auto curr = *iter; iter++; while (iter != transfer.end()) { auto next = *iter; size_t x1 = static_cast<size_t>(curr->density * samples); if (x1 >= map.width()) x1 = map.width()-1; size_t x2 = static_cast<size_t>(next->density * samples); if (x2 >= map.width()) x2 = map.width()-1; Vector3f s1(curr->material->specular); Vector3f s2(next->material->specular); Vector4f y1(s1[0], s1[1], s1[2], curr->material->glossiness*255.0f); Vector4f y2(s2[0], s2[1], s2[2], next->material->glossiness*255.0f); for (size_t i = x1; i <= x2; i++) { float px = i / samples - curr->density; float py = next->density - curr->density; float part = low( high(px / py, 0.0f), 1.0f ); buffer[i] = static_cast<Vector<UInt8,4>>(y1*(1.f - part) + y2*part); } curr = next; iter++; } }
// ---------------------------------------------------------------------------- // Generates an emissive map from the tranfer specification // ---------------------------------------------------------------------------- void mapEmissive(Image3D<Vector4f> & map, std::list<std::shared_ptr<Node>> transfer) { float samples = static_cast<float>(map.width()) - 1; auto buffer = map.data(); memset(buffer, 0, map.size()*sizeof(Vector4f)); auto iter = transfer.begin(); auto curr = *iter; iter++; while (iter != transfer.end()) { auto next = *iter; size_t x1 = static_cast<size_t>(curr->density * samples); if (x1 >= map.width()) x1 = map.width()-1; size_t x2 = static_cast<size_t>(next->density * samples); if (x2 >= map.width()) x2 = map.width()-1; Vector3f s1 = Vector3f(curr->material->emissive) / 255.0f * curr->material->emissiveStrength; Vector3f s2 = Vector3f(next->material->emissive) / 255.0f * curr->material->emissiveStrength; Vector4f y1(s1[0], s1[1], s1[2], 0.0f); Vector4f y2(s2[0], s2[1], s2[2], 0.0f); for (size_t i = x1; i <= x2; i++) { float px = i / samples - curr->density; float py = next->density - curr->density; float part = low( high(px / py, 0.0f), 1.0f ); buffer[i] = y1*(1.f - part) + y2*part; } curr = next; iter++; } }
/* * Kernel event */ KernelEvent::KernelEvent(CommandQueue *parent, Kernel *kernel, cl_uint work_dim, const size_t *global_work_offset, const size_t *global_work_size, const size_t *local_work_size, cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_int *errcode_ret) : Event(parent, Queued, num_events_in_wait_list, event_wait_list, errcode_ret), p_work_dim(work_dim), p_kernel(kernel), p_timeout_ms(0) { clRetainKernel(desc(p_kernel)); if (*errcode_ret != CL_SUCCESS) return; *errcode_ret = CL_SUCCESS; // Sanity checks if (!kernel) { *errcode_ret = CL_INVALID_KERNEL; return; } // Check that the kernel was built for parent's device. cl_device_id d_device = 0; cl_context k_ctx, q_ctx; size_t max_work_group_size; cl_uint max_dims = 0; *errcode_ret = parent->info(CL_QUEUE_DEVICE, sizeof(cl_device_id), &d_device, 0); if (*errcode_ret != CL_SUCCESS) return; auto device = pobj(d_device); *errcode_ret = parent->info(CL_QUEUE_CONTEXT, sizeof(cl_context), &q_ctx, 0); *errcode_ret |= kernel->info(CL_KERNEL_CONTEXT, sizeof(cl_context), &k_ctx, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group_size, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(size_t), &max_dims, 0); *errcode_ret |= device->info(CL_DEVICE_MAX_WORK_ITEM_SIZES, max_dims * sizeof(size_t), p_max_work_item_sizes, 0); if (*errcode_ret != CL_SUCCESS) return; p_dev_kernel = kernel->deviceDependentKernel(device); if (!p_dev_kernel) { *errcode_ret = CL_INVALID_PROGRAM_EXECUTABLE; return; } // Check that contexts match if (k_ctx != q_ctx) { *errcode_ret = CL_INVALID_CONTEXT; return; } // Check args if (!kernel->argsSpecified()) { *errcode_ret = CL_INVALID_KERNEL_ARGS; return; } // Check dimension if ((work_dim == 0 || work_dim > max_dims) || (max_dims > MAX_WORK_DIMS)) { *errcode_ret = CL_INVALID_WORK_DIMENSION; return; } // Populate work_offset, work_size and local_work_size size_t work_group_size = 1; cl_uint dims[3]; kernel->reqdWorkGroupSize(kernel->deviceDependentModule(device), dims); uint32_t reqd_x = dims[0]; uint32_t reqd_y = dims[1]; uint32_t reqd_z = dims[2]; bool reqd_any = reqd_x > 0 || reqd_y > 0 || reqd_z > 0; if (reqd_any) { // if __attribute__((reqd_work_group_size(X, Y, Z))) is set and local size not specified if (!local_work_size) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } // if __attribute__((reqd_work_group_size(X, Y, Z))) doesn't match else { if (( local_work_size[0] != reqd_x) || (work_dim > 1 && local_work_size[1] != reqd_y) || (work_dim > 2 && local_work_size[2] != reqd_z)) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } } } // If kernel has zero arguments if (local_work_size && local_work_size[0] == 0) return; cl_uint i; for (i=0; i<work_dim; ++i) { if (global_work_offset) { p_global_work_offset[i] = global_work_offset[i]; } else { p_global_work_offset[i] = 0; } if (!global_work_size || !global_work_size[i]) { *errcode_ret = CL_INVALID_GLOBAL_WORK_SIZE; return; } p_global_work_size[i] = global_work_size[i]; if (!local_work_size) { // Guess the best value according to the device p_local_work_size[i] = p_dev_kernel->guessWorkGroupSize(work_dim, i, global_work_size[i]); } else { // Check divisibility if ((global_work_size[i] % local_work_size[i]) != 0) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } // Not too big ? if (local_work_size[i] > p_max_work_item_sizes[i]) { *errcode_ret = CL_INVALID_WORK_ITEM_SIZE; return; } p_local_work_size[i] = local_work_size[i]; work_group_size *= local_work_size[i]; } } // initialize missing dimensions for (; i < max_dims; i++) { p_global_work_offset[i] = 0; p_global_work_size[i] = 1; p_local_work_size[i] = 1; } // Check we don't ask too much to the device if (work_group_size > max_work_group_size) { *errcode_ret = CL_INVALID_WORK_GROUP_SIZE; return; } // Check arguments (buffer alignment, image size, ...) for (unsigned int i=0; i<kernel->numArgs(); ++i) { const Kernel::Arg &a = kernel->arg(i); if (a.kind() == Kernel::Arg::Buffer && a.file() != Kernel::Arg::Local) { MemObject *buffer = *(MemObject **)(a.value(0)); if (!BufferEvent::isSubBufferAligned(buffer, device)) { *errcode_ret = CL_MISALIGNED_SUB_BUFFER_OFFSET; return; } clRetainMemObject(desc(buffer)); p_mem_objects.push_back((MemObject *) buffer); } else if (a.kind() == Kernel::Arg::Image2D) { Image2D *image = *(Image2D **)(a.value(0)); size_t maxWidth, maxHeight; *errcode_ret = device->info(CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &maxWidth, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &maxHeight, 0); if (*errcode_ret != CL_SUCCESS) return; if (image->width() > maxWidth || image->height() > maxHeight) { *errcode_ret = CL_INVALID_IMAGE_SIZE; return; } clRetainMemObject(desc(image)); p_mem_objects.push_back((MemObject *) image); } else if (a.kind() == Kernel::Arg::Image3D) { Image3D *image = *(Image3D **)a.value(0); size_t maxWidth, maxHeight, maxDepth; *errcode_ret = device->info(CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &maxWidth, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &maxHeight, 0); *errcode_ret |= device->info(CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &maxDepth, 0); if (*errcode_ret != CL_SUCCESS) return; if (image->width() > maxWidth || image->height() > maxHeight || image->depth() > maxDepth) { *errcode_ret = CL_INVALID_IMAGE_SIZE; return; } clRetainMemObject(desc(image)); p_mem_objects.push_back((MemObject *) image); } } // Check if kernel has timeout specified and CommandQueue allows it if (kernel->getTimeout() > 0) { cl_command_queue_properties queue_props; *errcode_ret = parent->info(CL_QUEUE_PROPERTIES, sizeof(cl_command_queue_properties), &queue_props, 0); if (*errcode_ret != CL_SUCCESS) return; if ((queue_props & CL_QUEUE_KERNEL_TIMEOUT_COMPUTE_UNIT_TI) != 0) p_timeout_ms = kernel->getTimeout(); } }