CCL_NAMESPACE_BEGIN static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels, Progress& progress) { /* create input */ int width = res; int height = res; device_vector<uint4> d_input; device_vector<float4> d_output; uint4 *d_input_data = d_input.resize(width*height); for(int y = 0; y < height; y++) { for(int x = 0; x < width; x++) { float u = x/(float)width; float v = y/(float)height; uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0); d_input_data[x + y*width] = in; } } /* compute on device */ d_output.resize(width*height); memset((void*)d_output.data_pointer, 0, d_output.memory_size()); device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); device->mem_alloc(d_input, MEM_READ_ONLY); device->mem_copy_to(d_input); device->mem_alloc(d_output, MEM_WRITE_ONLY); DeviceTask main_task(DeviceTask::SHADER); main_task.shader_input = d_input.device_pointer; main_task.shader_output = d_output.device_pointer; main_task.shader_eval_type = SHADER_EVAL_BACKGROUND; main_task.shader_x = 0; main_task.shader_w = width*height; main_task.num_samples = 1; main_task.get_cancel = function_bind(&Progress::get_cancel, &progress); /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */ list<DeviceTask> split_tasks; main_task.split(split_tasks, 1, 128*128); foreach(DeviceTask& task, split_tasks) { device->task_add(task); device->task_wait(); device->mem_copy_from(d_output, task.shader_x, 1, task.shader_w, sizeof(float4)); }
void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& progess) { /* for OpenCL, we pack all image textures inside a single big texture, and * will do our own interpolation in the kernel */ size_t size = 0; for(size_t slot = 0; slot < images.size(); slot++) { if(!images[slot]) continue; device_vector<uchar4>& tex_img = dscene->tex_image[slot]; size += tex_img.size(); } uint4 *info = dscene->tex_image_packed_info.resize(images.size()); uchar4 *pixels = dscene->tex_image_packed.resize(size); size_t offset = 0; for(size_t slot = 0; slot < images.size(); slot++) { if(!images[slot]) continue; device_vector<uchar4>& tex_img = dscene->tex_image[slot]; info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1); memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } if(dscene->tex_image_packed.size()) device->tex_alloc("__tex_image_packed", dscene->tex_image_packed); if(dscene->tex_image_packed_info.size()) device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info); }
void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& /*progess*/) { /* For OpenCL, we pack all image textures into a single large texture, and * do our own interpolation in the kernel. */ size_t size = 0, offset = 0; ImageDataType type; int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4] + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE]; uint4 *info = dscene->tex_image_packed_info.resize(info_size*2); /* Byte4 Textures*/ type = IMAGE_DATA_TYPE_BYTE4; for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; size += tex_img.size(); } uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot]; uint8_t options = pack_image_options(type, slot); int index = type_index_to_flattened_slot(slot, type) * 2; info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } /* Float4 Textures*/ type = IMAGE_DATA_TYPE_FLOAT4; size = 0, offset = 0; for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; size += tex_img.size(); } float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<float4>& tex_img = dscene->tex_float4_image[slot]; /* todo: support 3D textures, only CPU for now */ uint8_t options = pack_image_options(type, slot); int index = type_index_to_flattened_slot(slot, type) * 2; info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } /* Byte Textures*/ type = IMAGE_DATA_TYPE_BYTE; size = 0, offset = 0; for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; size += tex_img.size(); } uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<uchar>& tex_img = dscene->tex_byte_image[slot]; uint8_t options = pack_image_options(type, slot); int index = type_index_to_flattened_slot(slot, type) * 2; info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } /* Float Textures*/ type = IMAGE_DATA_TYPE_FLOAT; size = 0, offset = 0; for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<float>& tex_img = dscene->tex_float_image[slot]; size += tex_img.size(); } float *pixels_float = dscene->tex_image_float_packed.resize(size); for(size_t slot = 0; slot < images[type].size(); slot++) { if(!images[type][slot]) continue; device_vector<float>& tex_img = dscene->tex_float_image[slot]; /* todo: support 3D textures, only CPU for now */ uint8_t options = pack_image_options(type, slot); int index = type_index_to_flattened_slot(slot, type) * 2; info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options); info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0); memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size()); offset += tex_img.size(); } if(dscene->tex_image_byte4_packed.size()) { if(dscene->tex_image_byte4_packed.device_pointer) { thread_scoped_lock device_lock(device_mutex); device->tex_free(dscene->tex_image_byte4_packed); } device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed); } if(dscene->tex_image_float4_packed.size()) { if(dscene->tex_image_float4_packed.device_pointer) { thread_scoped_lock device_lock(device_mutex); device->tex_free(dscene->tex_image_float4_packed); } device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed); } if(dscene->tex_image_byte_packed.size()) { if(dscene->tex_image_byte_packed.device_pointer) { thread_scoped_lock device_lock(device_mutex); device->tex_free(dscene->tex_image_byte_packed); } device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed); } if(dscene->tex_image_float_packed.size()) { if(dscene->tex_image_float_packed.device_pointer) { thread_scoped_lock device_lock(device_mutex); device->tex_free(dscene->tex_image_float_packed); } device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed); } if(dscene->tex_image_packed_info.size()) { if(dscene->tex_image_packed_info.device_pointer) { thread_scoped_lock device_lock(device_mutex); device->tex_free(dscene->tex_image_packed_info); } device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info); } }
static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
template<> inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}