Exemplo n.º 1
0
CCL_NAMESPACE_BEGIN

static void shade_background_pixels(Device *device, DeviceScene *dscene, int res, vector<float3>& pixels, Progress& progress)
{
	/* create input */
	int width = res;
	int height = res;

	device_vector<uint4> d_input;
	device_vector<float4> d_output;

	uint4 *d_input_data = d_input.resize(width*height);

	for(int y = 0; y < height; y++) {
		for(int x = 0; x < width; x++) {
			float u = x/(float)width;
			float v = y/(float)height;

			uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0);
			d_input_data[x + y*width] = in;
		}
	}

	/* compute on device */
	d_output.resize(width*height);
	memset((void*)d_output.data_pointer, 0, d_output.memory_size());

	device->const_copy_to("__data", &dscene->data, sizeof(dscene->data));

	device->mem_alloc(d_input, MEM_READ_ONLY);
	device->mem_copy_to(d_input);
	device->mem_alloc(d_output, MEM_WRITE_ONLY);

	DeviceTask main_task(DeviceTask::SHADER);
	main_task.shader_input = d_input.device_pointer;
	main_task.shader_output = d_output.device_pointer;
	main_task.shader_eval_type = SHADER_EVAL_BACKGROUND;
	main_task.shader_x = 0;
	main_task.shader_w = width*height;
	main_task.num_samples = 1;
	main_task.get_cancel = function_bind(&Progress::get_cancel, &progress);

	/* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */
	list<DeviceTask> split_tasks;
	main_task.split(split_tasks, 1, 128*128);

	foreach(DeviceTask& task, split_tasks) {
		device->task_add(task);
		device->task_wait();
		device->mem_copy_from(d_output, task.shader_x, 1, task.shader_w, sizeof(float4));
	}
Exemplo n.º 2
0
void ImageManager::device_pack_images(Device *device, DeviceScene *dscene, Progress& progess)
{
	/* for OpenCL, we pack all image textures inside a single big texture, and
	 * will do our own interpolation in the kernel */
	size_t size = 0;

	for(size_t slot = 0; slot < images.size(); slot++) {
		if(!images[slot])
			continue;

		device_vector<uchar4>& tex_img = dscene->tex_image[slot];
		size += tex_img.size();
	}

	uint4 *info = dscene->tex_image_packed_info.resize(images.size());
	uchar4 *pixels = dscene->tex_image_packed.resize(size);

	size_t offset = 0;

	for(size_t slot = 0; slot < images.size(); slot++) {
		if(!images[slot])
			continue;

		device_vector<uchar4>& tex_img = dscene->tex_image[slot];

		info[slot] = make_uint4(tex_img.data_width, tex_img.data_height, offset, 1);

		memcpy(pixels+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
		offset += tex_img.size();
	}

	if(dscene->tex_image_packed.size())
		device->tex_alloc("__tex_image_packed", dscene->tex_image_packed);
	if(dscene->tex_image_packed_info.size())
		device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
}
Exemplo n.º 3
0
void ImageManager::device_pack_images(Device *device,
                                      DeviceScene *dscene,
                                      Progress& /*progess*/)
{
	/* For OpenCL, we pack all image textures into a single large texture, and
	 * do our own interpolation in the kernel. */
	size_t size = 0, offset = 0;
	ImageDataType type;

	int info_size = tex_num_images[IMAGE_DATA_TYPE_FLOAT4] + tex_num_images[IMAGE_DATA_TYPE_BYTE4]
	                + tex_num_images[IMAGE_DATA_TYPE_FLOAT] + tex_num_images[IMAGE_DATA_TYPE_BYTE];
	uint4 *info = dscene->tex_image_packed_info.resize(info_size*2);

	/* Byte4 Textures*/
	type = IMAGE_DATA_TYPE_BYTE4;

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];
		size += tex_img.size();
	}

	uchar4 *pixels_byte4 = dscene->tex_image_byte4_packed.resize(size);

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<uchar4>& tex_img = dscene->tex_byte4_image[slot];

		uint8_t options = pack_image_options(type, slot);

		int index = type_index_to_flattened_slot(slot, type) * 2;
		info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
		info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);

		memcpy(pixels_byte4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
		offset += tex_img.size();
	}

	/* Float4 Textures*/
	type = IMAGE_DATA_TYPE_FLOAT4;
	size = 0, offset = 0;

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];
		size += tex_img.size();
	}

	float4 *pixels_float4 = dscene->tex_image_float4_packed.resize(size);

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<float4>& tex_img = dscene->tex_float4_image[slot];

		/* todo: support 3D textures, only CPU for now */

		uint8_t options = pack_image_options(type, slot);

		int index = type_index_to_flattened_slot(slot, type) * 2;
		info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
		info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);

		memcpy(pixels_float4+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
		offset += tex_img.size();
	}

	/* Byte Textures*/
	type = IMAGE_DATA_TYPE_BYTE;
	size = 0, offset = 0;

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];
		size += tex_img.size();
	}

	uchar *pixels_byte = dscene->tex_image_byte_packed.resize(size);

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<uchar>& tex_img = dscene->tex_byte_image[slot];

		uint8_t options = pack_image_options(type, slot);

		int index = type_index_to_flattened_slot(slot, type) * 2;
		info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
		info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);

		memcpy(pixels_byte+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
		offset += tex_img.size();
	}

	/* Float Textures*/
	type = IMAGE_DATA_TYPE_FLOAT;
	size = 0, offset = 0;

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<float>& tex_img = dscene->tex_float_image[slot];
		size += tex_img.size();
	}

	float *pixels_float = dscene->tex_image_float_packed.resize(size);

	for(size_t slot = 0; slot < images[type].size(); slot++) {
		if(!images[type][slot])
			continue;

		device_vector<float>& tex_img = dscene->tex_float_image[slot];

		/* todo: support 3D textures, only CPU for now */

		uint8_t options = pack_image_options(type, slot);

		int index = type_index_to_flattened_slot(slot, type) * 2;
		info[index] = make_uint4(tex_img.data_width, tex_img.data_height, offset, options);
		info[index+1] = make_uint4(tex_img.data_depth, 0, 0, 0);

		memcpy(pixels_float+offset, (void*)tex_img.data_pointer, tex_img.memory_size());
		offset += tex_img.size();
	}

	if(dscene->tex_image_byte4_packed.size()) {
		if(dscene->tex_image_byte4_packed.device_pointer) {
			thread_scoped_lock device_lock(device_mutex);
			device->tex_free(dscene->tex_image_byte4_packed);
		}
		device->tex_alloc("__tex_image_byte4_packed", dscene->tex_image_byte4_packed);
	}
	if(dscene->tex_image_float4_packed.size()) {
		if(dscene->tex_image_float4_packed.device_pointer) {
			thread_scoped_lock device_lock(device_mutex);
			device->tex_free(dscene->tex_image_float4_packed);
		}
		device->tex_alloc("__tex_image_float4_packed", dscene->tex_image_float4_packed);
	}
	if(dscene->tex_image_byte_packed.size()) {
		if(dscene->tex_image_byte_packed.device_pointer) {
			thread_scoped_lock device_lock(device_mutex);
			device->tex_free(dscene->tex_image_byte_packed);
		}
		device->tex_alloc("__tex_image_byte_packed", dscene->tex_image_byte_packed);
	}
	if(dscene->tex_image_float_packed.size()) {
		if(dscene->tex_image_float_packed.device_pointer) {
			thread_scoped_lock device_lock(device_mutex);
			device->tex_free(dscene->tex_image_float_packed);
		}
		device->tex_alloc("__tex_image_float_packed", dscene->tex_image_float_packed);
	}
	if(dscene->tex_image_packed_info.size()) {
		if(dscene->tex_image_packed_info.device_pointer) {
			thread_scoped_lock device_lock(device_mutex);
			device->tex_free(dscene->tex_image_packed_info);
		}
		device->tex_alloc("__tex_image_packed_info", dscene->tex_image_packed_info);
	}
}
Exemplo n.º 4
0
static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
Exemplo n.º 5
0
template<> inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}