Beispiel #1
0
        void mul(const device_vector<double> &x, device_vector<double> &y,
                 double alpha = 1, bool append = false) const
        {
            double beta = append ? 1.0 : 0.0;

            cuda_check(
                    cusparseDhybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                        &alpha, desc.get(), mat.get(),
                        x.raw_ptr(), &beta, y.raw_ptr()
                        )
                    );
        }
Beispiel #2
0
        void mul(const device_vector<float> &x, device_vector<float> &y,
                 float alpha = 1, bool append = false) const
        {
            float beta = append ? 1.0f : 0.0f;

            cuda_check(
                    cusparseShybmv(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
                        &alpha, desc.get(), mat.get(),
                        x.raw_ptr(), &beta, y.raw_ptr()
                        )
                    );
        }
Beispiel #3
0
 void load_texture_info()
 {
   if (need_texture_info) {
     texture_info.copy_to_device();
     need_texture_info = false;
   }
 }
Beispiel #4
0
  void tex_alloc(device_memory &mem)
  {
    VLOG(1) << "Texture allocate: " << mem.name << ", "
            << string_human_readable_number(mem.memory_size()) << " bytes. ("
            << string_human_readable_size(mem.memory_size()) << ")";

    if (mem.interpolation == INTERPOLATION_NONE) {
      /* Data texture. */
      kernel_tex_copy(&kernel_globals, mem.name, mem.host_pointer, mem.data_size);
    }
    else {
      /* Image Texture. */
      int flat_slot = 0;
      if (string_startswith(mem.name, "__tex_image")) {
        int pos = string(mem.name).rfind("_");
        flat_slot = atoi(mem.name + pos + 1);
      }
      else {
        assert(0);
      }

      if (flat_slot >= texture_info.size()) {
        /* Allocate some slots in advance, to reduce amount
         * of re-allocations. */
        texture_info.resize(flat_slot + 128);
      }

      TextureInfo &info = texture_info[flat_slot];
      info.data = (uint64_t)mem.host_pointer;
      info.cl_buffer = 0;
      info.interpolation = mem.interpolation;
      info.extension = mem.extension;
      info.width = mem.data_width;
      info.height = mem.data_height;
      info.depth = mem.data_depth;

      need_texture_info = true;
    }

    mem.device_pointer = (device_ptr)mem.host_pointer;
    mem.device_size = mem.memory_size();
    stats.mem_alloc(mem.device_size);
  }
Beispiel #5
0
bool ImageManager::file_load_float_image(Image *img, device_vector<float4>& tex_img)
{
	if(img->filename == "")
		return false;

	/* load image from file through OIIO */
	ImageInput *in = ImageInput::create(img->filename);

	if(!in)
		return false;

	ImageSpec spec;

	if(!in->open(img->filename, spec)) {
		delete in;
		return false;
	}

	/* we only handle certain number of components */
	int width = spec.width;
	int height = spec.height;
	int components = spec.nchannels;

	if(!(components == 1 || components == 3 || components == 4)) {
		in->close();
		delete in;
		return false;
	}

	printf("loading float image: '%s' %dx%d\n", img->filename.c_str(), width, height);

	/* read RGBA pixels */
	float *pixels = (float*)tex_img.resize(width, height);
	int scanlinesize = width*components*sizeof(float);

	in->read_image(TypeDesc::FLOAT,
		(uchar*)pixels + (height-1)*scanlinesize,
		AutoStride,
		-scanlinesize,
		AutoStride);

	in->close();
	delete in;

	if(components == 3) {
		for(int i = width*height-1; i >= 0; i--) {
			pixels[i*4+3] = 1.0f;
			pixels[i*4+2] = pixels[i*3+2];
			pixels[i*4+1] = pixels[i*3+1];
			pixels[i*4+0] = pixels[i*3+0];
		}
	}
	else if(components == 1) {
		for(int i = width*height-1; i >= 0; i--) {
			pixels[i*4+3] = 1.0f;
			pixels[i*4+2] = pixels[i];
			pixels[i*4+1] = pixels[i];
			pixels[i*4+0] = pixels[i];
		}
	}

	return true;
}
Beispiel #6
0
bool ImageManager::file_load_image(Image *img,
                                   ImageDataType type,
                                   int texture_limit,
                                   device_vector<DeviceType>& tex_img)
{
	const StorageType alpha_one = (FileFormat == TypeDesc::UINT8)? 255 : 1;
	ImageInput *in = NULL;
	int width, height, depth, components;
	if(!file_load_image_generic(img, &in, width, height, depth, components)) {
		return false;
	}
	/* Read RGBA pixels. */
	vector<StorageType> pixels_storage;
	StorageType *pixels;
	const size_t max_size = max(max(width, height), depth);
	if(max_size == 0) {
		/* Don't bother with invalid images. */
		return false;
	}
	if(texture_limit > 0 && max_size > texture_limit) {
		pixels_storage.resize(((size_t)width)*height*depth*4);
		pixels = &pixels_storage[0];
	}
	else {
		thread_scoped_lock device_lock(device_mutex);
		pixels = (StorageType*)tex_img.alloc(width, height, depth);
	}
	if(pixels == NULL) {
		/* Could be that we've run out of memory. */
		return false;
	}
	bool cmyk = false;
	const size_t num_pixels = ((size_t)width) * height * depth;
	if(in) {
		StorageType *readpixels = pixels;
		vector<StorageType> tmppixels;
		if(components > 4) {
			tmppixels.resize(((size_t)width)*height*components);
			readpixels = &tmppixels[0];
		}
		if(depth <= 1) {
			size_t scanlinesize = ((size_t)width)*components*sizeof(StorageType);
			in->read_image(FileFormat,
			               (uchar*)readpixels + (height-1)*scanlinesize,
			               AutoStride,
			               -scanlinesize,
			               AutoStride);
		}
		else {
			in->read_image(FileFormat, (uchar*)readpixels);
		}
		if(components > 4) {
			size_t dimensions = ((size_t)width)*height;
			for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
				pixels[i*4+3] = tmppixels[i*components+3];
				pixels[i*4+2] = tmppixels[i*components+2];
				pixels[i*4+1] = tmppixels[i*components+1];
				pixels[i*4+0] = tmppixels[i*components+0];
			}
			tmppixels.clear();
		}
		cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;
		in->close();
		delete in;
	}
	else {
		if(FileFormat == TypeDesc::FLOAT) {
			builtin_image_float_pixels_cb(img->filename,
			                              img->builtin_data,
			                              (float*)&pixels[0],
			                              num_pixels * components,
			                              img->builtin_free_cache);
		}
		else if(FileFormat == TypeDesc::UINT8) {
			builtin_image_pixels_cb(img->filename,
			                        img->builtin_data,
			                        (uchar*)&pixels[0],
			                        num_pixels * components,
			                        img->builtin_free_cache);
		}
		else {
			/* TODO(dingto): Support half for ImBuf. */
		}
	}
	/* Check if we actually have a float4 slot, in case components == 1,
	 * but device doesn't support single channel textures.
	 */
	bool is_rgba = (type == IMAGE_DATA_TYPE_FLOAT4 ||
	                type == IMAGE_DATA_TYPE_HALF4 ||
	                type == IMAGE_DATA_TYPE_BYTE4);
	if(is_rgba) {
		if(cmyk) {
			/* CMYK */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
				pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
				pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
				pixels[i*4+3] = alpha_one;
			}
		}
		else if(components == 2) {
			/* grayscale + alpha */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = pixels[i*2+1];
				pixels[i*4+2] = pixels[i*2+0];
				pixels[i*4+1] = pixels[i*2+0];
				pixels[i*4+0] = pixels[i*2+0];
			}
		}
		else if(components == 3) {
			/* RGB */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = alpha_one;
				pixels[i*4+2] = pixels[i*3+2];
				pixels[i*4+1] = pixels[i*3+1];
				pixels[i*4+0] = pixels[i*3+0];
			}
		}
		else if(components == 1) {
			/* grayscale */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = alpha_one;
				pixels[i*4+2] = pixels[i];
				pixels[i*4+1] = pixels[i];
				pixels[i*4+0] = pixels[i];
			}
		}
		if(img->use_alpha == false) {
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = alpha_one;
			}
		}
	}
	/* Make sure we don't have buggy values. */
	if(FileFormat == TypeDesc::FLOAT) {
		/* For RGBA buffers we put all channels to 0 if either of them is not
		 * finite. This way we avoid possible artifacts caused by fully changed
		 * hue.
		 */
		if(is_rgba) {
			for(size_t i = 0; i < num_pixels; i += 4) {
				StorageType *pixel = &pixels[i*4];
				if(!isfinite(pixel[0]) ||
				   !isfinite(pixel[1]) ||
				   !isfinite(pixel[2]) ||
				   !isfinite(pixel[3]))
				{
					pixel[0] = 0;
					pixel[1] = 0;
					pixel[2] = 0;
					pixel[3] = 0;
				}
			}
		}
		else {
			for(size_t i = 0; i < num_pixels; ++i) {
				StorageType *pixel = &pixels[i];
				if(!isfinite(pixel[0])) {
					pixel[0] = 0;
				}
			}
		}
	}
	/* Scale image down if needed. */
	if(pixels_storage.size() > 0) {
		float scale_factor = 1.0f;
		while(max_size * scale_factor > texture_limit) {
			scale_factor *= 0.5f;
		}
		VLOG(1) << "Scaling image " << img->filename
		        << " by a factor of " << scale_factor << ".";
		vector<StorageType> scaled_pixels;
		size_t scaled_width, scaled_height, scaled_depth;
		util_image_resize_pixels(pixels_storage,
		                         width, height, depth,
		                         is_rgba ? 4 : 1,
		                         scale_factor,
		                         &scaled_pixels,
		                         &scaled_width, &scaled_height, &scaled_depth);

		StorageType *texture_pixels;

		{
			thread_scoped_lock device_lock(device_mutex);
			texture_pixels = (StorageType*)tex_img.alloc(scaled_width,
			                                             scaled_height,
			                                             scaled_depth);
		}

		memcpy(texture_pixels,
		       &scaled_pixels[0],
		       scaled_pixels.size() * sizeof(StorageType));
	}
	return true;
}
Beispiel #7
0
bool ImageManager::file_load_half_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
{
	ImageInput *in = NULL;
	int width, height, depth, components;

	if(!file_load_image_generic(img, &in, width, height, depth, components))
		return false;

	/* read RGBA pixels */
	half *pixels = (half*)tex_img.resize(width, height, depth);
	if(pixels == NULL) {
		return false;
	}

	if(in) {
		half *readpixels = pixels;
		vector<half> tmppixels;

		if(components > 4) {
			tmppixels.resize(((size_t)width)*height*components);
			readpixels = &tmppixels[0];
		}

		if(depth <= 1) {
			size_t scanlinesize = ((size_t)width)*components*sizeof(half);
			in->read_image(TypeDesc::HALF,
			               (uchar*)readpixels + (height-1)*scanlinesize,
			               AutoStride,
			               -scanlinesize,
			               AutoStride);
		}
		else {
			in->read_image(TypeDesc::HALF, (uchar*)readpixels);
		}

		if(components > 4) {
			size_t dimensions = ((size_t)width)*height;
			for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
				pixels[i*4+3] = tmppixels[i*components+3];
				pixels[i*4+2] = tmppixels[i*components+2];
				pixels[i*4+1] = tmppixels[i*components+1];
				pixels[i*4+0] = tmppixels[i*components+0];
			}

			tmppixels.clear();
		}

		in->close();
		delete in;
	}
#if 0
	/* TODO(dingto): Support half for ImBuf. */
	else {
		builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
	}
#endif

	/* Check if we actually have a half4 slot, in case components == 1, but device
	 * doesn't support single channel textures. */
	if(type == IMAGE_DATA_TYPE_HALF4) {
		size_t num_pixels = ((size_t)width) * height * depth;
		if(components == 2) {
			/* grayscale + alpha */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = pixels[i*2+1];
				pixels[i*4+2] = pixels[i*2+0];
				pixels[i*4+1] = pixels[i*2+0];
				pixels[i*4+0] = pixels[i*2+0];
			}
		}
		else if(components == 3) {
			/* RGB */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
				pixels[i*4+2] = pixels[i*3+2];
				pixels[i*4+1] = pixels[i*3+1];
				pixels[i*4+0] = pixels[i*3+0];
			}
		}
		else if(components == 1) {
			/* grayscale */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
				pixels[i*4+2] = pixels[i];
				pixels[i*4+1] = pixels[i];
				pixels[i*4+0] = pixels[i];
			}
		}

		if(img->use_alpha == false) {
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
			}
		}
	}

	return true;
}
Beispiel #8
0
bool ImageManager::file_load_float_image(Image *img, ImageDataType type, device_vector<T>& tex_img)
{
	ImageInput *in = NULL;
	int width, height, depth, components;

	if(!file_load_image_generic(img, &in, width, height, depth, components))
		return false;

	/* read RGBA pixels */
	float *pixels = (float*)tex_img.resize(width, height, depth);
	if(pixels == NULL) {
		return false;
	}
	bool cmyk = false;

	if(in) {
		float *readpixels = pixels;
		vector<float> tmppixels;

		if(components > 4) {
			tmppixels.resize(((size_t)width)*height*components);
			readpixels = &tmppixels[0];
		}

		if(depth <= 1) {
			size_t scanlinesize = ((size_t)width)*components*sizeof(float);
			in->read_image(TypeDesc::FLOAT,
			               (uchar*)readpixels + (height-1)*scanlinesize,
			               AutoStride,
			               -scanlinesize,
			               AutoStride);
		}
		else {
			in->read_image(TypeDesc::FLOAT, (uchar*)readpixels);
		}

		if(components > 4) {
			size_t dimensions = ((size_t)width)*height;
			for(size_t i = dimensions-1, pixel = 0; pixel < dimensions; pixel++, i--) {
				pixels[i*4+3] = tmppixels[i*components+3];
				pixels[i*4+2] = tmppixels[i*components+2];
				pixels[i*4+1] = tmppixels[i*components+1];
				pixels[i*4+0] = tmppixels[i*components+0];
			}

			tmppixels.clear();
		}

		cmyk = strcmp(in->format_name(), "jpeg") == 0 && components == 4;

		in->close();
		delete in;
	}
	else {
		builtin_image_float_pixels_cb(img->filename, img->builtin_data, pixels);
	}

	/* Check if we actually have a float4 slot, in case components == 1, but device
	 * doesn't support single channel textures. */
	if(type == IMAGE_DATA_TYPE_FLOAT4) {
		size_t num_pixels = ((size_t)width) * height * depth;
		if(cmyk) {
			/* CMYK */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 255;
				pixels[i*4+2] = (pixels[i*4+2]*pixels[i*4+3])/255;
				pixels[i*4+1] = (pixels[i*4+1]*pixels[i*4+3])/255;
				pixels[i*4+0] = (pixels[i*4+0]*pixels[i*4+3])/255;
			}
		}
		else if(components == 2) {
			/* grayscale + alpha */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = pixels[i*2+1];
				pixels[i*4+2] = pixels[i*2+0];
				pixels[i*4+1] = pixels[i*2+0];
				pixels[i*4+0] = pixels[i*2+0];
			}
		}
		else if(components == 3) {
			/* RGB */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
				pixels[i*4+2] = pixels[i*3+2];
				pixels[i*4+1] = pixels[i*3+1];
				pixels[i*4+0] = pixels[i*3+0];
			}
		}
		else if(components == 1) {
			/* grayscale */
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
				pixels[i*4+2] = pixels[i];
				pixels[i*4+1] = pixels[i];
				pixels[i*4+0] = pixels[i];
			}
		}

		if(img->use_alpha == false) {
			for(size_t i = num_pixels-1, pixel = 0; pixel < num_pixels; pixel++, i--) {
				pixels[i*4+3] = 1.0f;
			}
		}
	}

	return true;
}
Beispiel #9
0
 void push_arg(device_vector<T> arg) {
     K.set_arg(argpos++, arg.raw());
 }
Beispiel #10
0
 void push_arg(const device_vector<T> &arg) {
     push_arg(arg.raw());
 }
Beispiel #11
0
 ~CPUDevice()
 {
   task_pool.stop();
   texture_info.free();
 }
Beispiel #12
0
bool ImageManager::file_load_image(Image *img, device_vector<uchar4>& tex_img)
{
	if(img->filename == "")
		return false;

  fprintf(stderr, "image input disabled\n");
#if 0

	/* load image from file through OIIO */
	ImageInput *in = ImageInput::create(img->filename);

	if(!in)
		return false;

	ImageSpec spec;

	if(!in->open(img->filename, spec)) {
		delete in;
		return false;
	}

	/* we only handle certain number of components */
	int width = spec.width;
	int height = spec.height;
	int components = spec.nchannels;

	if(!(components == 1 || components == 3 || components == 4)) {
		in->close();
		delete in;
		return false;
	}

	/* read RGBA pixels */
	uchar *pixels = (uchar*)tex_img.resize(width, height);
	int scanlinesize = width*components*sizeof(uchar);

	in->read_image(TypeDesc::UINT8,
		(uchar*)pixels + (height-1)*scanlinesize,
		AutoStride,
		-scanlinesize,
		AutoStride);

	in->close();
	delete in;

	if(components == 3) {
		for(int i = width*height-1; i >= 0; i--) {
			pixels[i*4+3] = 255;
			pixels[i*4+2] = pixels[i*3+2];
			pixels[i*4+1] = pixels[i*3+1];
			pixels[i*4+0] = pixels[i*3+0];
		}
	}
	else if(components == 1) {
		for(int i = width*height-1; i >= 0; i--) {
			pixels[i*4+3] = 255;
			pixels[i*4+2] = pixels[i];
			pixels[i*4+1] = pixels[i];
			pixels[i*4+0] = pixels[i];
		}
	}

	return true;
#endif 
	return false;
}