Beispiel #1
0
DeviceMatrixCL::Ptr cell_histogram_dense_cl(
	const DeviceMatrixCL3D::Ptr& ff_out,
	const int max_bin, const int cell_size, 
	const int start_y, const int start_x,
	const int stop_y, const int stop_x)
{
	DeviceMatrixCL::Ptr assignment_mat = makeDeviceMatrixCL(*ff_out, 0);
	DeviceMatrixCL::Ptr weight_mat = makeDeviceMatrixCL(*ff_out, 1);
	
	int n_parts_y = (stop_y - start_y) / cell_size;
    int n_parts_x = (stop_x - start_x) / cell_size;
	
#ifdef METHOD_2
    n_parts_y += (n_parts_y % 2);
    n_parts_x += (n_parts_x % 2);
#endif
	
    DeviceMatrixCL::Ptr histogram = makeDeviceMatrixCL(n_parts_y * n_parts_x, max_bin);
//		for(int i=0; i<10000; i++)
	cell_histogram_dense_device_cl(
		histogram.get(),
		assignment_mat.get(),
		weight_mat.get(),
		max_bin,
		cell_size,
		start_y, start_x,
		n_parts_y, n_parts_x);

    return histogram;
}
Beispiel #2
0
DeviceMatrixCL::Ptr pwcityblock_cl( const DeviceMatrixCL::Ptr& features_train,
        const DeviceMatrixCL::Ptr& features_test){

    DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height,
            features_test->height);
    pwdist_genericCL(features_train.get(), features_test.get(), out.get(), CITYBLOCK);
    return out;
}
Beispiel #3
0
DeviceMatrixCL::Ptr pwchisq_cl( const DeviceMatrixCL::Ptr& features_train,
        const DeviceMatrixCL::Ptr& features_test){

    DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height,
            features_test->height);
    pwdist_genericCL(features_train.get(), features_test.get(), out.get(), CHISQUARED);
    return out;
}
Beispiel #4
0
DeviceMatrixCL::Ptr pwabsdot_cl( const DeviceMatrixCL::Ptr& features_train,
        const DeviceMatrixCL::Ptr& features_test){

    DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height,
            features_test->height);
    pwdist_genericCL(features_train.get(), features_test.get(), out.get(), ABSDOTPRODUCT);
    return out;
}
Beispiel #5
0
DeviceMatrixCL::Ptr pwdist_cl( const DeviceMatrixCL::Ptr& features_train,
        const DeviceMatrixCL::Ptr& features_test){

//	double tic = omp_get_wtime();
    DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(features_train->height,
            features_test->height);

    // double tic = omp_get_wtime();
    // pwdist_genericCL(features_train.get(), features_test.get(), out.get(), EUCLIDEAN);
//	for(int i=0; i<10000; i++)
	pwdist_eucCL(features_train.get(), features_test.get(), out.get());

 //   double toc = omp_get_wtime();
 //   std::cout << "OpenCL time: " << toc - tic << std::endl;
    return out;
}
Beispiel #6
0
	Classifier(const int window_height, const int window_width, const int cell_size, const int block_size, const int dict_size):
	  _window_height(window_height), _window_width(window_width), _dict_size(dict_size),
		  _cell_size(cell_size), _block_size(block_size)
	  {
		  _n_cells_x = _window_width / cell_size;
		  _n_cells_y = _window_height / cell_size;

		  _n_blocks_x = _n_cells_x - _block_size + 1;
		  _n_blocks_y = _n_cells_y - _block_size + 1;

		  _n_total_coeff = _block_size * _block_size * _n_blocks_x * _n_blocks_y, _dict_size;

		  coefficients = new float[_n_total_coeff];

		  classifierCL = makeDeviceMatrixCL(_n_total_coeff / _dict_size, _dict_size);

		  for (int i = 0; i < _n_total_coeff; i++)
		  {
			  coefficients[i] = float( std::rand() ) / RAND_MAX;
		  }

		  DeviceMatrixCL_copyToDevice(*classifierCL, coefficients);
	  };
Beispiel #7
0
DeviceMatrixCL::Ptr max_cl(const DeviceMatrixCL::Ptr& matrix)
{
    DeviceMatrixCL::Ptr out = makeDeviceMatrixCL(matrix->height, 1);
    max_cl_local(matrix.get(), out.get());
    return out;
}
Beispiel #8
0
int main(int argc, char* argv[])
{
	if(argc!=2){
		printf("Please provide kernel path as argument!");
		exit(1);
	}
	kernelPath=argv[1];

		
	const int height = 640;
	const int width = 480;

	const int MAX_BIN = 100;

	float* weights = new float[height * width];
	float* assignments = new float[height * width];

	for (int i = 0; i < height * width; i++)
	{
		weights[i] = float(std::rand()) / RAND_MAX;
		assignments[i] = float(std::rand() % MAX_BIN);
	}
	
	DeviceMatrixCL::Ptr asCL = makeDeviceMatrixCL(height, width);
	DeviceMatrixCL_copyFromDevice(*asCL, assignments);

	DeviceMatrixCL::Ptr wtCL = makeDeviceMatrixCL(height, width);
	DeviceMatrixCL_copyFromDevice(*wtCL, weights);

	DeviceMatrixCL3D::Ptr block_histograms = cell_histogram_dense_cl(
		asCL,
		wtCL,
		MAX_BIN, 8, 
		0, 0,
		height, width);

	delete[] weights;
	delete[] assignments;
//	cv::Mat exampleImage = cv::imread(exampleImagePath, 0);
//
//	//convert to float
//	exampleImage.convertTo(exampleImage, CV_32FC1);
//
//	//pull the data
//	float* f_imData = (float*) exampleImage.data;
//
//	const int height = exampleImage.size().height;
//	const int width = exampleImage.size().width;
//
//	//create a random filterbank
//	const int num_filters = 100;
//	const int filter_dim = 3;
//
//	float* filter_bank = new float[num_filters * filter_dim * filter_dim];
//
//	for (int i = 0; i < num_filters * filter_dim * filter_dim; i++)
//	{
//		filter_bank[i] = float( std::rand() ) / RAND_MAX;
//	}
//
//	//OPENCL Reference
//	DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(height, width);
//	DeviceMatrixCL_copyToDevice(*dmpCL, f_imData);
//	set_filter_bank_cl(filter_bank, num_filters * filter_dim * filter_dim);
//	DeviceMatrixCL3D::Ptr retdm = filter_frame_cl_3(dmpCL, num_filters, 1, FF_OPTYPE_COSINE);
//	float* retval = new float[height * width * 2];
//	DeviceMatrixCL3D_copyFromDevice(*retdm, retval);
//
//	std::ofstream test_out_cl("testcl.out", std::ios_base::out);
//	for (int j = 0; j < height; j++)
//	{
//		for (int i = 0; i < width; i++)
//		{
//			test_out_cl << retval[j * width + i] << ", ";
//		}
//
//		test_out_cl << std::endl;
//	}
//
//	test_out_cl << std::endl << std::endl << std::endl;
//
//	for (int j = 0; j < height; j++)
//	{
//		for (int i = 0; i < width; i++)
//		{
//			test_out_cl << retval[height * width + j * width + i] << ", ";
//		}
//		test_out_cl << std::endl;
//	}
//	test_out_cl.close();
//
//	delete[] filter_bank;
//	delete[] retval;
//	//delete[] retvalCU;
//
//	return 0;
}
Beispiel #9
0
int main(int argc, char* argv[])
{
	device_use = 0;
	if(argc>1)
		device_use = atoi(argv[1]);
	
	static char* exampleImagePath = "..\\..\\..\\media\\kewell1.jpg";

	//create a random filterbank
	const int num_filters = 256;

	//number of pipeline passes
	const int num_iters = 125;

	const int filter_dim = 3;

	FilterBank fb(filter_dim, num_filters);
	fb.set_on_device();

	Classifier clf(128, 64, 8, 2, num_filters);

	//load the image on device
	cv::Mat exampleImage = cv::imread(exampleImagePath, 0);
	//convert to float
	exampleImage.convertTo(exampleImage, CV_32FC1);

	cv::resize(exampleImage, exampleImage, cv::Size(exampleImage.cols, exampleImage.rows));

	if(device_use==0)
		std::cout << "running on CPU" <<std::endl;
	else
		std::cout << "running on GPU" <<std::endl;
	std::cout << "Image dimensions:" << exampleImage.size().height <<" "<< exampleImage.size().width <<std::endl;
	
	//pull the data
	float* f_imData = (float*) exampleImage.data;
	DeviceMatrixCL::Ptr dmpCL = makeDeviceMatrixCL(exampleImage.size().height, exampleImage.size().width);
	DeviceMatrixCL_copyToDevice(*dmpCL, f_imData);


/*	for(int i=0; i<20; i++)
	{

	DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL);
//	tic1= omp_get_wtime();

	DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl(
		ff_im, num_filters, 8, 0, 0, 
		exampleImage.size().height, exampleImage.size().width);
//	tic2= omp_get_wtime();

	DeviceMatrixCL::Ptr result = clf.apply(block_histogram);
	}
	*/
	double tic0, tic1, tic2, tic3;
	double tim1 = 0.0;
	double tim2 = 0.0;
	double tim3 = 0.0;

	for(int i=0; i<num_iters; i++)
	{
	tic0= omp_get_wtime();
	DeviceMatrixCL3D::Ptr ff_im = fb.apply_cl(dmpCL);
	tic1= omp_get_wtime();
	tim1 += tic1 - tic0;

	DeviceMatrixCL::Ptr block_histogram = cell_histogram_dense_cl(
		ff_im, num_filters, 8, 0, 0, 
		exampleImage.size().height, exampleImage.size().width);
	tic2= omp_get_wtime();
	tim2 += tic2 - tic1;

	DeviceMatrixCL::Ptr result = clf.apply(block_histogram);

	TheContext* tc = new TheContext();

	clFinish(tc->getMyContext()->cqCommandQueue);
	tic3 = omp_get_wtime();	
	tim3 += tic3 - tic2;
	}
	
	std::cout << "full pipeline time: " << tim1 + tim2 + tim3 << std::endl;
	std::cout << "filter pipeline time: " << tim1 << std::endl;
	std::cout << "histogram pipeline time: " << tim2 << std::endl;
	std::cout << "classifier pipeline time: " << tim3 << std::endl;

	return 0;
}