C++ (Cpp) _clInit Beispiele

Beispiel #1

0

Datei anzeigen

Datei: bfs.cpp Projekt: ebads67/ipmacc

//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, bool *h_graph_mask, bool *h_updating_graph_mask, \
		bool *h_graph_visited, int *h_cost) 
					throw(std::string){

	//int number_elements = height*width;
	bool h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
//	try{
		//--1 transfer data from host to device
		//printf("initializing\n");
		_clInit();			
		//printf("allocating\n");
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_graph_mask);
		d_updating_graph_mask = _clMalloc(no_of_nodes*sizeof(bool), h_updating_graph_mask);
		d_graph_visited = _clMalloc(no_of_nodes*sizeof(bool), h_graph_visited);
		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(bool), &h_over);
		
		//printf("copyin\n");
		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(bool), h_graph_mask);	
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(bool), h_updating_graph_mask);	
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(bool), h_graph_visited);	
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	
			
		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;		
		kernel_timer.reset();
		kernel_timer.start();
#endif
		int kerId=0;
	//	printf("launching kernel\n");
		do{
	//		printf("copy in\n");
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(bool), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
	//		printf("set arg 1\n");
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//int work_items = no_of_nodes;
	//		printf("invoke 1\n");
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
			
			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;			
	//		printf("set arg 2\n");
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//work_items = no_of_nodes;
	//		printf("invoke 2\n");
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			
			
	//		printf("copy back\n");
			_clMemcpyD2H(d_over,sizeof(bool), &h_over);
	//		printf("done\n");
	//		printf("K%d\n",kerId++);
			}while(h_over);
	//	printf("done!");
		_clFinish();
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
//	}
//	catch(std::string msg){		
//		_clFree(d_graph_nodes);
//		_clFree(d_graph_edges);
//		_clFree(d_graph_mask);
//		_clFree(d_updating_graph_mask);
//		_clFree(d_graph_visited);
//		_clFree(d_cost);
//		_clFree(d_over);
//		_clRelease();
//		std::string e_str = "in run_transpose_gpu -> ";
//		e_str += msg;
//		throw(e_str);
//	}
	return ;
}

Beispiel #2

0

Datei anzeigen

Datei: Main.cpp Projekt: haibo031031/elmo

int main(int argc, char ** argv)
{
	uint * in = NULL, * out_cpu = NULL, * out_gpu = NULL;
	cl_mem d_in = NULL, d_out = NULL;
try{
	if(argc!=2){
		printf("need 1 parameter here!!!");
		exit(-1);
	}

	_clInit(1, "gpu", 0);
	uint iter = 100;
	
#if defined TIME
	double start_time = 0.0;
	double end_time = 0.0;
	double deltaT = 0.0;
	string dat_name="data.dat";

	FILE * fp = fopen(dat_name.c_str(), "a+");
	if(fp==NULL)
	{
		printf("failed to open file!!!\n");
		exit(-1);
	}
#endif
	
	// parameters
	uint side = atoi(argv[1]);
	uint wData = side;
	uint hData = side;
	uint size = wData * hData;

	printf("wData=%d, hData=%d\n", wData, hData);
	
	// allocate memory space on the host and device side
	in = (uint * )malloc(size * sizeof(uint));
	out_cpu = (uint * )malloc(size * sizeof(uint));
	out_gpu = (uint * )malloc(size * sizeof(uint));
	
	d_in = _clMalloc(size * sizeof(uint));	
	d_out = _clMalloc(size * sizeof(uint));

	// initialization
	fill<uint>(in, size, 16);

	// copy data from host to device
	_clMemcpyH2D(d_in, in, size * sizeof(uint));
	
	// warm-up
	mt_1(d_in, d_out, wData, hData);
	mt_2(d_in, d_out, wData, hData);
	mt_3(d_in, d_out, wData, hData);
	
#ifdef VARIFY	
	CPURun(in, out_cpu, wData, hData);
#endif //VARIFY
	
	/**************************1****************************/
#ifdef TIME
	deltaT = 0.0;
#endif
	for(int i=0; i<iter; i++)
	{
	
#ifdef TIME
	start_time = gettime();
#endif

		mt_1(d_in, d_out, wData, hData);
	
#ifdef TIME	
	end_time = gettime();
	deltaT += end_time - start_time;	
#endif
	}	
#ifdef TIME
	fprintf(fp, "%lf\t", deltaT/(double)iter);
#endif

#ifdef VARIFY
	_clMemcpyD2H(out_gpu, d_out, size * sizeof(uint));
	verify_array_int<uint>(out_cpu, out_gpu, size);
#endif //VARIFY

	/**************************2****************************/
#ifdef TIME
	deltaT = 0.0;
#endif
	for(int i=0; i<iter; i++)
	{
	
#ifdef TIME
	start_time = gettime();
#endif

		mt_2(d_in, d_out, wData, hData);
	
#ifdef TIME	
	end_time = gettime();
	deltaT += end_time - start_time;	
#endif
	}	
#ifdef TIME
	fprintf(fp, "%lf\t", deltaT/(double)iter);
#endif

#ifdef VARIFY
	_clMemcpyD2H(out_gpu, d_out, size * sizeof(uint));
	verify_array_int<uint>(out_cpu, out_gpu, size);
#endif //VARIFY

	/**************************3****************************/
#ifdef TIME
	deltaT = 0.0;
#endif
	for(int i=0; i<iter; i++)
	{
	
#ifdef TIME
	start_time = gettime();
#endif

		mt_3(d_in, d_out, wData, hData);
	
#ifdef TIME	
	end_time = gettime();
	deltaT += end_time - start_time;	
#endif
	}	
#ifdef TIME
	fprintf(fp, "%lf\t", deltaT/(double)iter);
#endif

#ifdef VARIFY
	_clMemcpyD2H(out_gpu, d_out, size * sizeof(uint));
	verify_array_int<uint>(out_cpu, out_gpu, size);
#endif //VARIFY

#ifdef TIME	
	fprintf(fp, "\n");	
	fclose(fp);
#endif	
}
catch(string msg){
	printf("ERR:%s\n", msg.c_str());
	printf("Error catched\n");
	}

	_clFree(d_in);
	_clFree(d_out);
	_clRelease();
	if(in!=NULL) free(in);
	if(out_cpu!=NULL) free(out_cpu);
	if(out_gpu!=NULL) free(out_gpu);

	return 1;
}

Beispiel #3

0

Datei anzeigen

Datei: bfs.cpp Projekt: dylanzika/rodinia-1

//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
		char *h_graph_visited, int *h_cost) 
					throw(std::string){

	//int number_elements = height*width;
	char h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
	try{
		//--1 transfer data from host to device
		_clInit();			
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
		d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
		d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);


		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(char), &h_over);
		
		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);	
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);	
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);	
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	
			
		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;		
		kernel_timer.reset();
		kernel_timer.start();
#endif
		do{
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(char), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//int work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
			
			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;			
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
			
			//work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			
			
			_clMemcpyD2H(d_over,sizeof(char), &h_over);
			}while(h_over);
			
		_clFinish();
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
	}
	catch(std::string msg){		
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
		std::string e_str = "in run_transpose_gpu -> ";
		e_str += msg;
		throw(e_str);
	}
	return ;
}

Beispiel #4

0

Datei anzeigen

Datei: app.cpp Projekt: haibo031031/vdt

int main(int argc, char * argv[])
{
	float *h_imatrix_a, *h_imatrix_b, *h_omatrix_c, *h_omatrix_ref, * h_imatrix_b_trans;
	int a_size, b_size, c_size;
	int size = 2048;
	_clParseCommandLine(argc, argv);
	_clInit(platform_id, device_type, device_id);
	bool verify = false;
	a_size = size;
	b_size = size;
	c_size = size;
	int number_elements_a = a_size * a_size;
	int number_elements_b = b_size * b_size;
	int number_elements_c = c_size * c_size;	
	
	try{
		h_imatrix_a = (datatype *)malloc(number_elements_a*sizeof(datatype));
		h_imatrix_b = (datatype *)malloc(number_elements_b*sizeof(datatype));
		h_omatrix_c = (datatype *)malloc(number_elements_c*sizeof(datatype)); 
		if(verify){
			h_omatrix_ref = (datatype *)malloc(number_elements_c*sizeof(datatype)); 
		}
		fill<datatype>(h_imatrix_a, number_elements_a, 10);
		fill<datatype>(h_imatrix_b, number_elements_b, 10);
		if(verify){
			run_cpu<datatype>(h_imatrix_a, a_size, a_size, h_imatrix_b, b_size, b_size, h_omatrix_ref, c_size, c_size);		
		}

		std::cout<<"--------------------testing..."<<std::endl;
		int number_trials = 20;
		double *trials = (double*)malloc(sizeof(double)*number_trials);
		double avg_t = 0.0;
		double std_t = 0.0;		
		printf("total number of kernels: %d\n", total_kernels);
		for(int k=0; k<total_kernels; k++){	
			//std::cout<<"---kernel::"<<kernel_names[k]<<std::endl;
			FILE *fp = fopen("results.dat", "a");
			avg_t = 0.0;
			std_t = 0.0;
			for(int i=0; i<number_trials; i++){
				trials[i] = run_gpu<datatype>(h_imatrix_a, h_imatrix_b, h_omatrix_c, h_omatrix_ref, size, k, verify);
				avg_t += trials[i];
			}
			avg_t = avg_t/(double)number_trials;
			for(int i=0; i<number_trials; i++){
				std_t += (avg_t-trials[i])*(avg_t-trials[i]);
			}
			std_t /= (double)number_trials;
			std_t = sqrt(std_t);
			fprintf(fp, "%d\t%lf\t%lf\t", k, avg_t, std_t);
			for(int i=0; i<number_trials; i++){
				fprintf(fp, "%lf\t", trials[i]);
			}
			fprintf(fp, "\n");
			fclose(fp);
		}
	
		std::cout<<"--------------------done..."<<std::endl;
		_clRelease();
		free(h_imatrix_a);
		free(h_imatrix_b);
		free(h_omatrix_c);
		if(verify){
			free(h_omatrix_ref);		
		}
		free(trials);
	}
	catch(std::string msg){
		std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
		_clRelease();
	}
		
    return 0;
}

Beispiel #5

0

Datei anzeigen

Datei: Main.cpp Projekt: haibo031031/elmo

int main(int argc, char ** argv)
{
	cl_mem out = NULL;
try{
	if(argc!=3){
		printf("need 2 parameter here!!!\n");
		exit(-1);
	}
	
	
#if defined TIME
	double start_time = 0;
	double end_time = 0;
	string dat_name="data.dat";

	FILE * fp = fopen(dat_name.c_str(), "a+");
	if(fp==NULL)
	{
		printf("failed to open file!!!\n");
		exit(-1);
	}
#endif

	uint bins = atoi(argv[1]);
	uint size = atoi(argv[2]);
	uint iter = 100;

	
	printf("bins=%d, size=%d\n", bins, size);
	
	_clInit(1, "gpu", 0);
	
	out = _clMalloc((size/BS)*bins);
	
	layout_cyclic(out, bins, size);
	
	/**************************1****************************/

#ifdef TIME
	start_time = gettime();
#endif

	for(int i=1; i<iter; i++)
	{
		layout_blocked(out, bins, size);
	}
	
#ifdef TIME	
	end_time = gettime();
	fprintf(fp, "%lf\t", (end_time-start_time)/(double)iter);	
#endif		

	/**************************2****************************/

#ifdef TIME
	start_time = gettime();
#endif

	for(int i=1; i<iter; i++)
	{
		layout_cyclic(out, bins, size);
	}
	
#ifdef TIME	
	end_time = gettime();
	fprintf(fp, "%lf\t", (end_time-start_time)/(double)iter);	
#endif

	/**************************3****************************/

#ifdef TIME
	start_time = gettime();
#endif

	for(int i=1; i<iter; i++)
	{
		layout_cyclic_2(out, bins, size);
	}
	
#ifdef TIME	
	end_time = gettime();
	fprintf(fp, "%lf\t", (end_time-start_time)/(double)iter);	
#endif


#ifdef TIME	
	fprintf(fp, "\n");	
	fclose(fp);
#endif	
}
catch(string msg){
	printf("ERR:%s\n", msg.c_str());
	printf("Error catched\n");
	}

	_clFree(out);
	_clRelease();
	return 1;
}

Beispiel #6

0

Datei anzeigen

Datei: bfs.cpp Projekt: bs5ht/Parallella

//----------------------------------------------------------
//--breadth first search on GPUs
//----------------------------------------------------------
void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
		int *h_graph_edges, int *h_graph_mask, int *h_updating_graph_mask, \
		int *h_graph_visited, int *h_cost)
					throw(std::string){

	//int number_elements = height*width;
	int h_over;
	cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
			d_graph_visited, d_cost, d_over;
	try{
		//--1 transfer data from host to device
		_clInit();
		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_graph_mask);
		d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(int), h_updating_graph_mask);
		d_graph_visited = _clMallocRW(no_of_nodes*sizeof(int), h_graph_visited);


		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
		d_over = _clMallocRW(sizeof(int), &h_over);

		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(int), h_graph_mask);
		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(int), h_updating_graph_mask);
		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(int), h_graph_visited);
		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);

		//--2 invoke kernel
#ifdef	PROFILING
		timer kernel_timer;
		double kernel_time = 0.0;
		kernel_timer.reset();
		kernel_timer.start();


#endif
		struct timespec startT, endT;
		clock_gettime(CLOCK_MONOTONIC, &startT);
		do{
			h_over = false;
			_clMemcpyH2D(d_over, sizeof(int), &h_over);
			//--kernel 0
			int kernel_id = 0;
			int kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_cost);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			//int work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			//--kernel 1
			kernel_id = 1;
			kernel_idx = 0;
			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
			_clSetArgs(kernel_id, kernel_idx++, d_over);
			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			//work_items = no_of_nodes;
			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			_clMemcpyD2H(d_over,sizeof(int), &h_over);
			}while(h_over);

		_clFinish();
		clock_gettime(CLOCK_MONOTONIC, &endT);
		uint64_t diff = 1000000000 * (endT.tv_sec - startT.tv_sec);
		uint64_t nanodiff = endT.tv_nsec - startT.tv_nsec;
		//printf("elapsed accelerator time = %llu nanoseconds\n", (long long unsigned int) diff);
		//printf("start time seconds%u \n", startT.tv_sec);
		//printf("end time seconds %u \n", endT.tv_sec);
        //printf("difference %u \n", diff);
		//printf("start time nanoseconds %u \n", startT.tv_nsec);
		//printf("end time nanoseconds %u \n", endT.tv_nsec);
        printf(" accelerator time %u \n", nanodiff + diff);
#ifdef	PROFILING
		kernel_timer.stop();
		kernel_time = kernel_timer.getTimeInSeconds();
#endif
		//--3 transfer data from device to host
		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
		//--statistics
#ifdef	PROFILING
		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
#endif
		//--4 release cl resources.
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
	}
	catch(std::string msg){
		_clFree(d_graph_nodes);
		_clFree(d_graph_edges);
		_clFree(d_graph_mask);
		_clFree(d_updating_graph_mask);
		_clFree(d_graph_visited);
		_clFree(d_cost);
		_clFree(d_over);
		_clRelease();
		std::string e_str = "in run_transpose_gpu -> ";
		e_str += msg;
		throw(e_str);
	}
	return ;
}

Beispiel #7

0

Datei anzeigen

Datei: Main.cpp Projekt: haibo031031/elmo

int main(int argc, char ** argv)
{
    float * h_raw, * h_out, * outCPU;
    cl_mem d_raw, d_out;
    try {
        if(argc!=2) {
            printf("need one parameter here!!!");
            exit(-1);
        }

        _clInit(1, "gpu", 0);


#if defined TIME
        double start_time = 0;
        double end_time = 0;
        string dat_name="data.dat";

        FILE * fp = fopen(dat_name.c_str(), "a+");
        if(fp==NULL)
        {
            printf("failed to open file!!!\n");
            exit(-1);
        }
#endif

        int cdim = atoi(argv[1]); //{384};
        int rdim = atoi(argv[1]); //{288};
        printf("cdim=%d, rdim=%d\n", cdim, rdim);
        h_raw = (float *)malloc(cdim * rdim * sizeof(float));
        h_out = (float *)malloc(cdim * rdim * sizeof(float));
        outCPU = (float *)malloc(cdim * rdim * sizeof(float));
        fill<float>(h_raw, cdim * rdim, 5);
        d_raw = _clMalloc(cdim * rdim * sizeof(float));
        d_out = _clMalloc(cdim * rdim * sizeof(float));
        _clMemcpyH2D(d_raw, h_raw, cdim * rdim * sizeof(float));
        printf("-0\n");
#ifdef VARIFY
        CPURun(h_raw, outCPU, cdim, rdim);
#endif //VARIFY

        /**************************1****************************/
#ifdef TIME
        start_time = gettime();
#endif
        printf("-1.1\n");
        broadcast(d_raw, d_out, cdim, rdim);
        printf("-1.2\n");
#ifdef TIME
        end_time = gettime();
        fprintf(fp, "%lf\t", (end_time - start_time));
#endif

#ifdef VARIFY
        _clMemcpyD2H(h_out, d_out, cdim * rdim * sizeof(float));
        verify_array<float>(outCPU, h_out, cdim * rdim);
#endif //VARIFY

        /**************************2****************************/
#ifdef TIME
        start_time = gettime();
#endif
        broadcast_lm(d_raw, d_out, cdim, rdim);
        printf("-2\n");
#ifdef TIME
        end_time = gettime();
        fprintf(fp, "%lf\t", (end_time - start_time));
#endif

#ifdef VARIFY
        _clMemcpyD2H(h_out, d_out, cdim * rdim * sizeof(float));
        verify_array<float>(outCPU, h_out, cdim * rdim);
#endif //VARIFY

#ifdef TIME
        fprintf(fp, "\n");
        fclose(fp);
#endif
    }
    catch(string msg) {
        printf("ERR:%s\n", msg.c_str());
        printf("Error catched\n");
    }
    _clFree(d_raw);
    _clFree(d_out);
    _clRelease();
    if(h_raw!=NULL) free(h_raw);
    if(h_out!=NULL) free(h_out);
    if(outCPU!=NULL) free(outCPU);

    return 1;
}

Beispiel #8

0

Datei anzeigen

Datei: app.cpp Projekt: haibo031031/vdt

int main(int argc, char * argv[]){
	
	_clParseCommandLine(argc, argv);
	int w = 2048;
	int h = 2048;	
	bool verify = false;
	
	try{
		_clInit(platform_id, device_type, device_id);   						
		int number_trials = 20;
		double *trials = (double*)malloc(sizeof(double)*number_trials);
		double avg_time = 0.0;
		double std_time = 0.0;		
		FILE *fp = fopen("results.dat", "a");
		if(fp==NULL){
			throw(string("failed to open the output file!!!"));
		}
		datatype *h_i_vector;
		datatype *h_o_vector;
		datatype *h_o_vector_ref;
        
		int number_elements_out = w * h;
		int number_elements_in = w * h;
		h_i_vector = (datatype *)malloc(number_elements_in * sizeof(datatype));
		h_o_vector = (datatype *)malloc(number_elements_out * sizeof(datatype));
		fill<datatype>(h_i_vector, number_elements_in, 10);
		if(verify){
			h_o_vector_ref = (datatype *)malloc(number_elements_out * sizeof(datatype));
			run_cpu<datatype>(h_i_vector, h_o_vector_ref, w, h);
		}
		for(int k=0; k<total_kernels; k++){
			avg_time = 0.0;
			std_time = 0.0;
			for(int r=0; r<number_trials; r++){
				trials[r] = run_gpu<datatype>(h_i_vector, h_o_vector, h_o_vector_ref, w, h, k, verify);
				avg_time += trials[r];
			}			
			avg_time = avg_time/(double)number_trials;
			for(int i=0; i<number_trials; i++){
				std_time += (avg_time-trials[i])*(avg_time-trials[i]);
			}
			std_time /= (double)number_trials;
			std_time = sqrt(std_time);
			fprintf(fp, "%d\t%lf\t%lf\t", k, avg_time, std_time);
			for(int i=0; i<number_trials; i++){
				fprintf(fp, "%lf\t", trials[i]);
			}
			fprintf(fp, "\n");
		}
		_clRelease();
		free(h_i_vector);
		free(h_o_vector);
		if(verify){
			free(h_o_vector_ref);
		}	
		fclose(fp);	 	
	}
	catch(std::string msg){
		std::cout<<"Exception in main :: "<<msg<<std::endl;
	}
		
	return 0;
}

Beispiel #9

0

Datei anzeigen

Datei: main.cpp Projekt: haibo031031/aristotle

int main(int argc, char ** argv)
{
	//float *hIn1, *hIn2;
	//cl_mem dIn1, dIn2;
	@hIn;
	@dIn;
	float *hOut, *rOut;
	cl_mem dOut;
try{
	_clParseCommandLine(argc, argv);
	string strSubfix = string(argv[2]);
	_clInit(platform_id, device_type, device_id);
	int cdim = atoi(argv[1]); 
	int rdim = atoi(argv[1]); 
	int r = atoi(argv[3]);
	@cdimIn
	@rdimIn
	// different between iMAP1 and iMAP2
	printf("cdim=%d, rdim=%d, radius=%d\n", cdim, rdim, r);
	int iIter = 10;
	int elems = @elems;
	double dataAmount = (double)cdim * (double)rdim * (double)(elems) * (double)sizeof(float) * 1e-6;

	
#if defined TIME
	double start_time = 0;
	double end_time = 0;
	double delta_time = 0;
	int cnt = 0;
	string dat_name= string("data.") + strSubfix + string(".dat");

	FILE * fp = fopen(dat_name.c_str(), "a+");
	if(fp==NULL)
	{
		printf("failed to open file!!!\n");
		exit(-1);
	}
#endif
	
	//hIn1 = (float *)malloc(cdim * rdim * sizeof(float));
	//hIn2 = (float *)malloc(cdim * rdim * sizeof(float));
	@hAlc
	hOut = (float *)malloc(cdim * rdim * sizeof(float));
	rOut = (float *)malloc(cdim * rdim * sizeof(float));

	//fill<float>(hIn1, cdim * rdim, 5);
	//fill<float>(hIn2, cdim * rdim, 5);		
	@hFill

	//dIn1 = _clMalloc(cdim * rdim * sizeof(float));
	//dIn2 = _clMalloc(cdim * rdim * sizeof(float));
	@dAlc
	dOut = _clMalloc(cdim * rdim * sizeof(float));

	//_clMemcpyH2D(dIn1, hIn1, cdim * rdim * sizeof(float));
	//_clMemcpyH2D(dIn2, hIn2, cdim * rdim * sizeof(float));
	@h2dTrans

	_clFinish();
	
	// warmup
	//OCLRun(dIn1, dIn2, dOut, cdim, rdim);
	OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn);

#ifdef VARIFY	
	//OMPRun(hIn1, hIn2, rOut, cdim, rdim);
	OMPRun(@ompArgs, rOut, cdim, rdim, cdimIn, rdimIn);
#endif //VARIFY
	

#ifdef TIME
	delta_time = 0;
	cnt = 0;
#endif	
	for(int i=0; i<iIter; i++)
	{
#ifdef TIME
	cnt++;
	start_time = gettime();
#endif

		OCLRun(@oclArgs, dOut, cdim, rdim, cdimIn, rdimIn);
#ifdef TIME	
	end_time = gettime();
	delta_time += end_time - start_time;
	if(fabs(delta_time-600000.0)>0.1) break;	// ????
#endif	
	}

#ifdef TIME
	fprintf(fp, "%lf\t", dataAmount * (double)cnt/delta_time);
#endif

#ifdef VARIFY
	_clMemcpyD2H(hOut, dOut, cdim * rdim * sizeof(float));	
	verify_array<float>(rOut, hOut, cdim * rdim);	
#endif //VARIFY


#ifdef TIME	
	fprintf(fp, "\n");	
	fclose(fp);
#endif	
}
catch(string msg){
	printf("ERR:%s\n", msg.c_str());
	printf("Error catched\n");
	exit(-1);
	}
	//_clFree(dIn1);
	//_clFree(dIn2);
	@clFree
	_clFree(dOut);
	_clRelease();
	//if(hIn1!=NULL) free(hIn1);
	//if(hIn2!=NULL) free(hIn2);
	@hFree
	if(hOut!=NULL) free(hOut);
	if(rOut!=NULL) free(rOut);

	return 1;
}

Beispiel #10

0

Datei anzeigen

Datei: Main.cpp Projekt: haibo031031/elmo

int main(int argc, char ** argv)
{
	uint * h_in = NULL, * h_out_1 = NULL, * h_out_2 = NULL, * h_out = NULL;
	cl_mem d_in = NULL, d_out_1 = NULL, d_out_2 = NULL;
try{
	if(argc!=3){
		printf("need 2 parameter here!!!\n");
		exit(-1);
	}
	
	
#if defined TIME
	double start_time = 0;
	double end_time = 0;
	string dat_name="data.dat";

	FILE * fp = fopen(dat_name.c_str(), "a+");
	if(fp==NULL)
	{
		printf("failed to open file!!!\n");
		exit(-1);
	}
#endif

	uint w, h;
	uint side = atoi(argv[1]);
	w = side, h = side;
	uint size = w * h;
	uint radius = atoi(argv[2]);
	uint iter = 100;

	
	printf("w=%d, h=%d, radius=%d\n", w, h, radius);
	
	_clInit(1, "gpu", 0);

	h_in = (uint *)malloc(size * sizeof(uint));	
	h_out_1 = (uint *)malloc(size * sizeof(uint));
	h_out_2 = (uint *)malloc(size * sizeof(uint));
	h_out = (uint *)malloc(size * sizeof(uint));
	
	d_in = _clMalloc(size * sizeof(uint));
	d_out_1 = _clMalloc(size * sizeof(uint));
	d_out_2 = _clMalloc(size * sizeof(uint));
	
	fill<uint>(h_in, size, 10);
	_clMemcpyH2D(d_in, h_in, size * sizeof(uint));

	//g2l_CPU(h_in, h_out, w, h, radius);

	/**************************1****************************/

#ifdef TIME
	start_time = gettime();
#endif

	for(int i=1; i<iter; i++)
	{
		g2l_TBT(d_in, d_out_1, radius, w, h);
	}
	
#ifdef TIME	
	end_time = gettime();
	fprintf(fp, "%lf\t", (end_time-start_time)/(double)iter);	
#endif
	
#ifdef VARI
	_clMemcpyD2H(h_out_1, d_out_1, size * sizeof(uint));
#endif	
	
	
	/**************************2****************************/
#ifdef TIME
	start_time = gettime();
#endif
	for(int i=1; i<iter; i++)
	{
		g2l_FCTH(d_in, d_out_2, radius, w, h);
	}
	
#ifdef TIME	
	end_time = gettime();
	fprintf(fp, "%lf\t", (end_time-start_time)/(double)iter);	
#endif

#ifdef VARI	
	_clMemcpyD2H(h_out_2, d_out_2, size * sizeof(uint));
	verify_array_int<uint>(h_out_1, h_out_2, w, h);
#endif

#ifdef TIME	
	fprintf(fp, "\n");	
	fclose(fp);
#endif	
}
catch(string msg){
	printf("ERR:%s\n", msg.c_str());
	printf("Error catched\n");
	}

	_clFree(d_in);
	_clFree(d_out_1);
	_clFree(d_out_2);
	_clRelease();
	if(h_in!=NULL) free(h_in);
	if(h_out_1!=NULL) free(h_out_1);
	if(h_out_2!=NULL) free(h_out_2);
	if(h_out!=NULL) free(h_out);

	return 1;
}