示例#1
0
int main()
{
	srand (time(0));
	
	int temp;

for (int i =0;i<SIZE;i++){
	temp=rand() % 10000 + 1;
	data[i]=temp;
	data2[i]=temp;

}
  ResetMilli();
  bitonic_cpu(data, SIZE);
  printf("CPU:%f\n", GetSeconds());
  ResetMilli();
  #ifdef GPU
  bitonic_gpu(data2, SIZE);
  printf("GPU:%f\n", GetSeconds());

  for (int i=0;i<SIZE;i++)
    if (data[i] != data2[i])
    {
      printf("Error at %d ", i);
      return(1);
    }
 #endif
  // Print result
  if (SIZE <= MAXPRINTSIZE)
    for (int i=0;i<SIZE;i++)
      printf("%d ", data[i]);
  printf("\nYour sorting looks correct!\n");
}
示例#2
0
int main()
{
  const int N = 1024;
  
  float *a = new float[N*N];
  float *b = new float[N*N];
  float *c = new float[N*N];

  for (int i = 0; i < N; i++)
    for (int j = 0; j < N; j++)
      {
	a[i+j*N] = 10 + i;
	b[i+j*N] = (float)j / N;
      }
  
  ResetMilli();
  add_matrix(a, b, c, N);
  printf("Total time in ms: %d\n", GetMicroseconds());

  /* for (int i = 0; i < N; i++) */
  /*   { */
  /*     for (int j = 0; j < N; j++) */
  /* 	{ */
  /* 	  printf("%0.2f ", c[i+j*N]); */
  /* 	} */
  /*     printf("\n"); */
  /*   } */
}
示例#3
0
////////////////////////////////////////////////////////////////////////////////
// main computation function
////////////////////////////////////////////////////////////////////////////////
void computeImages()
{
	//read in full size of memory
	image = readppm("maskros512.ppm", &n, &m);
	out = (unsigned char*) malloc(n*m*3);
	cl_mem in_data, out_data;
	cl_int ciErrNum = CL_SUCCESS;
	
	
	// Create space for data and copy image to device (note that we could also use clEnqueueWriteBuffer to upload)
	
	in_data = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
		3*n*m * sizeof(unsigned char), image, &ciErrNum);
	printCLError(ciErrNum,6);
	out_data = clCreateBuffer(cxGPUContext, CL_MEM_WRITE_ONLY,
		3*n*m * sizeof(unsigned char), NULL, &ciErrNum);
	printCLError(ciErrNum,7);

	// set the args values
	ciErrNum  = clSetKernelArg(theKernel, 0, sizeof(cl_mem),  (void *) &in_data);
	ciErrNum |= clSetKernelArg(theKernel, 1, sizeof(cl_mem),  (void *) &out_data);
	ciErrNum |= clSetKernelArg(theKernel, 2, sizeof(cl_uint), (void *) &n);
	ciErrNum |= clSetKernelArg(theKernel, 3, sizeof(cl_uint), (void *) &m);
	printCLError(ciErrNum,8);

	// Computing arrangement
	//size_t localWorkSize, globalWorkSize;
	size_t globalWorkSize[3] = {512, 512, 1};
	size_t localWorkSize[3] = {16, 16, 1}; //256 threads in each block
	// 32*32 (1024) blocks in total to have 512*512 threads in total

	printf("Startup time %lf\n", GetSeconds());

	// Compute!
	cl_event event;
	ResetMilli();
	ciErrNum = clEnqueueNDRangeKernel(commandQueue, theKernel, 2, NULL, &globalWorkSize, &localWorkSize, 0, NULL, &event);
	printCLError(ciErrNum,9);

 	ciErrNum = clWaitForEvents(1, &event); // Synch
	printCLError(ciErrNum,10);
	printf("time %lf\n", GetSeconds());

	ciErrNum = clEnqueueReadBuffer(commandQueue, out_data, CL_TRUE, 0, 3*n*m * sizeof(unsigned char), out, 0, NULL, &event);
	printCLError(ciErrNum,11);
	clWaitForEvents(1, &event); // Synch
	printCLError(ciErrNum,10);
    
	clReleaseMemObject(in_data);
	clReleaseMemObject(out_data);
	
	return;
}
示例#4
0
int main()
{
  ResetMilli();
  bitonic_cpu(data, SIZE);
  printf("%f\n", GetSeconds());
  ResetMilli();
  bitonic_gpu(data2, SIZE);
  printf("%f\n", GetSeconds());

  for (int i=0;i<SIZE;i++)
    if (data[i] != data2[i])
    {
      printf("Error at %d ", i);
      return(1);
    }

  // Print result
  if (SIZE <= MAXPRINTSIZE)
    for (int i=0;i<SIZE;i++)
      printf("%d ", data[i]);
  printf("\nYour sorting looks correct!\n");
}
示例#5
0
// Main program, inits
int main( int argc, char** argv) 
{
	glutInit(&argc, argv);
	glutInitDisplayMode( GLUT_SINGLE | GLUT_RGBA );
	glutInitWindowSize( 1024, 512 );
	glutCreateWindow("CUDA on live GL");
	glutDisplayFunc(Draw);

	ResetMilli();
	if (init_OpenCL()<0)
	{
		printf("OpenCL could not be initialized!\n");
		close_OpenCL();
		return;
	}
	readAndBuildKernel("filter.cl");
	computeImages();
	close_OpenCL();

	glutMainLoop();
}