/* * main execution routine * Basically it consists of three parts: * - generating the inputs * - running OpenCL kernel * - reading results of processing */ int _tmain(int argc, TCHAR* argv[]) { cl_int err; ocl_args_d_t ocl; cl_device_type deviceType = CL_DEVICE_TYPE_GPU; LARGE_INTEGER perfFrequency; LARGE_INTEGER performanceCountNDRangeStart; LARGE_INTEGER performanceCountNDRangeStop; cl_uint arrayWidth = 1024; cl_uint arrayHeight = 1024; //initialize Open CL objects (context, queue, etc.) if (CL_SUCCESS != SetupOpenCL(&ocl, deviceType)) { return -1; } // allocate working buffers. // the buffer should be aligned with 4K page and size should fit 64-byte cached line cl_uint optimizedSize = ((sizeof(cl_int) * arrayWidth * arrayHeight - 1) / 64 + 1) * 64; cl_int* inputA = (cl_int*)_aligned_malloc(optimizedSize, 4096); cl_int* inputB = (cl_int*)_aligned_malloc(optimizedSize, 4096); cl_int* outputC = (cl_int*)_aligned_malloc(optimizedSize, 4096); if (NULL == inputA || NULL == inputB || NULL == outputC) { LogError("Error: _aligned_malloc failed to allocate buffers.\n"); return -1; } //random input generateInput(inputA, arrayWidth, arrayHeight); generateInput(inputB, arrayWidth, arrayHeight); // Create OpenCL buffers from host memory // These buffers will be used later by the OpenCL kernel if (CL_SUCCESS != CreateBufferArguments(&ocl, inputA, inputB, outputC, arrayWidth, arrayHeight)) { return -1; } // Create and build the OpenCL program if (CL_SUCCESS != CreateAndBuildProgram(&ocl)) { return -1; } // Program consists of kernels. // Each kernel can be called (enqueued) from the host part of OpenCL application. // To call the kernel, you need to create it from existing program. ocl.kernel = clCreateKernel(ocl.program, "Add", &err); if (CL_SUCCESS != err) { LogError("Error: clCreateKernel returned %s\n", TranslateOpenCLError(err)); return -1; } // Passing arguments into OpenCL kernel. if (CL_SUCCESS != SetKernelArguments(&ocl)) { return -1; } // Regularly you wish to use OpenCL in your application to achieve greater performance results // that are hard to achieve in other ways. // To understand those performance benefits you may want to measure time your application spent in OpenCL kernel execution. // The recommended way to obtain this time is to measure interval between two moments: // - just before clEnqueueNDRangeKernel is called, and // - just after clFinish is called // clFinish is necessary to measure entire time spending in the kernel, measuring just clEnqueueNDRangeKernel is not enough, // because this call doesn't guarantees that kernel is finished. // clEnqueueNDRangeKernel is just enqueue new command in OpenCL command queue and doesn't wait until it ends. // clFinish waits until all commands in command queue are finished, that suits your need to measure time. bool queueProfilingEnable = true; if (queueProfilingEnable) QueryPerformanceCounter(&performanceCountNDRangeStart); // Execute (enqueue) the kernel if (CL_SUCCESS != ExecuteAddKernel(&ocl, arrayWidth, arrayHeight)) { return -1; } if (queueProfilingEnable) QueryPerformanceCounter(&performanceCountNDRangeStop); // The last part of this function: getting processed results back. // use map-unmap sequence to update original memory area with output buffer. ReadAndVerify(&ocl, arrayWidth, arrayHeight, inputA, inputB); // retrieve performance counter frequency if (queueProfilingEnable) { QueryPerformanceFrequency(&perfFrequency); LogInfo("NDRange performance counter time %f ms.\n", 1000.0f*(float)(performanceCountNDRangeStop.QuadPart - performanceCountNDRangeStart.QuadPart) / (float)perfFrequency.QuadPart); } _aligned_free(inputA); _aligned_free(inputB); _aligned_free(outputC); #if defined(_DEBUG) getchar(); #endif return 0; }
int main(int argc, char *argv[]) { struct timeval begin, end; gettimeofday(&begin, NULL); printf("OpenCL Initialization\n"); cl_int err = CL_SUCCESS; struct timeval begin_init, end_init; gettimeofday(&begin_init, NULL); if(!createContext()) { printf("Error: createContext\n"); return 0; } if(!getDeviceIDs()) { printf("Error: getDeviceIDs\n"); return 0; } generateArgument(); if(CreateAndBuildProgram() != CL_SUCCESS) { printf("Error: CreateAndBuildProgram\n"); return 0; } if(CreateBufferArguments() != CL_SUCCESS) { printf("Error: CreateBufferArguments\n"); return 0; } ocl.kernel = clCreateKernel(ocl.program, "ray_cal", &err); if(err != CL_SUCCESS) { printf("Error: clCreateKernel\n"); return 0; } if(SetKernelArguments() != CL_SUCCESS) { printf("Error: SetKernelArguments\n"); return 0; } gettimeofday(&end_init, NULL); printf("init elapsed time : %lfs\n", (double)timeval_diff(&end_init, &begin_init)/1000000); struct timeval begin_kernel, end_kernel; gettimeofday(&begin_kernel, NULL); srand((unsigned int)time(NULL)); pthread_t t1, t2; int tmpvalue = WIDTH*HEIGHT/WorkAmount; printf("total recursive %d\n", tmpvalue); //PASSING_OCL ocl_info[2] = {{ocl, 0, 0, tmpvalue*5/8, 256}, {ocl, 1, tmpvalue*5/8, tmpvalue, 256}}; PASSING_OCL ocl_info[2] = {{ocl, 0, 0, 8, 256}, {ocl, 1, 8, 16, 256}}; //int joinstatus pthread_create(&t1, NULL, RenderDisplay, (void *)&ocl_info[0]); pthread_create(&t2, NULL, RenderDisplay, (void *)&ocl_info[1]); pthread_join(t1, NULL); pthread_join(t2, NULL); gettimeofday(&end_kernel, NULL); printf("render elapsed time : %lfs\n", (double)timeval_diff(&end_kernel, &begin_kernel)/1000000); /* std::ostringstream headerStream; headerStream << "P6\n"; headerStream << Width << ' ' << Height << '\n'; headerStream << "255\n"; std::ofstream fileStream("out.ppm", std::ios::out | std::ios::binary); fileStream << headerStream.str(); for (unsigned int j = 0; j < Width*Height; j++) { unsigned char r, g, b; unsigned int tmp = ImagePixel[j]; r = (unsigned char)((tmp >> 16) & 0xFF); g = (unsigned char)((tmp >> 8) & 0xFF); b = (unsigned char)((tmp)& 0xFF); fileStream << r << g << b; } fileStream.flush(); fileStream.close(); */ //gettimeofday(&end_kernel, NULL); //printf("render elapsed time : %lfs\n", (double)timeval_diff(&end_kernel, &begin_kernel)/1000000); gettimeofday(&end, NULL); printf("elapsed time : %lfs\n", (double)timeval_diff(&end, &begin)/1000000); printf("End Success\n"); return 0; }