ofxClScheduler::ofxClScheduler() { // create the OpenCL context #ifdef GL_INTEROP CGLContextObj kCGLContext = CGLGetCurrentContext(); CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup, 0 }; context = clCreateContext(properties, 0, 0, 0, 0, &clErr); // context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &clErr); #else context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &clErr); #endif checkOpenClError(clErr, "clCreateContextFromType"); device = getMaxFlopsDev(context); globalQ = clCreateCommandQueue(context, device, 0, &clErr); checkOpenClError(clErr, "clCreateCommandQueue: global"); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { bool doCPU = false; bool doGPU = false; bool doMultiGPU = false; bool doCPUGPU = false; bool doRef = false; int numSources = 100; int generateVerts = 100000; int generateEdgesPerVert = 10; parseCommandLineArgs(argc, argv, doCPU, doGPU, doMultiGPU, doCPUGPU, doRef, &numSources, &generateVerts, &generateEdgesPerVert); cl_platform_id platform; cl_context gpuContext; cl_context cpuContext; cl_int errNum; // First, select an OpenCL platform to run on. For this example, we // simply choose the first available platform. Normally, you would // query for all available platforms and select the most appropriate one. cl_uint numPlatforms; errNum = clGetPlatformIDs(1, &platform, &numPlatforms); printf("Number of OpenCL Platforms: %d\n", numPlatforms); if (errNum != CL_SUCCESS || numPlatforms <= 0) { printf("Failed to find any OpenCL platforms.\n"); return 1; } // create the OpenCL context on available GPU devices gpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { printf("No GPU devices found.\n"); } // Create an OpenCL context on available CPU devices cpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { printf("No CPU devices found.\n"); } // Allocate memory for arrays GraphData graph; generateRandomGraph(&graph, generateVerts, generateEdgesPerVert); printf("Vertex Count: %d\n", graph.vertexCount); printf("Edge Count: %d\n", graph.edgeCount); std::vector<int> sourceVertices; for(int source = 0; source < numSources; source++) { sourceVertices.push_back(source % graph.vertexCount); } int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size()); std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray); float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount); // Run Dijkstra's algorithm pt::ptime startTimeCPU = pt::microsec_clock::local_time(); if (doCPU) { runDijkstra(cpuContext, getMaxFlopsDev(cpuContext), &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeCPU = pt::microsec_clock::local_time() - startTimeCPU; pt::ptime startTimeGPU = pt::microsec_clock::local_time(); if (doGPU) { runDijkstra(gpuContext, getMaxFlopsDev(gpuContext), &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeGPU = pt::microsec_clock::local_time() - startTimeGPU; pt::ptime startTimeMultiGPU = pt::microsec_clock::local_time(); if (doMultiGPU) { runDijkstraMultiGPU(gpuContext, &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeMultiGPU = pt::microsec_clock::local_time() - startTimeMultiGPU; pt::ptime startTimeGPUCPU = pt::microsec_clock::local_time(); if (doCPUGPU) { runDijkstraMultiGPUandCPU(gpuContext, cpuContext, &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeGPUCPU = pt::microsec_clock::local_time() - startTimeGPUCPU; pt::ptime startTimeRef = pt::microsec_clock::local_time(); if (doRef) { runDijkstraRef( &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeRef = pt::microsec_clock::local_time() - startTimeRef; if (doCPU) { printf("\nrunDijkstra - CPU Time: %f s\n", (float)timeCPU.total_milliseconds() / 1000.0f); } if (doGPU) { printf("\nrunDijkstra - Single GPU Time: %f s\n", (float)timeGPU.total_milliseconds() / 1000.0f); } if (doMultiGPU) { printf("\nrunDijkstra - Multi GPU Time: %f s\n", (float)timeMultiGPU.total_milliseconds() / 1000.0f); } if (doCPUGPU) { printf("\nrunDijkstra - Multi GPU and CPU Time: %f s\n", (float)timeGPUCPU.total_milliseconds() / 1000.0f); } if (doRef) { printf("\nrunDijkstra - Reference (CPU): %f s\n", (float)timeRef.total_milliseconds() / 1000.0f); } free(sourceVertArray); free(results); clReleaseContext(gpuContext); // finish //shrEXIT(argc, argv); }