示例#1
0
ofxClScheduler::ofxClScheduler() {
	// create the OpenCL context 
#ifdef GL_INTEROP
	CGLContextObj kCGLContext = CGLGetCurrentContext();              
    CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
    
    cl_context_properties properties[] = { 
        CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, 
        (cl_context_properties)kCGLShareGroup, 0 
    };
	context = clCreateContext(properties, 0, 0, 0, 0, &clErr);
//	context = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &clErr);
#else
	context = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &clErr);
#endif
	
	checkOpenClError(clErr, "clCreateContextFromType");

	device = getMaxFlopsDev(context);

	globalQ = clCreateCommandQueue(context, device, 0, &clErr);	
	checkOpenClError(clErr, "clCreateCommandQueue: global");
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    bool doCPU = false;
    bool doGPU = false;
    bool doMultiGPU = false;
    bool doCPUGPU = false;
    bool doRef = false;
    int numSources = 100;
    int generateVerts = 100000;
    int generateEdgesPerVert = 10;

    parseCommandLineArgs(argc, argv, doCPU, doGPU,
                         doMultiGPU, doCPUGPU, doRef,
                         &numSources, &generateVerts, &generateEdgesPerVert);

    cl_platform_id platform;
    cl_context gpuContext;
    cl_context cpuContext;
    cl_int errNum;

    // First, select an OpenCL platform to run on.  For this example, we
    // simply choose the first available platform.  Normally, you would
    // query for all available platforms and select the most appropriate one.
    cl_uint numPlatforms;
    errNum = clGetPlatformIDs(1, &platform, &numPlatforms);
    printf("Number of OpenCL Platforms: %d\n", numPlatforms);
    if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Failed to find any OpenCL platforms.\n");
        return 1;
    }

    // create the OpenCL context on available GPU devices
    gpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        printf("No GPU devices found.\n");
    }

    // Create an OpenCL context on available CPU devices
    cpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        printf("No CPU devices found.\n");
    }

    // Allocate memory for arrays
    GraphData graph;
    generateRandomGraph(&graph, generateVerts, generateEdgesPerVert);

    printf("Vertex Count: %d\n", graph.vertexCount);
    printf("Edge Count: %d\n", graph.edgeCount);

    std::vector<int> sourceVertices;


    for(int source = 0; source < numSources; source++)
    {
        sourceVertices.push_back(source % graph.vertexCount);
    }

    int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size());
    std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray);

    float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount);


    // Run Dijkstra's algorithm
    pt::ptime startTimeCPU = pt::microsec_clock::local_time();
    if (doCPU)
    {
        runDijkstra(cpuContext, getMaxFlopsDev(cpuContext), &graph, sourceVertArray,
                    results, sourceVertices.size() );
    }
    pt::time_duration timeCPU = pt::microsec_clock::local_time() - startTimeCPU;

    pt::ptime startTimeGPU = pt::microsec_clock::local_time();
    if (doGPU)
    {
        runDijkstra(gpuContext, getMaxFlopsDev(gpuContext), &graph, sourceVertArray,
                    results, sourceVertices.size() );
    }
    pt::time_duration timeGPU = pt::microsec_clock::local_time() - startTimeGPU;

    pt::ptime startTimeMultiGPU = pt::microsec_clock::local_time();
    if (doMultiGPU)
    {
        runDijkstraMultiGPU(gpuContext, &graph, sourceVertArray,
                            results, sourceVertices.size() );
    }
    pt::time_duration timeMultiGPU = pt::microsec_clock::local_time() - startTimeMultiGPU;


    pt::ptime startTimeGPUCPU = pt::microsec_clock::local_time();
    if (doCPUGPU)
    {
        runDijkstraMultiGPUandCPU(gpuContext, cpuContext, &graph, sourceVertArray,
                                  results, sourceVertices.size() );
    }
    pt::time_duration timeGPUCPU = pt::microsec_clock::local_time() - startTimeGPUCPU;

    pt::ptime startTimeRef = pt::microsec_clock::local_time();
    if (doRef)
    {
        runDijkstraRef( &graph, sourceVertArray,
                        results, sourceVertices.size() );
    }
    pt::time_duration timeRef = pt::microsec_clock::local_time() - startTimeRef;


    if (doCPU)
    {
        printf("\nrunDijkstra - CPU Time:               %f s\n", (float)timeCPU.total_milliseconds() / 1000.0f);
    }

    if (doGPU)
    {
        printf("\nrunDijkstra - Single GPU Time:        %f s\n", (float)timeGPU.total_milliseconds() / 1000.0f);
    }

    if (doMultiGPU)
    {
        printf("\nrunDijkstra - Multi GPU Time:         %f s\n", (float)timeMultiGPU.total_milliseconds() / 1000.0f);
    }

    if (doCPUGPU)
    {
        printf("\nrunDijkstra - Multi GPU and CPU Time: %f s\n", (float)timeGPUCPU.total_milliseconds() / 1000.0f);
    }

    if (doRef)
    {
        printf("\nrunDijkstra - Reference (CPU):        %f s\n", (float)timeRef.total_milliseconds() / 1000.0f);
    }

    free(sourceVertArray);
    free(results);

    clReleaseContext(gpuContext);

    // finish
    //shrEXIT(argc, argv);
 }