////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    bool doCPU = false;
    bool doGPU = false;
    bool doMultiGPU = false;
    bool doCPUGPU = false;
    bool doRef = false;
    int numSources = 100;
    int generateVerts = 100000;
    int generateEdgesPerVert = 10;

    parseCommandLineArgs(argc, argv, doCPU, doGPU,
                         doMultiGPU, doCPUGPU, doRef,
                         &numSources, &generateVerts, &generateEdgesPerVert);

    cl_platform_id platform;
    cl_context gpuContext;
    cl_context cpuContext;
    cl_int errNum;

    // First, select an OpenCL platform to run on.  For this example, we
    // simply choose the first available platform.  Normally, you would
    // query for all available platforms and select the most appropriate one.
    cl_uint numPlatforms;
    errNum = clGetPlatformIDs(1, &platform, &numPlatforms);
    printf("Number of OpenCL Platforms: %d\n", numPlatforms);
    if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Failed to find any OpenCL platforms.\n");
        return 1;
    }

    // create the OpenCL context on available GPU devices
    gpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        printf("No GPU devices found.\n");
    }

    // Create an OpenCL context on available CPU devices
    cpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum);
    if (errNum != CL_SUCCESS)
    {
        printf("No CPU devices found.\n");
    }

    // Allocate memory for arrays
    GraphData graph;
    generateRandomGraph(&graph, generateVerts, generateEdgesPerVert);

    printf("Vertex Count: %d\n", graph.vertexCount);
    printf("Edge Count: %d\n", graph.edgeCount);

    std::vector<int> sourceVertices;


    for(int source = 0; source < numSources; source++)
    {
        sourceVertices.push_back(source % graph.vertexCount);
    }

    int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size());
    std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray);

    float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount);


    // Run Dijkstra's algorithm
    pt::ptime startTimeCPU = pt::microsec_clock::local_time();
    if (doCPU)
    {
        runDijkstra(cpuContext, getMaxFlopsDev(cpuContext), &graph, sourceVertArray,
                    results, sourceVertices.size() );
    }
    pt::time_duration timeCPU = pt::microsec_clock::local_time() - startTimeCPU;

    pt::ptime startTimeGPU = pt::microsec_clock::local_time();
    if (doGPU)
    {
        runDijkstra(gpuContext, getMaxFlopsDev(gpuContext), &graph, sourceVertArray,
                    results, sourceVertices.size() );
    }
    pt::time_duration timeGPU = pt::microsec_clock::local_time() - startTimeGPU;

    pt::ptime startTimeMultiGPU = pt::microsec_clock::local_time();
    if (doMultiGPU)
    {
        runDijkstraMultiGPU(gpuContext, &graph, sourceVertArray,
                            results, sourceVertices.size() );
    }
    pt::time_duration timeMultiGPU = pt::microsec_clock::local_time() - startTimeMultiGPU;


    pt::ptime startTimeGPUCPU = pt::microsec_clock::local_time();
    if (doCPUGPU)
    {
        runDijkstraMultiGPUandCPU(gpuContext, cpuContext, &graph, sourceVertArray,
                                  results, sourceVertices.size() );
    }
    pt::time_duration timeGPUCPU = pt::microsec_clock::local_time() - startTimeGPUCPU;

    pt::ptime startTimeRef = pt::microsec_clock::local_time();
    if (doRef)
    {
        runDijkstraRef( &graph, sourceVertArray,
                        results, sourceVertices.size() );
    }
    pt::time_duration timeRef = pt::microsec_clock::local_time() - startTimeRef;


    if (doCPU)
    {
        printf("\nrunDijkstra - CPU Time:               %f s\n", (float)timeCPU.total_milliseconds() / 1000.0f);
    }

    if (doGPU)
    {
        printf("\nrunDijkstra - Single GPU Time:        %f s\n", (float)timeGPU.total_milliseconds() / 1000.0f);
    }

    if (doMultiGPU)
    {
        printf("\nrunDijkstra - Multi GPU Time:         %f s\n", (float)timeMultiGPU.total_milliseconds() / 1000.0f);
    }

    if (doCPUGPU)
    {
        printf("\nrunDijkstra - Multi GPU and CPU Time: %f s\n", (float)timeGPUCPU.total_milliseconds() / 1000.0f);
    }

    if (doRef)
    {
        printf("\nrunDijkstra - Reference (CPU):        %f s\n", (float)timeRef.total_milliseconds() / 1000.0f);
    }

    free(sourceVertArray);
    free(results);

    clReleaseContext(gpuContext);

    // finish
    //shrEXIT(argc, argv);
 }
Пример #2
0
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv) 
{
//@TEMP - why do I need this for link to work on Linux?
//cutWaitForThreads(NULL,0);
//@TEMP
	// use command-line specified CUDA device, otherwise use device with highest Gflops/s
	//if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
	//	cutilDeviceInit(argc, argv);
	//else
	//	cudaSetDevice( cutGetMaxGflopsDeviceId() );

    bool doGPU;
    bool doRef;
    bool doMultiGPU;
    int generateVerts;
    int generateEdgesPerVert;
    int numSources;
    int sourceId;

    //parseCommandLineArgs(argc, (const char**)argv, doGPU, doMultiGPU, doRef, &numSources, &generateVerts, &generateEdgesPerVert);

    doGPU = 1;
    doMultiGPU = 0;
    doRef = 0;

    // Allocate memory for arrays
    GraphData graph;

    //generateRandomGraph(&graph, generateVerts, generateEdgesPerVert);
    generateLVAGraph(&graph, generateVerts, generateEdgesPerVert);

    //printf("Vertex Count: %d\n", graph.vertexCount);
    //printf("Edge Count: %d\n", graph.edgeCount);

    std::vector<int> sourceVertices;

    //for(int source = 0; source < numSources; source++)
    //{
    //    sourceVertices.push_back(source % graph.vertexCount);
    //}


	FILE *fp;
	fp = fopen("nodes2cal.out", "r");

	if(fp){
		//printf("Reading file nodes2cal.out...\n");
		if(fscanf(fp, "%d", &numSources)!=EOF){
			if(numSources<1){
				printf("nodes2cal.out must have numSources>0.\n");
				return 1;
			}
		}
		else{
			printf("nodes2cal.out must have as first line 'numSources'.\n");
			return 1;
		}
		while(fscanf(fp, "%d", &sourceId)!=EOF){
			sourceVertices.push_back(sourceId-1);
		}

	}
	else{
		printf("nodes2cal.out not exists!.\n");
		return 1;
	}



    int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size());
    std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray);

    float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount);


    unsigned int gpuTimer = 0;
    //cutilCheckError(cutCreateTimer(&gpuTimer));
    //cutilCheckError(cutStartTimer(gpuTimer));

    // Run Dijkstra's algorithm
    if ( doGPU )
    {
        runDijkstra(&graph, sourceVertArray, results, sourceVertices.size() );
    }

    //cutilCheckError(cutStopTimer(gpuTimer));


    unsigned int multiGPUTimer = 0;
    //cutilCheckError(cutCreateTimer(&multiGPUTimer));
    //cutilCheckError(cutStartTimer(multiGPUTimer));

    if ( doMultiGPU )
    {
        runDijkstraMultiGPU(&graph, sourceVertArray, results, sourceVertices.size() );
    }

    //cutilCheckError(cutStopTimer(multiGPUTimer));

    unsigned int refTimer = 0;
    //cutilCheckError(cutCreateTimer(&refTimer));
    //cutilCheckError(cutStartTimer(refTimer));

    if ( doRef )
    {
        runDijkstraRef(&graph, sourceVertArray, results, sourceVertices.size() );
    }

    //cutilCheckError(cutStopTimer(refTimer));

    for (unsigned int i = 0; i < sourceVertices.size(); i++)
    {
        for (int j = 0; j < graph.vertexCount; j++)
        {
            //if (i != j)
            //{
                //printf("%d --> %d: %f\n", sourceVertArray[i], j, results[i * graph.vertexCount + j] );
                printf("%f\n", results[i * graph.vertexCount + j] );
            //}
        }
    }

    free(sourceVertArray);
    free(results);

    //cudaThreadExit();

    return 0;
}