//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { bool doCPU = false; bool doGPU = false; bool doMultiGPU = false; bool doCPUGPU = false; bool doRef = false; int numSources = 100; int generateVerts = 100000; int generateEdgesPerVert = 10; parseCommandLineArgs(argc, argv, doCPU, doGPU, doMultiGPU, doCPUGPU, doRef, &numSources, &generateVerts, &generateEdgesPerVert); cl_platform_id platform; cl_context gpuContext; cl_context cpuContext; cl_int errNum; // First, select an OpenCL platform to run on. For this example, we // simply choose the first available platform. Normally, you would // query for all available platforms and select the most appropriate one. cl_uint numPlatforms; errNum = clGetPlatformIDs(1, &platform, &numPlatforms); printf("Number of OpenCL Platforms: %d\n", numPlatforms); if (errNum != CL_SUCCESS || numPlatforms <= 0) { printf("Failed to find any OpenCL platforms.\n"); return 1; } // create the OpenCL context on available GPU devices gpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { printf("No GPU devices found.\n"); } // Create an OpenCL context on available CPU devices cpuContext = clCreateContextFromType(0, CL_DEVICE_TYPE_CPU, NULL, NULL, &errNum); if (errNum != CL_SUCCESS) { printf("No CPU devices found.\n"); } // Allocate memory for arrays GraphData graph; generateRandomGraph(&graph, generateVerts, generateEdgesPerVert); printf("Vertex Count: %d\n", graph.vertexCount); printf("Edge Count: %d\n", graph.edgeCount); std::vector<int> sourceVertices; for(int source = 0; source < numSources; source++) { sourceVertices.push_back(source % graph.vertexCount); } int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size()); std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray); float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount); // Run Dijkstra's algorithm pt::ptime startTimeCPU = pt::microsec_clock::local_time(); if (doCPU) { runDijkstra(cpuContext, getMaxFlopsDev(cpuContext), &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeCPU = pt::microsec_clock::local_time() - startTimeCPU; pt::ptime startTimeGPU = pt::microsec_clock::local_time(); if (doGPU) { runDijkstra(gpuContext, getMaxFlopsDev(gpuContext), &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeGPU = pt::microsec_clock::local_time() - startTimeGPU; pt::ptime startTimeMultiGPU = pt::microsec_clock::local_time(); if (doMultiGPU) { runDijkstraMultiGPU(gpuContext, &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeMultiGPU = pt::microsec_clock::local_time() - startTimeMultiGPU; pt::ptime startTimeGPUCPU = pt::microsec_clock::local_time(); if (doCPUGPU) { runDijkstraMultiGPUandCPU(gpuContext, cpuContext, &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeGPUCPU = pt::microsec_clock::local_time() - startTimeGPUCPU; pt::ptime startTimeRef = pt::microsec_clock::local_time(); if (doRef) { runDijkstraRef( &graph, sourceVertArray, results, sourceVertices.size() ); } pt::time_duration timeRef = pt::microsec_clock::local_time() - startTimeRef; if (doCPU) { printf("\nrunDijkstra - CPU Time: %f s\n", (float)timeCPU.total_milliseconds() / 1000.0f); } if (doGPU) { printf("\nrunDijkstra - Single GPU Time: %f s\n", (float)timeGPU.total_milliseconds() / 1000.0f); } if (doMultiGPU) { printf("\nrunDijkstra - Multi GPU Time: %f s\n", (float)timeMultiGPU.total_milliseconds() / 1000.0f); } if (doCPUGPU) { printf("\nrunDijkstra - Multi GPU and CPU Time: %f s\n", (float)timeGPUCPU.total_milliseconds() / 1000.0f); } if (doRef) { printf("\nrunDijkstra - Reference (CPU): %f s\n", (float)timeRef.total_milliseconds() / 1000.0f); } free(sourceVertArray); free(results); clReleaseContext(gpuContext); // finish //shrEXIT(argc, argv); }
//////////////////////////////////////////////////////////////////////////////// // Program main //////////////////////////////////////////////////////////////////////////////// int main( int argc, char** argv) { //@TEMP - why do I need this for link to work on Linux? //cutWaitForThreads(NULL,0); //@TEMP // use command-line specified CUDA device, otherwise use device with highest Gflops/s //if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") ) // cutilDeviceInit(argc, argv); //else // cudaSetDevice( cutGetMaxGflopsDeviceId() ); bool doGPU; bool doRef; bool doMultiGPU; int generateVerts; int generateEdgesPerVert; int numSources; int sourceId; //parseCommandLineArgs(argc, (const char**)argv, doGPU, doMultiGPU, doRef, &numSources, &generateVerts, &generateEdgesPerVert); doGPU = 1; doMultiGPU = 0; doRef = 0; // Allocate memory for arrays GraphData graph; //generateRandomGraph(&graph, generateVerts, generateEdgesPerVert); generateLVAGraph(&graph, generateVerts, generateEdgesPerVert); //printf("Vertex Count: %d\n", graph.vertexCount); //printf("Edge Count: %d\n", graph.edgeCount); std::vector<int> sourceVertices; //for(int source = 0; source < numSources; source++) //{ // sourceVertices.push_back(source % graph.vertexCount); //} FILE *fp; fp = fopen("nodes2cal.out", "r"); if(fp){ //printf("Reading file nodes2cal.out...\n"); if(fscanf(fp, "%d", &numSources)!=EOF){ if(numSources<1){ printf("nodes2cal.out must have numSources>0.\n"); return 1; } } else{ printf("nodes2cal.out must have as first line 'numSources'.\n"); return 1; } while(fscanf(fp, "%d", &sourceId)!=EOF){ sourceVertices.push_back(sourceId-1); } } else{ printf("nodes2cal.out not exists!.\n"); return 1; } int *sourceVertArray = (int*) malloc(sizeof(int) * sourceVertices.size()); std::copy(sourceVertices.begin(), sourceVertices.end(), sourceVertArray); float *results = (float*) malloc(sizeof(float) * sourceVertices.size() * graph.vertexCount); unsigned int gpuTimer = 0; //cutilCheckError(cutCreateTimer(&gpuTimer)); //cutilCheckError(cutStartTimer(gpuTimer)); // Run Dijkstra's algorithm if ( doGPU ) { runDijkstra(&graph, sourceVertArray, results, sourceVertices.size() ); } //cutilCheckError(cutStopTimer(gpuTimer)); unsigned int multiGPUTimer = 0; //cutilCheckError(cutCreateTimer(&multiGPUTimer)); //cutilCheckError(cutStartTimer(multiGPUTimer)); if ( doMultiGPU ) { runDijkstraMultiGPU(&graph, sourceVertArray, results, sourceVertices.size() ); } //cutilCheckError(cutStopTimer(multiGPUTimer)); unsigned int refTimer = 0; //cutilCheckError(cutCreateTimer(&refTimer)); //cutilCheckError(cutStartTimer(refTimer)); if ( doRef ) { runDijkstraRef(&graph, sourceVertArray, results, sourceVertices.size() ); } //cutilCheckError(cutStopTimer(refTimer)); for (unsigned int i = 0; i < sourceVertices.size(); i++) { for (int j = 0; j < graph.vertexCount; j++) { //if (i != j) //{ //printf("%d --> %d: %f\n", sourceVertArray[i], j, results[i * graph.vertexCount + j] ); printf("%f\n", results[i * graph.vertexCount + j] ); //} } } free(sourceVertArray); free(results); //cudaThreadExit(); return 0; }