int main(int argc, char** argv) { cl_platform_id pf[MAX_PLATFORMS]; cl_uint nb_platforms = 0; cl_int err; // error code returned from api calls cl_device_type device_type = CL_DEVICE_TYPE_ALL; // Filter args // argv++; while (argc > 1) { if(!strcmp(*argv, "-g") || !strcmp(*argv, "--gpu-only")) { if(device_type != CL_DEVICE_TYPE_ALL) error("--gpu-only and --cpu-only can not be specified at the same time\n"); device_type = CL_DEVICE_TYPE_GPU; } else if(!strcmp(*argv, "-c") || !strcmp(*argv, "--cpu-only")) { if(device_type != CL_DEVICE_TYPE_ALL) error("--gpu-only and --cpu-only can not be specified at the same time\n"); device_type = CL_DEVICE_TYPE_CPU; } else if(!strcmp(*argv, "-s") || !strcmp(*argv, "--size")) { unsigned i; int r; char c; r = sscanf(argv[1], "%u%[mMkK]", &SIZE, &c); if (r == 2) { if (c == 'k' || c == 'K') SIZE *= 1024; else if (c == 'm' || c == 'M') SIZE *= 1024 * 1024; } argc--; argv++; } else break; argc--; argv++; } if(argc > 1) TILE = atoi(*argv); // Get list of OpenCL platforms detected // err = clGetPlatformIDs(3, pf, &nb_platforms); check(err, "Failed to get platform IDs"); printf("%d OpenCL platforms detected\n", nb_platforms); // For each platform do // for (cl_int p = 0; p < nb_platforms; p++) { cl_uint num; int platform_valid = 1; char name[1024], vendor[1024]; cl_device_id devices[MAX_DEVICES]; cl_uint nb_devices = 0; cl_context context; // compute context cl_program program; // compute program cl_kernel kernel; err = clGetPlatformInfo(pf[p], CL_PLATFORM_NAME, 1024, name, NULL); check(err, "Failed to get Platform Info"); err = clGetPlatformInfo(pf[p], CL_PLATFORM_VENDOR, 1024, vendor, NULL); check(err, "Failed to get Platform Info"); printf("Platform %d: %s - %s\n", p, name, vendor); // Get list of devices // err = clGetDeviceIDs(pf[p], device_type, MAX_DEVICES, devices, &nb_devices); printf("nb devices = %d\n", nb_devices); if(nb_devices == 0) continue; // Create compute context with "device_type" devices // context = clCreateContext (0, nb_devices, devices, NULL, NULL, &err); check(err, "Failed to create compute context"); // Load program source into memory // const char *opencl_prog; opencl_prog = file_load(KERNEL_FILE); // Attach program source to context // program = clCreateProgramWithSource(context, 1, &opencl_prog, NULL, &err); check(err, "Failed to create program"); // Compile program // { char flags[1024]; sprintf (flags, "-cl-mad-enable -cl-fast-relaxed-math -DSIZE=%d -DTILE=%d -DTYPE=%s", SIZE, TILE, "float"); err = clBuildProgram (program, 0, NULL, flags, NULL, NULL); if(err != CL_SUCCESS) { size_t len; // Display compiler log // clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, NULL, &len); { char buffer[len+1]; fprintf(stderr, "--- Compiler log ---\n"); clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, NULL); fprintf(stderr, "%s\n", buffer); fprintf(stderr, "--------------------\n"); } if(err != CL_SUCCESS) error("Failed to build program!\n"); } } // Create the compute kernel in the program we wish to run // kernel = clCreateKernel(program, KERNEL_NAME, &err); check(err, "Failed to create compute kernel"); // Allocate and initialize input data // alloc_buffers_and_user_data(context); // Iterate over devices // for(cl_int dev = 0; dev < nb_devices; dev++) { cl_command_queue queue; char name[1024]; cl_device_type dtype; err = clGetDeviceInfo(devices[dev], CL_DEVICE_NAME, 1024, name, NULL); check(err, "Cannot get type of device"); err = clGetDeviceInfo(devices[dev], CL_DEVICE_TYPE, sizeof(cl_device_type), &dtype, NULL); check(err, "Cannot get type of device"); printf("\tDevice %d : %s [%s]\n", dev, (dtype == CL_DEVICE_TYPE_GPU) ? "GPU" : "CPU", name); // Create a command queue // queue = clCreateCommandQueue(context, devices[dev], CL_QUEUE_PROFILING_ENABLE, &err); check(err,"Failed to create command queue"); // Write our data set into device buffer // send_input(queue); // Execute kernel // { cl_event prof_event; cl_ulong start, end; struct timeval t1,t2; double timeInMicroseconds; size_t global[2] = { SIZE, SIZE }; // global domain size for our calculation size_t local[2] = { TILE, TILE }; // local domain size for our calculation printf("\t%dx%d Threads in workgroups of %dx%d\n", global[0], global[1], local[0], local[1]); // Set kernel arguments // err = 0; err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_buffer); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output_buffer); check(err, "Failed to set kernel arguments"); gettimeofday (&t1, NULL); for (unsigned iter = 0; iter < ITERATIONS; iter++) { err = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global, local, 0, NULL, &prof_event); check(err, "Failed to execute kernel"); } // Wait for the command commands to get serviced before reading back results // clFinish(queue); gettimeofday (&t2,NULL); // Check performance // timeInMicroseconds = (double)TIME_DIFF(t1, t2) / ITERATIONS; printf("\tComputation performed in %lf µs over device #%d\n", timeInMicroseconds, dev); clReleaseEvent(prof_event); } // Read back the results from the device to verify the output // retrieve_output(queue); // Validate computation // check_output_data(); clReleaseCommandQueue(queue); } // Cleanup // free_buffers_and_user_data(); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseContext(context); } return 0; }
int main (int argc, char **argv) { unsigned long long perf; tsp_path_t path; tsp_path_t sol; int sol_len; long long int cuts = 0; struct tsp_queue q; struct timespec t1, t2; /* lire les arguments */ int opt; while ((opt = getopt(argc, argv, "s")) != -1) { switch (opt) { case 's': affiche_sol = true; break; default: usage(argv[0]); break; } } if (optind != argc-3) usage(argv[0]); nb_towns = atoi(argv[optind]); myseed = atol(argv[optind+1]); nb_threads = atoi(argv[optind+2]); assert(nb_towns > 0); assert(nb_threads > 0); minimum = INT_MAX; /* generer la carte et la matrice de distance */ fprintf (stderr, "ncities = %3d\n", nb_towns); genmap (); init_queue (&q); clock_gettime (CLOCK_REALTIME, &t1); memset (path, -1, MAX_TOWNS * sizeof (int)); path[0] = 0; /* mettre les travaux dans la file d'attente */ generate_tsp_jobs (&q, 1, 0, path, &cuts, sol, & sol_len, 3); no_more_jobs (&q); /* calculer chacun des travaux */ tsp_path_t solution; memset (solution, -1, MAX_TOWNS * sizeof (int)); solution[0] = 0; while (!empty_queue (&q)) { int hops = 0, len = 0; get_job (&q, solution, &hops, &len); tsp (hops, len, solution, &cuts, sol, &sol_len); } clock_gettime (CLOCK_REALTIME, &t2); if (affiche_sol) print_solution_svg (sol, sol_len); perf = TIME_DIFF (t1,t2); printf("<!-- # = %d seed = %ld len = %d threads = %d time = %lld.%03lld ms ( %lld coupures ) -->\n", nb_towns, myseed, sol_len, nb_threads, perf/1000000ll, perf%1000000ll, cuts); return 0 ; }
int main (int argc, char **argv) { unsigned long temps; int cuts = 0; struct timeval t1, t2; if (argc != 3) { fprintf (stderr, "Usage: %s <nbcities ( MAXNBCITIES = %d )> <seed> \n", argv[0], MAXNBCITIES) ; exit (1) ; } NbCities = atoi (argv[1]) ; seed = atoi(argv[2]); minimum = INT_MAX ; // printf ("NbCities = %3d\n", NbCities) ; init_queue (&listeTaches) ; genmap () ; gettimeofday(&t1,NULL); { Path_t path; int i; for(i = 0; i < MAXNBCITIES; i++) path[i] = -1 ; /* Ville de d�part : Ville 0 */ path [0] = 0; tsp_partiel (1, 0, path, &cuts); } gettimeofday(&t2,NULL); temps = TIME_DIFF(t1,t2); // printf("time = %ld.%03ldms (%d coupures)\n", temps/1000, temps%1000, cuts); gettimeofday(&t1,NULL); { int j; Path_t path; int hops, len, nbjobs; cuts = 0; nbjobs = get_nbjobs(&listeTaches); #pragma omp parallel for private (path, hops, len) schedule (static, 50) for (j = 0; j < nbjobs; j++) { if (get_job(&listeTaches, j, path, &hops, &len)) plop(hops, len, path, &cuts); } } gettimeofday(&t2,NULL); temps = TIME_DIFF(t1,t2); printf("minimum = %d time = %ld ms (%d coupures)\n", minimum, temps, cuts); return 0 ; }