int main(void) { const int size[NSTEPS] = {500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000}; // specification of array sizes static const double epsilon = 0.00001; // maximum floating point error allowed between doubles to be considered equal const double flops_per_iteration = 2.0; int i; // index variables for looping double time[2]; // elapsed time. 0 is test case, 1 is base case int mflops[2]; // calculated mflops. 0 is test case, 1 is base case double* a; double* b; double* ctest; double* cbase; stopwatch* sw = stopwatch_new(); for(i = 0; i < NSTEPS; i++) { int n = size[i]; int n2 = n * n; a = (double*) malloc(n2*sizeof(double)); b = (double*) malloc(n2*sizeof(double)); ctest = (double*) malloc(n2*sizeof(double)); cbase = (double*) malloc(n2*sizeof(double)); rand_square_double_matrix(i+10, n, a); rand_square_double_matrix(i+11, n, b); zero_square_double_matrix(n, ctest); zero_square_double_matrix(n, cbase); stopwatch_restart(sw); my_dgemm(n, a, b, ctest); stopwatch_stop(sw); time[0] = stopwatch_time(sw); stopwatch_restart(sw); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, 1.0, a, n, b, n, 1.0, cbase, n); stopwatch_stop(sw); //printf("A\n"); //print_square_matrix(n, a); //printf("B\n"); //print_square_matrix(n, b); //printf("my_dgemm\n"); //print_square_matrix(n, ctest); //printf("cdgemm\n"); //print_square_matrix(n, cbase); assert_equal(n, ctest, cbase, epsilon); time[1] = stopwatch_time(sw); mflops[0] = calc_mflops(flops_per_iteration, n, time[0]); mflops[1] = calc_mflops(flops_per_iteration, n, time[1]); printf("%d, %5.2f, %d, %5.2f, %d\n", n, time[0], mflops[0], time[1], mflops[1]); free(a); free(b); free(ctest); free(cbase); } stopwatch_delete(sw); return 0; }
int main (int argc, char **argv) { int error = 0; Vector *vector = vector_new (); if (vector_load_from_file (vector, argc, argv)) { Stopwatch *stopwatch = stopwatch_new (); uint64_t inversions = 0; stopwatch_start (stopwatch); inversions = count_inversions (vector); stopwatch_stop (stopwatch); printf ("%llu inversions of %zu numbers in %llu.%03u ms.\n", inversions, vector->length, stopwatch->elapsed, stopwatch->usec); stopwatch_free (stopwatch); } else { error = 1; } vector_free (vector); return error; }
static void handle_work(struct worker_info *data, struct work *work) { const char *command = data->command; struct benchmark_config *config = data->config; struct benchmark_operations *bops = &config->ops; unsigned long start, elapsed; if (work->progress > 1) die("something wrong happened"); start = stopwatch_start(); if (strstartswith(command, "putlist")) { bops->putlist_test(data->db, command, config->num, config->vsiz, config->batch, work->seed); } else if (!strcmp(command, "fwmkeys")) { bops->fwmkeys_test(data->db, config->num, work->seed); } else if (!strcmp(command, "range") || !strcmp(command, "range_atomic")) { bops->range_test(data->db, command, config->num, config->vsiz, config->batch, work->seed); } else if (!strcmp(command, "rangeout_atomic")) { bops->rangeout_test(data->db, command, config->num, config->vsiz, config->batch, work->seed); } else if (strstartswith(command, "getlist")) { bops->getlist_test(data->db, command, config->num, config->vsiz, config->batch, work->seed); } else if (!strcmp(command, "fwmkeys-getlist")) { bops->fwmkeys_test(data->db, config->num, work->seed); bops->getlist_test(data->db, "getlist", config->num, config->vsiz, config->batch, work->seed); } else if (!strcmp(command, "fwmkeys-getlist_atomic")) { bops->fwmkeys_test(data->db, config->num, work->seed); bops->getlist_test(data->db, "getlist_atomic", config->num, config->vsiz, config->batch, work->seed); } else if (strstartswith(command, "outlist")) { bops->outlist_test(data->db, command, config->num, config->batch, work->seed); } else if (!strcmp(command, "fwmkeys-outlist")) { bops->fwmkeys_test(data->db, config->num, work->seed); bops->outlist_test(data->db, "outlist", config->num, config->batch, work->seed); } else if (!strcmp(command, "fwmkeys-outlist_atomic")) { bops->fwmkeys_test(data->db, config->num, work->seed); bops->outlist_test(data->db, "outlist_atomic", config->num, config->batch, work->seed); } else if (!strcmp(command, "put")) { bops->put_test(data->db, config->num, config->vsiz, work->seed); } else if (!strcmp(command, "get")) { bops->get_test(data->db, config->num, config->vsiz, work->seed); } else if (!strcmp(command, "nop")) { /* nop */ } else { die("Invalid command %s", command); } elapsed = stopwatch_stop(start); work->start[work->progress] = start; work->elapsed[work->progress] = elapsed; work->progress++; }
int main(int argc, char *argv[]){ int size = 18; long long us = 0; stopwatch sw; int opt, option_index=0; int status = 1; if(argc < 2){ fprintf(stderr, "Usage: %s [-s board_size]\n", argv[0]); exit(EXIT_FAILURE); } while ((opt = getopt_long(argc, argv, "s:", long_options, &option_index)) != -1 ) { if(opt == 's')size = atoi(optarg); else{ fprintf(stderr, "Usage: %s [-s board_size]\n", argv[0]); exit(EXIT_FAILURE); } } stopwatch_start(&sw); long long solutions = nqueen_cpu(size, &us); stopwatch_stop(&sw); fprintf(stderr, "The number of solutions is %lld, the number of unique solutions is " "%lld and the total time it took is %lf seconds\n", solutions, us, get_interval_by_sec(&sw)); printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f, \"output\": \"[%lld, %lld]\" }\n", status, size, get_interval_by_sec(&sw), solutions, us); return 0; }
/** * QR-based solver with Givens rotations. * @param[in] argc ARGument Counter * @param[in] argv ARGument Vector * @retval EXIT_SUCCESS Normal termination of the program * @retval EXIT_FAILURE Some error occurred */ int main(const int argc, char * const argv[]) { st_matrix_t M = st_matrix_load(stdin); const unsigned int size = st_matrix_size(M); double *eigenvalues; unsigned int i; stopwatch_t stopwatch = stopwatch_create("QR_solver"); (void) argc; (void) argv; /* Allocates resources */ SAFE_MALLOC(eigenvalues, double *, size * sizeof(double)); /* Computes eigenvalues */ stopwatch_start(stopwatch, 0, "Compute eigenvalues"); qr_iterative(M, eigenvalues, NULL); stopwatch_stop(stopwatch, 0); /* Prints results */ printf("Eigenvalues:\n["); for (i = 0; i < size - 1; ++i) { printf("%g, ", eigenvalues[i]); } printf("%g]\n", eigenvalues[i]); /* Frees memory */ st_matrix_delete(&M); free(eigenvalues); stopwatch_delete(&stopwatch); return EXIT_SUCCESS; }
void stopwatch_destroy (struct stopwatch_t* T) { if (T) { stopwatch_stop (T); free (T); } }
int main(int argc, char **argv) { bmo_verbosity(BMO_MESSAGE_DEBUG); int ret = 0; char * test_directory = NULL; char * test_string = NULL; char * replacement = NULL; char env_buf[ENV_BUF_LEN] = {'\0'}; const char * test_proto = "return runtests(find_test_files({'$'}, 1, \"*.lua\"))"; if (argc != 2) { test_directory = "./lua"; } else { test_directory = argv[1]; } test_string = calloc( strlen(test_directory) + strlen(test_proto) + 1, sizeof(char) ); if (!test_string) { bmo_err("couldn't allocate string."); assert(0); } replacement = strchr(test_proto, '$'); assert(replacement); strncpy(test_string, test_proto, replacement - test_proto); strcat(test_string, test_directory); strcat(test_string, replacement + 1); // some tests rely on sanity testing values by comparing against environment // variables snprintf(env_buf, ENV_BUF_LEN, "%d", FRAMES); setenv("BMO_FRAMES", env_buf, 1); snprintf(env_buf, ENV_BUF_LEN, "%d", CHANNELS); setenv("BMO_CHANNELS", env_buf, 1); snprintf(env_buf, ENV_BUF_LEN, "%d", RATE); setenv("BMO_RATE", env_buf, 1); stopwatch_start(); BMO_dsp_obj_t * dsp = bmo_dsp_lua_new(0, CHANNELS, FRAMES, RATE, NULL, 0); dsp->_init(dsp, 0); ret = bmo_runtests(dsp, "testingunit.lua", test_string, NULL); bmo_info("tests ran in %fs\n", stopwatch_stop()); if (ret != 0) { bmo_err("test failure:%d\n", ret); } dsp->_close(dsp, 0); free(dsp); free(test_string); return ret; }
int main(int argc, char* argv[]) { stopwatch sw; if (argc < 2) { printf("usage: %s x\n", argv[0]); return 0; } int n = atoi(argv[1]); stopwatch_start(&sw); int x = fib(n); stopwatch_stop(&sw); printf("{ \"time\": %f, \"output\": %d }\n", get_interval_by_sec(&sw), x); }
matrix_t *multiply_matrix(matrix_t *a, matrix_t *b) { if (a->columns != b->rows) { fprintf(stderr, "multiply_matrix(): cannot multiply\n"); exit(1); } matrix_t *c = allocate_matrix(a->rows, b->columns); stopwatch_start(); __multiply_matrix(c, a, b); stopwatch_stop(); return c; }
void benchmark(struct benchmark_config *config) { int i; struct worker_info *producers; struct worker_info *consumers; struct work_queue queue_to_producer; struct work_queue queue_to_consumer; struct work_queue trash_queue; unsigned long long start, elapsed; work_queue_init(&queue_to_producer); work_queue_init(&queue_to_consumer); work_queue_init(&trash_queue); producers = create_workers(config, config->producer_thnum, config->producer, &queue_to_producer, &queue_to_consumer); consumers = create_workers(config, config->consumer_thnum, config->consumer, &queue_to_consumer, &trash_queue); start = stopwatch_start(); for (i = 0; i < config->num_works; i++) { struct work *work = xmalloc(sizeof(*work)); memset(work, 0, sizeof(*work)); work->seed = config->seed_offset + i; work_queue_push(&queue_to_producer, work); } work_queue_close(&queue_to_producer); join_workers(producers, config->producer_thnum); work_queue_close(&queue_to_consumer); join_workers(consumers, config->consumer_thnum); work_queue_close(&trash_queue); elapsed = stopwatch_stop(start); collect_results(config, &trash_queue, start, elapsed); destroy_workers(consumers, config->consumer_thnum); destroy_workers(producers, config->producer_thnum); work_queue_destroy(&queue_to_producer); work_queue_destroy(&queue_to_consumer); work_queue_destroy(&trash_queue); }
static void test_lav3_normalize(unsigned int count) { lav3 V; las k; int i; stopwatch watch; test_reset_vectors(&V, NULL); printf(" {'id' : 'lav3_normalize',\n"); printf(" 'count' : %d,\n", count); printf(" 'V' : "); print_lav3_tuple(&V); printf(",\n"); stopwatch_start(&watch); for (i=0; i<count; i++) lav3_normalize(&V); stopwatch_stop(&watch); printf(" 'result_V' "); print_lav3_tuple(&V); printf(",\n"); printf(" 'time' : "); stopwatch_print(&watch); printf("\n },\n"); }
int main(int argc, char **argv) { struct stopwatch sw; if (argc < 2) { printf("usage: %s <input size>\n", argv[0]); return 1; } int n = atoi(argv[1]); stopwatch_start(&sw); int x = numprime(n); stopwatch_stop(&sw); printf("{ \"time\": %f, \"result\": %d }\n", get_interval_by_sec(&sw), x); return 0; }
/******************************************************************************* * Obsluha optozavory *******************************************************************************/ int opto_idle() { char opto; opto = OPTO_read(); if (opto != last_opto) { last_opto = opto; if (opto == 1) { if (!isrunning && (t == t0)) { stopwatch_start(); } else { stopwatch_stop(); } } } return 0; }
static void test_lav3_dot(unsigned int count) { lav3 V1; lav3 V2; las dot_res; int i; stopwatch watch; dot_res = 0.; printf(" {'id' : 'lav3_dot',\n"); test_reset_vectors(&V1, &V2); printf(" 'count' : %d,\n", count); printf(" 'V1' : "); print_lav3_tuple(&V1); printf(",\n"); printf(" 'V2' : "); print_lav3_tuple(&V2); printf(",\n"); stopwatch_start(&watch); for (i=0; i<count; i++) dot_res += lav3_dot(&V1,&V2); stopwatch_stop(&watch); printf(" 'result_dot' : "); print_las(dot_res); printf(",\n"); printf(" 'time' : "); stopwatch_print(&watch); printf("\n },\n"); }
int main(void) { int *s,*d; char msg[255]; unsigned int fselect; Stopwatch sw; void (*transfuncs[2])(int *, int *, int) = {transpose, transpose_O}; printf(" Choose transpose (0=original, 1=optimized)\n"); scanf("%d",&fselect); if (fselect > 1 ) { puts("Invalid transpose function"); return(-1); } // initialize data` s = random_ints(((TEST_DIM) * (TEST_DIM)), RANDOM_M_SEED, 4); d = (int *)malloc(sizeof(int) * ((TEST_DIM) * (TEST_DIM))); sw = new_stopwatch(); if (d == NULL || s == NULL || sw == NULL) return -1; // performance test given transpose stopwatch_start(sw); transfuncs[fselect](d,s,TEST_DIM); stopwatch_stop(sw); snprintf(msg, 255, "%s transform: %li ms\n", (fselect) ? "Optimized" : "Original", stopwatch_milliseconds(sw)); puts(msg); // superfluous memory deallocation activities free(s); free(d); destroy_stopwatch(sw); return 0; }
/******************************************************************************* * Obsluha klavesnice *******************************************************************************/ int keyboard_idle() { char ch; ch = key_decode(read_word_keyboard_4x4()); if (ch != last_ch) { last_ch = ch; if (ch != 0) { if (ch == '*') { stopwatch_reset(); } else if (ch == '#' && isrunning) { stopwatch_stop(); } else if (ch == '#' && !isrunning && (t == t0)) { stopwatch_start(); } } } return 0; }
int main ( int argc, char *argv[] ) { printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); int matrix_dim = 32; /* default matrix_dim */ int opt, option_index=0; func_ret_t ret; const char *input_file = NULL; float *m, *mm; stopwatch sw; while ((opt = getopt_long(argc, argv, "::vs:i:", long_options, &option_index)) != -1 ) { switch(opt){ case 'i': input_file = optarg; break; case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); printf("Generate input matrix internally, size =%d\n", matrix_dim); // fprintf(stderr, "Currently not supported, use -i instead\n"); // fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]); // exit(EXIT_FAILURE); break; case '?': fprintf(stderr, "invalid option\n"); break; case ':': fprintf(stderr, "missing argument\n"); break; default: fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]); exit(EXIT_FAILURE); } } if ( (optind < argc) || (optind == 1)) { fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]); exit(EXIT_FAILURE); } if (input_file) { printf("Reading matrix from file %s\n", input_file); ret = create_matrix_from_file(&m, input_file, &matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix from file %s\n", input_file); exit(EXIT_FAILURE); } } else if (matrix_dim) { printf("Creating matrix internally size=%d\n", matrix_dim); ret = create_matrix(&m, matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix internally size=%d\n", matrix_dim); exit(EXIT_FAILURE); } } else { printf("No input file specified!\n"); exit(EXIT_FAILURE); } if (do_verify){ printf("Before LUD\n"); // print_matrix(m, matrix_dim); matrix_duplicate(m, &mm, matrix_dim); } int sourcesize = 1024*1024; char * source = (char *)calloc(sourcesize, sizeof(char)); if(!source) { printf("ERROR: calloc(%d) failed\n", sourcesize); return -1; } char * kernel_lud_diag = "lud_diagonal"; char * kernel_lud_peri = "lud_perimeter"; char * kernel_lud_inter = "lud_internal"; FILE * fp = fopen("./lud_kernel.cl", "rb"); if(!fp) { printf("ERROR: unable to open '%s'\n"); return -1; } fread(source + strlen(source), sourcesize, 1, fp); fclose(fp); // Use 1: GPU 0: CPU int use_gpu = 1; // OpenCL initialization if(initialize(use_gpu)) return -1; // compile kernel cl_int err = 0; const char * slist[2] = { source, 0 }; cl_program prog = clCreateProgramWithSource(context, 1, slist, NULL, &err); if(err != CL_SUCCESS) { printf("ERROR: clCreateProgramWithSource() => %d\n", err); return -1; } char clOptions[110]; // sprintf(clOptions,"-I../../src"); sprintf(clOptions," "); #ifdef BLOCK_SIZE sprintf(clOptions + strlen(clOptions), " -DBLOCK_SIZE=%d", BLOCK_SIZE); #endif err = clBuildProgram(prog, 0, NULL, clOptions, NULL, NULL); { // show warnings/errors //static char log[65536]; memset(log, 0, sizeof(log)); //cl_device_id device_id = 0; //err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(device_id), &device_id, NULL); //clGetProgramBuildInfo(prog, device_id, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL); //if(err || strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log); } if(err != CL_SUCCESS) { printf("ERROR: clBuildProgram() => %d\n", err); return -1; } cl_kernel diagnal; cl_kernel perimeter; cl_kernel internal; diagnal = clCreateKernel(prog, kernel_lud_diag, &err); perimeter = clCreateKernel(prog, kernel_lud_peri, &err); internal = clCreateKernel(prog, kernel_lud_inter, &err); if(err != CL_SUCCESS) { printf("ERROR: clCreateKernel() 0 => %d\n", err); return -1; } clReleaseProgram(prog); //size_t local_work[3] = { 1, 1, 1 }; //size_t global_work[3] = {1, 1, 1 }; cl_mem d_m; d_m = clCreateBuffer(context, CL_MEM_READ_WRITE, matrix_dim*matrix_dim * sizeof(float), NULL, &err ); if(err != CL_SUCCESS) { printf("ERROR: clCreateBuffer d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1;} /* beginning of timing point */ stopwatch_start(&sw); err = clEnqueueWriteBuffer(cmd_queue, d_m, 1, 0, matrix_dim*matrix_dim*sizeof(float), m, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: clEnqueueWriteBuffer d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1; } int i=0; for (i=0; i < matrix_dim-BLOCK_SIZE; i += BLOCK_SIZE) { clSetKernelArg(diagnal, 0, sizeof(void *), (void*) &d_m); clSetKernelArg(diagnal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(diagnal, 2, sizeof(cl_int), (void*) &matrix_dim); clSetKernelArg(diagnal, 3, sizeof(cl_int), (void*) &i); size_t global_work1[3] = {BLOCK_SIZE, 1, 1}; size_t local_work1[3] = {BLOCK_SIZE, 1, 1}; err = clEnqueueNDRangeKernel(cmd_queue, diagnal, 2, NULL, global_work1, local_work1, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: diagnal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; } clSetKernelArg(perimeter, 0, sizeof(void *), (void*) &d_m); clSetKernelArg(perimeter, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(perimeter, 2, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(perimeter, 3, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(perimeter, 4, sizeof(cl_int), (void*) &matrix_dim); clSetKernelArg(perimeter, 5, sizeof(cl_int), (void*) &i); size_t global_work2[3] = {BLOCK_SIZE * 2 * ((matrix_dim-i)/BLOCK_SIZE-1), 1, 1}; size_t local_work2[3] = {BLOCK_SIZE * 2, 1, 1}; err = clEnqueueNDRangeKernel(cmd_queue, perimeter, 2, NULL, global_work2, local_work2, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: perimeter clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; } clSetKernelArg(internal, 0, sizeof(void *), (void*) &d_m); clSetKernelArg(internal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(internal, 2, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(internal, 3, sizeof(cl_int), (void*) &matrix_dim); clSetKernelArg(internal, 4, sizeof(cl_int), (void*) &i); size_t global_work3[3] = {BLOCK_SIZE * ((matrix_dim-i)/BLOCK_SIZE-1), BLOCK_SIZE * ((matrix_dim-i)/BLOCK_SIZE-1), 1}; size_t local_work3[3] = {BLOCK_SIZE, BLOCK_SIZE, 1}; err = clEnqueueNDRangeKernel(cmd_queue, internal, 2, NULL, global_work3, local_work3, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: internal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; } } clSetKernelArg(diagnal, 0, sizeof(void *), (void*) &d_m); clSetKernelArg(diagnal, 1, sizeof(float) * BLOCK_SIZE * BLOCK_SIZE, (void*)NULL ); clSetKernelArg(diagnal, 2, sizeof(cl_int), (void*) &matrix_dim); clSetKernelArg(diagnal, 3, sizeof(cl_int), (void*) &i); size_t global_work1[3] = {BLOCK_SIZE, 1, 1}; size_t local_work1[3] = {BLOCK_SIZE, 1, 1}; err = clEnqueueNDRangeKernel(cmd_queue, diagnal, 2, NULL, global_work1, local_work1, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: diagnal clEnqueueNDRangeKernel()=>%d failed\n", err); return -1; } err = clEnqueueReadBuffer(cmd_queue, d_m, 1, 0, matrix_dim*matrix_dim*sizeof(float), m, 0, 0, 0); if(err != CL_SUCCESS) { printf("ERROR: clEnqueueReadBuffer d_m (size:%d) => %d\n", matrix_dim*matrix_dim, err); return -1; } clFinish(cmd_queue); /* end of timing point */ stopwatch_stop(&sw); printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw)); clReleaseMemObject(d_m); if (do_verify){ printf("After LUD\n"); // print_matrix(m, matrix_dim); printf(">>>Verify<<<<\n"); lud_verify(mm, m, matrix_dim); free(mm); } free(m); if(shutdown()) return -1; }
void stopwatch_switch(stopwatch_t * watchToStop, stopwatch_t * watchToStart) { // TODO: use a single clock_gettime call? stopwatch_stop(watchToStop); stopwatch_start(watchToStart); }
int main ( int argc, char *argv[] ) { int matrix_dim = 32; /* default matrix_dim */ int opt, option_index=0; func_ret_t ret; const char *input_file = NULL; float *m, *mm; stopwatch sw; cl_device_id clDevice; cl_context clContext; cl_command_queue clCommands; cl_program clProgram; cl_kernel clKernel_diagonal; cl_kernel clKernel_perimeter; cl_kernel clKernel_internal; cl_int dev_type; cl_int errcode; FILE *kernelFile; char *kernelSource; size_t kernelLength; cl_mem d_m; ocd_init(&argc, &argv, NULL); ocd_options opts = ocd_get_options(); platform_id = opts.platform_id; device_id = opts.device_id; while ((opt = getopt_long(argc, argv, "::vs:i:", long_options, &option_index)) != -1 ) { switch(opt) { case 'i': input_file = optarg; break; case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); fprintf(stderr, "Currently not supported, use -i instead\n"); fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-p platform|-d device]\n", argv[0]); exit(EXIT_FAILURE); case '?': fprintf(stderr, "invalid option\n"); break; case ':': fprintf(stderr, "missing argument\n"); break; default: fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file||-p platform|-d device]\n", argv[0]); exit(EXIT_FAILURE); } } if ( (optind < argc) || (optind == 1)) { fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-p platform|-d device]\n", argv[0]); exit(EXIT_FAILURE); } if (input_file) { printf("Reading matrix from file %s\n", input_file); ret = create_matrix_from_file(&m, input_file, &matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix from file %s\n", input_file); exit(EXIT_FAILURE); } } else { printf("No input file specified!\n"); exit(EXIT_FAILURE); } if (do_verify) { printf("Before LUD\n"); print_matrix(m, matrix_dim); matrix_duplicate(m, &mm, matrix_dim); } // errcode = clGetPlatformIDs(NUM_PLATFORM, clPlatform, NULL); // CHECKERR(errcode); // // errcode = clGetDeviceIDs(clPlatform[PLATFORM_ID], USEGPU ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 1, &clDevice, NULL); // CHECKERR(errcode); #ifdef USEGPU dev_type = CL_DEVICE_TYPE_GPU; #elif defined(USE_AFPGA) dev_type = CL_DEVICE_TYPE_ACCELERATOR; #else dev_type = CL_DEVICE_TYPE_CPU; #endif clDevice = GetDevice(platform_id, device_id,dev_type); size_t max_worksize[3]; errcode = clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*3, &max_worksize, NULL); CHECKERR(errcode); while(BLOCK_SIZE*BLOCK_SIZE>max_worksize[0]) BLOCK_SIZE = BLOCK_SIZE/2; clContext = clCreateContext(NULL, 1, &clDevice, NULL, NULL, &errcode); CHECKERR(errcode); clCommands = clCreateCommandQueue(clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &errcode); CHECKERR(errcode); kernelFile = fopen("lud_kernel.cl", "r"); fseek(kernelFile, 0, SEEK_END); kernelLength = (size_t) ftell(kernelFile); kernelSource = (char *) malloc(sizeof(char)*kernelLength); rewind(kernelFile); fread((void *) kernelSource, kernelLength, 1, kernelFile); fclose(kernelFile); clProgram = clCreateProgramWithSource(clContext, 1, (const char **) &kernelSource, &kernelLength, &errcode); CHECKERR(errcode); free(kernelSource); char arg[100]; sprintf(arg,"-D BLOCK_SIZE=%d", (int)BLOCK_SIZE); errcode = clBuildProgram(clProgram, 1, &clDevice, arg, NULL, NULL); if (errcode == CL_BUILD_PROGRAM_FAILURE) { char *log; size_t logLength; errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLength); log = (char *) malloc(sizeof(char)*logLength); errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, logLength, (void *) log, NULL); fprintf(stderr, "Kernel build error! Log:\n%s", log); free(log); return 0; } CHECKERR(errcode); clKernel_diagonal = clCreateKernel(clProgram, "lud_diagonal", &errcode); CHECKERR(errcode); clKernel_perimeter = clCreateKernel(clProgram, "lud_perimeter", &errcode); CHECKERR(errcode); clKernel_internal = clCreateKernel(clProgram, "lud_internal", &errcode); CHECKERR(errcode); d_m = clCreateBuffer(clContext, CL_MEM_READ_WRITE, matrix_dim*matrix_dim*sizeof(float), NULL, &errcode); CHECKERR(errcode); /* beginning of timing point */ stopwatch_start(&sw); errcode = clEnqueueWriteBuffer(clCommands, d_m, CL_TRUE, 0, matrix_dim*matrix_dim*sizeof(float), (void *) m, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "Matrix Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHECKERR(errcode); int i=0; size_t localWorkSize[2]; size_t globalWorkSize[2]; //printf("BLOCK_SIZE: %d\n",BLOCK_SIZE); // printf("max Work-item Size: %d\n",(int)max_worksize[0]); #ifdef START_POWER for( int iter = 0; iter < 1000; iter++) #endif for (i=0; i < matrix_dim-BLOCK_SIZE; i += BLOCK_SIZE) { errcode = clSetKernelArg(clKernel_diagonal, 0, sizeof(cl_mem), (void *) &d_m); errcode |= clSetKernelArg(clKernel_diagonal, 1, sizeof(int), (void *) &matrix_dim); errcode |= clSetKernelArg(clKernel_diagonal, 2, sizeof(int), (void *) &i); CHECKERR(errcode); localWorkSize[0] = BLOCK_SIZE; globalWorkSize[0] = BLOCK_SIZE; errcode = clEnqueueNDRangeKernel(clCommands, clKernel_diagonal, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Diagonal Kernels", ocdTempTimer) END_TIMER(ocdTempTimer) CHECKERR(errcode); errcode = clSetKernelArg(clKernel_perimeter, 0, sizeof(cl_mem), (void *) &d_m); errcode |= clSetKernelArg(clKernel_perimeter, 1, sizeof(int), (void *) &matrix_dim); errcode |= clSetKernelArg(clKernel_perimeter, 2, sizeof(int), (void *) &i); CHECKERR(errcode); localWorkSize[0] = BLOCK_SIZE*2; globalWorkSize[0] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[0]; errcode = clEnqueueNDRangeKernel(clCommands, clKernel_perimeter, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Perimeter Kernel", ocdTempTimer) CHECKERR(errcode); END_TIMER(ocdTempTimer) errcode = clSetKernelArg(clKernel_internal, 0, sizeof(cl_mem), (void *) &d_m); errcode |= clSetKernelArg(clKernel_internal, 1, sizeof(int), (void *) &matrix_dim); errcode |= clSetKernelArg(clKernel_internal, 2, sizeof(int), (void *) &i); CHECKERR(errcode); localWorkSize[0] = BLOCK_SIZE; localWorkSize[1] = BLOCK_SIZE; globalWorkSize[0] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[0]; globalWorkSize[1] = ((matrix_dim-i)/BLOCK_SIZE-1)*localWorkSize[1]; errcode = clEnqueueNDRangeKernel(clCommands, clKernel_internal, 2, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Internal Kernel", ocdTempTimer) END_TIMER(ocdTempTimer) CHECKERR(errcode); } errcode = clSetKernelArg(clKernel_diagonal, 0, sizeof(cl_mem), (void *) &d_m); errcode |= clSetKernelArg(clKernel_diagonal, 1, sizeof(int), (void *) &matrix_dim); errcode |= clSetKernelArg(clKernel_diagonal, 2, sizeof(int), (void *) &i); CHECKERR(errcode); localWorkSize[0] = BLOCK_SIZE; globalWorkSize[0] = BLOCK_SIZE; errcode = clEnqueueNDRangeKernel(clCommands, clKernel_diagonal, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "Diagonal Kernels", ocdTempTimer) CHECKERR(errcode); END_TIMER(ocdTempTimer) errcode = clEnqueueReadBuffer(clCommands, d_m, CL_TRUE, 0, matrix_dim*matrix_dim*sizeof(float), (void *) m, 0, NULL, &ocdTempEvent); clFinish(clCommands); START_TIMER(ocdTempEvent, OCD_TIMER_D2H, "Matrix copy", ocdTempTimer) END_TIMER(ocdTempTimer) /* end of timing point */ stopwatch_stop(&sw); printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw)); clReleaseMemObject(d_m); if (do_verify) { printf("After LUD\n"); print_matrix(m, matrix_dim); printf(">>>Verify<<<<\n"); printf("matrix_dim: %d\n",matrix_dim); lud_verify(mm, m, matrix_dim); free(mm); } clReleaseKernel(clKernel_diagonal); clReleaseKernel(clKernel_perimeter); clReleaseKernel(clKernel_internal); clReleaseProgram(clProgram); clReleaseCommandQueue(clCommands); clReleaseContext(clContext); free(m); ocd_finalize(); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
int main(int argc, char** argv) { ocd_init(&argc, &argv, NULL); ocd_initCL(); std::cerr << "N-Queen solver for OpenCL\n"; std::cerr << "Ping-Che Chen\n\n"; if(argc < 2) { std::cerr << "Usage: " << argv[0] << " [options] N\n"; std::cerr << "\tN: board size (1 ~ 32)\n"; std::cerr << "\t-cpu: use CPU (multi-threaded on Windows)\n"; std::cerr << "\t-prof: enable profiler\n"; std::cerr << "\t-threads #: set number of threads to #\n"; std::cerr << "\t-blocksize #: set size of thread blocks to #\n"; std::cerr << "\t-local: use local memory for arrays (default: off)\n"; std::cerr << "\t-noatomics: do not use global atomics\n"; std::cerr << "\t-novec: do not use vectorization\n"; std::cerr << "\t-vec4: use 4D vectors instead of 2D (only when vectorized- default: off)\n"; return 0; } // handle options bool force_cpu = false; bool profiling = false; int threads = 0; int block_size = 0; bool local = false;//default OFF (was true) bool noatomics = false; bool novec = false; bool use_vec4 = false; int start = 1; while(start < argc - 1) { if(std::strcmp(argv[start], "-cpu") == 0) { force_cpu = true; } else if(std::strcmp(argv[start], "-threads") == 0 && start < argc - 2) { threads = std::atoi(argv[start + 1]); start++; } else if(std::strcmp(argv[start], "-blocksize") == 0 && start < argc - 2) { block_size = std::atoi(argv[start + 1]); start++; } else if(std::strcmp(argv[start], "-local") == 0) { local = true; } else if(std::strcmp(argv[start], "-noatomics") == 0) { noatomics = true; } else if(std::strcmp(argv[start], "-novec") == 0) { novec = true; } else if(std::strcmp(argv[start], "-vec4") == 0) { use_vec4 = true; } else { std::cerr << "Unknown option " << argv[start] << "\n"; } start ++; } int board_size = std::atoi(argv[start]); if(board_size < 1 || board_size > 32) { std::cerr << "Inalid board size (only 1 ~ 32 allowed)\n"; return 0; } stopwatch sw; long long solutions = 0; long long unique_solutions = 0; if(force_cpu) { stopwatch_start(&sw); solutions = nqueen_cpu(board_size, &unique_solutions); stopwatch_stop(&sw); } else { stopwatch_start(&sw); cl_int err; // show device list size_t num_devices; num_devices=1;//In OpenDwarfs we only work with one device at a time. std::vector<cl_device_id> devices(num_devices / sizeof(cl_device_id)); devices.clear(); devices.resize(1); devices[0] = device_id; try { NQueenSolver nqueen(context, devices, profiling, threads, block_size, local, noatomics, novec, use_vec4); for(int i = 0; i < devices.size(); i++) { size_t name_length; err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, 0, &name_length); if(err == CL_SUCCESS) { std::string name; name.resize(name_length + 1); clGetDeviceInfo(devices[i], CL_DEVICE_NAME, name_length, &name[0], &name_length); name[name_length] = 0; std::cerr << "Device " << i << ": " << name.c_str() << "\n"; std::cerr << "\tUsing " << nqueen.GetThreads(i) << " threads\n"; std::cerr << "\tBlock size = " << nqueen.GetBlockSize(i) << " threads\n"; if(nqueen.AtomicsEnabled(i)) { std::cerr << "\tUsing global atomics\n"; } if(nqueen.VectorizationEnabled(i)) { std::cerr << "\tUsing vectorization\n"; if(use_vec4) { std::cerr << "\tUse 4D vectors\n"; } else { std::cerr << "\tUse 2D vectors\n"; } } } } //start_time = std::clock(); solutions = nqueen.Compute(board_size, &unique_solutions); //end_time = std::clock(); } catch(CLError x) { if(x.GetErrorNo() == 1) { std::cerr << "1 OpenCL kernel execution failed\n"; } if(x.GetErrorNo() == 2) { std::cerr << "2 OpenCL kernel execution failed\n"; } if(x.GetErrorNo() == 3) { std::cerr << "3 OpenCL kernel execution failed\n"; } else { std::cerr << x << "\n"; } } stopwatch_stop(&sw); clReleaseContext(context); } std::cerr << "Solution took " << get_interval_by_sec(&sw) << " seconds to complete\n"; std::cerr << board_size << "-queen has " << solutions << " solutions (" << unique_solutions << " unique)\n"; printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, board_size, get_interval_by_sec(&sw)); ocd_finalize(); return 0; }
int main ( int argc, char *argv[] ) { int matrix_dim = 32; /* default matrix_dim */ int opt, option_index=0, error=0; func_ret_t ret; char *input_path = NULL; char *output_path = NULL; FILE *file; double *m; stopwatch sw; int i,j; int debug = 0; size_t linesiz=0; char* linebuf=NULL; ssize_t linelen=0; char* token; const char comma[2] = ","; while ((opt = getopt_long(argc, argv, ":dvs:i:o:", long_options, &option_index)) != -1 ) { switch(opt){ case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); break; case 'i': input_path = optarg; break; case 'o': output_path = optarg; break; case '?': fprintf(stderr, "invalid option\n"); error=1; break; case 'd': debug=1; break; case ':': fprintf(stderr, "missing argument\n"); error=1; break; default: error=1; } } if ((optind < argc) || (optind == 1) || input_path == NULL || error) { fprintf(stderr, "Usage: %s -s size -i input_path [-v] [-d] [-o output_path]\n", argv[0]); exit(EXIT_FAILURE); } file = fopen(input_path, "r"); if (file == NULL) { fprintf(stderr, "Invalid input file path: %s\n", input_path); exit(EXIT_FAILURE); } // Read matrix from file if (debug) { fprintf(stderr, "Reading data from file %s\n", input_path); } m = (double *)malloc(sizeof(double) * matrix_dim * matrix_dim); j = 0; while ((linelen=getline(&linebuf, &linesiz, file)) != -1) { if (debug) { fprintf(stderr, "Read line: %s\n", linebuf); } /* get the first number */ i = 0; token = strtok(linebuf, comma); /* walk through other numbers */ while( token != NULL ) { if (debug) { fprintf(stderr, "Read token: %s\n", token); } m[j*matrix_dim + i] = atof(token); token = strtok(NULL, comma); i = i + 1; } free(linebuf); linebuf=NULL; j = j + 1; } fclose(file); if (debug) { fprintf(stderr, "Computing LUD\n"); } stopwatch_start(&sw); lud_base(m, matrix_dim); stopwatch_stop(&sw); if (output_path) { if (debug) { fprintf(stderr, "Saving result in %s\n", output_path); } file = fopen(output_path, "w"); if (file == NULL) { free(m); exit(EXIT_FAILURE); } for (j = 0; j < matrix_dim; ++j) { for (i = 0; i < matrix_dim; ++i) { fprintf(file, "%.*f", 21, m[j*matrix_dim+i]); if (i < matrix_dim-1) { fprintf(file, ","); } } fprintf(file, "\n"); } fclose(file); } free(m); printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, matrix_dim, get_interval_by_sec(&sw)); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
/* * vecsum is a microbenchmark which measures the speed of various ways of * reading from HDFS. It creates a file containing floating-point 'doubles', * and computes the sum of all the doubles several times. For some CPUs, * assembly optimizations are used for the summation (SSE, etc). */ int main(void) { int ret = 1; struct options *opts = NULL; struct local_data *cdata = NULL; struct libhdfs_data *ldata = NULL; struct stopwatch *watch = NULL; if (check_byte_size(VECSUM_CHUNK_SIZE, "VECSUM_CHUNK_SIZE") || check_byte_size(ZCR_READ_CHUNK_SIZE, "ZCR_READ_CHUNK_SIZE") || check_byte_size(NORMAL_READ_CHUNK_SIZE, "NORMAL_READ_CHUNK_SIZE")) { goto done; } opts = options_create(); if (!opts) goto done; if (opts->ty == VECSUM_LOCAL) { cdata = local_data_create(opts); if (!cdata) goto done; } else { ldata = libhdfs_data_create(opts); if (!ldata) goto done; } watch = stopwatch_create(); if (!watch) goto done; switch (opts->ty) { case VECSUM_LOCAL: vecsum_local(cdata, opts); ret = 0; break; case VECSUM_LIBHDFS: ret = vecsum_libhdfs(ldata, opts); break; case VECSUM_ZCR: ret = vecsum_zcr(ldata, opts); break; } if (ret) { fprintf(stderr, "vecsum failed with error %d\n", ret); goto done; } ret = 0; done: fprintf(stderr, "cleaning up...\n"); if (watch && (ret == 0)) { long long length = vecsum_length(opts, ldata); if (length >= 0) { stopwatch_stop(watch, length * opts->passes); } } if (cdata) local_data_free(cdata); if (ldata) libhdfs_data_free(ldata); if (opts) options_free(opts); return ret; }
void profiler_exit(PROFILER* profiler) { stopwatch_stop(profiler->stopwatch); }
void stopwatch_lap(stopwatch * w) { stopwatch_stop(w); }
int main (int argc, char* argv[]) { int option_char; int NTRIALS = 1; int N = 1000000; char *filename = NULL; while ((option_char = getopt(argc, argv, "t:n:o:h")) != -1) { switch (option_char) { case 't': NTRIALS = atoi(optarg); break; case 'n': N = atoi(optarg); break; case 'o': filename = optarg; break; case 'h': fprintf(stderr, "%s\n", USAGE); exit(EXIT_SUCCESS); break; default: fprintf(stderr, "%s\n", USAGE); exit(EXIT_FAILURE); } } long double* T = (long double*) malloc(NTRIALS * sizeof(long double)); long *next = (long*) malloc(N * sizeof(long)); long *par_rank = (long*) malloc(N * sizeof(long)); long *seq_rank = (long*) malloc(N * sizeof(long)); size_t ncorrect = 0; for(int i = 0; i < NTRIALS; i++){ initRandomList(next, N); long head = seqFindHead(next, N); seqListRanks(head, next, seq_rank, N); stopwatch_t watch = stopwatch_start(); parallelListRanks(head, next, par_rank, N); T[i] = stopwatch_stop(watch); if( memcmp(seq_rank, par_rank, N * sizeof(long)) == 0) ncorrect++; } if (!filename) printStats(stdout, T, ncorrect, NTRIALS, N); else{ FILE *fp; fp = fopen(filename, "w"); printStats(fp, T, ncorrect, NTRIALS, N); fclose(fp); } free(next); free(par_rank); free(seq_rank); free(T); return EXIT_SUCCESS; }
int main ( int argc, char *argv[] ) { //printf("Starting..\n"); int matrix_dim = 32; /* default size */ int opt, option_index=0; func_ret_t ret; const char *input_file = NULL; float *m, *mm; stopwatch sw; int grid_x=0; int grid_y=0; while ((opt = getopt_long(argc, argv, "::vs:i:x:y:", long_options, &option_index)) != -1 ) { switch(opt){ case 'i': input_file = optarg; break; case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); //printf("Generate input matrix internally, size =%d\n", matrix_dim); // fprintf(stderr, "Currently not supported, use -i instead\n"); // fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]); // exit(EXIT_FAILURE); break; case 'x': grid_x = atoi(optarg); break; case 'y': grid_y = atoi(optarg); break; case '?': fprintf(stderr, "invalid option\n"); break; case ':': fprintf(stderr, "missing argument\n"); break; default: fprintf(stderr, "1Usage: %s [-v] [-s matrix_size|-i input_file]\n", argv[0]); exit(EXIT_FAILURE); } } /* if ( (optind < argc) || (optind == 1)) { fprintf(stderr, "2Usage: %s [-v] [-n no. of threads] [-s matrix_size|-i input_file]\n", argv[0]); exit(EXIT_FAILURE); } */ if (input_file) { //printf("Reading matrix from file %s\n", input_file); ret = create_matrix_from_file(&m, input_file, &matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix from file %s\n", input_file); exit(EXIT_FAILURE); } } else if (matrix_dim) { //printf("Creating matrix internally size=%d\n", matrix_dim); ret = create_matrix(&m, matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix internally size=%d\n", matrix_dim); exit(EXIT_FAILURE); } } else { printf("No input file specified!\n"); exit(EXIT_FAILURE); } if (do_verify){ /* print_matrix(m, matrix_dim); */ matrix_duplicate(m, &mm, matrix_dim); } wul(); //printf("Starting. . . \n"); //lud_oacc(m, matrix_dim,grid_x,grid_y); stopwatch_start(&sw); // lud_omp(m, matrix_dim); lud_oacc(m, matrix_dim,grid_x,grid_y); stopwatch_stop(&sw); printf("Time consumed(ms): %lf\n", 1000*get_interval_by_sec(&sw)); if (do_verify){ printf("After LUD\n"); /* print_matrix(m, matrix_dim); */ printf(">>>Verify<<<<\n"); lud_verify(mm, m, matrix_dim); free(mm); } free(m); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
int main (int argc, char **argv) { char *s, *s2, *s3, *p, *p2, *p3; size_t slen, slen2; long s_size = 1234567; s = (char *)malloc(s_size * sizeof(char)); assert(s); s2 = (char *)malloc(s_size * sizeof(char)); assert(s2); s3 = (char *)malloc(s_size * sizeof(char)); assert(s3); for (p = s, p2 = s2, p3 = s3; (p - s) < s_size; p++, p2++, p3++) { *p = ((p - s) % 26) + 'a'; assert(*p != 0); *p2 = ((p2 - s2) % 26) + 'a'; assert(*p2 != 0); *p3 = ((p3 - s3) % 26) + 'a'; assert(*p3 != 0); } s[0] = '/'; s2[0] = '/'; s3[0] = '/'; //have a different zero'th elem, so that reverse search traverses the full array s[1] = ';'; s2[1] = ';'; s3[1] = ';'; slen = s_size;//strlen(s); //s = argv[2]; //slen = strlen(s); s[slen - 1] = '\0'; s[slen - 2] = '\0'; s[slen - 3] = '\0'; s[slen - 4] = '\0'; s[slen - 5] = '\0'; s[slen - 6] = '\0'; s[slen - 7] = '\0'; s[slen - 8] = '\0'; s[slen - 9] = '\0'; s[slen - 10] = '\0'; s[slen - 11] = '\0'; s[slen - 12] = '\0'; s[slen - 13] = '\0'; s[slen - 14] = '\0'; s[slen - 15] = '\0'; s[slen - 16] = '\0'; slen2 = s_size; s2[slen2 - 1] = '\0'; s2[slen2 - 2] = '\0'; s2[slen2 - 3] = '\0'; s2[slen2 - 4] = '\0'; s2[slen2 - 5] = '\0'; s2[slen2 - 6] = '\0'; s2[slen2 - 7] = '\0'; s2[slen2 - 8] = '\0'; s2[slen2 - 9] = '\0'; s2[slen2 - 10] = '\0'; s2[slen2 - 11] = '\0'; s2[slen2 - 12] = '\0'; s2[slen2 - 13] = '\0'; s2[slen2 - 14] = '\0'; s2[slen2 - 15] = '\0'; s2[slen2 - 16] = '\0'; struct time array_setl_timer; stopwatch_reset(&array_setl_timer); stopwatch_run(&array_setl_timer); int set1l = array_setl(s, s2[0], slen/4); stopwatch_stop(&array_setl_timer); printf("output = %d\n", set1l); printf("first 10 elems:"); int i; for (i = 0; i < 10; i++) { printf(" %x", *((uint32_t *)&s[i])); } printf("\n"); stopwatch_print(&array_setl_timer); return 0; }
int main ( int argc, char *argv[] ) { int matrix_dim = 32; /* default matrix_dim */ int opt, option_index=0, error=0; func_ret_t ret; const char *input_file = NULL; double *m, *mm; stopwatch sw; int i; while ((opt = getopt_long(argc, argv, ":vs:i:", long_options, &option_index)) != -1 ) { switch(opt){ case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); break; case '?': fprintf(stderr, "invalid option\n"); error=1; break; case ':': fprintf(stderr, "missing argument\n"); error=1; break; default: error=1; } } if ((optind < argc) || (optind == 1) || error) { fprintf(stderr, "Usage: %s [-v] [-s matrix_size]\n", argv[0]); exit(EXIT_FAILURE); } if(matrix_dim>1) { fprintf(stderr, "Generating matrix of size %d x %d\n", matrix_dim, matrix_dim); ret = create_matrix_from_random(&m, matrix_dim); if(ret != RET_SUCCESS){ m = NULL; fprintf(stderr, "error could not generate random matrix of size %d x %d!\n", matrix_dim, matrix_dim); exit(EXIT_FAILURE); } } else { fprintf(stderr, "No input file or valid matrix size specified!\n"); exit(EXIT_FAILURE); } if (do_verify){ //printf("Before LUD\n"); //print_matrix(m, matrix_dim); matrix_duplicate(m, &mm, matrix_dim); } stopwatch_start(&sw); lud_base(m, matrix_dim); stopwatch_stop(&sw); if (matrix_dim == 1024) { for (i=0; i<100; ++i) { if (m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]] != expected_values[i]) { fprintf(stderr, "ERROR: value at index (%d,%d) = '%.*f' is different from the expected value '%.*f'\n", expected_row_indices[i], expected_col_indices[i], // the 21 parameter prints enough significant decimal digits to obtain the same floating-point number // when read back 21, m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]], 21, expected_values[i] ); fprintf(stderr, "Received values:\n"); for (i=0; i<100; ++i) { fprintf(stderr, "%.*f, ", 21, m[expected_row_indices[i]*matrix_dim + expected_col_indices[i]]); } fprintf(stderr, "\n"); exit(1); } } } else { fprintf(stderr, "WARNING: No self-checking step for dimension '%d'\n", matrix_dim); } if (do_verify){ //fprintf(stderr, "After LUD\n"); //print_matrix(m, matrix_dim); fprintf(stderr, ">>>Verify<<<<\n"); lud_verify(mm, m, matrix_dim); free(mm); } free(m); printf("{ \"status\": %d, \"options\": \"-s %d\", \"time\": %f }\n", 1, matrix_dim, get_interval_by_sec(&sw)); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
int main(int argc, char** argv) { cl_int err; int usegpu = USEGPU; int do_verify = 0; int opt, option_index=0; unsigned int correct; size_t global_size; size_t local_size; cl_device_id device_id; cl_context context; cl_command_queue commands; cl_program program; cl_kernel kernel; stopwatch sw; cl_mem csr_ap; cl_mem csr_aj; cl_mem csr_ax; cl_mem x_loc; cl_mem y_loc; FILE *kernelFile; char *kernelSource; size_t kernelLength; size_t lengthRead; ocd_init(&argc, &argv, NULL); ocd_options opts = ocd_get_options(); platform_id = opts.platform_id; n_device = opts.device_id; while ((opt = getopt_long(argc, argv, "::vc::", long_options, &option_index)) != -1 ) { switch(opt){ //case 'i': //input_file = optarg; //break; case 'v': fprintf(stderr, "verify\n"); do_verify = 1; break; case 'c': fprintf(stderr, "using cpu\n"); usegpu = 0; break; default: fprintf(stderr, "Usage: %s [-v Warning: lots of output] [-c use CPU]\n", argv[0]); exit(EXIT_FAILURE); } } /* Fill input set with random float values */ int i; csr_matrix csr; csr = laplacian_5pt(512); int k = 0; for(k = 0; k < csr.num_nonzeros; k++){ csr.Ax[k] = 1.0 - 2.0 * (rand() / (RAND_MAX + 1.0)); } //The other arrays float * x_host = float_new_array(csr.num_cols); float * y_host = float_new_array(csr.num_rows); unsigned int ii; for(ii = 0; ii < csr.num_cols; ii++){ x_host[ii] = rand() / (RAND_MAX + 1.0); } for(ii = 0; ii < csr.num_rows; ii++){ y_host[ii] = rand() / (RAND_MAX + 2.0); } /* Retrieve an OpenCL platform */ device_id = GetDevice(platform_id, n_device); /* Create a compute context */ context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); CHKERR(err, "Failed to create a compute context!"); /* Create a command queue */ commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err); CHKERR(err, "Failed to create a command queue!"); /* Load kernel source */ kernelFile = fopen("spmv_csr_kernel.cl", "r"); fseek(kernelFile, 0, SEEK_END); kernelLength = (size_t) ftell(kernelFile); kernelSource = (char *) malloc(sizeof(char)*kernelLength); rewind(kernelFile); lengthRead = fread((void *) kernelSource, kernelLength, 1, kernelFile); fclose(kernelFile); /* Create the compute program from the source buffer */ program = clCreateProgramWithSource(context, 1, (const char **) &kernelSource, &kernelLength, &err); CHKERR(err, "Failed to create a compute program!"); /* Free kernel source */ free(kernelSource); /* Build the program executable */ err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err == CL_BUILD_PROGRAM_FAILURE) { char *buildLog; size_t logLen; err = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &logLen); buildLog = (char *) malloc(sizeof(char)*logLen); err = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, logLen, (void *) buildLog, NULL); fprintf(stderr, "CL Error %d: Failed to build program! Log:\n%s", err, buildLog); free(buildLog); exit(1); } CHKERR(err, "Failed to build program!"); /* Create the compute kernel in the program we wish to run */ kernel = clCreateKernel(program, "csr", &err); CHKERR(err, "Failed to create a compute kernel!"); /* Create the input and output arrays in device memory for our calculation */ csr_ap = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned int)*csr.num_rows+4, NULL, &err); CHKERR(err, "Failed to allocate device memory!"); csr_aj = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(unsigned int)*csr.num_nonzeros, NULL, &err); CHKERR(err, "Failed to allocate device memory!"); csr_ax = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_nonzeros, NULL, &err); CHKERR(err, "Failed to allocate device memory!"); x_loc = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_cols, NULL, &err); CHKERR(err, "Failed to allocate device memory!"); y_loc = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float)*csr.num_rows, NULL, &err); CHKERR(err, "Failed to allocate device memory!"); /* beginning of timing point */ stopwatch_start(&sw); /* Write our data set into the input array in device memory */ err = clEnqueueWriteBuffer(commands, csr_ap, CL_TRUE, 0, sizeof(unsigned int)*csr.num_rows+4, csr.Ap, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to write to source array!"); err = clEnqueueWriteBuffer(commands, csr_aj, CL_TRUE, 0, sizeof(unsigned int)*csr.num_nonzeros, csr.Aj, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to write to source array!"); err = clEnqueueWriteBuffer(commands, csr_ax, CL_TRUE, 0, sizeof(float)*csr.num_nonzeros, csr.Ax, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to write to source array!"); err = clEnqueueWriteBuffer(commands, x_loc, CL_TRUE, 0, sizeof(float)*csr.num_cols, x_host, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to write to source array!"); err = clEnqueueWriteBuffer(commands, y_loc, CL_TRUE, 0, sizeof(float)*csr.num_rows, y_host, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_H2D, "CSR Data Copy", ocdTempTimer) CHKERR(err, "Failed to write to source array!"); END_TIMER(ocdTempTimer) /* Set the arguments to our compute kernel */ err = 0; err = clSetKernelArg(kernel, 0, sizeof(unsigned int), &csr.num_rows); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &csr_ap); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &csr_aj); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &csr_ax); err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &x_loc); err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &y_loc); CHKERR(err, "Failed to set kernel arguments!"); /* Get the maximum work group size for executing the kernel on the device */ err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), (void *) &local_size, NULL); CHKERR(err, "Failed to retrieve kernel work group info!"); /* Execute the kernel over the entire range of our 1d input data set */ /* using the maximum number of work group items for this device */ global_size = csr.num_rows; err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global_size, &local_size, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_KERNEL, "CSR Kernel", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to execute kernel!"); /* Wait for the command commands to get serviced before reading back results */ float output[csr.num_rows]; /* Read back the results from the device to verify the output */ err = clEnqueueReadBuffer(commands, y_loc, CL_TRUE, 0, sizeof(float)*csr.num_rows, output, 0, NULL, &ocdTempEvent); clFinish(commands); START_TIMER(ocdTempEvent, OCD_TIMER_D2H, "CSR Data Copy", ocdTempTimer) END_TIMER(ocdTempTimer) CHKERR(err, "Failed to read output array!"); /* end of timing point */ stopwatch_stop(&sw); printf("Time consumed(ms): %lf Gflops: %f \n", 1000*get_interval_by_sec(&sw), (2.0 * (double) csr.num_nonzeros / get_interval_by_sec(&sw)) / 1e9); /* Validate our results */ if(do_verify){ for (i = 0; i < csr.num_rows; i++){ printf("row: %d output: %f \n", i, output[i]); } } int row = 0; float sum = 0; int row_start = 0; int row_end = 0; for(row =0; row < csr.num_rows; row++){ sum = y_host[row]; row_start = csr.Ap[row]; row_end = csr.Ap[row+1]; unsigned int jj = 0; for (jj = row_start; jj < row_end; jj++){ sum += csr.Ax[jj] * x_host[csr.Aj[jj]]; } y_host[row] = sum; } for (i = 0; i < csr.num_rows; i++){ if((fabsf(y_host[i]) - fabsf(output[i])) > .001) printf("Possible error, difference greater then .001 at row %d \n", i); } /* Print a brief summary detailing the results */ ocd_finalize(); /* Shutdown and cleanup */ clReleaseMemObject(csr_ap); clReleaseMemObject(csr_aj); clReleaseMemObject(csr_ax); clReleaseMemObject(x_loc); clReleaseMemObject(y_loc); clReleaseProgram(program); clReleaseKernel(kernel); clReleaseCommandQueue(commands); clReleaseContext(context); return 0; }
// ISR responds to transponder interrupts void transponder_response_isr(void) { GPIOPinIntClear(GPIO_PORTB_BASE, UUT_TRANSPONDER_RESPONSE_PIN_PB3); stopwatch_stop(&g_transponder_stopwatch); g_transponder_response_flags[g_transponder_pulse_count]++; }