/** * Main, startup */ int main(int argc, char *argv[]) { // Filename of the HEX File const char * hexfile = NULL; // 1 if verify, 2 if programm char verify = 0; // Serial device const char * device = "/dev/ttyS0"; // Baudrate int baud = 4800; int baudid = -1; // if crc is supported (not supportet if 2) int crc_on; // The path to the binary (for finding devices.txt) readlink("/proc/self/exe", devices_file_p, 255); // symlink to this binary chp = strrchr (devices_file_p,'/'); // get the last path separator if (chp) strcpy (chp+1,DEVICES_FILE); // copy the device filename after the path separator // print header printf("\n"); printf("=================================================\n"); printf("| BOOTLOADER, Target: V2.1 |\n"); printf("=================================================\n"); // Parsing / checking parameter int i; int type = 0; for(i = 1; i < argc; i++) { if(*argv[i] == '-') { type = argv[i][1]; } else { switch(type) { case 'd': device = argv[i]; break; case 'b': baud = atoi(argv[i]); break; case 'v': verify = 1; hexfile = argv[i]; break; case 'p': verify = 2; hexfile = argv[i]; break; default: printf("Wrong parameter!\n"); usage(); } type = 0; } } if(hexfile == NULL) { printf("No hexfile specified!\n"); usage(); } if(verify == 0) { printf("No Verify / Programm specified!\n"); usage(); } // Checking baudrate for(i = 0; i < BAUD_CNT; i++) { if (baud_value[i] == baud) { baudid = i; break; } } if(baudid == -1) { printf("Unknown baudrate (%i)!\n", baud); usage(); } printf("Device : %s\n", device); printf("Baudrate : %i\n", baud); printf("%s: %s\n", (verify == 1 ? "Verify " : "Program "), hexfile); printf("-------------------------------------------------\n"); if(!com_open(device, baud_const[baudid])) { printf("Open com port failed!\n"); exit(2); } connect_device(); crc_on = check_crc(); read_info(); /*if(read_info()) { } else { printf("Reading device information failed!\n"); }*/ if(crc_on != 2) { crc_on = check_crc(); switch(crc_on) { case 2: printf("No CRC support.\n"); break; case 0: printf("CRC enabled and OK.\n"); break; case 3: printf("CRC check failed!\n"); break; default: printf("Checking CRC Error (%i)!\n", crc_on); break; } } else { printf("No CRC support.\n"); } flash(verify==1, hexfile); if( crc_on != 2 ) { if( check_crc() ) printf( "CRC-Error !\n"); else printf("CRC: o.k.\n"); } #ifdef SHOW_TIME_MS //time @ ms printf("Elapsed time: %d s\n", elapsed_msecs (&t_start)); #endif #ifdef SHOW_TIME_S printf("Elapsed time: %.3f seconds\n", elapsed_secs (&t_start)); #endif printf("...starting application\n\n"); sendcommand(START);//start application sendcommand(START); com_close();//close opened com port return 0; }
int run_kernel_benchmark(cl_device_id did, cl_context context, cl_command_queue commands, int n_args, int n_lines, double *duration, double *delta, double *compile_time) { int i; int err; char build_log[4096] = {0}; T *tmp_args = NULL; cl_mem* mem_args = NULL; double durations[10]; size_t len; cl_ulong t1; cl_ulong t2; cl_ulong t3; cl_ulong t4; size_t global = 1; size_t local = global; cl_program program; cl_kernel kernel; cl_event event; //printf("lines: %i, args %i\n", n_args, n_lines); program = gen_kernel(n_args, n_lines, context); if(!program) return -1; unsigned long start_time = current_msecs(); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if(err != CL_SUCCESS){ fprintf(stderr, "clBuildProgram() failed!\n"); fprintf(stderr, "err: %i\n", err); clGetProgramBuildInfo(program, did, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, &len); puts(build_log); return -1; } /* size_t pz = 0; clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &pz, NULL); printf("*Progsize: %i\n", pz); */ kernel = clCreateKernel(program, "main_func", &err); if(!kernel || err != CL_SUCCESS){ fprintf(stderr, "clCreateKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } *compile_time = elapsed_msecs(start_time)*1.0; err = 0; tmp_args = (T *)malloc(n_args * sizeof(T)); mem_args = (cl_mem*)malloc(n_args * sizeof(cl_mem)); for(i = 0; i < n_args; i++){ tmp_args[i] = (float)(1 + (int) (100.0 * (rand() / (RAND_MAX + 1.0)))); mem_args[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(T), tmp_args+i, NULL); err |= clSetKernelArg(kernel, i, sizeof(cl_mem), mem_args+i); } if(err != CL_SUCCESS){ fprintf(stderr, "clSetKernelArg() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } //warm up call err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if(err != CL_SUCCESS){ fprintf(stderr, "err: %i\n", err); fprintf(stderr, "clEnqueueNDRangeKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } clFinish(commands); for(i = 0; i < 10; i++){ err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, &event); if(err != CL_SUCCESS){ fprintf(stderr, "err: %i\n", err); fprintf(stderr, "clEnqueueNDRangeKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } cl_int errcode = clFinish(commands); errcode |= clWaitForEvents(1, &event); if(errcode != CL_SUCCESS) printf("Error waiting for kernel completion: %s\n", oclErrorString(errcode)); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &t1, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &t2, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t3, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t4, NULL); durations[i] = (t3 - t1) * 1e-6; } /* printf("submit: %lu\n", (unsigned long)t2 - (unsigned long)t1); printf("start: %lu\n", (unsigned long)t3 - (unsigned long)t1); printf("end: %lu\n", (unsigned long)t4 - (unsigned long)t1); */ qsort(durations, 10, sizeof(double), dblcmp); *duration = durations[4]; *delta = durations[0] - durations[9]; clReleaseEvent(event); clReleaseKernel(kernel); clReleaseProgram(program); free(tmp_args); return 0; }
//////////////////////////////////////////////////////////////////////////////////// // Measure the local memoy to local memoy bandwidth. //////////////////////////////////////////////////////////////////////////////////// int measureLocalMemory(cl_device_id device_id, cl_context context, cl_command_queue commands, unsigned int type, int f4, unsigned int elements, unsigned int iterations, bool larg, double time_taken[2]) { cl_int err = CL_SUCCESS; const char* source_path = "mem_streaming.cl"; char buf[512]; int elementsToAlloc = elements; size_t local, global; for(size_t ws = 0; ws <= 1; ++ws) { if(ws == 0) { // Execute the kernel using just one single workitem local = 1; global = 1; } else { // Execute the kernel using the max number of threads on each processor _DEVICE_INFO* info = get_device_info(device_id); size_t* tmp = info->max_work_item_sizes; local = tmp[0]; free(tmp); global = info->max_compute_units; while(local > elements) local /= 2; global *= local; } if(type == 1) elementsToAlloc = (elements + local-1)/local; if(f4 == 0) sprintf(buf, "#define dtype float\n"); else sprintf(buf, "#define dtype float%d\n", (int)pow(2.0, f4)); sprintf(buf+strlen(buf), "#define VEC %d\n#define ELEMENTS %d\n#define localRange %lu\n", f4, elementsToAlloc, local); if(larg) sprintf(buf+strlen(buf), "#define LARG\n"); cl_program program = load_kernel(source_path, context, buf); if(!program) { fprintf(stderr, "Error: Failed to create compute program!\n"); return 1; } // Build the program executable err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if(err != CL_SUCCESS) { size_t len; char buffer[8096]; fprintf(stderr, "Error: Failed to build program executable!\n"); clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); fprintf(stderr, "%s\n", buffer); return 1; } // Create the compute kernel cl_kernel kernel; switch(type) { case 1: kernel = clCreateKernel(program, "private_mem", &err); break; case 2: kernel = clCreateKernel(program, "global_mem", &err); break; default: kernel = clCreateKernel(program, "local_mem", &err); } if (!kernel || err != CL_SUCCESS) { fprintf(stderr, "Error: Failed to create compute kernel!\n"); return 1; } float* hOutput = (float*)malloc(global * sizeof(float)); memset(hOutput, 0, global * sizeof(float)); cl_mem output = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * global, hOutput, NULL); if (!output || err != CL_SUCCESS) { fprintf(stderr, "Error: Failed to allocate device memory!\n"); return 1; } // Set the arguments to our compute kernel err = CL_SUCCESS; err |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &output); cl_mem g1, g2; switch(type) { case 1: break; case 2: switch(f4) { case(1): g1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float2) * elements, NULL, NULL); g2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float2) * elements*2, NULL, NULL); break; case(2): g1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4) * elements, NULL, NULL); g2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4) * elements*2, NULL, NULL); break; case(3): g1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float8) * elements, NULL, NULL); g2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float8) * elements*2, NULL, NULL); break; case(4): g1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float16) * elements, NULL, NULL); g2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float16) * elements*2, NULL, NULL); break; default: g1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * elements, NULL, NULL); g2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float) * elements*2, NULL, NULL); break; break; } err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &g1); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &g2); break; default: if(larg) switch(f4) { case(1): err |= clSetKernelArg(kernel, 1, sizeof(cl_float2)*elements, NULL); err |= clSetKernelArg(kernel, 2, sizeof(cl_float2)*elements*2, NULL); break; case(2): err |= clSetKernelArg(kernel, 1, sizeof(cl_float4)*elements, NULL); err |= clSetKernelArg(kernel, 2, sizeof(cl_float4)*elements*2, NULL); break; case(3): err |= clSetKernelArg(kernel, 1, sizeof(cl_float8)*elements, NULL); err |= clSetKernelArg(kernel, 2, sizeof(cl_float8)*elements*2, NULL); break; case(4): err |= clSetKernelArg(kernel, 1, sizeof(cl_float8)*elements, NULL); err |= clSetKernelArg(kernel, 2, sizeof(cl_float8)*elements*2, NULL); break; default: err |= clSetKernelArg(kernel, 1, sizeof(cl_float)*elements, NULL); err |= clSetKernelArg(kernel, 2, sizeof(cl_float)*elements*2, NULL); break; break; } } if (err != CL_SUCCESS) { fprintf(stderr, "Error: Failed to set kernel arguments! %d\n", err); return 1; } // warmup for(unsigned i = 0; i < WARMUP_CYCLES; ++i) { err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); clFinish(commands); } // start actual measurement unsigned long start_time = current_msecs(); for(unsigned i = 0; i < iterations; ++i) { err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if (err) { fprintf(stderr, "Error %i: Failed to execute kernel!\n%s\n", err, oclErrorString(err)); return 1; } clFlush(commands); } clFinish(commands); time_taken[ws] = elapsed_msecs(start_time) / 1000.0; /* cl_event read; err = clEnqueueReadBuffer(commands, output, CL_FALSE, 0, global*sizeof(float), hOutput, 0, NULL, &read); if (err) { fprintf(stderr, "Error %i: Failed read buffer!\n%s\n", err, oclErrorString(err)); return 1; } clWaitForEvents(1, &read); for(size_t i = 0; i < global; ++i) printf(", %d %f ", i, hOutput[i]); printf("\n\n"); */ free(hOutput); clReleaseMemObject(output); if(type == 2) { clReleaseMemObject(g1); clReleaseMemObject(g2); } clReleaseProgram(program); clReleaseKernel(kernel); } return err; }