static void run_multiply() { struct timespec ts_start, ts_stop; double runtime_ref, runtime_sse; printf("Starting SSE run...\n"); util_monotonic_time(&ts_start); /* vec_c = mat_a * vec_b */ matvec_sse(); util_monotonic_time(&ts_stop); runtime_sse = util_time_diff(&ts_start, &ts_stop); printf("SSE run completed in %.2f s\n", runtime_sse); printf("Starting reference run...\n"); util_monotonic_time(&ts_start); matvec_ref(); util_monotonic_time(&ts_stop); runtime_ref = util_time_diff(&ts_start, &ts_stop); printf("Reference run completed in %.2f s\n", runtime_ref); printf("Speedup: %.2f\n", runtime_ref / runtime_sse); if (verify_result()) printf("OK\n"); else printf("MISMATCH\n"); }
/*********************************************************************** * test - main routine which is called repeatedly * (once for each test run) * Two transactions take place, first a write to the SDRAM and * second, a read from the SDRAM. It is followed by verification * of data consistency. * * Parameters * byteCount - Number of Bytes to be Transfered * extAdrL - Sdram External Address Low * sbSend - start byte send value * sbRecv - start byte receive value * sysMod - Sdram DSP modulo register - offset for module access * **********************************************************************/ void _reentrant test(WORD byteCount, WORD extAdrL, WORD sbSend, WORD sbRecv, WORD sysMod){ num_bytes = byteCount; timeout = SDRAMTIMEOUT * num_bytes; tranread = 0; start_byte_send = sbSend; start_byte_receive = sbRecv; big_endian = 0; extaddr_lo = extAdrL; extaddr_hi = 0; extaddr_base_lo = 0; extaddr_base_hi = 0; extaddr_mod_lo = 0; extaddr_mod_hi = 0; sysaddr_base = source_data; sysaddr_mod = sysMod; if (sdraminittrans (SDRAMTYPE_128Mb_8, tranread, start_byte_send, big_endian, extaddr_lo, extaddr_hi, source_data, num_bytes, sysaddr_base, sysaddr_mod, extaddr_base_lo, extaddr_base_hi, extaddr_mod_lo, extaddr_mod_hi, timeout) != 1) TEST_FAILED; tranread = 1; sysaddr_mod = 0; if (sdraminittrans (SDRAMTYPE_128Mb_8, tranread, start_byte_receive, big_endian, extaddr_lo, extaddr_hi, returned_data, num_bytes, sysaddr_base, sysaddr_mod, extaddr_base_lo, extaddr_base_hi, extaddr_mod_lo, extaddr_mod_hi, timeout) != 1) TEST_FAILED; sysaddr_mod = sysMod; // set again to verify result verify_result(); data_initialize(); erase_used_sdram(); }
int main(int argc,char **argv) { hupcpp::init(&argc, &argv); #if 0 if(argc!=5) { printf("(ERROR) USAGE: ./a.out <total bodies> <tileSize> <in data file> <out data file>\n"); hupcpp::finalize(); } assert(atoi(argv[1]) == NUMBODIES); assert(atoi(argv[2]) == TILESIZE); //Init(argv[3]); #endif long start = get_usecs(); for(int time_steps=0;time_steps<MAX_STEPS;time_steps++) { hupcpp::finish_spmd([=]() { if(upcxx::global_myrank() ==0) { calculate_forces(); } }); } long end = get_usecs(); double dur = ((double)(end-start))/1000000; #ifdef VERIFY_SHORT counter_t sum = 0; for(int i=0; i<MAX_HCPP_WORKERS; i++) { sum+=TOTAL_COUNTS[i]; } counter_t total_sum; upcxx::reduce<counter_t>(&sum, &total_sum, 1, 0, UPCXX_SUM, UPCXX_ULONG_LONG); if(upcxx::global_myrank() == 0) { const counter_t expected = NUMBODIES * MAX_STEPS; const char* res = expected == total_sum ? "PASSED" : "FAILED"; printf("Test %s, Time = %0.3f\n",res,dur); } #endif #if 0 // gather result from all other processes using upcxx_reduce float accx_all[NUMBODIES], accy_all[NUMBODIES], accz_all[NUMBODIES]; upcxx::upcxx_reduce<float>(accx, accx_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT); upcxx::upcxx_reduce<float>(accy, accy_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT); upcxx::upcxx_reduce<float>(accz, accz_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT); if(upcxx::global_myrank() ==0) { printf("0: Computation done\n"); printf("Test Passed=%d\n",verify_result(argv[4],accx_all)); } #endif hupcpp::barrier(); hupcpp::finalize(); return 0; }
main(int argc, char *argv[]) { float *A, *B, *C; int n, i, j, k; double timetick; /* Chequeando parametros */ if ((argc != 2) || ((n = atoi(argv[1])) <= 0) ) { printf("Uso: %s n\n donde n: dimension de la matriz cuadrada (nxn X nxn)\n", argv[0]); exit(1); } // Inicializando Matrices A = (float *) malloc(n*n*sizeof(float)); B = (float *) malloc(n*n*sizeof(float)); C = (float *) malloc(n*n*sizeof(float)); initvalmat(A, n, 1.0); initvalmat(B, n, 1.0); initvalmat(C, n, 0.0); printf("Multiplicando matrices de %d x %d \n", n, n); timetick = dwalltime(); /*************************************************************/ /* Programar aqui el algoritmo de multiplicacion de matrices */ /*************************************************************/ for (i = 0; i < n; ++i) { for (j = 0; j < n; ++j) { for (k = 0; k < n; ++k) { C[i*n + j] += A[i*n + k] * B[k*n + j]; } } } /*************************************************************/ timetick = dwalltime() - timetick; verify_result(C, n); printf("Resultado correcto. Tiempo de ejecucion: %f segundos\n", timetick); }
int simulate_thread_final_verify () { return verify_result (); }
int simulate_thread_step_verify () { return verify_result (); }
int BigqueryReader::read_data(uint64_t prefix) { int err = 0; assert(NULL != ob_client_); char rule_data[256]; memset(rule_data, 0x00, sizeof(rule_data)); ValueRule rule; err = prefix_info_->set_read_write_status(prefix, BIGQUERY_READING); if (0 != err && READ_WRITE_CONFLICT != err) { TBSYS_LOG(WARN, "failed to set read flag, prefix=%lu, err=%d", prefix, err); } if (0 == err) { err = prefix_info_->get_rule(prefix, rule_data, sizeof(rule_data)); if (0 != err) { TBSYS_LOG(WARN, "failed to get rule, prefix=%lu, err=%d", prefix, err); } else { uint64_t row_num = 0; err = prefix_info_->get_row_num(prefix, row_num); if (0 != err) { TBSYS_LOG(WARN, "failed to get row num, err=%d", err); } else { err = rule.deserialize(rule_data, strlen(rule_data) + 1); if (0 != err) { TBSYS_LOG(WARN, "failed to deserialize rule, err=%d", err); } else { rule.set_row_num(row_num); } } } } Bigquery query; Array expected_result; if (0 == err) { err = get_bigquery(prefix, rule, query, expected_result); if (0 != err) { TBSYS_LOG(WARN, "failed to get big query, rule_data=%s, err=%d", rule_data, err); } } Array res; if (0 == err) { TBSYS_LOG(INFO, "row_num=%ld, rule_data=%s, bigquery sql=%s", rule.get_row_num(), rule_data, query.to_sql()); err = ob_client_->exec_query(query.to_sql(), res); if (0 != err) { TBSYS_LOG(ERROR, "failed to exec query, sql=%s, err=%d", query.to_sql(), err); } } if (0 == err) { err = verify_result(expected_result, res); } if (0 == err) { err = prefix_info_->set_read_write_status(prefix, 0); if (0 != err) { TBSYS_LOG(WARN, "failed to reset flag, prefix=%lu, err=%d", prefix, err); } } else if (READ_WRITE_CONFLICT == err) { err = OB_SUCCESS; } return err; }
int main(int argc, char **argv) { const char *file = "test_tcpbpf_kern.o"; int prog_fd, map_fd, sock_map_fd; struct tcpbpf_globals g = {0}; const char *cg_path = "/foo"; int error = EXIT_FAILURE; struct bpf_object *obj; int cg_fd = -1; __u32 key = 0; int rv; if (setup_cgroup_environment()) goto err; cg_fd = create_and_get_cgroup(cg_path); if (cg_fd < 0) goto err; if (join_cgroup(cg_path)) goto err; if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; } rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (rv) { printf("FAILED: bpf_prog_attach: %d (%s)\n", error, strerror(errno)); goto err; } if (system("./tcp_server.py")) { printf("FAILED: TCP server\n"); goto err; } map_fd = bpf_find_map(__func__, obj, "global_map"); if (map_fd < 0) goto err; sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results"); if (sock_map_fd < 0) goto err; rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); goto err; } if (verify_result(&g)) { printf("FAILED: Wrong stats\n"); goto err; } if (verify_sockopt_result(sock_map_fd)) { printf("FAILED: Wrong sockopt stats\n"); goto err; } printf("PASSED!\n"); error = 0; err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); close(cg_fd); cleanup_cgroup_environment(); return error; }
void ConvolutionLayerSpatial<float>::setup_convolution( const vector<Blob<float>*>& bottom, const vector<Blob<float>*>& top, const Blob<float> &verify_blob) { // Generates static key_ generate_key(); // Initializes unique kernel ID kernel_uid_ = 0; viennacl::ocl::context &ctx = viennacl::ocl::get_context(this->device_->id()); const viennacl::ocl::device &device = ctx.current_device(); if (device.vendor().find("Intel") != std::string::npos && M_ % 16 == 0) { /* IDLF kernels are using Intel specific extension which make them intel only. */ int kernelCnt = 0; for (uint32_t width = 14; width > 0; width--) { int candidate = 0; if (width > output_w_) continue; for (uint32_t height = 14; height > 0; height--) { if (height * width > 32 || height > output_h_) continue; int tile_x = kernel_w_ + (width - 1) * stride_w_; int tile_y = kernel_h_ + (height - 1) * stride_h_; int tile_y_stride = 64 / tile_x; if (tile_x % 4 != 0 && tile_x <= 16) { create_convolution_kernel(bottom, top, 2, width, height, 1); candidate++; } else if ((tile_x % 4 == 0) && ((tile_y + tile_y_stride - 1) / tile_y_stride < 4)) { create_convolution_kernel(bottom, top, 2, width, height, 1); candidate++; } if (candidate >= 4 && height == 2) break; } kernelCnt += candidate; if (kernelCnt >= 12 && width == 2) break; } } else { for (int_tp y = 1; y < 4; y += 1) for (int_tp z = 1; z < 16 && z < M_; z += 1) { if (4 * y * z > 32) continue; create_convolution_kernel(bottom, top, 1, 4, y, z); } } for (int_tp x = 0; x < kernelQueue.size(); x++) if (tune_local_size(bottom, top, kernelQueue[x])) { kernelQueue[x]->executionTime = timed_convolve(bottom, top, bottom_index_, num_, kernelQueue[x]); } else { // skip those kernels without a good local size. kernelQueue[x]->verified = false; kernelQueue[x]->tested = true; } int_tp failures = 0; bool verification = false; if (kernelQueue.size()) { while (failures < kernelQueue.size()) { int_tp fastestKernel = -1; float fastestTime = 999999990000000000000000000.0f; for (int_tp x = 0; x < kernelQueue.size(); x++) { if (kernelQueue[x]->executionTime < fastestTime && kernelQueue[x]->tested == false) { fastestKernel = x; fastestTime = kernelQueue[x]->executionTime; } } if (fastestKernel < 0) break; // Test fastest kernel bool verified = verify_result(bottom, top, bottom_index_, num_, verify_blob, kernelQueue[fastestKernel]); if (verified == true) { kernelQueue[fastestKernel]->verified = true; kernel_index_ = fastestKernel; verification = true; break; } else { kernelQueue[fastestKernel]->tested = true; dbgPrint(std::cout << "Kernel " << kernelQueue[fastestKernel]->kernelName << " failed verification" << std::endl); failures++; } } } if (verification) { dbgPrint(std::cout << "Kernel <" << kernelQueue[kernel_index_]->kernelName << "> passed verification" << std::endl); } else { dbgPrint(std::cout << "Verification was not successful, " << "fallback to basic kernel" << std::endl); create_basic_kernel(bottom, top, 1, 1, 1); kernel_index_ = kernelQueue.size() - 1; verification = verify_result(bottom, top, bottom_index_, num_, verify_blob, kernelQueue[kernel_index_]); CHECK_EQ(verification, true) << "Basic kernel failed verification." << std::endl; } this->bestKernelConfig = kernelQueue[kernel_index_]; dbgPrint(std::cout << "Convolution Time:" << kernelQueue[kernel_index_]->executionTime << std::endl); for (int_tp x = 0; x < kernelQueue.size(); x++) { if (x != kernel_index_) { viennacl::ocl::current_context().delete_program( kernelQueue[x]->kernelName); delete kernelQueue[x]; } } kernelQueue.clear(); tuned_ = true; const boost::filesystem::path& path = CACHE_DIRECTORY; const boost::filesystem::path& dir = boost::filesystem::unique_path(path).string(); bool hasCacheDir = false; if (!boost::filesystem::exists(dir)) hasCacheDir = boost::filesystem::create_directory(dir); else hasCacheDir = boost::filesystem::is_directory(dir); if (hasCacheDir != true) { std::cout << "Failed to create cache directory," << "will tune again for next running" << std::endl; return; } string outputFile; outputFile = CACHE_DIRECTORY + key_; std::ifstream cachedKernel(outputFile.c_str()); std::ofstream outputKernel; outputKernel.open(outputFile.c_str()); outputKernel << bestKernelConfig->workItem_output[0] << " " << bestKernelConfig->workItem_output[1] << " " << bestKernelConfig->workItem_output[2] << " " << bestKernelConfig->kernelType << " " << bestKernelConfig->global_work_size[0] << " " << bestKernelConfig->global_work_size[1] << " " << bestKernelConfig->global_work_size[2] << " " << bestKernelConfig->local_work_size[0] << " " << bestKernelConfig->local_work_size[1] << " " << bestKernelConfig->local_work_size[2] << " " << bestKernelConfig->swizzle_weights << " " << 0 << " " // deprecated << bestKernelConfig->use_null_local << " "; outputKernel.close(); }
int main(int argc, char **argv) { int i; vx_status status; vx_set_debug_zone(VX_ZONE_ERROR); //vx_set_debug_zone(VX_ZONE_WARNING); //vx_set_debug_zone(VX_ZONE_INFO); vx_context context = vxCreateContext(); CHECK_NOT_NULL(context, "vxCreateContext"); printf("Success create vx_context!!\n\n"); vxInitLog(&helper_log); vxRegisterLogCallback(context, &vxHelperLogCallback, vx_false_e); Mat src = imread(SRC_IMG_NAME); CHECK_NOT_NULL(src.data, "imread"); resize(src, src, Size(IMG_WIDTH,IMG_HEIGHT)); cvtColor(src, src, CV_RGB2GRAY); for(i=0; i<1; i++) { Mat result_cv(IMG_HEIGHT,IMG_WIDTH,CV_8UC1); Mat result_vx(IMG_HEIGHT,IMG_WIDTH,CV_8UC1); printf("Start to run not_box3x3_graph()\n"); not_box3x3_cv(src.clone(), result_cv); status = not_box3x3_graph(context, src.clone(), result_vx); printf("Return from not_box3x3_graph() result_vx: %d\n", status); if(verify_result(result_cv, result_vx)) printf("Verify passed!!\n"); else printf("Verify fail!!\n"); printf("\n"); //imwrite("not_box3x3_cv.jpg",result_cv); //imwrite("not_box3x3_vx.jpg",result_vx); printf("Start to run not_not_graph()\n"); not_not_cv(src.clone(), result_cv); status = not_not_graph(context, src.clone(), result_vx); printf("Return from not_not_graph() result_vx: %d\n", status); if(verify_result(result_cv, result_vx)) printf("Verify passed!!\n"); else printf("Verify fail!!\n"); printf("\n"); printf("Start to run not_graph()\n"); not_cv(src.clone(), result_cv); status = not_graph(context, src.clone(), result_vx); printf("Return from not_not_graph() result_vx: %d\n", status); if(verify_result(result_cv, result_vx)) printf("Verify passed!!\n"); else printf("Verify fail!!\n"); printf("\n"); //imwrite("result_cv.jpg",result_cv); //imwrite("result_vx.jpg",result_vx); } status = vxReleaseContext(&context); CHECK_STATUS(status, "vxReleaseContext"); printf("%s done!!\n", argv[0]); return 0; }
int main(int argc, char **argv) { const char *file = "test_tcpnotify_kern.o"; int prog_fd, map_fd, perf_event_fd; struct tcpnotify_globals g = {0}; const char *cg_path = "/foo"; int error = EXIT_FAILURE; struct bpf_object *obj; int cg_fd = -1; __u32 key = 0; int rv; char test_script[80]; int pmu_fd; cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); if (setup_cgroup_environment()) goto err; cg_fd = create_and_get_cgroup(cg_path); if (!cg_fd) goto err; if (join_cgroup(cg_path)) goto err; if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) { printf("FAILED: load_bpf_file failed for: %s\n", file); goto err; } rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0); if (rv) { printf("FAILED: bpf_prog_attach: %d (%s)\n", error, strerror(errno)); goto err; } perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map"); if (perf_event_fd < 0) goto err; map_fd = bpf_find_map(__func__, obj, "global_map"); if (map_fd < 0) goto err; pmu_fd = setup_bpf_perf_event(perf_event_fd); if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0) goto err; pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd); sprintf(test_script, "/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP", TESTPORT); system(test_script); sprintf(test_script, "/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ", TESTPORT); system(test_script); sprintf(test_script, "/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP", TESTPORT); system(test_script); rv = bpf_map_lookup_elem(map_fd, &key, &g); if (rv != 0) { printf("FAILED: bpf_map_lookup_elem returns %d\n", rv); goto err; } sleep(10); if (verify_result(&g)) { printf("FAILED: Wrong stats Expected %d calls, got %d\n", g.ncalls, rx_callbacks); goto err; } printf("PASSED!\n"); error = 0; err: bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS); close(cg_fd); cleanup_cgroup_environment(); return error; }