Ejemplo n.º 1
0
static void
run_multiply()
{
        struct timespec ts_start, ts_stop;
        double runtime_ref, runtime_sse;

        printf("Starting SSE run...\n");
        util_monotonic_time(&ts_start);
        /* vec_c = mat_a * vec_b */
        matvec_sse();
        util_monotonic_time(&ts_stop);
        runtime_sse = util_time_diff(&ts_start, &ts_stop);
        printf("SSE run completed in %.2f s\n",
               runtime_sse);

        printf("Starting reference run...\n");
        util_monotonic_time(&ts_start);
	matvec_ref();
        util_monotonic_time(&ts_stop);
        runtime_ref = util_time_diff(&ts_start, &ts_stop);
        printf("Reference run completed in %.2f s\n",
               runtime_ref);

        printf("Speedup: %.2f\n",
               runtime_ref / runtime_sse);


	if (verify_result())
	    printf("OK\n");
	else
	    printf("MISMATCH\n");
}
/***********************************************************************
 * test  - main routine which is called repeatedly 
 *		   (once for each test run)
 *		   Two transactions take place, first a write to the SDRAM and
 *		   second, a read from the SDRAM. It is followed by verification
 *		   of data consistency.
 *	
 * Parameters
 * byteCount - Number of Bytes to be Transfered
 * extAdrL   - Sdram External Address Low 
 * sbSend    - start byte send value
 * sbRecv    - start byte receive value
 * sysMod    - Sdram DSP modulo register - offset for module access
 * 
 **********************************************************************/
void _reentrant test(WORD byteCount, WORD extAdrL, 
					 WORD sbSend, WORD sbRecv, WORD sysMod){
    num_bytes = byteCount;
	timeout = SDRAMTIMEOUT * num_bytes;
	tranread = 0;
	start_byte_send = sbSend;
	start_byte_receive = sbRecv;
	big_endian = 0;
	extaddr_lo = extAdrL;
	extaddr_hi = 0;
	extaddr_base_lo = 0;
	extaddr_base_hi = 0;
	extaddr_mod_lo = 0;
	extaddr_mod_hi = 0;
	sysaddr_base = source_data;
	sysaddr_mod = sysMod;
  
	if (sdraminittrans (SDRAMTYPE_128Mb_8,
						tranread,
						start_byte_send,
						big_endian,
						extaddr_lo,
						extaddr_hi,
						source_data,
						num_bytes,
						sysaddr_base,
						sysaddr_mod,
						extaddr_base_lo,
						extaddr_base_hi,
						extaddr_mod_lo,
						extaddr_mod_hi,
						timeout) != 1)
		TEST_FAILED;

	tranread = 1;
	sysaddr_mod = 0;

 	if (sdraminittrans (SDRAMTYPE_128Mb_8,
						tranread,
						start_byte_receive,
						big_endian,
						extaddr_lo,
						extaddr_hi,
						returned_data,
						num_bytes,
						sysaddr_base,
						sysaddr_mod,
						extaddr_base_lo,
						extaddr_base_hi,
						extaddr_mod_lo,
						extaddr_mod_hi,
						timeout) != 1)
	    TEST_FAILED;

	sysaddr_mod = sysMod; // set again to verify result

	verify_result();
	data_initialize();
	erase_used_sdram();
}
Ejemplo n.º 3
0
int main(int argc,char **argv) {
	hupcpp::init(&argc, &argv);

#if 0
	if(argc!=5) {
		printf("(ERROR) USAGE: ./a.out <total bodies> <tileSize> <in data file> <out data file>\n");
		hupcpp::finalize();
	}
	assert(atoi(argv[1]) == NUMBODIES);
	assert(atoi(argv[2]) == TILESIZE);
	//Init(argv[3]);
#endif

	long start = get_usecs();
	for(int time_steps=0;time_steps<MAX_STEPS;time_steps++) {
		hupcpp::finish_spmd([=]() {
			if(upcxx::global_myrank() ==0) {
				calculate_forces();
			}
		});
	}
	long end = get_usecs();
 	double dur = ((double)(end-start))/1000000;
#ifdef VERIFY_SHORT
	counter_t sum = 0;
	for(int i=0; i<MAX_HCPP_WORKERS; i++) {
		sum+=TOTAL_COUNTS[i];
	}
	counter_t total_sum;
	upcxx::reduce<counter_t>(&sum, &total_sum, 1, 0, UPCXX_SUM, UPCXX_ULONG_LONG);
	if(upcxx::global_myrank() == 0) {
		const counter_t expected = NUMBODIES * MAX_STEPS;
		const char* res = expected == total_sum ? "PASSED" : "FAILED";
		printf("Test %s, Time = %0.3f\n",res,dur);
	}
#endif

#if 0
	// gather result from all other processes using upcxx_reduce
	float accx_all[NUMBODIES], accy_all[NUMBODIES], accz_all[NUMBODIES];
	upcxx::upcxx_reduce<float>(accx, accx_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);
	upcxx::upcxx_reduce<float>(accy, accy_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);
	upcxx::upcxx_reduce<float>(accz, accz_all, NUMBODIES, 0, UPCXX_SUM, UPCXX_FLOAT);

	if(upcxx::global_myrank() ==0) {
		printf("0: Computation done\n");
		printf("Test Passed=%d\n",verify_result(argv[4],accx_all));
	}
#endif

	hupcpp::barrier();
	hupcpp::finalize();
	return 0;
}
Ejemplo n.º 4
0
Archivo: matmul.c Proyecto: fivunlm/HPC
main(int argc, char *argv[])
{
  float *A, *B, *C;
  int n, i, j, k;
  double timetick;

  /* Chequeando parametros */
  if ((argc != 2) || ((n = atoi(argv[1])) <= 0) )
  {
    printf("Uso: %s n\n  donde n: dimension de la matriz cuadrada (nxn X nxn)\n", argv[0]);
    exit(1);
  }

  // Inicializando Matrices
  A = (float *) malloc(n*n*sizeof(float));
  B = (float *) malloc(n*n*sizeof(float));
  C = (float *) malloc(n*n*sizeof(float));
  initvalmat(A, n, 1.0); 
  initvalmat(B, n, 1.0); 
  initvalmat(C, n, 0.0); 


  printf("Multiplicando matrices de %d x %d \n", n, n);  
  timetick = dwalltime();
  
  /*************************************************************/
  /* Programar aqui el algoritmo de multiplicacion de matrices */
  /*************************************************************/
  
  for (i = 0; i < n; ++i)
  {
    for (j = 0; j < n; ++j)
    {
      for (k = 0; k < n; ++k)
      {
        C[i*n + j] += A[i*n + k] * B[k*n + j];  
      }
    }
  }
  
  /*************************************************************/
  
  timetick = dwalltime() - timetick;
  verify_result(C, n);

  printf("Resultado correcto. Tiempo de ejecucion: %f segundos\n", timetick);
}
Ejemplo n.º 5
0
int simulate_thread_final_verify ()
{
  return verify_result ();
}
Ejemplo n.º 6
0
int simulate_thread_step_verify ()
{
  return verify_result ();
}
Ejemplo n.º 7
0
int BigqueryReader::read_data(uint64_t prefix)
{
  int err = 0;
  assert(NULL != ob_client_);
  char rule_data[256];
  memset(rule_data, 0x00, sizeof(rule_data));
  ValueRule rule;

  err = prefix_info_->set_read_write_status(prefix, BIGQUERY_READING);
  if (0 != err && READ_WRITE_CONFLICT != err)
  {
    TBSYS_LOG(WARN, "failed to set read flag, prefix=%lu, err=%d", prefix, err);
  }

  if (0 == err)
  {
    err = prefix_info_->get_rule(prefix, rule_data, sizeof(rule_data));
    if (0 != err)
    {
      TBSYS_LOG(WARN, "failed to get rule, prefix=%lu, err=%d", prefix, err);
    }
    else
    {
      uint64_t row_num = 0;
      err = prefix_info_->get_row_num(prefix, row_num);
      if (0 != err)
      {
        TBSYS_LOG(WARN, "failed to get row num, err=%d", err);
      }
      else
      {
        err = rule.deserialize(rule_data, strlen(rule_data) + 1);
        if (0 != err)
        {
          TBSYS_LOG(WARN, "failed to deserialize rule, err=%d", err);
        }
        else
        {
          rule.set_row_num(row_num);
        }
      }
    }
  }

  Bigquery query;
  Array expected_result;
  if (0 == err)
  {
    err = get_bigquery(prefix, rule, query, expected_result);
    if (0 != err)
    {
      TBSYS_LOG(WARN, "failed to get big query, rule_data=%s, err=%d", rule_data, err);
    }
  }

  Array res;
  if (0 == err)
  {
    TBSYS_LOG(INFO, "row_num=%ld, rule_data=%s, bigquery sql=%s", rule.get_row_num(), rule_data, query.to_sql());
    err = ob_client_->exec_query(query.to_sql(), res);
    if (0 != err)
    {
      TBSYS_LOG(ERROR, "failed to exec query, sql=%s, err=%d", query.to_sql(), err);
    }
  }

  if (0 == err)
  {
    err = verify_result(expected_result, res);
  }

  if (0 == err)
  {
    err = prefix_info_->set_read_write_status(prefix, 0);
    if (0 != err)
    {
      TBSYS_LOG(WARN, "failed to reset flag, prefix=%lu, err=%d", prefix, err);
    }
  }
  else if (READ_WRITE_CONFLICT == err)
  {
    err = OB_SUCCESS;
  }

  return err;
}
Ejemplo n.º 8
0
int main(int argc, char **argv)
{
	const char *file = "test_tcpbpf_kern.o";
	int prog_fd, map_fd, sock_map_fd;
	struct tcpbpf_globals g = {0};
	const char *cg_path = "/foo";
	int error = EXIT_FAILURE;
	struct bpf_object *obj;
	int cg_fd = -1;
	__u32 key = 0;
	int rv;

	if (setup_cgroup_environment())
		goto err;

	cg_fd = create_and_get_cgroup(cg_path);
	if (cg_fd < 0)
		goto err;

	if (join_cgroup(cg_path))
		goto err;

	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
		printf("FAILED: load_bpf_file failed for: %s\n", file);
		goto err;
	}

	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
	if (rv) {
		printf("FAILED: bpf_prog_attach: %d (%s)\n",
		       error, strerror(errno));
		goto err;
	}

	if (system("./tcp_server.py")) {
		printf("FAILED: TCP server\n");
		goto err;
	}

	map_fd = bpf_find_map(__func__, obj, "global_map");
	if (map_fd < 0)
		goto err;

	sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
	if (sock_map_fd < 0)
		goto err;

	rv = bpf_map_lookup_elem(map_fd, &key, &g);
	if (rv != 0) {
		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
		goto err;
	}

	if (verify_result(&g)) {
		printf("FAILED: Wrong stats\n");
		goto err;
	}

	if (verify_sockopt_result(sock_map_fd)) {
		printf("FAILED: Wrong sockopt stats\n");
		goto err;
	}

	printf("PASSED!\n");
	error = 0;
err:
	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
	close(cg_fd);
	cleanup_cgroup_environment();
	return error;
}
Ejemplo n.º 9
0
void ConvolutionLayerSpatial<float>::setup_convolution(
    const vector<Blob<float>*>& bottom, const vector<Blob<float>*>& top,
    const Blob<float> &verify_blob) {
  // Generates static key_
  generate_key();
  // Initializes unique kernel ID
  kernel_uid_ = 0;

  viennacl::ocl::context &ctx = viennacl::ocl::get_context(this->device_->id());
  const viennacl::ocl::device &device = ctx.current_device();
  if (device.vendor().find("Intel") != std::string::npos &&
    M_ % 16 == 0) {
    /* IDLF kernels are using Intel specific extension which make
       them intel only. */
    int kernelCnt = 0;
    for (uint32_t width = 14; width > 0; width--) {
      int candidate = 0;
      if (width > output_w_)
        continue;
      for (uint32_t height = 14; height > 0; height--) {
        if (height * width > 32 || height > output_h_)
          continue;
        int tile_x = kernel_w_ + (width - 1) * stride_w_;
        int tile_y = kernel_h_ + (height - 1) * stride_h_;
        int tile_y_stride = 64 / tile_x;

        if (tile_x % 4 != 0 && tile_x <= 16) {
          create_convolution_kernel(bottom, top, 2, width, height, 1);
          candidate++;
        } else if ((tile_x % 4 == 0) &&
                 ((tile_y + tile_y_stride - 1) / tile_y_stride < 4)) {
          create_convolution_kernel(bottom, top, 2, width, height, 1);
          candidate++;
        }
        if (candidate >= 4 && height == 2)
          break;
      }
      kernelCnt += candidate;
      if (kernelCnt >= 12 && width == 2)
        break;
    }
  } else {
    for (int_tp y = 1; y < 4; y += 1)
      for (int_tp z = 1; z < 16 && z < M_; z += 1) {
        if (4 * y * z > 32) continue;
        create_convolution_kernel(bottom, top, 1, 4, y, z);
      }
  }
  for (int_tp x = 0; x < kernelQueue.size(); x++)
    if (tune_local_size(bottom, top, kernelQueue[x])) {
      kernelQueue[x]->executionTime = timed_convolve(bottom, top, bottom_index_,
                                                     num_, kernelQueue[x]);
    } else {
      // skip those kernels without a good local size.
      kernelQueue[x]->verified = false;
      kernelQueue[x]->tested = true;
    }

  int_tp failures = 0;
  bool verification = false;
  if (kernelQueue.size()) {
    while (failures < kernelQueue.size()) {
      int_tp fastestKernel = -1;
      float fastestTime = 999999990000000000000000000.0f;

      for (int_tp x = 0; x < kernelQueue.size(); x++) {
        if (kernelQueue[x]->executionTime < fastestTime
            && kernelQueue[x]->tested == false) {
          fastestKernel = x;
          fastestTime = kernelQueue[x]->executionTime;
        }
      }
      if (fastestKernel < 0) break;
      // Test fastest kernel
      bool verified = verify_result(bottom, top, bottom_index_, num_,
                                    verify_blob, kernelQueue[fastestKernel]);
      if (verified == true) {
        kernelQueue[fastestKernel]->verified = true;
        kernel_index_ = fastestKernel;
        verification = true;
        break;
      } else {
        kernelQueue[fastestKernel]->tested = true;
        dbgPrint(std::cout << "Kernel "
                           << kernelQueue[fastestKernel]->kernelName
                           << " failed verification" << std::endl);
        failures++;
      }
    }
  }
  if (verification) {
    dbgPrint(std::cout << "Kernel <" << kernelQueue[kernel_index_]->kernelName
                       << "> passed verification" << std::endl);
  } else {
    dbgPrint(std::cout << "Verification was not successful, "
                       << "fallback to basic kernel" << std::endl);
    create_basic_kernel(bottom, top, 1, 1, 1);
    kernel_index_ = kernelQueue.size() - 1;
    verification = verify_result(bottom, top, bottom_index_, num_,
                                 verify_blob, kernelQueue[kernel_index_]);
    CHECK_EQ(verification, true) << "Basic kernel failed verification."
                                 << std::endl;
  }
  this->bestKernelConfig = kernelQueue[kernel_index_];

  dbgPrint(std::cout << "Convolution Time:"
                     << kernelQueue[kernel_index_]->executionTime << std::endl);

  for (int_tp x = 0; x < kernelQueue.size(); x++) {
    if (x != kernel_index_) {
      viennacl::ocl::current_context().delete_program(
          kernelQueue[x]->kernelName);
      delete kernelQueue[x];
    }
  }
  kernelQueue.clear();

  tuned_ = true;

  const boost::filesystem::path& path = CACHE_DIRECTORY;
  const boost::filesystem::path& dir =
                   boost::filesystem::unique_path(path).string();
  bool hasCacheDir = false;
  if (!boost::filesystem::exists(dir))
    hasCacheDir = boost::filesystem::create_directory(dir);
  else
    hasCacheDir = boost::filesystem::is_directory(dir);

  if (hasCacheDir != true) {
    std::cout << "Failed to create cache directory,"
              << "will tune again for next running" << std::endl;
    return;
  }

  string outputFile;
  outputFile = CACHE_DIRECTORY + key_;
  std::ifstream cachedKernel(outputFile.c_str());
  std::ofstream outputKernel;
  outputKernel.open(outputFile.c_str());
  outputKernel << bestKernelConfig->workItem_output[0] << " "
               << bestKernelConfig->workItem_output[1] << " "
               << bestKernelConfig->workItem_output[2] << " "
               << bestKernelConfig->kernelType << " "
               << bestKernelConfig->global_work_size[0] << " "
               << bestKernelConfig->global_work_size[1] << " "
               << bestKernelConfig->global_work_size[2] << " "
               << bestKernelConfig->local_work_size[0] << " "
               << bestKernelConfig->local_work_size[1] << " "
               << bestKernelConfig->local_work_size[2] << " "
               << bestKernelConfig->swizzle_weights << " "
               << 0 << " "  // deprecated
               << bestKernelConfig->use_null_local << " ";
  outputKernel.close();
}
Ejemplo n.º 10
0
int main(int argc, char **argv)
{
	int i;
	vx_status status;
	vx_set_debug_zone(VX_ZONE_ERROR);
	//vx_set_debug_zone(VX_ZONE_WARNING);
	//vx_set_debug_zone(VX_ZONE_INFO);

	vx_context context = vxCreateContext();
	CHECK_NOT_NULL(context, "vxCreateContext");
	printf("Success create vx_context!!\n\n");

	vxInitLog(&helper_log);
	vxRegisterLogCallback(context, &vxHelperLogCallback, vx_false_e);
	
	Mat src = imread(SRC_IMG_NAME);
	CHECK_NOT_NULL(src.data, "imread");
	resize(src, src, Size(IMG_WIDTH,IMG_HEIGHT));
	cvtColor(src, src, CV_RGB2GRAY);
	
	for(i=0; i<1; i++)
	{
		Mat result_cv(IMG_HEIGHT,IMG_WIDTH,CV_8UC1);
		Mat result_vx(IMG_HEIGHT,IMG_WIDTH,CV_8UC1);
		printf("Start to run not_box3x3_graph()\n");
		not_box3x3_cv(src.clone(), result_cv);
		status = not_box3x3_graph(context, src.clone(), result_vx);
		printf("Return from not_box3x3_graph() result_vx: %d\n", status);
		if(verify_result(result_cv, result_vx))
			printf("Verify passed!!\n");
		else
			printf("Verify fail!!\n");
		printf("\n");
		
		//imwrite("not_box3x3_cv.jpg",result_cv);
		//imwrite("not_box3x3_vx.jpg",result_vx);
		
		printf("Start to run not_not_graph()\n");
		not_not_cv(src.clone(), result_cv);
		status = not_not_graph(context, src.clone(), result_vx);
		printf("Return from not_not_graph() result_vx: %d\n", status);
		if(verify_result(result_cv, result_vx))
			printf("Verify passed!!\n");
		else
			printf("Verify fail!!\n");
		printf("\n");
		
		printf("Start to run not_graph()\n");
		not_cv(src.clone(), result_cv);
		status = not_graph(context, src.clone(), result_vx);
		printf("Return from not_not_graph() result_vx: %d\n", status);
		if(verify_result(result_cv, result_vx))
			printf("Verify passed!!\n");
		else
			printf("Verify fail!!\n");
		printf("\n");
		
		//imwrite("result_cv.jpg",result_cv);
		//imwrite("result_vx.jpg",result_vx);
	}
	
	status = vxReleaseContext(&context);
	CHECK_STATUS(status, "vxReleaseContext");
	printf("%s done!!\n", argv[0]);
	return 0;
}
Ejemplo n.º 11
0
int main(int argc, char **argv)
{
	const char *file = "test_tcpnotify_kern.o";
	int prog_fd, map_fd, perf_event_fd;
	struct tcpnotify_globals g = {0};
	const char *cg_path = "/foo";
	int error = EXIT_FAILURE;
	struct bpf_object *obj;
	int cg_fd = -1;
	__u32 key = 0;
	int rv;
	char test_script[80];
	int pmu_fd;
	cpu_set_t cpuset;

	CPU_ZERO(&cpuset);
	CPU_SET(0, &cpuset);
	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);

	if (setup_cgroup_environment())
		goto err;

	cg_fd = create_and_get_cgroup(cg_path);
	if (!cg_fd)
		goto err;

	if (join_cgroup(cg_path))
		goto err;

	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
		printf("FAILED: load_bpf_file failed for: %s\n", file);
		goto err;
	}

	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
	if (rv) {
		printf("FAILED: bpf_prog_attach: %d (%s)\n",
		       error, strerror(errno));
		goto err;
	}

	perf_event_fd = bpf_find_map(__func__, obj, "perf_event_map");
	if (perf_event_fd < 0)
		goto err;

	map_fd = bpf_find_map(__func__, obj, "global_map");
	if (map_fd < 0)
		goto err;

	pmu_fd = setup_bpf_perf_event(perf_event_fd);
	if (pmu_fd < 0 || perf_event_mmap(pmu_fd) < 0)
		goto err;

	pthread_create(&tid, NULL, poller_thread, (void *)&pmu_fd);

	sprintf(test_script,
		"/usr/sbin/iptables -A INPUT -p tcp --dport %d -j DROP",
		TESTPORT);
	system(test_script);

	sprintf(test_script,
		"/usr/bin/nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
		TESTPORT);
	system(test_script);

	sprintf(test_script,
		"/usr/sbin/iptables -D INPUT -p tcp --dport %d -j DROP",
		TESTPORT);
	system(test_script);

	rv = bpf_map_lookup_elem(map_fd, &key, &g);
	if (rv != 0) {
		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
		goto err;
	}

	sleep(10);

	if (verify_result(&g)) {
		printf("FAILED: Wrong stats Expected %d calls, got %d\n",
			g.ncalls, rx_callbacks);
		goto err;
	}

	printf("PASSED!\n");
	error = 0;
err:
	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
	close(cg_fd);
	cleanup_cgroup_environment();
	return error;
}