T Linalg<T, H>::syr2k(
    const T &a, const T &b, const value_type &alpha, const value_type &beta,
    Uplo uplo) {
  T c(a.allocator());
  syr2k(a, b, &c, alpha, beta, uplo);
  return c;
}
Beispiel #2
0
void MTLmarks::DmatDmatRun(std::string benchmark) {
    
    if(benchmark == "dmatdmatadd"){
        mtl_result = dmatdmatadd(size, steps);
    }
    else if(benchmark == "dmatdmatmult"){
        mtl_result = dmatdmatmult(size, steps);
    }
    else if(benchmark == "cmajordmdmmult"){
        mtl_result = cmajordmdmmult(size, steps);
    }
    else if(benchmark == "rmajordmdmmult"){
        mtl_result = rmajordmdmmult(size, steps);
    }
    else if(benchmark == "nestedprod"){
        mtl_result = nestedprod(size, steps);
    }
    else if(benchmark == "symm1"){
        mtl_result = symm1(size, steps);
    }
    else if(benchmark == "symm1rect"){
        mtl_result = symm1rect(size, steps);
    }
    else if(benchmark == "symm2"){
        mtl_result = symm2(size, steps);
    }
    else if(benchmark == "syr2k"){
        mtl_result = syr2k(size, steps);
    }
    else if(benchmark == "syr2krect"){
        mtl_result = syr2krect(size, steps);
    }
    else if(benchmark == "syrk"){
        mtl_result = syrk(size, steps);
    }
    else if(benchmark == "syrkrect"){
        mtl_result = syrkrect(size, steps);
    }
    else if(benchmark == "custom"){
        mtl_result = custom(size, steps);
    }
    else{
        std::cerr << "MTLmarks benchmark does not exist." << std::endl;
        exit(1);
    }
    
}
Beispiel #3
0
int main(void) 
{
	double t_start, t_end;

	DATA_TYPE* A;
	DATA_TYPE* B;
	DATA_TYPE* C;
	DATA_TYPE* C_outputFromGpu;

	A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	B = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
	C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

	init_arrays(A, B, C);
	read_cl_file();
	cl_initialization();
	cl_mem_init(A, B, C);
	cl_load_prog();

	cl_launch_kernel();

	errcode = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, N*M*sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL);
	if(errcode != CL_SUCCESS) printf("Error in reading GPU mem\n");

	t_start = rtclock();
	syr2k(A, B, C);
	t_end = rtclock(); 
	fprintf(stdout, "CPU Runtime: %0.6lfs\n", t_end - t_start);   
	compareResults(C, C_outputFromGpu);
	cl_clean_up();

	free(A);
	free(B);
	free(C);
	free(C_outputFromGpu);

	return 0;
}
T& Linalg<T, H>::syr2k(
    const T &a, const T &b, T &c, const value_type &alpha,
    const value_type &beta, Uplo uplo) {
  return const_cast< T& >(syr2k(
      a, b, const_cast< const T& >(c), alpha, beta, uplo));
}
Beispiel #5
0
int main(void) 
{
  /* Prepare ctuning vars */
  long ct_repeat=0;
  long ct_repeat_max=1;

  double t_start, t_end;

  DATA_TYPE* A;
  DATA_TYPE* B;
  DATA_TYPE* C;
  DATA_TYPE* C_outputFromGpu;

#ifdef OPENME
  openme_init(NULL,NULL,NULL,0);
  openme_callback("PROGRAM_START", NULL);
#endif

  /* Run kernel. */
  if (getenv("CT_REPEAT_MAIN")!=NULL) ct_repeat_max=atol(getenv("CT_REPEAT_MAIN"));

  A = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  B = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  C = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));
  C_outputFromGpu = (DATA_TYPE*)malloc(N*M*sizeof(DATA_TYPE));

  srand(1);
  init_arrays(A, B, C);
  read_cl_file();
  cl_initialization();
  cl_mem_init(A, B, C);
  cl_load_prog();

#ifdef OPENME
  openme_callback("ACC_KERNEL_START", NULL);
#endif
  for (ct_repeat=0; ct_repeat<ct_repeat_max; ct_repeat++)
  {
    cl_launch_kernel();

    err_code = clEnqueueReadBuffer(clCommandQue, c_mem_obj, CL_TRUE, 0, N*M*sizeof(DATA_TYPE), C_outputFromGpu, 0, NULL, NULL);
    if(err_code != CL_SUCCESS)
    {
      printf("Error in reading GPU mem\n");
      exit(1);
    }
  }
#ifdef OPENME
  openme_callback("ACC_KERNEL_END", NULL);
#endif

  srand(1);
  init_arrays(A, B, C);

#ifdef OPENME
  openme_callback("KERNEL_START", NULL);
#endif
  for (ct_repeat=0; ct_repeat<ct_repeat_max; ct_repeat++)
  {
    syr2k(A, B, C);
  }
#ifdef OPENME
  openme_callback("KERNEL_END", NULL);
#endif

  compareResults(C, C_outputFromGpu);
  cl_clean_up();

  free(A);
  free(B);
  free(C);
  free(C_outputFromGpu);

#ifdef OPENME
  openme_callback("PROGRAM_END", NULL);
#endif

  return 0;
}