Example #1
0
GPU_PERF_TEST(CartToPolar, cv::gpu::DeviceInfo, cv::Size)
{
    cv::gpu::DeviceInfo devInfo = GET_PARAM(0);
    cv::Size size = GET_PARAM(1);

    cv::gpu::setDevice(devInfo.deviceID());

    cv::Mat x_host(size, CV_32FC1);
    cv::Mat y_host(size, CV_32FC1);

    fill(x_host, -100.0, 100.0);
    fill(y_host, -100.0, 100.0);

    cv::gpu::GpuMat x(x_host);
    cv::gpu::GpuMat y(y_host);
    cv::gpu::GpuMat magnitude;
    cv::gpu::GpuMat angle;

    TEST_CYCLE()
    {
        cv::gpu::cartToPolar(x, y, magnitude, angle);
    }
}
Example #2
0
int main(int argc, char** argv)
{
  // timer
  struct timeval st, et;
  float gputime = 0.0, cputime = 0.0;

  // read Sparse Matrix from file or generate
  if (argc < 2 || argc > 4) {
      printf("Correct Usage: <executable> <input matrix file>\n");
      exit(-1);
  }

  // init the network
  agile::NetworkEnvironment environment(argc, argv);

  // allocate a GPU
  typedef agile::GPUCommunicator<unsigned, float, float> communicator_type;
  communicator_type com;
  com.allocateGPU();

  char spmfileName[256];
  strcpy(spmfileName, argv[1]);
  if (!fileIsReadable(spmfileName))
  {
    printf("Non-existent input matrix file\n");
    exit(-1);
  }

  unsigned m_num_rows, m_num_cols;
  std::vector<unsigned> m_row_nnz;
  std::vector<unsigned> m_column_index;
  std::vector<float> m_data;

  // read in matrix from matrix-market file
  readSparseMatrix(spmfileName, 0, m_num_rows, m_num_cols, m_row_nnz,
    m_column_index, m_data);

  std::cout << m_num_rows << "\t" << m_num_cols << "\t";
/*
  PRINT_VEC("m_row_nnz", m_row_nnz);
  PRINT_VEC("m_column_index", m_column_index);
  PRINT_VEC("m_data", m_data);
*/

  // init gpu matrix
  agile::GPUCSMatrix<float> A(m_row_nnz, m_column_index, m_data);

  // init random vector
  std::vector<float> x_host(m_num_cols, 0);
  srand(time(NULL));
  for (unsigned i=0; i<m_num_cols; ++i)
    x_host[i] = rand() / (float)RAND_MAX;

//PRINT_VEC("RANDOM X VECTOR", x_host);

  // init gpu vector
  agile::GPUVector<float> x(m_num_cols);
  x.assignFromHost(x_host.begin(), x_host.end());

  // init result gpu vector: y
  agile::GPUVector<float> y(m_num_rows);

  // start time
  gettimeofday(&st, NULL);

  for (unsigned t=0; t<NUM_ITER; ++t)
  {
    // gpu multiplication
    agile::multiply(A, x, y);

    cudaThreadSynchronize();
  }

  // stop time
  gettimeofday(&et, NULL);
  gputime = ((et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0)/NUM_ITER;

  // transfer GPU multiplication result back to cpu
  std::vector<float> y_host;
  y.copyToHost(y_host);


  //----------------- CPU computation from ibm demo ---------------------------
  SpMatrix m;
  readSparseMatrix(&m, spmfileName, 0);
  unsigned int numNonZeroElements = m.numNZEntries;
  unsigned int memSize_row = sizeof(float) * m_num_rows;

  // allocate host memory
  float* h_x = (float*) malloc(memSize_row); 

  #if PADDED_CSR
    float *h_val;
    unsigned int *h_indices, *h_rowIndices;
    genPaddedCSRFormat(&m, &h_val, &h_rowIndices, &h_indices);
  #else
    float* h_val = (float*) malloc(sizeof(float)*numNonZeroElements);
    unsigned int* h_indices = (unsigned int*) malloc(sizeof(int)*numNonZeroElements);
    unsigned int* h_rowIndices = (unsigned int*) malloc(sizeof(int)*(m_num_rows+1));
    genCSRFormat(&m, h_val, h_rowIndices, h_indices);
  #endif

  // CPU REFERENCE
  float* reference = (float*) malloc(memSize_row);
#if EXEC_CPU
  #if TIMER
  gettimeofday(&st, NULL);
  #endif
  // compute reference solution
  #if BCSR
  float *val;
  unsigned int *rowIndices, *indices;
  unsigned int numblocks;
  genBCSRFormat(&m, &val, &rowIndices, &indices, &numblocks, BCSR_r, BCSR_c);
  computeSpMV_BCSR(reference, val, rowIndices, indices, &(x_host[0]), m_num_rows, m_num_cols, BCSR_r, BCSR_c);
  #else
  computeSpMV(reference, h_val, h_rowIndices, h_indices, &(x_host[0]), m_num_rows);
  #endif
  #if TIMER
  gettimeofday(&et, NULL);
  cputime = (et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0;
  #endif
#endif

  float flops= ((numNonZeroElements * 2) / (gputime*1000000));
  //printf("GPU (ms) \tCPU (ms) \tGFLOPS\n");
  printf("%f\t%f\t%f\t", gputime, cputime, flops);

#if VERIFY
  // check result
  float error_norm, ref_norm, diff;
  error_norm = 0;
  ref_norm = 0;
  for (unsigned i = 0; i < m_num_rows; ++i) {
      diff = reference[i] - y_host[i];
      error_norm += diff * diff;
      ref_norm += reference[i] * reference[i];
  }
  error_norm = (float)sqrt((double)error_norm);
  ref_norm = (float)sqrt((double)ref_norm);

  if (fabs(ref_norm) < 1e-7)
    printf ("Test FAILED");
  else
    printf( "Test %s", ((error_norm / ref_norm) < 1e-6f) ? "PASSED" : "FAILED");

#endif

  free(reference);
  free(h_x);
  #if !PADDED_CSR
    free(h_val);
    free(h_indices);
    free(h_rowIndices);
  #endif

  return 0;
}