TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( Kokkos_View_MP, DeepCopy_HostArray, Storage, Layout )
{
  typedef typename Storage::execution_space Device;
  typedef typename Storage::value_type Scalar;
  typedef Sacado::MP::Vector<Storage> Vector;
  typedef typename ApplyView<Vector*,Layout,Device>::type ViewType;
  typedef typename ViewType::size_type size_type;
  typedef typename ViewType::HostMirror host_view_type;
  typedef typename host_view_type::array_type host_array_type;

  const size_type num_rows = global_num_rows;
  const size_type num_cols = Storage::is_static ? Storage::static_size : global_num_cols;
  ViewType v("view", num_rows, num_cols);
  host_array_type h_a = Kokkos::create_mirror_view(v);

  bool is_right = Kokkos::Impl::is_same< typename ViewType::array_layout,
                                         Kokkos::LayoutRight >::value;
  if (is_right) {
    for (size_type i=0; i<num_rows; ++i)
      for (size_type j=0; j<num_cols; ++j)
        h_a(i,j) = generate_vector_coefficient<Scalar>(
          num_rows, num_cols, i, j);
  }
  else {
    for (size_type i=0; i<num_rows; ++i)
      for (size_type j=0; j<num_cols; ++j)
        h_a(j,i) = generate_vector_coefficient<Scalar>(
          num_rows, num_cols, i, j);
  }
  Kokkos::deep_copy(v, h_a);

  success = checkVectorView(v, out);
}
bool
checkVectorView(const ViewType& v,
                Teuchos::FancyOStream& out) {
  typedef ViewType view_type;
  typedef typename view_type::size_type size_type;
  typedef typename view_type::HostMirror host_view_type;
  typedef typename host_view_type::array_type host_array_type;
  typedef typename host_array_type::value_type scalar_type;

  // Copy to host
  host_view_type h_v = Kokkos::create_mirror_view(v);
  Kokkos::deep_copy(h_v, v);
  host_array_type h_a = h_v;

  size_type num_rows, num_cols;

  // For static, layout left, sacado dimension becomes first dimension
  // instead of last
  bool is_right = Kokkos::Impl::is_same< typename ViewType::array_layout,
                                         Kokkos::LayoutRight >::value;
  if (is_right) {
    num_rows = h_a.dimension_0();
    num_cols = h_a.dimension_1();
  }
  else {
    num_rows = h_a.dimension_1();
    num_cols = h_a.dimension_0();
  }
  bool success = true;
  if (is_right) {
    for (size_type i=0; i<num_rows; ++i) {
      for (size_type j=0; j<num_cols; ++j) {
        scalar_type val = h_a(i,j);
        scalar_type val_expected =
          generate_vector_coefficient<scalar_type>(
            num_rows, num_cols, i, j);
        TEUCHOS_TEST_EQUALITY(val, val_expected, out, success);
      }
    }
  }
  else {
    for (size_type i=0; i<num_rows; ++i) {
      for (size_type j=0; j<num_cols; ++j) {
        scalar_type val = h_a(j,i);
        scalar_type val_expected =
          generate_vector_coefficient<scalar_type>(
            num_rows, num_cols, i, j);
        TEUCHOS_TEST_EQUALITY(val, val_expected, out, success);
      }
    }
  }

  return success;
}
Example #3
0
void TestVectorCppEquality(void)
{
#if 1    
    KNOWN_FAILURE;
#else
    thrust::host_vector<int> h_a(3);
    thrust::host_vector<int> h_b(3);
    thrust::host_vector<int> h_c(3);
    h_a[0] = 0;    h_a[1] = 1;    h_a[2] = 2;
    h_b[0] = 0;    h_b[1] = 1;    h_b[2] = 3;
    h_b[0] = 0;    h_b[1] = 1;

    thrust::device_vector<int> d_a(3);
    thrust::device_vector<int> d_b(3);
    thrust::device_vector<int> d_c(3);
    d_a[0] = 0;    d_a[1] = 1;    d_a[2] = 2;
    d_b[0] = 0;    d_b[1] = 1;    d_b[2] = 3;
    d_b[0] = 0;    d_b[1] = 1;

    ASSERT_EQUAL((h_a == h_a), true); ASSERT_EQUAL((h_a == d_a), true); ASSERT_EQUAL((d_a == h_a), true);  ASSERT_EQUAL((d_a == d_a), true); 
    ASSERT_EQUAL((h_b == h_b), true); ASSERT_EQUAL((h_b == d_b), true); ASSERT_EQUAL((d_b == h_b), true);  ASSERT_EQUAL((d_b == d_b), true);
    ASSERT_EQUAL((h_c == h_c), true); ASSERT_EQUAL((h_c == d_c), true); ASSERT_EQUAL((d_c == h_c), true);  ASSERT_EQUAL((d_c == d_c), true);

    ASSERT_EQUAL((h_a == h_b), false); ASSERT_EQUAL((h_a == d_b), false); ASSERT_EQUAL((d_a == h_b), false); ASSERT_EQUAL((d_a == d_b), false); 
    ASSERT_EQUAL((h_b == h_a), false); ASSERT_EQUAL((h_b == d_a), false); ASSERT_EQUAL((d_b == h_a), false); ASSERT_EQUAL((d_b == d_a), false);
    ASSERT_EQUAL((h_a == h_c), false); ASSERT_EQUAL((h_a == d_c), false); ASSERT_EQUAL((d_a == h_c), false); ASSERT_EQUAL((d_a == d_c), false);
    ASSERT_EQUAL((h_c == h_a), false); ASSERT_EQUAL((h_c == d_a), false); ASSERT_EQUAL((d_c == h_a), false); ASSERT_EQUAL((d_c == d_a), false);
    ASSERT_EQUAL((h_b == h_c), false); ASSERT_EQUAL((h_b == d_c), false); ASSERT_EQUAL((d_b == h_c), false); ASSERT_EQUAL((d_b == d_c), false);
    ASSERT_EQUAL((h_c == h_b), false); ASSERT_EQUAL((h_c == d_b), false); ASSERT_EQUAL((d_c == h_b), false); ASSERT_EQUAL((d_c == d_b), false);
#endif    
}
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( Kokkos_View_MP, DeepCopy_DeviceArray, Storage, Layout )
{
  typedef typename Storage::device_type Device;
  typedef typename Storage::value_type Scalar;
  typedef Sacado::MP::Vector<Storage> Vector;
  typedef typename ApplyView<Vector*,Layout,Device>::type ViewType;
  typedef typename ViewType::size_type size_type;
  typedef typename ViewType::HostMirror host_view_type;
  typedef typename host_view_type::array_type host_array_type;
  typedef typename ViewType::array_type array_type;

  const size_type num_rows = global_num_rows;
  const size_type num_cols = Storage::is_static ? Storage::static_size : global_num_cols;
  ViewType v("view", num_rows, num_cols);
  host_view_type h_v = Kokkos::create_mirror_view(v);
  host_array_type h_a = h_v;
  array_type a = v;

  for (size_type i=0; i<num_rows; ++i)
    for (size_type j=0; j<num_cols; ++j)
      h_a(i,j) = generate_vector_coefficient<Scalar>(
        num_rows, num_cols, i, j);
  Kokkos::deep_copy(a, h_v);

  success = checkVectorView(v, out);
}
int main() {
  Kokkos::initialize();

  {
    view_type a ("A", 10);
    // If view_type and host_mirror_type live in the same memory space,
    // a "mirror view" is just an alias, and deep_copy does nothing.
    // Otherwise, a mirror view of a device View lives in host memory,
    // and deep_copy does a deep copy.
    host_view_type h_a = Kokkos::create_mirror_view (a);

    // The View h_a lives in host (CPU) memory, so it's legal to fill
    // the view sequentially using ordinary code, like this.
    for (int i = 0; i < 10; i++) {
      for (int j = 0; j < 3; j++) {
        h_a(i,j) = i*10 + j;
      }
    }
    Kokkos::deep_copy (a, h_a); // Copy from host to device.

    int sum = 0;
    Kokkos::parallel_reduce (10, ReduceFunctor (a), sum);
    printf ("Result is %i\n",sum);
  }

  Kokkos::finalize ();
}
Example #6
0
    /**
     * seed generator with 32-bit integer
     */
    void seed(unsigned int value)
    {
        // compute leapfrog multipliers for initialization
        cuda::vector<uint48> g_A(dim.threads()), g_C(dim.threads());
        cuda::configure(dim.grid, dim.block);
        get_rand48_kernel().leapfrog(g_A);

        // compute leapfrog addends for initialization
        cuda::copy(g_A, g_C);
        algorithm::gpu::scan<uint48> scan(g_C.size(), dim.threads_per_block());
        scan(g_C);

        // initialize generator with seed
        cuda::vector<uint48> g_a(1), g_c(1);
        cuda::host::vector<uint48> h_a(1), h_c(1);
        cuda::configure(dim.grid, dim.block);
        get_rand48_kernel().seed(g_A, g_C, g_a, g_c, g_state_, value);
        cuda::copy(g_a, h_a);
        cuda::copy(g_c, h_c);

        // set leapfrog constants for constant device memory
        rng_.a = h_a.front();
        rng_.c = h_c.front();
        rng_.g_state = g_state_.data();
    }
int do_memory_uplo(int n, W& workspace ) {
   typedef typename bindings::remove_imaginary<T>::type real_type ;

   typedef ublas::matrix<T, ublas::column_major>     matrix_type ;
   typedef ublas::vector<real_type>                  vector_type ;

   typedef ublas::hermitian_adaptor<matrix_type, UPLO> hermitian_type;

   // Set matrix
   matrix_type a( n, n ); a.clear();
   vector_type e1( n );
   vector_type e2( n );

   fill( a );
   matrix_type a2( a );
   matrix_type z( a );

   // Compute Schur decomposition.
   fortran_int_t m;
   ublas::vector<fortran_int_t> ifail(n);
   
   hermitian_type h_a( a );
   lapack::heevx( 'V', 'A', h_a, real_type(0.0), real_type(1.0), 2, n-1, real_type(1e-28), m,
                  e1, z, ifail, workspace ) ;

   if (check_residual( a2, e1, z )) return 255 ;

   hermitian_type h_a2( a2 );
   lapack::heevx( 'N', 'A', h_a2, real_type(0.0), real_type(1.0), 2, n-1, real_type(1e-28), m,
                  e2, z, ifail, workspace ) ;
   if (norm_2( e1 - e2 ) > n * norm_2( e1 ) * std::numeric_limits< real_type >::epsilon()) return 255 ;

   // Test for a matrix range
   fill( a ); a2.assign( a );

   typedef ublas::matrix_range< matrix_type > matrix_range ;
   typedef ublas::hermitian_adaptor<matrix_range, UPLO> hermitian_range_type;

   ublas::range r(1,n-1) ;
   matrix_range a_r( a, r, r );
   matrix_range z_r( z, r, r );
   ublas::vector_range< vector_type> e_r( e1, r );
   ublas::vector<fortran_int_t> ifail_r(n-2);

   hermitian_range_type h_a_r( a_r );
   lapack::heevx( 'V', 'A', h_a_r, real_type(0.0), real_type(1.0), 2, n-1, real_type(1e-28), m,
                  e_r, z_r, ifail_r, workspace );

   matrix_range a2_r( a2, r, r );
   if (check_residual( a2_r, e_r, z_r )) return 255 ;

   return 0 ;
} // do_memory_uplo()
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL( Kokkos_View_MP, Unmanaged, Storage, Layout )
{
  typedef typename Storage::device_type Device;
  typedef typename Storage::value_type Scalar;
  typedef Sacado::MP::Vector<Storage> Vector;
  typedef typename ApplyView<Vector*,Layout,Device>::type ViewType;
  typedef typename ViewType::size_type size_type;
  typedef typename ViewType::HostMirror host_view_type;
  typedef typename host_view_type::array_type host_array_type;

  const size_type num_rows = global_num_rows;
  const size_type num_cols = Storage::is_static ? Storage::static_size : global_num_cols;
  ViewType v("view", num_rows, num_cols);
  host_view_type h_v = Kokkos::create_mirror_view(v);
  host_array_type h_a = h_v;

  bool is_right = Kokkos::Impl::is_same< typename ViewType::array_layout,
                                         Kokkos::LayoutRight >::value;
  if (is_right || !ViewType::is_contiguous) {
    for (size_type i=0; i<num_rows; ++i)
      for (size_type j=0; j<num_cols; ++j)
        h_a(i,j) = generate_vector_coefficient<Scalar>(
          num_rows, num_cols, i, j);
  }
  else {
    for (size_type i=0; i<num_rows; ++i)
      for (size_type j=0; j<num_cols; ++j)
        h_a(j,i) = generate_vector_coefficient<Scalar>(
          num_rows, num_cols, i, j);
  }
  Kokkos::deep_copy(v, h_v);

  // Create unmanaged view
  ViewType v2(Kokkos::view_without_managing, v.ptr_on_device(),
              num_rows, num_cols);

  success = checkVectorView(v2, out);
}
Example #9
0
int main() {
  Kokkos::initialize();

  view_type a("A",10);
  host_view_type h_a = Kokkos::create_mirror_view(a); 
  
  for(int i = 0; i < 10; i++)
    for(int j = 0; j < 3; j++)
      h_a(i,j) = i*10 + j;

  Kokkos::deep_copy(a,h_a);

  int sum = 0;
  Kokkos::parallel_reduce(10,squaresum(a),sum);
  printf("Result is %i\n",sum);  

  Kokkos::finalize();
}
Example #10
0
File: vadd.cpp Project: pelmer/esc
int main(void)
{
    std::vector<float> h_a(LENGTH);              // a vector 
    std::vector<float> h_b(LENGTH);              // b vector 	
    std::vector<float> h_c (LENGTH, 0xdeadbeef); // c = a + b, from compute device

    cl::Buffer d_a;      // device memory used for the input  a vector
    cl::Buffer d_b;      // device memory used for the input  b vector
    cl::Buffer d_c;      // device memory used for the output c vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
    }

    try 
    {
    	// Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor
 
        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, int>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);

        d_c  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        util::Timer timer;

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)), 
            d_a,
            d_b,
            d_c,
            count);

        queue.finish();

        double rtime = static_cast<double>(timer.getTimeMilliseconds()) / 1000.0;
        printf("\nThe kernels ran in %lf seconds\n", rtime);

        cl::copy(queue, d_c, begin(h_c), end(h_c));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++) {
            tmp = h_a[i] + h_b[i]; // expected value for d_c[i]
            tmp -= h_c[i];                      // compute errors
            if(tmp*tmp < TOL*TOL) {      // correct if square deviation is less 
                correct++;                         //  than tolerance squared
            }
            else {

                printf(
                    " tmp %f h_a %f h_b %f  h_c %f \n",
                    tmp, 
                    h_a[i], 
                    h_b[i], 
                    h_c[i]);
            }
        }

        // summarize results
        printf(
            "vector add to find C = A+B:  %d out of %d results were correct.\n", 
            correct, 
            count);
    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr 
            << "ERROR: "
            << err.what()
            << "("
            << err_code(err.err())
           << ")"
           << std::endl;
    }
}
int main(void)
{
    std::vector<float> h_a(LENGTH);                // a vector
    std::vector<float> h_b(LENGTH);                // b vector
    std::vector<float> h_c (LENGTH, 0xdeadbeef);   // c vector (result)
    std::vector<float> h_d (LENGTH, 0xdeadbeef);   // d vector (result)
    std::vector<float> h_e (LENGTH);               // e vector
    std::vector<float> h_f (LENGTH, 0xdeadbeef);   // f vector (result)
    std::vector<float> h_g (LENGTH);               // g vector

    cl::Buffer d_a;                       // device memory used for the input  a vector
    cl::Buffer d_b;                       // device memory used for the input  b vector
    cl::Buffer d_c;                       // device memory used for the output c vector
    cl::Buffer d_d;                       // device memory used for the output d vector
    cl::Buffer d_e;                       // device memory used for the input e vector
    cl::Buffer d_f;                       // device memory used for the output f vector
    cl::Buffer d_g;                       // device memory used for the input g vector

    // Fill vectors a and b with random float values
    int count = LENGTH;
    for(int i = 0; i < count; i++)
    {
        h_a[i]  = rand() / (float)RAND_MAX;
        h_b[i]  = rand() / (float)RAND_MAX;
        h_e[i]  = rand() / (float)RAND_MAX;
        h_g[i]  = rand() / (float)RAND_MAX;
    }

    try
    {
        // Create a context
        cl::Context context(DEVICE);

        // Load in kernel source, creating a program object for the context

        cl::Program program(context, util::loadProgram("vadd.cl"), true);

        // Get the command queue
        cl::CommandQueue queue(context);

        // Create the kernel functor

        auto vadd = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer>(program, "vadd");

        d_a   = cl::Buffer(context, begin(h_a), end(h_a), true);
        d_b   = cl::Buffer(context, begin(h_b), end(h_b), true);
        d_e   = cl::Buffer(context, begin(h_e), end(h_e), true);
        d_g   = cl::Buffer(context, begin(h_g), end(h_g), true);

        d_c  = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(float) * LENGTH);
        d_d  = cl::Buffer(context, CL_MEM_READ_WRITE, sizeof(float) * LENGTH);
        d_f  = cl::Buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_a,
            d_b,
            d_c);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_e,
            d_c,
            d_d);

        vadd(
            cl::EnqueueArgs(
                queue,
                cl::NDRange(count)),
            d_g,
            d_d,
            d_f);

        cl::copy(queue, d_f, begin(h_f), end(h_f));

        // Test the results
        int correct = 0;
        float tmp;
        for(int i = 0; i < count; i++)
        {
            tmp = h_a[i] + h_b[i] + h_e[i] + h_g[i];     // assign element i of a+b+e+g to tmp
            tmp -= h_f[i];                               // compute deviation of expected and output result
            if(tmp*tmp < TOL*TOL)                        // correct if square deviation is less than tolerance squared
                correct++;
            else {
                printf(" tmp %f h_a %f h_b %f h_e %f h_g %f h_f %f\n",tmp, h_a[i], h_b[i], h_e[i], h_g[i], h_f[i]);
            }
        }

        // summarize results
        printf("C = A+B+E+G:  %d out of %d results were correct.\n", correct, count);

    }
    catch (cl::Error err) {
        std::cout << "Exception\n";
        std::cerr
                << "ERROR: "
                << err.what()
                << std::endl;
    }
}
Example #12
0
int mc_update_path(DAL_Context* ctx) {
   // QUESTION: Do we need to prepend the bucket and ns->alias to the
   //           objid? For now We can just flatten the url to make
   //           things easy.

   // shorthand
   PathInfo*         info          = &(MC_FH(ctx)->info);
   const MarFS_Repo* repo          = info->pre.repo;
   char*             objid         = info->pre.objid;
   char*             path_template = MC_CONTEXT(ctx)->path_template;

   char obj_filename[MARFS_MAX_OBJID_SIZE];
   strncpy(obj_filename, objid, MARFS_MAX_OBJID_SIZE);
   flatten_objid(obj_filename);

   // We will use the hash in multiple places, save it to avoid
   // recomputing.
   //
   // Hash the actual object ID so the hash will remain the same,
   // regadless of changes to the "file-ification" format.
   unsigned long objid_hash = polyhash(objid);
   
   char *mc_path_format = repo->host;

   unsigned int num_blocks    = MC_CONFIG(ctx)->n+MC_CONFIG(ctx)->e;
   unsigned int num_pods      = MC_CONFIG(ctx)->num_pods;
   unsigned int num_cap       = MC_CONFIG(ctx)->num_cap;
   unsigned int scatter_width = MC_CONFIG(ctx)->scatter_width;

   unsigned int seed = objid_hash;
   uint64_t a[3];
   int i;
   for(i = 0; i < 3; i++)
      a[i] = rand_r(&seed) * 2 + 1; // generate 32 random bits

   MC_CONTEXT(ctx)->pod         = objid_hash % num_pods;
   MC_CONTEXT(ctx)->cap         = h_a(objid_hash, a[0]) % num_cap;
   unsigned long scatter        = h_a(objid_hash, a[1]) % scatter_width;
   MC_CONTEXT(ctx)->start_block = h_a(objid_hash, a[2]) % num_blocks;

   // fill in path template
   // the mc_path_format is sometheing like:
   //   "<protected-root>/repo10+2/pod%d/block%s/cap%d/scatter%d/"
   snprintf(path_template, MC_MAX_PATH_LEN, mc_path_format,
            MC_CONTEXT(ctx)->pod,
            "%d", // this will be filled in by the ec library
            MC_CONTEXT(ctx)->cap,
            scatter);

   // be robust to vairation in the config... We could always just add
   // a slash, but that will get ugly in the logs.
   if(path_template[strlen(path_template) - 1] != '/')
      strcat(path_template, "/");
   
   // append the fileified object id
   strncat(path_template, obj_filename, MARFS_MAX_OBJID_SIZE);

   LOG(LOG_INFO, "MC path template: (starting block: %d) %s\n",
       MC_CONTEXT(ctx)->start_block, path_template);
   
   return 0;
}