int main(int narg, char* args[]) { Kokkos::initialize(narg,args); // Produce some 3D random data (see Algorithms/01_random_numbers for more info) Kokkos::View<int***,Kokkos::LayoutRight> data("Data",512,512,32); Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); Kokkos::fill_random(data,rand_pool64,100); // A global value to put the result in Kokkos::View<int> gsum("Sum"); // Each team handles a slice of the data // Set up TeamPolicy with 512 teams with maximum number of threads per team and 16 vector lanes. // The team_size_max function will determine the maximum number of threads taking into account // shared memory requirements of the Functor. // The maximum vector length is hardware dependent but can always be smaller than the hardware allows. // The vector length must be a power of 2. const Kokkos::TeamPolicy<> policy( 512 , Kokkos::TeamPolicy<>::team_size_max(SomeCorrelation(data,gsum)) , 16); Kokkos::parallel_for( policy , SomeCorrelation(data,gsum) ); Kokkos::fence(); // Copy result value back int sum = 0; Kokkos::deep_copy(sum,gsum); printf("Result %i\n",sum); Kokkos::finalize(); }
int main(int argc, char* args[]) { if (argc != 3){ printf("Please pass two integers on the command line\n"); } else { // Initialize Kokkos Kokkos::initialize(argc,args); int size = atoi(args[1]); int samples = atoi(args[2]); // Create two random number generator pools one for 64bit states and one for 1024 bit states // Both take an 64 bit unsigned integer seed to initialize a Random_XorShift64 generator which // is used to fill the generators of the pool. Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); Kokkos::Random_XorShift1024_Pool<> rand_pool1024(5374857); Kokkos::DualView<uint64_t*> vals("Vals",size*samples); // Run some performance comparisons Kokkos::Timer timer; Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples)); Kokkos::fence(); timer.reset(); Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift64_Pool<> >(vals.d_view,rand_pool64,samples)); Kokkos::fence(); double time_64 = timer.seconds(); Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples)); Kokkos::fence(); timer.reset(); Kokkos::parallel_for(size,generate_random<Kokkos::Random_XorShift1024_Pool<> >(vals.d_view,rand_pool1024,samples)); Kokkos::fence(); double time_1024 = timer.seconds(); printf("#Time XorShift64*: %e %e\n",time_64,1.0e-9*samples*size/time_64 ); printf("#Time XorShift1024*: %e %e\n",time_1024,1.0e-9*samples*size/time_1024 ); Kokkos::deep_copy(vals.h_view,vals.d_view); Kokkos::finalize(); } return 0; }
int main(){ #ifdef HAVE_INTREPID_KOKKOSCORE Kokkos::initialize(); //initialize viewsto random values const int v=100,x=300,y=40,z=40; Kokkos::View<double****> inputview1("X",v,x,y,z); Kokkos::View<double****> inputview2("Y",v,x,y,z); Kokkos::View<double****> outputview2("Z",v,x,y,z); Intrepid2::FieldContainer<double> inview2FieldContainer(v, x, y, z); Intrepid2::FieldContainer<double> inview1FieldContainer(v, x, y, z); Intrepid2::FieldContainer<double> outview2FieldContainer(v, x, y, z); //These are the wrapper structures that are used to avoid compiletime rank issues ArrayWrapper<double,Kokkos::View<double****>, Rank<Kokkos::View<double****> >::value,false>inputview1wrap(inputview1); ArrayWrapper<double,Kokkos::View<double****>, Rank<Kokkos::View<double****> >::value,false>outputview2wrap(outputview2); ArrayWrapper<double,Intrepid2::FieldContainer<double>,Rank<Intrepid2::FieldContainer<double> >::value,false>inputfieldcontainer2wrap(inview2FieldContainer); ArrayWrapper<double,Kokkos::View<double****>, Rank<Kokkos::View<double****> >::value,false>inputview2wrap(inputview2); //fill with random numbers Kokkos::Random_XorShift64_Pool<> rand_pool64(5374857); Kokkos::fill_random(inputview1,rand_pool64,100); Kokkos::fill_random(inputview2,rand_pool64,100); //test getrank partial template specialization for kokkos views and field containers // std::cout <<"Rankstuff: "<<getrank(inputview1)<<","<<getrank(inview2FieldContainer)<<std::endl; // int numDataLeftPts = inputview1.dimension(1); int numCells = outputview2.dimension(0); int numPoints = outputview2.dimension(1); int matDim = outputview2.dimension(2); // std::cout <<numCells<<","<<numPoints<<","<<matDim<<std::endl; //setup field container using values from kokkos view and set output to zeros for(int cell = 0; cell < numCells; cell++) { for(int point = 0; point < numPoints; point++) { for( int row = 0; row < matDim; row++) { for( int col = 0; col < matDim; col++) { inview2FieldContainer(cell, point, row, col)=inputview2(cell, point, row, col); inview1FieldContainer(cell, point, row, col)=inputview1(cell, point, row, col); outputview2wrap(cell, point, row, col)=0.0; outview2FieldContainer(cell, point, row, col)=0.0; }// Col-loop } // Row-loop } // P-loop }// C-loop */ Kokkos::fence(); Kokkos::Impl::Timer structviewstimer; //example with two kokkos views and structs for(int cell = 0; cell < numCells; cell++) { for(int point = 0; point < numPoints; point++) { for(int row = 0; row < matDim; row++) { for(int col = 0; col < matDim; col++) { outputview2wrap(cell,point,row,col)= inputview1wrap(cell,point,row,col)*inputview2wrap(cell, point, row, col); }// Col-loop } // Row-loop } // P-loop }// C-loop Kokkos::fence(); //double timestructviews = structviewstimer.seconds(); // std::cout <<std::setprecision(9)<<"Time for structviews"<<timestructviews<<std::endl; Kokkos::fence(); Kokkos::Impl::Timer rawviewstimer; //example with two kokkos views without structs for(int cell = 0; cell < numCells; cell++) { for(int point = 0; point < numPoints; point++) { for( int row = 0; row < matDim; row++) { for( int col = 0; col < matDim; col++) { outputview2(cell, point, row, col) = inputview1(cell,point, row, col)*inputview2(cell, point, row, col); }// Col-loop } // Row-loop } // P-loop }// C-loop Kokkos::fence(); // double timerawviews = rawviewstimer.seconds(); // std::cout <<"Time for rawviews"<<timerawviews<<std::endl; /* //example with kokkos view and intrepid field container for(int cell = 0; cell < numCells; cell++) { for(int point = 0; point < numPoints; point++) { for(int row = 0; row < matDim; row++) { for( int col = 0; col < matDim; col++) { outputview2wrap(cell, point, row, col) = inputview1wrap(cell,point, row, col)*inputfieldcontainer2wrap(cell, point, row, col); }// Col-loop } // Row-loop } // P-loop }// C-loop //Test passing first element of field container */ Kokkos::fence(); Kokkos::Impl::Timer fieldcontainertimer; for(int cell = 0; cell < numCells; cell++) { for(int point = 0; point < numPoints; point++) { for(int row = 0; row < matDim; row++) { for( int col = 0; col < matDim; col++) { outview2FieldContainer(cell, point, row, col) = inview1FieldContainer(cell,point, row, col)*inview2FieldContainer(cell, point, row, col); }// Col-loop } // Row-loop } // P-loop }// C-loop Kokkos::fence(); //double timerfieldcontainermultiply=fieldcontainertimer.seconds(); //std::cout <<"FieldContainerTimer: "<<timerfieldcontainermultiply <<std::endl; Kokkos::finalize(); #endif return 0; }