void memcpytest2_sizes(size_t maxElem=0, size_t offset=0) { printSep(); printf ("test: %s<%s>\n", __func__, TYPENAME(T)); int deviceId; HIPCHECK(hipGetDevice(&deviceId)); size_t free, total; HIPCHECK(hipMemGetInfo(&free, &total)); if (maxElem == 0) { maxElem = free/sizeof(T)/5; } printf (" device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB) maxSize=%6.1fMB offset=%lu\n", deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset); for (size_t elem=64; elem+offset<=maxElem; elem*=2) { HIPCHECK ( hipDeviceReset() ); memcpytest2<T>(elem+offset, 0, 1, 1, 0); // unpinned host HIPCHECK ( hipDeviceReset() ); memcpytest2<T>(elem+offset, 1, 1, 1, 0); // pinned host } }
int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); printf ("info: set device to %d\n", p_gpuDevice); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x1) { printf ("\n\n=== tests&1 (types)\n"); printSep(); HIPCHECK ( hipDeviceReset() ); size_t width = N/6; size_t height = N/6; memcpy2Dtest<float>(321, 211, 0); memcpy2Dtest<double>(322, 211, 0); memcpy2Dtest<char>(320, 211, 0); memcpy2Dtest<int>(323, 211, 0); printf ("===\n\n\n"); printf ("\n\n=== tests&1 (types)\n"); printSep(); // 2D memcpyArraytest<float>(320, 211, 0, 0); memcpyArraytest<unsigned int>(322, 211, 0, 0); memcpyArraytest<int>(320, 211, 0, 0); memcpyArraytest<float>(320, 211, 0, 1); memcpyArraytest<float>(322, 211, 0, 1); memcpyArraytest<int>(320, 211, 0, 1); printSep(); // 1D memcpyArraytest<float>(320, 1, 0); memcpyArraytest<unsigned int>(322, 1, 0); memcpyArraytest<int>(320, 1, 0); printf ("===\n\n\n"); } if (p_tests & 0x4) { printf ("\n\n=== tests&4 (test sizes and offsets)\n"); printSep(); HIPCHECK ( hipDeviceReset() ); printSep(); memcpyArraytest_size<float>(0,0); printSep(); memcpyArraytest_size<float>(0,64); printSep(); memcpyArraytest_size<float>(1024*1024,13); printSep(); memcpyArraytest_size<float>(1024*1024,50); } passed(); }
int main() { unsigned flag = 0; HIPCHECK(hipDeviceReset()); int deviceCount = 0; HIPCHECK(hipGetDeviceCount(&deviceCount)); for(int j=0;j<deviceCount;j++){ HIPCHECK(hipSetDevice(j)); for(int i=0;i<4;i++){ flag = 1 << i; printf ("Flag=%x\n", flag); HIPCHECK(hipSetDeviceFlags(flag)); //HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue); } flag = 0; } passed(); }
int main(int argc, char **argv) { printf("%s starting...\n", sampleName); runTest(argc, argv); hipDeviceReset(); printf("%s completed, returned %s\n", sampleName, testResult ? "OK" : "ERROR!"); exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE); }
int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); printf ("info: set device to %d\n", p_gpuDevice); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x1) { printf ("\n\n=== tests&1 (types and different memcpy kinds (H2D, D2H, H2H, D2D)\n"); HIPCHECK ( hipDeviceReset() ); memcpytest2_for_type<float>(N); memcpytest2_for_type<double>(N); memcpytest2_for_type<char>(N); memcpytest2_for_type<int>(N); printf ("===\n\n\n"); } if (p_tests & 0x2) { // Some tests around the 64MB boundary which have historically shown issues: printf ("\n\n=== tests&0x2 (64MB boundary)\n"); #if 0 // These all pass: memcpytest2<float>(15*1024*1024, 1, 0, 0, 0); memcpytest2<float>(16*1024*1024, 1, 0, 0, 0); memcpytest2<float>(16*1024*1024+16*1024, 1, 0, 0, 0); #endif // Just over 64MB: memcpytest2<float>(16*1024*1024+512*1024, 1, 0, 0, 0); memcpytest2<float>(17*1024*1024+1024, 1, 0, 0, 0); memcpytest2<float>(32*1024*1024, 1, 0, 0, 0); memcpytest2<float>(32*1024*1024, 0, 0, 0, 0); memcpytest2<float>(32*1024*1024, 1, 1, 1, 0); memcpytest2<float>(32*1024*1024, 1, 1, 1, 0); } if (p_tests & 0x4) { printf ("\n\n=== tests&4 (test sizes and offsets)\n"); HIPCHECK ( hipDeviceReset() ); printSep(); memcpytest2_sizes<float>(0,0); printSep(); memcpytest2_sizes<float>(0,64); printSep(); memcpytest2_sizes<float>(1024*1024, 13); printSep(); memcpytest2_sizes<float>(1024*1024, 50); } if (p_tests & 0x8) { printf ("\n\n=== tests&8\n"); HIPCHECK ( hipDeviceReset() ); printSep(); // Simplest cases: serialize the threads, and also used pinned memory: // This verifies that the sub-calls to memcpytest2 are correct. multiThread_1<float>(true, true); // Serialize, but use unpinned memory to stress the unpinned memory xfer path. multiThread_1<float>(true, false); // Remove serialization, so two threads are performing memory copies in parallel. multiThread_1<float>(false, true); // Remove serialization, and use unpinned. multiThread_1<float>(false, false); // TODO printf ("===\n\n\n"); } passed(); }
extern "C" void mixbenchGPU(double *c, long size){ const char *benchtype = "compute with global memory (block strided)"; printf("Trade-off type: %s\n", benchtype); double *cd; CUDA_SAFE_CALL( hipMalloc((void**)&cd, size*sizeof(double)) ); // Copy data to device memory CUDA_SAFE_CALL( hipMemset(cd, 0, size*sizeof(double)) ); // initialize to zeros // Synchronize in order to wait for memory operations to finish CUDA_SAFE_CALL( hipDeviceSynchronize() ); printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n"); printf("Experiment ID, Single Precision ops,,,, Double precision ops,,,, Integer operations,,, \n"); printf("Compute iters, Flops/byte, ex.time, GFLOPS, GB/sec, Flops/byte, ex.time, GFLOPS, GB/sec, Iops/byte, ex.time, GIOPS, GB/sec\n"); runbench_warmup(cd, size); runbench<32>(cd, size); runbench<31>(cd, size); runbench<30>(cd, size); runbench<29>(cd, size); runbench<28>(cd, size); runbench<27>(cd, size); runbench<26>(cd, size); runbench<25>(cd, size); runbench<24>(cd, size); runbench<23>(cd, size); runbench<22>(cd, size); runbench<21>(cd, size); runbench<20>(cd, size); runbench<19>(cd, size); runbench<18>(cd, size); runbench<17>(cd, size); runbench<16>(cd, size); runbench<15>(cd, size); runbench<14>(cd, size); runbench<13>(cd, size); runbench<12>(cd, size); runbench<11>(cd, size); runbench<10>(cd, size); runbench<9>(cd, size); runbench<8>(cd, size); runbench<7>(cd, size); runbench<6>(cd, size); runbench<5>(cd, size); runbench<4>(cd, size); runbench<3>(cd, size); runbench<2>(cd, size); runbench<1>(cd, size); runbench<0>(cd, size); printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n"); // Copy results back to host memory CUDA_SAFE_CALL( hipMemcpy(c, cd, size*sizeof(double), hipMemcpyDeviceToHost) ); CUDA_SAFE_CALL( hipFree(cd) ); CUDA_SAFE_CALL( hipDeviceReset() ); }