int main() { size_t Nbytes = N * sizeof(int); int numDevices = 0; int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; int *A_h, *B_h, *C_h; hipStream_t s; HIPCHECK(hipGetDeviceCount(&numDevices)); if (numDevices > 1) { HIPCHECK(hipSetDevice(0)); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); HIPCHECK(hipSetDevice(1)); HIPCHECK(hipMalloc(&X_d, Nbytes)); HIPCHECK(hipMalloc(&Y_d, Nbytes)); HIPCHECK(hipMalloc(&Z_d, Nbytes)); HIPCHECK(hipSetDevice(0)); HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N); HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipSetDevice(1)); HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s)); HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N); HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s)); HIPCHECK(hipStreamSynchronize(s)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipStreamDestroy(s)); HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); HIPCHECK(hipFree(X_d)); HIPCHECK(hipFree(Y_d)); HIPCHECK(hipFree(Z_d)); } passed(); }
int main() { unsigned flag = 0; HIPCHECK(hipDeviceReset()); int deviceCount = 0; HIPCHECK(hipGetDeviceCount(&deviceCount)); for(int j=0;j<deviceCount;j++){ HIPCHECK(hipSetDevice(j)); for(int i=0;i<4;i++){ flag = 1 << i; printf ("Flag=%x\n", flag); HIPCHECK(hipSetDeviceFlags(flag)); //HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue); } flag = 0; } passed(); }
int main() { int numDevices = 0; HIPCHECK_API(hipGetDeviceCount(&numDevices), hipSuccess); if (numDevices > 0) { for (int deviceId = 0; deviceId < numDevices; deviceId++) { HIPCHECK_API(hipSetDevice(deviceId), hipSuccess); } HIPCHECK_API(hipSetDevice(numDevices), hipErrorInvalidDevice); HIPCHECK_API(hipSetDevice(-1), hipErrorInvalidDevice); } else { failed("Error: failed to find any compatible devices."); } passed(); }
int main() { int numDevices = 0; int device; HIPCHECK(hipGetDeviceCount(&numDevices)); for(int i=0;i<numDevices;i++){ HIPCHECK(hipSetDevice(i)); HIPCHECK(hipGetDevice(&device)); HIPASSERT(device == i); } passed(); }
int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); bool testResult = false; HIPCHECK(hipSetDevice(p_gpuDevice)); testResult = testhipMemset3D(memsetval, p_gpuDevice); if (testResult) { passed(); } else { exit(EXIT_FAILURE); } }
int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); printf ("info: set device to %d\n", p_gpuDevice); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x1) { printf ("\n\n=== tests&1 (types)\n"); printSep(); HIPCHECK ( hipDeviceReset() ); size_t width = N/6; size_t height = N/6; memcpy2Dtest<float>(321, 211, 0); memcpy2Dtest<double>(322, 211, 0); memcpy2Dtest<char>(320, 211, 0); memcpy2Dtest<int>(323, 211, 0); printf ("===\n\n\n"); printf ("\n\n=== tests&1 (types)\n"); printSep(); // 2D memcpyArraytest<float>(320, 211, 0, 0); memcpyArraytest<unsigned int>(322, 211, 0, 0); memcpyArraytest<int>(320, 211, 0, 0); memcpyArraytest<float>(320, 211, 0, 1); memcpyArraytest<float>(322, 211, 0, 1); memcpyArraytest<int>(320, 211, 0, 1); printSep(); // 1D memcpyArraytest<float>(320, 1, 0); memcpyArraytest<unsigned int>(322, 1, 0); memcpyArraytest<int>(320, 1, 0); printf ("===\n\n\n"); } if (p_tests & 0x4) { printf ("\n\n=== tests&4 (test sizes and offsets)\n"); printSep(); HIPCHECK ( hipDeviceReset() ); printSep(); memcpyArraytest_size<float>(0,0); printSep(); memcpyArraytest_size<float>(0,64); printSep(); memcpyArraytest_size<float>(1024*1024,13); printSep(); memcpyArraytest_size<float>(1024*1024,50); } passed(); }
int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, false); parseMyArguments(argc, argv); printf("info: set device to %d tests=%x\n", p_gpuDevice, p_tests); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x01) { simpleNegTest(); } if (p_tests & 0x02) { hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); test_manyInflightCopies<float>(stream, 1024, 16, true); test_manyInflightCopies<float>( stream, 1024, 4, true); // verify we re-use the same entries instead of growing pool. test_manyInflightCopies<float>(stream, 1024 * 8, 64, false); HIPCHECK(hipStreamDestroy(stream)); } if (p_tests & 0x04) { test_chunkedAsyncExample(p_streams, true, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, false, true); // Some async test_chunkedAsyncExample(p_streams, false, false, false); // All async } if (p_tests & 0x08) { hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); // test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 1, false); // test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 10, false); HIPCHECK(hipStreamDestroy(stream)); } passed(); }
int main(int argc, char *argv[]) { HipTest::parseStandardArguments(argc, argv, true); printf ("info: set device to %d\n", p_gpuDevice); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x1) { printf ("\n\n=== tests&1 (types and different memcpy kinds (H2D, D2H, H2H, D2D)\n"); HIPCHECK ( hipDeviceReset() ); memcpytest2_for_type<float>(N); memcpytest2_for_type<double>(N); memcpytest2_for_type<char>(N); memcpytest2_for_type<int>(N); printf ("===\n\n\n"); } if (p_tests & 0x2) { // Some tests around the 64MB boundary which have historically shown issues: printf ("\n\n=== tests&0x2 (64MB boundary)\n"); #if 0 // These all pass: memcpytest2<float>(15*1024*1024, 1, 0, 0, 0); memcpytest2<float>(16*1024*1024, 1, 0, 0, 0); memcpytest2<float>(16*1024*1024+16*1024, 1, 0, 0, 0); #endif // Just over 64MB: memcpytest2<float>(16*1024*1024+512*1024, 1, 0, 0, 0); memcpytest2<float>(17*1024*1024+1024, 1, 0, 0, 0); memcpytest2<float>(32*1024*1024, 1, 0, 0, 0); memcpytest2<float>(32*1024*1024, 0, 0, 0, 0); memcpytest2<float>(32*1024*1024, 1, 1, 1, 0); memcpytest2<float>(32*1024*1024, 1, 1, 1, 0); } if (p_tests & 0x4) { printf ("\n\n=== tests&4 (test sizes and offsets)\n"); HIPCHECK ( hipDeviceReset() ); printSep(); memcpytest2_sizes<float>(0,0); printSep(); memcpytest2_sizes<float>(0,64); printSep(); memcpytest2_sizes<float>(1024*1024, 13); printSep(); memcpytest2_sizes<float>(1024*1024, 50); } if (p_tests & 0x8) { printf ("\n\n=== tests&8\n"); HIPCHECK ( hipDeviceReset() ); printSep(); // Simplest cases: serialize the threads, and also used pinned memory: // This verifies that the sub-calls to memcpytest2 are correct. multiThread_1<float>(true, true); // Serialize, but use unpinned memory to stress the unpinned memory xfer path. multiThread_1<float>(true, false); // Remove serialization, so two threads are performing memory copies in parallel. multiThread_1<float>(false, true); // Remove serialization, and use unpinned. multiThread_1<float>(false, false); // TODO printf ("===\n\n\n"); } passed(); }