int main(int argc, char *argv[]) { hipStream_t stream; unsigned int flags; HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault)); HIPCHECK(hipStreamGetFlags(stream, &flags)); HIPASSERT(flags == 0); HIPCHECK(hipStreamDestroy(stream)); HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); HIPCHECK(hipStreamGetFlags(stream, &flags)); HIPASSERT(flags == 1); HIPCHECK(hipStreamDestroy(stream)); passed(); }
int main(int argc, char* argv[]) { HipTest::parseStandardArguments(argc, argv, false); parseMyArguments(argc, argv); printf("info: set device to %d tests=%x\n", p_gpuDevice, p_tests); HIPCHECK(hipSetDevice(p_gpuDevice)); if (p_tests & 0x01) { simpleNegTest(); } if (p_tests & 0x02) { hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); test_manyInflightCopies<float>(stream, 1024, 16, true); test_manyInflightCopies<float>( stream, 1024, 4, true); // verify we re-use the same entries instead of growing pool. test_manyInflightCopies<float>(stream, 1024 * 8, 64, false); HIPCHECK(hipStreamDestroy(stream)); } if (p_tests & 0x04) { test_chunkedAsyncExample(p_streams, true, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, true, true); // Easy sync version test_chunkedAsyncExample(p_streams, false, false, true); // Some async test_chunkedAsyncExample(p_streams, false, false, false); // All async } if (p_tests & 0x08) { hipStream_t stream; HIPCHECK(hipStreamCreate(&stream)); // test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 1, false); // test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 10, false); HIPCHECK(hipStreamDestroy(stream)); } passed(); }
int main(int argc, char *argv[]) { // Can' destroy the default stream:// TODO - move to another test HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle); HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/); runTests(40000000); passed(); }
int main() { size_t Nbytes = N * sizeof(int); int numDevices = 0; int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d; int *A_h, *B_h, *C_h; hipStream_t s; HIPCHECK(hipGetDeviceCount(&numDevices)); if (numDevices > 1) { HIPCHECK(hipSetDevice(0)); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N); HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false); HIPCHECK(hipSetDevice(1)); HIPCHECK(hipMalloc(&X_d, Nbytes)); HIPCHECK(hipMalloc(&Y_d, Nbytes)); HIPCHECK(hipMalloc(&Z_d, Nbytes)); HIPCHECK(hipSetDevice(0)); HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice)); HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N); HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipSetDevice(1)); HIPCHECK(hipStreamCreate(&s)); HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s)); HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s)); hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N); HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s)); HIPCHECK(hipStreamSynchronize(s)); HIPCHECK(hipDeviceSynchronize()); HipTest::checkVectorADD(A_h, B_h, C_h, N); HIPCHECK(hipStreamDestroy(s)); HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false); HIPCHECK(hipFree(X_d)); HIPCHECK(hipFree(Y_d)); HIPCHECK(hipFree(Z_d)); } passed(); }
// Create a lot of streams and then destroy 'em. void createThenDestroyStreams(int iterations, int burstSize) { hipStream_t *streams = new hipStream_t[burstSize]; for (int i=0; i<iterations; i++) { if (p_verbose & 0x1) { printf ("%s iter=%d, create %d then destroy %d\n", __func__, i, burstSize, burstSize); } for (int j=0; j<burstSize; j++) { if (p_verbose & 0x2) { printf (" %d.%d streamCreate\n", i, j); } HIPCHECK( hipStreamCreate(&streams[j])); } for (int j=0; j<burstSize; j++) { if (p_verbose & 0x2) { printf (" %d.%d streamDestroy\n", i, j); } HIPCHECK( hipStreamDestroy(streams[j])); } } delete streams; }
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch) { // This test sends a long-running kernel to the null stream, then tests to see if the // specified synchronization technique is effective. // // Some syncMode are not expected to correctly sync (for example "syncNone"). in these // cases the test sets expectMismatch and the check logic below will attempt to ensure that // the undesired synchronization did not occur - ie ensure the kernel is still running and did // not yet update the stop event. This can be tricky since if the kernel runs fast enough it // may complete before the check. To prevent this, the addCountReverse has a count parameter // which causes it to loop repeatedly, and the results are checked in reverse order. // // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results // are checked and we test to make sure stop event has completed. if (!(testMask & p_tests)) { return; } printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n", testMask, syncModeString(syncMode), expectMismatch); size_t sizeBytes = numElements * sizeof(int); int count =100; int init0 = 0; HIPCHECK(hipMemset(C_d, init0, sizeBytes)); for (int i=0; i<numElements; i++) { C_h[i] = -1; // initialize } hipStream_t otherStream = 0; unsigned flags = (syncMode == syncMarkerThenOtherNonBlockingStream) ? hipStreamNonBlocking : hipStreamDefault; HIPCHECK(hipStreamCreateWithFlags(&otherStream, flags)); hipEvent_t stop, otherStreamEvent; HIPCHECK(hipEventCreate(&stop)); HIPCHECK(hipEventCreate(&otherStreamEvent)); unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements); // Launch kernel into null stream, should result in C_h == count. hipLaunchKernelGGL( HipTest::addCountReverse, dim3(blocks), dim3(threadsPerBlock), 0, 0 /*stream*/, static_cast<const int*>(C_d), C_h, numElements, count); HIPCHECK(hipEventRecord(stop, 0/*default*/)); switch (syncMode) { case syncNone: break; case syncNullStream: HIPCHECK(hipStreamSynchronize(0)); // wait on host for null stream: break; case syncOtherStream: // Does this synchronize with the null stream? HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncMarkerThenOtherStream: case syncMarkerThenOtherNonBlockingStream: // this may wait for NULL stream depending hipStreamNonBlocking flag above HIPCHECK(hipEventRecord(otherStreamEvent, otherStream)); HIPCHECK(hipStreamSynchronize(otherStream)); break; case syncDevice: HIPCHECK(hipDeviceSynchronize()); break; default: assert(0); }; hipError_t done = hipEventQuery(stop); if (expectMismatch) { assert (done == hipErrorNotReady); } else { assert (done == hipSuccess); } int mismatches = 0; int expected = init0 + count; for (int i=0; i<numElements; i++) { bool compareEqual = (C_h[i] == expected); if (!compareEqual) { mismatches ++; if (!expectMismatch) { printf ("C_h[%d] (%d) != %d\n", i, C_h[i], expected); assert(C_h[i] == expected); } } } if (expectMismatch) { assert (mismatches > 0); } HIPCHECK(hipStreamDestroy(otherStream)); HIPCHECK(hipEventDestroy(stop)); HIPCHECK(hipEventDestroy(otherStreamEvent)); HIPCHECK(hipDeviceSynchronize()); printf ("test: OK - %d mismatches (%6.2f%%)\n", mismatches, ((double)(mismatches)*100.0)/numElements); }
int main() { try { bool done = false; boost::fibers::fiber f1([&done]{ std::cout << "f1: entered" << std::endl; try { hipStream_t stream; hipStreamCreate( & stream); int size = 1024 * 1024; int full_size = 20 * size; int * host_a, * host_b, * host_c; hipHostMalloc( & host_a, full_size * sizeof( int), hipHostMallocDefault); hipHostMalloc( & host_b, full_size * sizeof( int), hipHostMallocDefault); hipHostMalloc( & host_c, full_size * sizeof( int), hipHostMallocDefault); int * dev_a, * dev_b, * dev_c; hipMalloc( & dev_a, size * sizeof( int) ); hipMalloc( & dev_b, size * sizeof( int) ); hipMalloc( & dev_c, size * sizeof( int) ); std::minstd_rand generator; std::uniform_int_distribution<> distribution(1, 6); for ( int i = 0; i < full_size; ++i) { host_a[i] = distribution( generator); host_b[i] = distribution( generator); } for ( int i = 0; i < full_size; i += size) { hipMemcpyAsync( dev_a, host_a + i, size * sizeof( int), hipMemcpyHostToDevice, stream); hipMemcpyAsync( dev_b, host_b + i, size * sizeof( int), hipMemcpyHostToDevice, stream); hipLaunchKernel( vector_add, dim3(size / 256), dim3(256), 0, stream, dev_a, dev_b, dev_c, size); hipMemcpyAsync( host_c + i, dev_c, size * sizeof( int), hipMemcpyDeviceToHost, stream); } auto result = boost::fibers::hip::waitfor_all( stream); BOOST_ASSERT( stream == std::get< 0 >( result) ); BOOST_ASSERT( hipSuccess == std::get< 1 >( result) ); std::cout << "f1: GPU computation finished" << std::endl; hipHostFree( host_a); hipHostFree( host_b); hipHostFree( host_c); hipFree( dev_a); hipFree( dev_b); hipFree( dev_c); hipStreamDestroy( stream); done = true; } catch ( std::exception const& ex) { std::cerr << "exception: " << ex.what() << std::endl; } std::cout << "f1: leaving" << std::endl; }); boost::fibers::fiber f2([&done]{ std::cout << "f2: entered" << std::endl; while ( ! done) { std::cout << "f2: sleeping" << std::endl; boost::this_fiber::sleep_for( std::chrono::milliseconds( 1 ) ); } std::cout << "f2: leaving" << std::endl; }); f1.join(); f2.join(); std::cout << "done." << std::endl; return EXIT_SUCCESS; } catch ( std::exception const& e) { std::cerr << "exception: " << e.what() << std::endl; } catch (...) { std::cerr << "unhandled exception" << std::endl; } return EXIT_FAILURE; }