Exemple #1
0
int main(int argc, char *argv[])
{
    hipStream_t stream;
    unsigned int flags;
    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault));
    HIPCHECK(hipStreamGetFlags(stream, &flags));
    HIPASSERT(flags == 0); 
    HIPCHECK(hipStreamDestroy(stream));

    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
    HIPCHECK(hipStreamGetFlags(stream, &flags));
    HIPASSERT(flags == 1); 
    HIPCHECK(hipStreamDestroy(stream));

    passed();
}
Exemple #2
0
int main(int argc, char* argv[]) {
    HipTest::parseStandardArguments(argc, argv, false);
    parseMyArguments(argc, argv);


    printf("info: set device to %d  tests=%x\n", p_gpuDevice, p_tests);
    HIPCHECK(hipSetDevice(p_gpuDevice));

    if (p_tests & 0x01) {
        simpleNegTest();
    }

    if (p_tests & 0x02) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        test_manyInflightCopies<float>(stream, 1024, 16, true);
        test_manyInflightCopies<float>(
            stream, 1024, 4, true);  // verify we re-use the same entries instead of growing pool.
        test_manyInflightCopies<float>(stream, 1024 * 8, 64, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    if (p_tests & 0x04) {
        test_chunkedAsyncExample(p_streams, true, true, true);     // Easy sync version
        test_chunkedAsyncExample(p_streams, false, true, true);    // Easy sync version
        test_chunkedAsyncExample(p_streams, false, false, true);   // Some async
        test_chunkedAsyncExample(p_streams, false, false, false);  // All async
    }

    if (p_tests & 0x08) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 1, false);
        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 10, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    passed();
}
Exemple #3
0
int main(int argc, char *argv[])
{
    // Can' destroy the default stream:// TODO - move to another test
    HIPCHECK_API(hipStreamDestroy(0), hipErrorInvalidResourceHandle);

    HipTest::parseStandardArguments(argc, argv, true /*failOnUndefinedArg*/);

    runTests(40000000);

    passed();
}
Exemple #4
0
int main() {
    size_t Nbytes = N * sizeof(int);
    int numDevices = 0;
    int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
    int *A_h, *B_h, *C_h;
    hipStream_t s;

    HIPCHECK(hipGetDeviceCount(&numDevices));
    if (numDevices > 1) {
        HIPCHECK(hipSetDevice(0));
        unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
        HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipMalloc(&X_d, Nbytes));
        HIPCHECK(hipMalloc(&Y_d, Nbytes));
        HIPCHECK(hipMalloc(&Z_d, Nbytes));


        HIPCHECK(hipSetDevice(0));
        HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
        HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
        HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
        HIPCHECK(hipDeviceSynchronize());
        HipTest::checkVectorADD(A_h, B_h, C_h, N);

        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipStreamCreate(&s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s));

        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N);
        HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s));
        HIPCHECK(hipStreamSynchronize(s));
        HIPCHECK(hipDeviceSynchronize());

        HipTest::checkVectorADD(A_h, B_h, C_h, N);
        HIPCHECK(hipStreamDestroy(s));
        HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
        HIPCHECK(hipFree(X_d));
        HIPCHECK(hipFree(Y_d));
        HIPCHECK(hipFree(Z_d));
    }

    passed();
}
// Create a lot of streams and then destroy 'em.
void createThenDestroyStreams(int iterations, int burstSize)
{
    hipStream_t *streams = new hipStream_t[burstSize];

    for (int i=0; i<iterations; i++) {
        if (p_verbose & 0x1) {
          printf ("%s iter=%d, create %d then destroy %d\n", __func__, i, burstSize, burstSize);
        }
        for (int j=0; j<burstSize; j++) {
            if (p_verbose & 0x2) {
                printf ("  %d.%d streamCreate\n", i, j);
            }
            HIPCHECK( hipStreamCreate(&streams[j]));
        }
        for (int j=0; j<burstSize; j++) {
            if (p_verbose & 0x2) {
                printf ("  %d.%d streamDestroy\n", i, j);
            }
            HIPCHECK( hipStreamDestroy(streams[j]));
        }
    }

    delete streams;
}
Exemple #6
0
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch)
{

    // This test sends a long-running kernel to the null stream, then tests to see if the
    // specified synchronization technique is effective.
    //
    // Some syncMode are not expected to correctly sync (for example "syncNone").  in these
    // cases the test sets expectMismatch and the check logic below will attempt to ensure that
    // the undesired synchronization did not occur - ie ensure the kernel is still running and did
    // not yet update the stop event.  This can be tricky since if the kernel runs fast enough it
    // may complete before the check.  To prevent this, the addCountReverse has a count parameter
    // which causes it to loop repeatedly, and the results are checked in reverse order.
    //
    // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results
    // are checked and we test to make sure stop event has completed.

    if (!(testMask & p_tests)) {
        return;
    }
    printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
            testMask, syncModeString(syncMode), expectMismatch);

    size_t sizeBytes = numElements * sizeof(int);

    int count =100;
    int init0 = 0;
    HIPCHECK(hipMemset(C_d, init0, sizeBytes));
    for (int i=0; i<numElements; i++) {
        C_h[i] = -1; // initialize
    }

    hipStream_t otherStream = 0;
    unsigned flags = (syncMode == syncMarkerThenOtherNonBlockingStream) ?  hipStreamNonBlocking : hipStreamDefault;
    HIPCHECK(hipStreamCreateWithFlags(&otherStream, flags));
    hipEvent_t stop, otherStreamEvent;
    HIPCHECK(hipEventCreate(&stop));
    HIPCHECK(hipEventCreate(&otherStreamEvent));


    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
    // Launch kernel into null stream, should result in C_h == count.
    hipLaunchKernelGGL(
        HipTest::addCountReverse,
        dim3(blocks),
        dim3(threadsPerBlock),
        0,
        0 /*stream*/,
        static_cast<const int*>(C_d),
        C_h,
        numElements,
        count);
    HIPCHECK(hipEventRecord(stop, 0/*default*/));

    switch (syncMode) {
        case syncNone:
            break;
        case syncNullStream:
            HIPCHECK(hipStreamSynchronize(0));  // wait on host for null stream:
            break;
        case syncOtherStream:
            // Does this synchronize with the null stream?
            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncMarkerThenOtherStream:
        case syncMarkerThenOtherNonBlockingStream:

            // this may wait for NULL stream depending hipStreamNonBlocking flag above
            HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));

            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncDevice:
            HIPCHECK(hipDeviceSynchronize());
            break;
        default:
            assert(0);
    };

    hipError_t done = hipEventQuery(stop);

    if (expectMismatch) {
        assert (done == hipErrorNotReady);
    } else {
        assert (done == hipSuccess);
    }

    int mismatches = 0;
    int expected = init0 + count;
    for (int i=0; i<numElements; i++) {
        bool compareEqual = (C_h[i] == expected);
        if (!compareEqual) {
            mismatches ++;
            if  (!expectMismatch) {
                printf ("C_h[%d] (%d) != %d\n", i, C_h[i], expected);
                assert(C_h[i] == expected);
            }
        }
    }

    if (expectMismatch) {
        assert (mismatches > 0);
    }


    HIPCHECK(hipStreamDestroy(otherStream));
    HIPCHECK(hipEventDestroy(stop));
    HIPCHECK(hipEventDestroy(otherStreamEvent));

    HIPCHECK(hipDeviceSynchronize());

    printf ("test:   OK - %d mismatches (%6.2f%%)\n",  mismatches, ((double)(mismatches)*100.0)/numElements);
}
int main() {
    try {
        bool done = false;
        boost::fibers::fiber f1([&done]{
            std::cout << "f1: entered" << std::endl;
            try {
                hipStream_t stream;
                hipStreamCreate( & stream);
                int size = 1024 * 1024;
                int full_size = 20 * size;
                int * host_a, * host_b, * host_c;
                hipHostMalloc( & host_a, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_b, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_c, full_size * sizeof( int), hipHostMallocDefault);
                int * dev_a, * dev_b, * dev_c;
                hipMalloc( & dev_a, size * sizeof( int) );
                hipMalloc( & dev_b, size * sizeof( int) );
                hipMalloc( & dev_c, size * sizeof( int) );
                std::minstd_rand generator;
                std::uniform_int_distribution<> distribution(1, 6);
                for ( int i = 0; i < full_size; ++i) {
                    host_a[i] = distribution( generator);
                    host_b[i] = distribution( generator);
                }
                for ( int i = 0; i < full_size; i += size) {
                    hipMemcpyAsync( dev_a, host_a + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipMemcpyAsync( dev_b, host_b + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipLaunchKernel( vector_add, dim3(size / 256), dim3(256), 0, stream, dev_a, dev_b, dev_c, size);
                    hipMemcpyAsync( host_c + i, dev_c, size * sizeof( int), hipMemcpyDeviceToHost, stream);
                }
                auto result = boost::fibers::hip::waitfor_all( stream);
                BOOST_ASSERT( stream == std::get< 0 >( result) );
                BOOST_ASSERT( hipSuccess == std::get< 1 >( result) );
                std::cout << "f1: GPU computation finished" << std::endl;
                hipHostFree( host_a);
                hipHostFree( host_b);
                hipHostFree( host_c);
                hipFree( dev_a);
                hipFree( dev_b);
                hipFree( dev_c);
                hipStreamDestroy( stream);
                done = true;
            } catch ( std::exception const& ex) {
                std::cerr << "exception: " << ex.what() << std::endl;
            }
            std::cout << "f1: leaving" << std::endl;
        });
        boost::fibers::fiber f2([&done]{
            std::cout << "f2: entered" << std::endl;
            while ( ! done) {
                std::cout << "f2: sleeping" << std::endl;
                boost::this_fiber::sleep_for( std::chrono::milliseconds( 1 ) );
            }
            std::cout << "f2: leaving" << std::endl;
        });
        f1.join();
        f2.join();
        std::cout << "done." << std::endl;
        return EXIT_SUCCESS;
    } catch ( std::exception const& e) {
        std::cerr << "exception: " << e.what() << std::endl;
    } catch (...) {
        std::cerr << "unhandled exception" << std::endl;
    }
	return EXIT_FAILURE;
}