Exemplo n.º 1
0
void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies,
                             bool syncBetweenCopies) {
    size_t Nbytes = numElements * sizeof(T);
    size_t eachCopyElements = numElements / numCopies;
    size_t eachCopyBytes = eachCopyElements * sizeof(T);

    printf(
        "------------------------------------------------------------------------------------------"
        "-----\n");
    printf(
        "testing: %s  Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
        __func__, Nbytes, (double)(Nbytes) / 1024.0 / 1024.0, numCopies, eachCopyElements,
        eachCopyBytes);

    T* A_d;
    T *A_h1, *A_h2;

    HIPCHECK(hipHostMalloc((void**)&A_h1, Nbytes, hipHostMallocDefault));
    HIPCHECK(hipHostMalloc((void**)&A_h2, Nbytes, hipHostMallocDefault));
    HIPCHECK(hipMalloc(&A_d, Nbytes));

    for (int i = 0; i < numElements; i++) {
        A_h1[i] = 3.14f + static_cast<T>(i);
    }


    // stream=0; // fixme TODO


    for (int i = 0; i < numCopies; i++) {
        HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
        HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h1[i * eachCopyElements],
                                eachCopyBytes, hipMemcpyHostToDevice, stream));
    }

    if (syncBetweenCopies) {
        HIPCHECK(hipDeviceSynchronize());
    }

    for (int i = 0; i < numCopies; i++) {
        HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
        HIPCHECK(hipMemcpyAsync(&A_h2[i * eachCopyElements], &A_d[i * eachCopyElements],
                                eachCopyBytes, hipMemcpyDeviceToHost, stream));
    }

    HIPCHECK(hipDeviceSynchronize());


    // Verify we copied back all the data correctly:
    for (int i = 0; i < numElements; i++) {
        HIPASSERT(A_h1[i] == A_h2[i]);
    }


    HIPCHECK(hipHostFree(A_h1));
    HIPCHECK(hipHostFree(A_h2));
    HIPCHECK(hipFree(A_d));
}
Exemplo n.º 2
0
void runTests(int64_t numElements)
{
    size_t sizeBytes = numElements * sizeof(int);

    printf ("\n\ntest: starting sequence with sizeBytes=%zu bytes, %6.2f MB\n", sizeBytes, sizeBytes/1024.0/1024.0);


    int *C_h, *C_d;
    HIPCHECK(hipMalloc(&C_d, sizeBytes));
    HIPCHECK(hipHostMalloc(&C_h, sizeBytes));


    {
        test (0x01, C_d, C_h, numElements,  syncNone,                             true /*expectMismatch*/);
        test (0x02, C_d, C_h, numElements,  syncNullStream,                       false /*expectMismatch*/);
        test (0x04, C_d, C_h, numElements,  syncOtherStream,                      true /*expectMismatch*/);
        test (0x08, C_d, C_h, numElements,  syncDevice,                           false /*expectMismatch*/);

        // Sending a marker to to null stream may synchronize the otherStream
        //  - other created with hipStreamNonBlocking=0 : synchronization, should match
        //  - other created with hipStreamNonBlocking=1 : no synchronization, may mismatch
        test (0x10, C_d, C_h, numElements,  syncMarkerThenOtherStream,            false /*expectMismatch*/);

        // TODO - review why this test seems flaky
        //test (0x20, C_d, C_h, numElements,  syncMarkerThenOtherNonBlockingStream, true /*expectMismatch*/);
    }


    HIPCHECK(hipFree(C_d));
    HIPCHECK(hipHostFree(C_h));
}
int main(){
    float *Ad, *A;
    hipHostMalloc((void**)&A, size);
    hipMalloc((void**)&Ad, size);
    assert(hipSuccess == hipMemcpy(Ad, A, size, hipMemcpyHostToDevice));
    assert(hipSuccess == hipMemcpy(A, Ad, size, hipMemcpyDeviceToHost));
    passed();
}
Exemplo n.º 4
0
int main() {
    float *A, *Ad;
    HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocDefault));
    HIPCHECK(hipMalloc((void**)&Ad, SIZE));
    hipStream_t stream;
    HIPCHECK(hipStreamCreate(&stream));
    for (int i = 0; i < SIZE; i++) {
        HIPCHECK(hipMemcpyAsync(Ad, A, SIZE, hipMemcpyHostToDevice, stream));
        HIPCHECK(hipDeviceSynchronize());
    }
}
void run1(size_t size, hipStream_t stream){
	float *Ah, *Bh, *Cd, *Dd, *Eh;

	HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cd, size));
	HIPCHECK(hipMalloc(&Dd, size));
	HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault));

	for(int i=0;i<N;i++){
		Ah[i] = 1.0f;
	}

	HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream));
	HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream));
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd);
	HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream));
	HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream));
	HIPCHECK(hipDeviceSynchronize());
	HIPASSERT(Eh[10] == Ah[10] + 1.0f);
}
void run(size_t size, hipStream_t stream1, hipStream_t stream2){
	float *Ah, *Bh, *Cd, *Dd, *Eh;
	float *Ahh, *Bhh, *Cdd, *Ddd, *Ehh;

	HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cd, size));
	HIPCHECK(hipMalloc(&Dd, size));
	HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Ahh, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bhh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cdd, size));
	HIPCHECK(hipMalloc(&Ddd, size));
	HIPCHECK(hipHostMalloc((void**)&Ehh, size, hipHostMallocDefault));

	HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream1));
	HIPCHECK(hipMemcpyAsync(Bhh, Ahh, size, hipMemcpyHostToHost, stream2));
	HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1));
	HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2));
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream1, Cd);
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream2, Cdd);
	HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1));
	HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2));
	HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1));
	HIPCHECK(hipMemcpyAsync(Ehh, Ddd, size, hipMemcpyDeviceToHost, stream2));
	HIPCHECK(hipDeviceSynchronize());
	HIPASSERT(Eh[10] = Ah[10] + 1.0f);
	HIPASSERT(Ehh[10] = Ahh[10] + 1.0f);
}
Exemplo n.º 7
0
void simpleNegTest() {
    printf("testing: %s\n", __func__);
    hipError_t e;
    float *A_malloc, *A_pinned, *A_d;

    size_t Nbytes = N * sizeof(float);
    A_malloc = (float*)malloc(Nbytes);
    HIPCHECK(hipHostMalloc((void**)&A_pinned, Nbytes, hipHostMallocDefault));
    A_d = NULL;
    HIPCHECK(hipMalloc(&A_d, Nbytes));
    HIPASSERT(A_d != NULL);
    // Can't use default with async copy
    e = hipMemcpyAsync(A_pinned, A_d, Nbytes, hipMemcpyDefault, NULL);
    //    HIPASSERT (e == hipSuccess);


    // Not sure what happens here, the memory must be pinned.
    e = hipMemcpyAsync(A_malloc, A_d, Nbytes, hipMemcpyDeviceToHost, NULL);

    printf("  async memcpy of A_malloc to A_d. Result=%d\n", e);
    // HIPASSERT (e==hipErrorInvalidValue);
}
Exemplo n.º 8
0
//---
// TODO - remove me, this is deprecated.
hipError_t hipMallocHost(void** ptr, size_t sizeBytes)
{
    return hipHostMalloc(ptr, sizeBytes, 0);
}
Exemplo n.º 9
0
//---
// TODO - remove me, this is deprecated.
hipError_t hipHostAlloc(void** ptr, size_t sizeBytes, unsigned int flags)
{
    return hipHostMalloc(ptr, sizeBytes, flags);
};
Exemplo n.º 10
0
 static void* Alloc(size_t sizeBytes) {
     void* p;
     HIPCHECK(hipHostMalloc((void**)&p, sizeBytes, hipHostMallocDefault));
     return p;
 };
Exemplo n.º 11
0
int main()
{
    int *A, *Am, *B, *Ad, *C, *Cm;
    A = new int[NUM];
    B = new int[NUM];
    C = new int[NUM];
    for(int i=0;i<NUM;i++) {
        A[i] = -1*i;
        B[i] = 0;
        C[i] = 0;
    }

    hipMalloc((void**)&Ad, SIZE);
    hipHostMalloc((void**)&Am, SIZE);
    hipHostMalloc((void**)&Cm, SIZE);
    for(int i=0;i<NUM;i++) {
        Am[i] = -1*i;
        Cm[i] = 0;
    }

    hipStream_t stream;
    hipStreamCreate(&stream);
    hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream);
    hipStreamSynchronize(stream);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
    hipStreamSynchronize(stream);
    for(int i=0;i<NUM;i++) {
        assert(Am[i] == B[i]);
        assert(Am[i] == Cm[i]);
    }

    for(int i=0;i<NUM;i++) {
        A[i] = -2*i;
        B[i] = 0;
    }

    hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost);
    for(int i=0;i<NUM;i++) {
        assert(A[i] == B[i]);
        assert(A[i] == C[i]);
    }

    for(int i=0;i<NUM;i++) {
        A[i] = -3*i;
        B[i] = 0;
    }

    hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream);
    hipStreamSynchronize(stream);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
    hipStreamSynchronize(stream);
    for(int i=0;i<NUM;i++) {
        assert(A[i] == B[i]);
        assert(A[i] == C[i]);
    }
    hipHostFree(Am);
    hipHostFree(Cm);
    hipFree(Ad);
    delete[] A;
    delete[] B;
    delete[] C;
    passed();
}
void memcpytest2(size_t numElements, bool usePinnedHost, bool useHostToHost, bool useDeviceToDevice, bool useMemkindDefault)
{
    size_t sizeElements = numElements * sizeof(T);
    printf ("test: %s<%s> size=%lu (%6.2fMB) usePinnedHost:%d, useHostToHost:%d, useDeviceToDevice:%d, useMemkindDefault:%d\n", 
            __func__, 
            TYPENAME(T),
            sizeElements, sizeElements/1024.0/1024.0,
            usePinnedHost, useHostToHost, useDeviceToDevice, useMemkindDefault);


    T *A_d, *B_d, *C_d;
    T *A_h, *B_h, *C_h;


    HipTest::initArrays (&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, numElements, usePinnedHost);
    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);

    T *A_hh = NULL;
    T *B_hh = NULL;
    T *C_dd = NULL;



    if (useHostToHost) {
        if (usePinnedHost) {
            HIPCHECK ( hipHostMalloc((void**)&A_hh, sizeElements, hipHostMallocDefault) );
            HIPCHECK ( hipHostMalloc((void**)&B_hh, sizeElements, hipHostMallocDefault) );
        } else {
            A_hh = (T*)malloc(sizeElements);
            B_hh = (T*)malloc(sizeElements);
        }


        // Do some extra host-to-host copies here to mix things up:
        HIPCHECK ( hipMemcpy(A_hh, A_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));
        HIPCHECK ( hipMemcpy(B_hh, B_h, sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyHostToHost));


        HIPCHECK ( hipMemcpy(A_d, A_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
        HIPCHECK ( hipMemcpy(B_d, B_hh, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
    } else {
        HIPCHECK ( hipMemcpy(A_d, A_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
        HIPCHECK ( hipMemcpy(B_d, B_h, sizeElements, useMemkindDefault ? hipMemcpyDefault : hipMemcpyHostToDevice));
    }

    hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, B_d, C_d, numElements);

    if (useDeviceToDevice) {
        HIPCHECK ( hipMalloc(&C_dd, sizeElements) );

        // Do an extra device-to-device copies here to mix things up:
        HIPCHECK ( hipMemcpy(C_dd, C_d,  sizeElements, useMemkindDefault? hipMemcpyDefault : hipMemcpyDeviceToDevice));

        //Destroy the original C_d:
        HIPCHECK ( hipMemset(C_d, 0x5A, sizeElements));

        HIPCHECK ( hipMemcpy(C_h, C_dd, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
    } else {
        HIPCHECK ( hipMemcpy(C_h, C_d, sizeElements, useMemkindDefault? hipMemcpyDefault:hipMemcpyDeviceToHost));
    }

    HIPCHECK ( hipDeviceSynchronize() );
    HipTest::checkVectorADD(A_h, B_h, C_h, numElements);

    HipTest::freeArrays (A_d, B_d, C_d, A_h, B_h, C_h, usePinnedHost);

    printf ("  %s success\n", __func__);
}
Exemplo n.º 13
0
int main() {
    try {
        bool done = false;
        boost::fibers::fiber f1([&done]{
            std::cout << "f1: entered" << std::endl;
            try {
                hipStream_t stream;
                hipStreamCreate( & stream);
                int size = 1024 * 1024;
                int full_size = 20 * size;
                int * host_a, * host_b, * host_c;
                hipHostMalloc( & host_a, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_b, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_c, full_size * sizeof( int), hipHostMallocDefault);
                int * dev_a, * dev_b, * dev_c;
                hipMalloc( & dev_a, size * sizeof( int) );
                hipMalloc( & dev_b, size * sizeof( int) );
                hipMalloc( & dev_c, size * sizeof( int) );
                std::minstd_rand generator;
                std::uniform_int_distribution<> distribution(1, 6);
                for ( int i = 0; i < full_size; ++i) {
                    host_a[i] = distribution( generator);
                    host_b[i] = distribution( generator);
                }
                for ( int i = 0; i < full_size; i += size) {
                    hipMemcpyAsync( dev_a, host_a + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipMemcpyAsync( dev_b, host_b + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipLaunchKernel( vector_add, dim3(size / 256), dim3(256), 0, stream, dev_a, dev_b, dev_c, size);
                    hipMemcpyAsync( host_c + i, dev_c, size * sizeof( int), hipMemcpyDeviceToHost, stream);
                }
                auto result = boost::fibers::hip::waitfor_all( stream);
                BOOST_ASSERT( stream == std::get< 0 >( result) );
                BOOST_ASSERT( hipSuccess == std::get< 1 >( result) );
                std::cout << "f1: GPU computation finished" << std::endl;
                hipHostFree( host_a);
                hipHostFree( host_b);
                hipHostFree( host_c);
                hipFree( dev_a);
                hipFree( dev_b);
                hipFree( dev_c);
                hipStreamDestroy( stream);
                done = true;
            } catch ( std::exception const& ex) {
                std::cerr << "exception: " << ex.what() << std::endl;
            }
            std::cout << "f1: leaving" << std::endl;
        });
        boost::fibers::fiber f2([&done]{
            std::cout << "f2: entered" << std::endl;
            while ( ! done) {
                std::cout << "f2: sleeping" << std::endl;
                boost::this_fiber::sleep_for( std::chrono::milliseconds( 1 ) );
            }
            std::cout << "f2: leaving" << std::endl;
        });
        f1.join();
        f2.join();
        std::cout << "done." << std::endl;
        return EXIT_SUCCESS;
    } catch ( std::exception const& e) {
        std::cerr << "exception: " << e.what() << std::endl;
    } catch (...) {
        std::cerr << "unhandled exception" << std::endl;
    }
	return EXIT_FAILURE;
}