コード例 #1
0
ファイル: hipMemcpyAsync.cpp プロジェクト: ssahasra/HIP
int main(int argc, char* argv[]) {
    HipTest::parseStandardArguments(argc, argv, false);
    parseMyArguments(argc, argv);


    printf("info: set device to %d  tests=%x\n", p_gpuDevice, p_tests);
    HIPCHECK(hipSetDevice(p_gpuDevice));

    if (p_tests & 0x01) {
        simpleNegTest();
    }

    if (p_tests & 0x02) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        test_manyInflightCopies<float>(stream, 1024, 16, true);
        test_manyInflightCopies<float>(
            stream, 1024, 4, true);  // verify we re-use the same entries instead of growing pool.
        test_manyInflightCopies<float>(stream, 1024 * 8, 64, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    if (p_tests & 0x04) {
        test_chunkedAsyncExample(p_streams, true, true, true);     // Easy sync version
        test_chunkedAsyncExample(p_streams, false, true, true);    // Easy sync version
        test_chunkedAsyncExample(p_streams, false, false, true);   // Some async
        test_chunkedAsyncExample(p_streams, false, false, false);  // All async
    }

    if (p_tests & 0x08) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 1, false);
        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 10, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    passed();
}
コード例 #2
0
ファイル: hipMemcpyAsync2.cpp プロジェクト: ssahasra/HIP
int main() {
    float *A, *Ad;
    HIPCHECK(hipHostMalloc((void**)&A, SIZE, hipHostMallocDefault));
    HIPCHECK(hipMalloc((void**)&Ad, SIZE));
    hipStream_t stream;
    HIPCHECK(hipStreamCreate(&stream));
    for (int i = 0; i < SIZE; i++) {
        HIPCHECK(hipMemcpyAsync(Ad, A, SIZE, hipMemcpyHostToDevice, stream));
        HIPCHECK(hipDeviceSynchronize());
    }
}
コード例 #3
0
ファイル: hipMemcpyDtoDAsync.cpp プロジェクト: ssahasra/HIP
int main() {
    size_t Nbytes = N * sizeof(int);
    int numDevices = 0;
    int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
    int *A_h, *B_h, *C_h;
    hipStream_t s;

    HIPCHECK(hipGetDeviceCount(&numDevices));
    if (numDevices > 1) {
        HIPCHECK(hipSetDevice(0));
        unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
        HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipMalloc(&X_d, Nbytes));
        HIPCHECK(hipMalloc(&Y_d, Nbytes));
        HIPCHECK(hipMalloc(&Z_d, Nbytes));


        HIPCHECK(hipSetDevice(0));
        HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
        HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
        HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
        HIPCHECK(hipDeviceSynchronize());
        HipTest::checkVectorADD(A_h, B_h, C_h, N);

        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipStreamCreate(&s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s));

        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N);
        HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s));
        HIPCHECK(hipStreamSynchronize(s));
        HIPCHECK(hipDeviceSynchronize());

        HipTest::checkVectorADD(A_h, B_h, C_h, N);
        HIPCHECK(hipStreamDestroy(s));
        HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
        HIPCHECK(hipFree(X_d));
        HIPCHECK(hipFree(Y_d));
        HIPCHECK(hipFree(Z_d));
    }

    passed();
}
コード例 #4
0
int main(int argc, char **argv){
	HipTest::parseStandardArguments(argc, argv, true);


	hipStream_t stream[3];
	for(int i=0;i<3;i++){
		HIPCHECK(hipStreamCreate(&stream[i]));
	}

	const size_t size = N * sizeof(float);

	std::thread t1(run1, size, stream[0]);
	std::thread t2(run1, size, stream[0]);
	std::thread t3(run, size, stream[1], stream[2]);
	t1.join();
//	std::cout<<"T1"<<std::endl;
	t2.join();
//	std::cout<<"T2"<<std::endl;
	t3.join(); 
	passed();
}
コード例 #5
0
// Create a lot of streams and then destroy 'em.
void createThenDestroyStreams(int iterations, int burstSize)
{
    hipStream_t *streams = new hipStream_t[burstSize];

    for (int i=0; i<iterations; i++) {
        if (p_verbose & 0x1) {
          printf ("%s iter=%d, create %d then destroy %d\n", __func__, i, burstSize, burstSize);
        }
        for (int j=0; j<burstSize; j++) {
            if (p_verbose & 0x2) {
                printf ("  %d.%d streamCreate\n", i, j);
            }
            HIPCHECK( hipStreamCreate(&streams[j]));
        }
        for (int j=0; j<burstSize; j++) {
            if (p_verbose & 0x2) {
                printf ("  %d.%d streamDestroy\n", i, j);
            }
            HIPCHECK( hipStreamDestroy(streams[j]));
        }
    }

    delete streams;
}
コード例 #6
0
int main(){

	hipError_t err;
	float *A, *Ad;

	A = new float[LEN];

	for(int i=0;i<LEN;i++){
		A[i] = 1.0f;
	}

	hipStream_t stream;
	err = hipStreamCreate(&stream);
	check("Creating stream",err);

	err = hipMalloc(&Ad, SIZE);
	check("Allocating Ad memory on device", err);

	err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
	check("Doing memory copy from A to Ad", err);

	float mS = 0;
	hipEvent_t start, stop;
	hipEventCreate(&start);
	hipEventCreate(&stop);

	ResultDatabase resultDB[8];

	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[0].DumpSummary(std::cout);
	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"Second Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[1].DumpSummary(std::cout);
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[2].AddResult(std::string("NULL Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[2].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Sync dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipDeviceSynchronize();
	hipEventElapsedTime(&mS, start, stop);
	resultDB[3].AddResult(std::string("NULL Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[3].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Async dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
		hipDeviceSynchronize();
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[4].AddResult(std::string("Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[4].DumpSummary(std::cout);
//	std::cout<<"Stream Sync dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[5].AddResult(std::string("Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
//	std::cout<<"Stream Async dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	resultDB[5].DumpSummary(std::cout);
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[6].AddResult(std::string("NULL Stream No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[6].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[7].AddResult(std::string("Stream Dispatch No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[7].DumpSummary(std::cout);
//	std::cout<<"Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
}
コード例 #7
0
ファイル: hipMemcpyAsync.cpp プロジェクト: ssahasra/HIP
// IN: nStreams : number of streams to use for the test
// IN :useNullStream - use NULL stream.  Synchronizes everything.
// IN: useSyncMemcpyH2D - use sync memcpy (no overlap) for H2D
// IN: useSyncMemcpyD2H - use sync memcpy (no overlap) for D2H
void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemcpyH2D,
                              bool useSyncMemcpyD2H) {
    size_t Nbytes = N * sizeof(int);
    printf("testing: %s(useNullStream=%d, useSyncMemcpyH2D=%d, useSyncMemcpyD2H=%d)  ", __func__,
           useNullStream, useSyncMemcpyH2D, useSyncMemcpyD2H);
    printf("Nbytes=%zu (%6.1f MB)\n", Nbytes, (double)(Nbytes) / 1024.0 / 1024.0);

    int *A_d, *B_d, *C_d;
    int *A_h, *B_h, *C_h;

    HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, true);


    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);


    hipStream_t* stream = (hipStream_t*)malloc(sizeof(hipStream_t) * nStreams);
    if (useNullStream) {
        nStreams = 1;
        stream[0] = NULL;
    } else {
        for (int i = 0; i < nStreams; ++i) {
            HIPCHECK(hipStreamCreate(&stream[i]));
        }
    }


    size_t workLeft = N;
    size_t workPerStream = N / nStreams;
    for (int i = 0; i < nStreams; ++i) {
        size_t work = (workLeft < workPerStream) ? workLeft : workPerStream;
        size_t workBytes = work * sizeof(int);

        size_t offset = i * workPerStream;
        HIPASSERT(A_d + offset < A_d + Nbytes);
        HIPASSERT(B_d + offset < B_d + Nbytes);
        HIPASSERT(C_d + offset < C_d + Nbytes);
        if (useSyncMemcpyH2D) {
            HIPCHECK(hipMemcpy(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice));
            HIPCHECK(hipMemcpy(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice));
        } else {
            HIPCHECK(hipMemcpyAsync(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice,
                                    stream[i]));
            HIPCHECK(hipMemcpyAsync(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice,
                                    stream[i]));
        };

        hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i],
                        &A_d[offset], &B_d[offset], &C_d[offset], work);

        if (useSyncMemcpyD2H) {
            HIPCHECK(hipMemcpy(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost));
        } else {
            HIPCHECK(hipMemcpyAsync(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost,
                                    stream[i]));
        }
    }


    HIPCHECK(hipDeviceSynchronize());


    HipTest::checkVectorADD(A_h, B_h, C_h, N);

    HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, true);

    free(stream);
};
コード例 #8
0
ファイル: hipTestDeviceSymbol.cpp プロジェクト: kknox/HIP
int main()
{
    int *A, *Am, *B, *Ad, *C, *Cm;
    A = new int[NUM];
    B = new int[NUM];
    C = new int[NUM];
    for(int i=0;i<NUM;i++) {
        A[i] = -1*i;
        B[i] = 0;
        C[i] = 0;
    }

    hipMalloc((void**)&Ad, SIZE);
    hipHostMalloc((void**)&Am, SIZE);
    hipHostMalloc((void**)&Cm, SIZE);
    for(int i=0;i<NUM;i++) {
        Am[i] = -1*i;
        Cm[i] = 0;
    }

    hipStream_t stream;
    hipStreamCreate(&stream);
    hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), Am, SIZE, 0, hipMemcpyHostToDevice, stream);
    hipStreamSynchronize(stream);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbolAsync(Cm, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
    hipStreamSynchronize(stream);
    for(int i=0;i<NUM;i++) {
        assert(Am[i] == B[i]);
        assert(Am[i] == Cm[i]);
    }

    for(int i=0;i<NUM;i++) {
        A[i] = -2*i;
        B[i] = 0;
    }

    hipMemcpyToSymbol(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbol(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost);
    for(int i=0;i<NUM;i++) {
        assert(A[i] == B[i]);
        assert(A[i] == C[i]);
    }

    for(int i=0;i<NUM;i++) {
        A[i] = -3*i;
        B[i] = 0;
    }

    hipMemcpyToSymbolAsync(HIP_SYMBOL(globalIn), A, SIZE, 0, hipMemcpyHostToDevice, stream);
    hipStreamSynchronize(stream);
    hipLaunchKernel(Assign, dim3(1,1,1), dim3(NUM,1,1), 0, 0, Ad);
    hipMemcpy(B, Ad, SIZE, hipMemcpyDeviceToHost);
    hipMemcpyFromSymbolAsync(C, HIP_SYMBOL(globalOut), SIZE, 0, hipMemcpyDeviceToHost, stream);
    hipStreamSynchronize(stream);
    for(int i=0;i<NUM;i++) {
        assert(A[i] == B[i]);
        assert(A[i] == C[i]);
    }
    hipHostFree(Am);
    hipHostFree(Cm);
    hipFree(Ad);
    delete[] A;
    delete[] B;
    delete[] C;
    passed();
}
コード例 #9
0
int main() {
    try {
        bool done = false;
        boost::fibers::fiber f1([&done]{
            std::cout << "f1: entered" << std::endl;
            try {
                hipStream_t stream;
                hipStreamCreate( & stream);
                int size = 1024 * 1024;
                int full_size = 20 * size;
                int * host_a, * host_b, * host_c;
                hipHostMalloc( & host_a, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_b, full_size * sizeof( int), hipHostMallocDefault);
                hipHostMalloc( & host_c, full_size * sizeof( int), hipHostMallocDefault);
                int * dev_a, * dev_b, * dev_c;
                hipMalloc( & dev_a, size * sizeof( int) );
                hipMalloc( & dev_b, size * sizeof( int) );
                hipMalloc( & dev_c, size * sizeof( int) );
                std::minstd_rand generator;
                std::uniform_int_distribution<> distribution(1, 6);
                for ( int i = 0; i < full_size; ++i) {
                    host_a[i] = distribution( generator);
                    host_b[i] = distribution( generator);
                }
                for ( int i = 0; i < full_size; i += size) {
                    hipMemcpyAsync( dev_a, host_a + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipMemcpyAsync( dev_b, host_b + i, size * sizeof( int), hipMemcpyHostToDevice, stream);
                    hipLaunchKernel( vector_add, dim3(size / 256), dim3(256), 0, stream, dev_a, dev_b, dev_c, size);
                    hipMemcpyAsync( host_c + i, dev_c, size * sizeof( int), hipMemcpyDeviceToHost, stream);
                }
                auto result = boost::fibers::hip::waitfor_all( stream);
                BOOST_ASSERT( stream == std::get< 0 >( result) );
                BOOST_ASSERT( hipSuccess == std::get< 1 >( result) );
                std::cout << "f1: GPU computation finished" << std::endl;
                hipHostFree( host_a);
                hipHostFree( host_b);
                hipHostFree( host_c);
                hipFree( dev_a);
                hipFree( dev_b);
                hipFree( dev_c);
                hipStreamDestroy( stream);
                done = true;
            } catch ( std::exception const& ex) {
                std::cerr << "exception: " << ex.what() << std::endl;
            }
            std::cout << "f1: leaving" << std::endl;
        });
        boost::fibers::fiber f2([&done]{
            std::cout << "f2: entered" << std::endl;
            while ( ! done) {
                std::cout << "f2: sleeping" << std::endl;
                boost::this_fiber::sleep_for( std::chrono::milliseconds( 1 ) );
            }
            std::cout << "f2: leaving" << std::endl;
        });
        f1.join();
        f2.join();
        std::cout << "done." << std::endl;
        return EXIT_SUCCESS;
    } catch ( std::exception const& e) {
        std::cerr << "exception: " << e.what() << std::endl;
    } catch (...) {
        std::cerr << "unhandled exception" << std::endl;
    }
	return EXIT_FAILURE;
}