int main(){

	hipError_t err;
	float *A, *Ad;

	A = new float[LEN];

	for(int i=0;i<LEN;i++){
		A[i] = 1.0f;
	}

	hipStream_t stream;
	err = hipStreamCreate(&stream);
	check("Creating stream",err);

	err = hipMalloc(&Ad, SIZE);
	check("Allocating Ad memory on device", err);

	err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
	check("Doing memory copy from A to Ad", err);

	float mS = 0;
	hipEvent_t start, stop;
	hipEventCreate(&start);
	hipEventCreate(&stop);

	ResultDatabase resultDB[8];

	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[0].DumpSummary(std::cout);
	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"Second Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[1].DumpSummary(std::cout);
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[2].AddResult(std::string("NULL Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[2].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Sync dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipDeviceSynchronize();
	hipEventElapsedTime(&mS, start, stop);
	resultDB[3].AddResult(std::string("NULL Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[3].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Async dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
		hipDeviceSynchronize();
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[4].AddResult(std::string("Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[4].DumpSummary(std::cout);
//	std::cout<<"Stream Sync dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[5].AddResult(std::string("Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
//	std::cout<<"Stream Async dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	resultDB[5].DumpSummary(std::cout);
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[6].AddResult(std::string("NULL Stream No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[6].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[7].AddResult(std::string("Stream Dispatch No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[7].DumpSummary(std::cout);
//	std::cout<<"Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
}
Beispiel #2
0
void initializeEvents(hipEvent_t *start, hipEvent_t *stop){
  CUDA_SAFE_CALL( hipEventCreate(start) );
	CUDA_SAFE_CALL( hipEventCreate(stop) );
	CUDA_SAFE_CALL( hipEventRecord(*start, 0) );
}
Beispiel #3
0
void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs,
                   bool doHostSide) {
    HIPASSERT(numElements % numInflight == 0);  // Must be evenly divisible.
    size_t Nbytes = numElements * sizeof(T);
    size_t eachCopyElements = numElements / numInflight;
    size_t eachCopyBytes = eachCopyElements * sizeof(T);

    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);

    printf(
        "------------------------------------------------------------------------------------------"
        "-----\n");
    printf(
        "testing: %s<%s>  Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu "
        "eachCopyBytes=%zu\n",
        __func__, HostTraits<AllocType>::Name(), Nbytes, (double)(Nbytes) / 1024.0 / 1024.0,
        numPongs, numInflight, eachCopyElements, eachCopyBytes);

    T* A_h = NULL;
    T* A_d = NULL;

    A_h = (T*)(HostTraits<AllocType>::Alloc(Nbytes));
    HIPCHECK(hipMalloc(&A_d, Nbytes));

    // Initialize the host array:
    const T initValue = 13;
    const T deviceConst = 2;
    const T hostConst = 10000;
    for (size_t i = 0; i < numElements; i++) {
        A_h[i] = initValue + i;
    }


    for (int k = 0; k < numPongs; k++) {
        for (int i = 0; i < numInflight; i++) {
            HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
            HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h[i * eachCopyElements],
                                    eachCopyBytes, hipMemcpyHostToDevice, stream));
        }

        hipLaunchKernel(addK<T>, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d, 2,
                        numElements);

        for (int i = 0; i < numInflight; i++) {
            HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
            HIPCHECK(hipMemcpyAsync(&A_h[i * eachCopyElements], &A_d[i * eachCopyElements],
                                    eachCopyBytes, hipMemcpyDeviceToHost, stream));
        }

        if (doHostSide) {
            assert(0);
#if 0
            hipEvent_t e;
            HIPCHECK(hipEventCreate(&e));
#endif
            HIPCHECK(hipDeviceSynchronize());
            for (size_t i = 0; i < numElements; i++) {
                A_h[i] += hostConst;
            }
        }
    };

    HIPCHECK(hipDeviceSynchronize());


    // Verify we copied back all the data correctly:
    for (size_t i = 0; i < numElements; i++) {
        T gold = initValue + i;
        // Perform calcs in same order as test above to replicate FP order-of-operations:
        for (int k = 0; k < numPongs; k++) {
            gold += deviceConst;
            if (doHostSide) {
                gold += hostConst;
            }
        }

        if (gold != A_h[i]) {
            std::cout << i << ": gold=" << gold << " out=" << A_h[i] << std::endl;
            HIPASSERT(gold == A_h[i]);
        }
    }


    HIPCHECK(hipHostFree(A_h));
    HIPCHECK(hipFree(A_d));
}
Beispiel #4
0
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch)
{

    // This test sends a long-running kernel to the null stream, then tests to see if the
    // specified synchronization technique is effective.
    //
    // Some syncMode are not expected to correctly sync (for example "syncNone").  in these
    // cases the test sets expectMismatch and the check logic below will attempt to ensure that
    // the undesired synchronization did not occur - ie ensure the kernel is still running and did
    // not yet update the stop event.  This can be tricky since if the kernel runs fast enough it
    // may complete before the check.  To prevent this, the addCountReverse has a count parameter
    // which causes it to loop repeatedly, and the results are checked in reverse order.
    //
    // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results
    // are checked and we test to make sure stop event has completed.

    if (!(testMask & p_tests)) {
        return;
    }
    printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
            testMask, syncModeString(syncMode), expectMismatch);

    size_t sizeBytes = numElements * sizeof(int);

    int count =100;
    int init0 = 0;
    HIPCHECK(hipMemset(C_d, init0, sizeBytes));
    for (int i=0; i<numElements; i++) {
        C_h[i] = -1; // initialize
    }

    hipStream_t otherStream = 0;
    unsigned flags = (syncMode == syncMarkerThenOtherNonBlockingStream) ?  hipStreamNonBlocking : hipStreamDefault;
    HIPCHECK(hipStreamCreateWithFlags(&otherStream, flags));
    hipEvent_t stop, otherStreamEvent;
    HIPCHECK(hipEventCreate(&stop));
    HIPCHECK(hipEventCreate(&otherStreamEvent));


    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
    // Launch kernel into null stream, should result in C_h == count.
    hipLaunchKernelGGL(
        HipTest::addCountReverse,
        dim3(blocks),
        dim3(threadsPerBlock),
        0,
        0 /*stream*/,
        static_cast<const int*>(C_d),
        C_h,
        numElements,
        count);
    HIPCHECK(hipEventRecord(stop, 0/*default*/));

    switch (syncMode) {
        case syncNone:
            break;
        case syncNullStream:
            HIPCHECK(hipStreamSynchronize(0));  // wait on host for null stream:
            break;
        case syncOtherStream:
            // Does this synchronize with the null stream?
            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncMarkerThenOtherStream:
        case syncMarkerThenOtherNonBlockingStream:

            // this may wait for NULL stream depending hipStreamNonBlocking flag above
            HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));

            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncDevice:
            HIPCHECK(hipDeviceSynchronize());
            break;
        default:
            assert(0);
    };

    hipError_t done = hipEventQuery(stop);

    if (expectMismatch) {
        assert (done == hipErrorNotReady);
    } else {
        assert (done == hipSuccess);
    }

    int mismatches = 0;
    int expected = init0 + count;
    for (int i=0; i<numElements; i++) {
        bool compareEqual = (C_h[i] == expected);
        if (!compareEqual) {
            mismatches ++;
            if  (!expectMismatch) {
                printf ("C_h[%d] (%d) != %d\n", i, C_h[i], expected);
                assert(C_h[i] == expected);
            }
        }
    }

    if (expectMismatch) {
        assert (mismatches > 0);
    }


    HIPCHECK(hipStreamDestroy(otherStream));
    HIPCHECK(hipEventDestroy(stop));
    HIPCHECK(hipEventDestroy(otherStreamEvent));

    HIPCHECK(hipDeviceSynchronize());

    printf ("test:   OK - %d mismatches (%6.2f%%)\n",  mismatches, ((double)(mismatches)*100.0)/numElements);
}