void run(size_t size, hipStream_t stream1, hipStream_t stream2){
	float *Ah, *Bh, *Cd, *Dd, *Eh;
	float *Ahh, *Bhh, *Cdd, *Ddd, *Ehh;

	HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cd, size));
	HIPCHECK(hipMalloc(&Dd, size));
	HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Ahh, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bhh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cdd, size));
	HIPCHECK(hipMalloc(&Ddd, size));
	HIPCHECK(hipHostMalloc((void**)&Ehh, size, hipHostMallocDefault));

	HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream1));
	HIPCHECK(hipMemcpyAsync(Bhh, Ahh, size, hipMemcpyHostToHost, stream2));
	HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream1));
	HIPCHECK(hipMemcpyAsync(Cdd, Bhh, size, hipMemcpyHostToDevice, stream2));
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream1, Cd);
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream2, Cdd);
	HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream1));
	HIPCHECK(hipMemcpyAsync(Ddd, Cdd, size, hipMemcpyDeviceToDevice, stream2));
	HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream1));
	HIPCHECK(hipMemcpyAsync(Ehh, Ddd, size, hipMemcpyDeviceToHost, stream2));
	HIPCHECK(hipDeviceSynchronize());
	HIPASSERT(Eh[10] = Ah[10] + 1.0f);
	HIPASSERT(Ehh[10] = Ahh[10] + 1.0f);
}
示例#2
0
void test_manyInflightCopies(hipStream_t stream, int numElements, int numCopies,
                             bool syncBetweenCopies) {
    size_t Nbytes = numElements * sizeof(T);
    size_t eachCopyElements = numElements / numCopies;
    size_t eachCopyBytes = eachCopyElements * sizeof(T);

    printf(
        "------------------------------------------------------------------------------------------"
        "-----\n");
    printf(
        "testing: %s  Nbytes=%zu (%6.1f MB) numCopies=%d eachCopyElements=%zu eachCopyBytes=%zu\n",
        __func__, Nbytes, (double)(Nbytes) / 1024.0 / 1024.0, numCopies, eachCopyElements,
        eachCopyBytes);

    T* A_d;
    T *A_h1, *A_h2;

    HIPCHECK(hipHostMalloc((void**)&A_h1, Nbytes, hipHostMallocDefault));
    HIPCHECK(hipHostMalloc((void**)&A_h2, Nbytes, hipHostMallocDefault));
    HIPCHECK(hipMalloc(&A_d, Nbytes));

    for (int i = 0; i < numElements; i++) {
        A_h1[i] = 3.14f + static_cast<T>(i);
    }


    // stream=0; // fixme TODO


    for (int i = 0; i < numCopies; i++) {
        HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
        HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h1[i * eachCopyElements],
                                eachCopyBytes, hipMemcpyHostToDevice, stream));
    }

    if (syncBetweenCopies) {
        HIPCHECK(hipDeviceSynchronize());
    }

    for (int i = 0; i < numCopies; i++) {
        HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
        HIPCHECK(hipMemcpyAsync(&A_h2[i * eachCopyElements], &A_d[i * eachCopyElements],
                                eachCopyBytes, hipMemcpyDeviceToHost, stream));
    }

    HIPCHECK(hipDeviceSynchronize());


    // Verify we copied back all the data correctly:
    for (int i = 0; i < numElements; i++) {
        HIPASSERT(A_h1[i] == A_h2[i]);
    }


    HIPCHECK(hipHostFree(A_h1));
    HIPCHECK(hipHostFree(A_h2));
    HIPCHECK(hipFree(A_d));
}
示例#3
0
int main()
{
    int numDevices = 0;
    int major,minor;
    hipDevice_t device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for(int i=0;i<numDevices;i++){
        HIPCHECK(hipDeviceGet(&device,i));
        HIPCHECK(hipDeviceComputeCapability(&major, &minor, device));
        HIPASSERT(major >= 0);
        HIPASSERT(minor >= 0);
    }
    passed();
}
示例#4
0
int main(int argc, char *argv[])
{
    hipStream_t stream;
    unsigned int flags;
    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamDefault));
    HIPCHECK(hipStreamGetFlags(stream, &flags));
    HIPASSERT(flags == 0); 
    HIPCHECK(hipStreamDestroy(stream));

    HIPCHECK(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking));
    HIPCHECK(hipStreamGetFlags(stream, &flags));
    HIPASSERT(flags == 1); 
    HIPCHECK(hipStreamDestroy(stream));

    passed();
}
int main()
{
    int numDevices = 0;
    int device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for(int i=0;i<numDevices;i++){
        HIPCHECK(hipSetDevice(i));
        HIPCHECK(hipGetDevice(&device));
        HIPASSERT(device == i);
    }
    passed();
}
示例#6
0
int main() {
    int numDevices = 0;
    char name[len];
    hipDevice_t device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for (int i = 0; i < numDevices; i++) {
        HIPCHECK(hipDeviceGet(&device, i));
        HIPCHECK(hipDeviceGetName(name, len, device));
        HIPASSERT(name != "");
    }
    passed();
}
示例#7
0
int main() {
    int numDevices = 0;
    size_t totMem;
    hipDevice_t device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for (int i = 0; i < numDevices; i++) {
        HIPCHECK(hipDeviceGet(&device, i));
        HIPCHECK(hipDeviceTotalMem(&totMem, device));
        HIPASSERT(totMem != 0);
    }
    passed();
}
示例#8
0
bool testhipMemset3D(int memsetval,int p_gpuDevice)
{
    size_t numH = 256;
    size_t numW = 256;
    size_t depth = 10;
    size_t width = numW * sizeof(char);
    size_t sizeElements = width * numH * depth;
    size_t elements = numW* numH* depth;


	printf ("testhipMemset3D memsetval=%2x device=%d\n", memsetval, p_gpuDevice);
    char *A_h;
    bool testResult = true;
    hipExtent extent = make_hipExtent(width, numH, depth);
    hipPitchedPtr devPitchedPtr;

    HIPCHECK(hipMalloc3D(&devPitchedPtr, extent));
	A_h = (char*)malloc(sizeElements);
	HIPASSERT(A_h != NULL);
	for (size_t i=0; i<elements; i++) {
        A_h[i] = 1;
    }
	HIPCHECK ( hipMemset3D( devPitchedPtr, memsetval, extent) );
	hipMemcpy3DParms myparms = {0};
	myparms.srcPos = make_hipPos(0,0,0);
	myparms.dstPos = make_hipPos(0,0,0);
	myparms.dstPtr = make_hipPitchedPtr(A_h, width , numW, numH);
	myparms.srcPtr = devPitchedPtr;
	myparms.extent = extent;
#ifdef __HIP_PLATFORM_NVCC__
	myparms.kind = hipMemcpyKindToCudaMemcpyKind(hipMemcpyDeviceToHost);
#else
    myparms.kind = hipMemcpyDeviceToHost;
#endif
    HIPCHECK(hipMemcpy3D(&myparms));

    for (int i=0; i<elements; i++) {
        if (A_h[i] != memsetval) {
			testResult = false;
            printf("mismatch at index:%d computed:%02x, memsetval:%02x\n", i, (int)A_h[i], (int)memsetval);
            break;
        }
    }
    HIPCHECK(hipFree(devPitchedPtr.ptr));
    free(A_h);
    return testResult;
}
void run1(size_t size, hipStream_t stream){
	float *Ah, *Bh, *Cd, *Dd, *Eh;

	HIPCHECK(hipHostMalloc((void**)&Ah, size, hipHostMallocDefault));
	HIPCHECK(hipHostMalloc((void**)&Bh, size, hipHostMallocDefault));
	HIPCHECK(hipMalloc(&Cd, size));
	HIPCHECK(hipMalloc(&Dd, size));
	HIPCHECK(hipHostMalloc((void**)&Eh, size, hipHostMallocDefault));

	for(int i=0;i<N;i++){
		Ah[i] = 1.0f;
	}

	HIPCHECK(hipMemcpyAsync(Bh, Ah, size, hipMemcpyHostToHost, stream));
	HIPCHECK(hipMemcpyAsync(Cd, Bh, size, hipMemcpyHostToDevice, stream));
	hipLaunchKernel(HIP_KERNEL_NAME(Inc), dim3(N/500), dim3(500), 0, stream, Cd);
	HIPCHECK(hipMemcpyAsync(Dd, Cd, size, hipMemcpyDeviceToDevice, stream));
	HIPCHECK(hipMemcpyAsync(Eh, Dd, size, hipMemcpyDeviceToHost, stream));
	HIPCHECK(hipDeviceSynchronize());
	HIPASSERT(Eh[10] == Ah[10] + 1.0f);
}
示例#10
0
void simpleNegTest() {
    printf("testing: %s\n", __func__);
    hipError_t e;
    float *A_malloc, *A_pinned, *A_d;

    size_t Nbytes = N * sizeof(float);
    A_malloc = (float*)malloc(Nbytes);
    HIPCHECK(hipHostMalloc((void**)&A_pinned, Nbytes, hipHostMallocDefault));
    A_d = NULL;
    HIPCHECK(hipMalloc(&A_d, Nbytes));
    HIPASSERT(A_d != NULL);
    // Can't use default with async copy
    e = hipMemcpyAsync(A_pinned, A_d, Nbytes, hipMemcpyDefault, NULL);
    //    HIPASSERT (e == hipSuccess);


    // Not sure what happens here, the memory must be pinned.
    e = hipMemcpyAsync(A_malloc, A_d, Nbytes, hipMemcpyDeviceToHost, NULL);

    printf("  async memcpy of A_malloc to A_d. Result=%d\n", e);
    // HIPASSERT (e==hipErrorInvalidValue);
}
示例#11
0
void test_pingpong(hipStream_t stream, size_t numElements, int numInflight, int numPongs,
                   bool doHostSide) {
    HIPASSERT(numElements % numInflight == 0);  // Must be evenly divisible.
    size_t Nbytes = numElements * sizeof(T);
    size_t eachCopyElements = numElements / numInflight;
    size_t eachCopyBytes = eachCopyElements * sizeof(T);

    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);

    printf(
        "------------------------------------------------------------------------------------------"
        "-----\n");
    printf(
        "testing: %s<%s>  Nbytes=%zu (%6.1f MB) numPongs=%d numInflight=%d eachCopyElements=%zu "
        "eachCopyBytes=%zu\n",
        __func__, HostTraits<AllocType>::Name(), Nbytes, (double)(Nbytes) / 1024.0 / 1024.0,
        numPongs, numInflight, eachCopyElements, eachCopyBytes);

    T* A_h = NULL;
    T* A_d = NULL;

    A_h = (T*)(HostTraits<AllocType>::Alloc(Nbytes));
    HIPCHECK(hipMalloc(&A_d, Nbytes));

    // Initialize the host array:
    const T initValue = 13;
    const T deviceConst = 2;
    const T hostConst = 10000;
    for (size_t i = 0; i < numElements; i++) {
        A_h[i] = initValue + i;
    }


    for (int k = 0; k < numPongs; k++) {
        for (int i = 0; i < numInflight; i++) {
            HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
            HIPCHECK(hipMemcpyAsync(&A_d[i * eachCopyElements], &A_h[i * eachCopyElements],
                                    eachCopyBytes, hipMemcpyHostToDevice, stream));
        }

        hipLaunchKernel(addK<T>, dim3(blocks), dim3(threadsPerBlock), 0, stream, A_d, 2,
                        numElements);

        for (int i = 0; i < numInflight; i++) {
            HIPASSERT(A_d + i * eachCopyElements < A_d + Nbytes);
            HIPCHECK(hipMemcpyAsync(&A_h[i * eachCopyElements], &A_d[i * eachCopyElements],
                                    eachCopyBytes, hipMemcpyDeviceToHost, stream));
        }

        if (doHostSide) {
            assert(0);
#if 0
            hipEvent_t e;
            HIPCHECK(hipEventCreate(&e));
#endif
            HIPCHECK(hipDeviceSynchronize());
            for (size_t i = 0; i < numElements; i++) {
                A_h[i] += hostConst;
            }
        }
    };

    HIPCHECK(hipDeviceSynchronize());


    // Verify we copied back all the data correctly:
    for (size_t i = 0; i < numElements; i++) {
        T gold = initValue + i;
        // Perform calcs in same order as test above to replicate FP order-of-operations:
        for (int k = 0; k < numPongs; k++) {
            gold += deviceConst;
            if (doHostSide) {
                gold += hostConst;
            }
        }

        if (gold != A_h[i]) {
            std::cout << i << ": gold=" << gold << " out=" << A_h[i] << std::endl;
            HIPASSERT(gold == A_h[i]);
        }
    }


    HIPCHECK(hipHostFree(A_h));
    HIPCHECK(hipFree(A_d));
}
示例#12
0
// IN: nStreams : number of streams to use for the test
// IN :useNullStream - use NULL stream.  Synchronizes everything.
// IN: useSyncMemcpyH2D - use sync memcpy (no overlap) for H2D
// IN: useSyncMemcpyD2H - use sync memcpy (no overlap) for D2H
void test_chunkedAsyncExample(int nStreams, bool useNullStream, bool useSyncMemcpyH2D,
                              bool useSyncMemcpyD2H) {
    size_t Nbytes = N * sizeof(int);
    printf("testing: %s(useNullStream=%d, useSyncMemcpyH2D=%d, useSyncMemcpyD2H=%d)  ", __func__,
           useNullStream, useSyncMemcpyH2D, useSyncMemcpyD2H);
    printf("Nbytes=%zu (%6.1f MB)\n", Nbytes, (double)(Nbytes) / 1024.0 / 1024.0);

    int *A_d, *B_d, *C_d;
    int *A_h, *B_h, *C_h;

    HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, true);


    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);


    hipStream_t* stream = (hipStream_t*)malloc(sizeof(hipStream_t) * nStreams);
    if (useNullStream) {
        nStreams = 1;
        stream[0] = NULL;
    } else {
        for (int i = 0; i < nStreams; ++i) {
            HIPCHECK(hipStreamCreate(&stream[i]));
        }
    }


    size_t workLeft = N;
    size_t workPerStream = N / nStreams;
    for (int i = 0; i < nStreams; ++i) {
        size_t work = (workLeft < workPerStream) ? workLeft : workPerStream;
        size_t workBytes = work * sizeof(int);

        size_t offset = i * workPerStream;
        HIPASSERT(A_d + offset < A_d + Nbytes);
        HIPASSERT(B_d + offset < B_d + Nbytes);
        HIPASSERT(C_d + offset < C_d + Nbytes);
        if (useSyncMemcpyH2D) {
            HIPCHECK(hipMemcpy(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice));
            HIPCHECK(hipMemcpy(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice));
        } else {
            HIPCHECK(hipMemcpyAsync(&A_d[offset], &A_h[offset], workBytes, hipMemcpyHostToDevice,
                                    stream[i]));
            HIPCHECK(hipMemcpyAsync(&B_d[offset], &B_h[offset], workBytes, hipMemcpyHostToDevice,
                                    stream[i]));
        };

        hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, stream[i],
                        &A_d[offset], &B_d[offset], &C_d[offset], work);

        if (useSyncMemcpyD2H) {
            HIPCHECK(hipMemcpy(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost));
        } else {
            HIPCHECK(hipMemcpyAsync(&C_h[offset], &C_d[offset], workBytes, hipMemcpyDeviceToHost,
                                    stream[i]));
        }
    }


    HIPCHECK(hipDeviceSynchronize());


    HipTest::checkVectorADD(A_h, B_h, C_h, N);

    HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, true);

    free(stream);
};
示例#13
0
文件: hipArray.cpp 项目: kknox/HIP
void memcpyArraytest(size_t numW, size_t numH, bool usePinnedHost, bool usePitch=false)
{

  size_t width = numW * sizeof(T);
  size_t sizeElements = width * numH;

  printf("memcpyArraytest: %s<%s> size=%lu (%6.2fMB) W: %d, H: %d, usePinnedHost: %d, usePitch: %d\n",
         __func__,
         TYPENAME(T),
         sizeElements, sizeElements/1024.0/1024.0,
         (int)numW, (int)numH, usePinnedHost, usePitch);

  hipArray *A_d, *B_d, *C_d;
  T *A_h, *B_h, *C_h;

  // 1D
  if ((numW >= 1) && (numH == 1)) {
    hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
    HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, 1, 0);
    HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);

    HIPCHECK (hipMemcpyToArray (A_d, 0, 0, (void *)A_h, width, hipMemcpyHostToDevice) );
    HIPCHECK (hipMemcpyToArray (B_d, 0, 0, (void *)B_h, width, hipMemcpyHostToDevice) );

    hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW);

    HIPCHECK (hipMemcpy (C_h, C_d->data, width, hipMemcpyDeviceToHost) );

    HIPCHECK ( hipDeviceSynchronize() );
    HipTest::checkVectorADD(A_h, B_h, C_h, numW);

  }
  // 2D
  else if ((numW >= 1) && (numH >= 1)) {


    hipChannelFormatDesc desc = hipCreateChannelDesc<T>();
    HipTest::initHIPArrays(&A_d, &B_d, &C_d, &desc, numW, numH, 0);
    HipTest::initArraysForHost(&A_h, &B_h, &C_h, numW*numH, usePinnedHost);
    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numW*numH);

    if (usePitch) {
      T *A_p, *B_p, *C_p;
      size_t pitch_A, pitch_B, pitch_C;

      HipTest::initArrays2DPitch(&A_p, &B_p, &C_p, &pitch_A, &pitch_B, &pitch_C, numW, numH);
      HIPCHECK (hipMemcpy2D (A_p, pitch_A, A_h, width, width, numH, hipMemcpyHostToDevice) );
      HIPCHECK (hipMemcpy2D (B_p, pitch_B, B_h, width, width, numH, hipMemcpyHostToDevice) );

      HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_p, pitch_A, width, numH, hipMemcpyDeviceToDevice) );
      HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_p, pitch_B, width, numH, hipMemcpyDeviceToDevice) );

      hipFree(A_p);
      hipFree(B_p);
      hipFree(C_p);
    }
    else {
      HIPCHECK (hipMemcpy2DToArray (A_d, 0, 0, (void *)A_h, width, width, numH, hipMemcpyHostToDevice) );
      HIPCHECK (hipMemcpy2DToArray (B_d, 0, 0, (void *)B_h, width, width, numH, hipMemcpyHostToDevice) );
    }

    hipLaunchKernel(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0, (T*)A_d->data, (T*)B_d->data, (T*)C_d->data, numW*numH);

    HIPCHECK (hipMemcpy2D ((void*)C_h, width, (void*)C_d->data, width, width, numH, hipMemcpyDeviceToHost) );

    HIPCHECK ( hipDeviceSynchronize() );
    HipTest::checkVectorADD(A_h, B_h, C_h, numW*numH);
  }
  // Unknown
  else {
    HIPASSERT("Incompatible dimensions" && 0);
  }

  hipFreeArray(A_d);
  hipFreeArray(B_d);
  hipFreeArray(C_d);
  HipTest::freeArraysForHost(A_h, B_h, C_h, usePinnedHost);

  printf ("  %s success\n", __func__);

}