コード例 #1
0
ファイル: hipMemcpyDtoDAsync.cpp プロジェクト: ssahasra/HIP
int main() {
    size_t Nbytes = N * sizeof(int);
    int numDevices = 0;
    int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
    int *A_h, *B_h, *C_h;
    hipStream_t s;

    HIPCHECK(hipGetDeviceCount(&numDevices));
    if (numDevices > 1) {
        HIPCHECK(hipSetDevice(0));
        unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
        HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipMalloc(&X_d, Nbytes));
        HIPCHECK(hipMalloc(&Y_d, Nbytes));
        HIPCHECK(hipMalloc(&Z_d, Nbytes));


        HIPCHECK(hipSetDevice(0));
        HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
        HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
        HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
        HIPCHECK(hipDeviceSynchronize());
        HipTest::checkVectorADD(A_h, B_h, C_h, N);

        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipStreamCreate(&s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s));

        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N);
        HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s));
        HIPCHECK(hipStreamSynchronize(s));
        HIPCHECK(hipDeviceSynchronize());

        HipTest::checkVectorADD(A_h, B_h, C_h, N);
        HIPCHECK(hipStreamDestroy(s));
        HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
        HIPCHECK(hipFree(X_d));
        HIPCHECK(hipFree(Y_d));
        HIPCHECK(hipFree(Z_d));
    }

    passed();
}
コード例 #2
0
ファイル: hipSetDeviceFlags.cpp プロジェクト: kknox/HIP
int main()
{
    unsigned flag = 0;
    HIPCHECK(hipDeviceReset());

    int deviceCount = 0;
    HIPCHECK(hipGetDeviceCount(&deviceCount));

    for(int j=0;j<deviceCount;j++){

        HIPCHECK(hipSetDevice(j));

        for(int i=0;i<4;i++){
            flag = 1 << i;
            printf ("Flag=%x\n", flag);
            HIPCHECK(hipSetDeviceFlags(flag));
            //HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue);
        }

        flag = 0;

    }

    passed();
}
コード例 #3
0
ファイル: hipSetDevice.cpp プロジェクト: ssahasra/HIP
int main() {
    int numDevices = 0;

    HIPCHECK_API(hipGetDeviceCount(&numDevices), hipSuccess);
    if (numDevices > 0) {
        for (int deviceId = 0; deviceId < numDevices; deviceId++) {
            HIPCHECK_API(hipSetDevice(deviceId), hipSuccess);
        }
        HIPCHECK_API(hipSetDevice(numDevices), hipErrorInvalidDevice);
        HIPCHECK_API(hipSetDevice(-1), hipErrorInvalidDevice);
    }
    else {
        failed("Error: failed to find any compatible devices.");
    }

    passed();
}
コード例 #4
0
int main()
{
    int numDevices = 0;
    int device;
    HIPCHECK(hipGetDeviceCount(&numDevices));
    for(int i=0;i<numDevices;i++){
        HIPCHECK(hipSetDevice(i));
        HIPCHECK(hipGetDevice(&device));
        HIPASSERT(device == i);
    }
    passed();
}
コード例 #5
0
ファイル: hipMemset3D.cpp プロジェクト: ssahasra/HIP
int main(int argc, char *argv[])
{
    HipTest::parseStandardArguments(argc, argv, true);
    bool testResult = false;
    HIPCHECK(hipSetDevice(p_gpuDevice));
    testResult = testhipMemset3D(memsetval, p_gpuDevice);
    if (testResult) {
        passed();
    } else {
        exit(EXIT_FAILURE);
    }
}
コード例 #6
0
ファイル: hipArray.cpp プロジェクト: kknox/HIP
int main(int argc, char *argv[])
{
    HipTest::parseStandardArguments(argc, argv, true);

    printf ("info: set device to %d\n", p_gpuDevice);
    HIPCHECK(hipSetDevice(p_gpuDevice));

    if (p_tests & 0x1) {
        printf ("\n\n=== tests&1 (types)\n");
        printSep();
        HIPCHECK ( hipDeviceReset() );
        size_t width = N/6;
        size_t height = N/6;
        memcpy2Dtest<float>(321, 211, 0);
        memcpy2Dtest<double>(322, 211, 0);
        memcpy2Dtest<char>(320, 211, 0);
        memcpy2Dtest<int>(323, 211, 0);
        printf ("===\n\n\n");

        printf ("\n\n=== tests&1 (types)\n");
        printSep();
        // 2D
        memcpyArraytest<float>(320, 211, 0, 0);
        memcpyArraytest<unsigned int>(322, 211, 0, 0);
        memcpyArraytest<int>(320, 211, 0, 0);
        memcpyArraytest<float>(320, 211, 0, 1);
        memcpyArraytest<float>(322, 211, 0, 1);
        memcpyArraytest<int>(320, 211, 0, 1);
        printSep();
        // 1D
        memcpyArraytest<float>(320, 1, 0);
        memcpyArraytest<unsigned int>(322, 1, 0);
        memcpyArraytest<int>(320, 1, 0);
        printf ("===\n\n\n");
    }

    if (p_tests & 0x4) {
        printf ("\n\n=== tests&4 (test sizes and offsets)\n");
        printSep();
        HIPCHECK ( hipDeviceReset() );
        printSep();
        memcpyArraytest_size<float>(0,0);
        printSep();
        memcpyArraytest_size<float>(0,64);
        printSep();
        memcpyArraytest_size<float>(1024*1024,13);
        printSep();
        memcpyArraytest_size<float>(1024*1024,50);
    }

    passed();

}
コード例 #7
0
ファイル: hipMemcpyAsync.cpp プロジェクト: ssahasra/HIP
int main(int argc, char* argv[]) {
    HipTest::parseStandardArguments(argc, argv, false);
    parseMyArguments(argc, argv);


    printf("info: set device to %d  tests=%x\n", p_gpuDevice, p_tests);
    HIPCHECK(hipSetDevice(p_gpuDevice));

    if (p_tests & 0x01) {
        simpleNegTest();
    }

    if (p_tests & 0x02) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        test_manyInflightCopies<float>(stream, 1024, 16, true);
        test_manyInflightCopies<float>(
            stream, 1024, 4, true);  // verify we re-use the same entries instead of growing pool.
        test_manyInflightCopies<float>(stream, 1024 * 8, 64, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    if (p_tests & 0x04) {
        test_chunkedAsyncExample(p_streams, true, true, true);     // Easy sync version
        test_chunkedAsyncExample(p_streams, false, true, true);    // Easy sync version
        test_chunkedAsyncExample(p_streams, false, false, true);   // Some async
        test_chunkedAsyncExample(p_streams, false, false, false);  // All async
    }

    if (p_tests & 0x08) {
        hipStream_t stream;
        HIPCHECK(hipStreamCreate(&stream));

        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 1, false);
        //        test_pingpong<int, Pinned>(stream, 1024*1024*32, 1, 10, false);

        HIPCHECK(hipStreamDestroy(stream));
    }


    passed();
}
コード例 #8
0
int main(int argc, char *argv[])
{
    HipTest::parseStandardArguments(argc, argv, true);

    printf ("info: set device to %d\n", p_gpuDevice);
    HIPCHECK(hipSetDevice(p_gpuDevice));


    if (p_tests & 0x1) {
        printf ("\n\n=== tests&1 (types and different memcpy kinds (H2D, D2H, H2H, D2D)\n");
        HIPCHECK ( hipDeviceReset() );
        memcpytest2_for_type<float>(N);
        memcpytest2_for_type<double>(N);
        memcpytest2_for_type<char>(N);
        memcpytest2_for_type<int>(N);
        printf ("===\n\n\n");
    }


    if (p_tests & 0x2) {
        // Some tests around the 64MB boundary which have historically shown issues:
        printf ("\n\n=== tests&0x2 (64MB boundary)\n");
#if 0
        // These all pass:
        memcpytest2<float>(15*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(16*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(16*1024*1024+16*1024,  1, 0, 0, 0);  
#endif
        // Just over 64MB:
        memcpytest2<float>(16*1024*1024+512*1024,  1, 0, 0, 0);  
        memcpytest2<float>(17*1024*1024+1024,  1, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 0, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 1, 1, 1, 0);  
        memcpytest2<float>(32*1024*1024, 1, 1, 1, 0);  
    }


    if (p_tests & 0x4) {
        printf ("\n\n=== tests&4 (test sizes and offsets)\n");
        HIPCHECK ( hipDeviceReset() );
        printSep();
        memcpytest2_sizes<float>(0,0);
        printSep();
        memcpytest2_sizes<float>(0,64);
        printSep();
        memcpytest2_sizes<float>(1024*1024, 13);
        printSep();
        memcpytest2_sizes<float>(1024*1024, 50);
    }

    if (p_tests & 0x8) {
        printf ("\n\n=== tests&8\n");
        HIPCHECK ( hipDeviceReset() );
        printSep();

        // Simplest cases: serialize the threads, and also used pinned memory:
        // This verifies that the sub-calls to memcpytest2 are correct.
        multiThread_1<float>(true, true); 

        // Serialize, but use unpinned memory to stress the unpinned memory xfer path.
        multiThread_1<float>(true, false);

        // Remove serialization, so two threads are performing memory copies in parallel.
        multiThread_1<float>(false, true);

        // Remove serialization, and use unpinned.
        multiThread_1<float>(false, false); // TODO
        printf ("===\n\n\n");
    }


    passed();

}