void memcpytest2_sizes(size_t maxElem=0, size_t offset=0)
{
    printSep();
    printf ("test: %s<%s>\n", __func__,  TYPENAME(T));

    int deviceId;
    HIPCHECK(hipGetDevice(&deviceId));

    size_t free, total;
    HIPCHECK(hipMemGetInfo(&free, &total));

    if (maxElem == 0) {
        maxElem = free/sizeof(T)/5;
    }

    printf ("  device#%d: hipMemGetInfo: free=%zu (%4.2fMB) total=%zu (%4.2fMB)    maxSize=%6.1fMB offset=%lu\n", 
            deviceId, free, (float)(free/1024.0/1024.0), total, (float)(total/1024.0/1024.0), maxElem*sizeof(T)/1024.0/1024.0, offset);

    for (size_t elem=64; elem+offset<=maxElem; elem*=2) {
        HIPCHECK ( hipDeviceReset() );
        memcpytest2<T>(elem+offset, 0, 1, 1, 0);  // unpinned host
        HIPCHECK ( hipDeviceReset() );
        memcpytest2<T>(elem+offset, 1, 1, 1, 0);  // pinned host
    }
}
예제 #2
0
파일: hipArray.cpp 프로젝트: kknox/HIP
int main(int argc, char *argv[])
{
    HipTest::parseStandardArguments(argc, argv, true);

    printf ("info: set device to %d\n", p_gpuDevice);
    HIPCHECK(hipSetDevice(p_gpuDevice));

    if (p_tests & 0x1) {
        printf ("\n\n=== tests&1 (types)\n");
        printSep();
        HIPCHECK ( hipDeviceReset() );
        size_t width = N/6;
        size_t height = N/6;
        memcpy2Dtest<float>(321, 211, 0);
        memcpy2Dtest<double>(322, 211, 0);
        memcpy2Dtest<char>(320, 211, 0);
        memcpy2Dtest<int>(323, 211, 0);
        printf ("===\n\n\n");

        printf ("\n\n=== tests&1 (types)\n");
        printSep();
        // 2D
        memcpyArraytest<float>(320, 211, 0, 0);
        memcpyArraytest<unsigned int>(322, 211, 0, 0);
        memcpyArraytest<int>(320, 211, 0, 0);
        memcpyArraytest<float>(320, 211, 0, 1);
        memcpyArraytest<float>(322, 211, 0, 1);
        memcpyArraytest<int>(320, 211, 0, 1);
        printSep();
        // 1D
        memcpyArraytest<float>(320, 1, 0);
        memcpyArraytest<unsigned int>(322, 1, 0);
        memcpyArraytest<int>(320, 1, 0);
        printf ("===\n\n\n");
    }

    if (p_tests & 0x4) {
        printf ("\n\n=== tests&4 (test sizes and offsets)\n");
        printSep();
        HIPCHECK ( hipDeviceReset() );
        printSep();
        memcpyArraytest_size<float>(0,0);
        printSep();
        memcpyArraytest_size<float>(0,64);
        printSep();
        memcpyArraytest_size<float>(1024*1024,13);
        printSep();
        memcpyArraytest_size<float>(1024*1024,50);
    }

    passed();

}
예제 #3
0
int main()
{
    unsigned flag = 0;
    HIPCHECK(hipDeviceReset());

    int deviceCount = 0;
    HIPCHECK(hipGetDeviceCount(&deviceCount));

    for(int j=0;j<deviceCount;j++){

        HIPCHECK(hipSetDevice(j));

        for(int i=0;i<4;i++){
            flag = 1 << i;
            printf ("Flag=%x\n", flag);
            HIPCHECK(hipSetDeviceFlags(flag));
            //HIPCHECK_API(hipSetDeviceFlags(flag), hipErrorInvalidValue);
        }

        flag = 0;

    }

    passed();
}
예제 #4
0
int main(int argc, char **argv)
{
    printf("%s starting...\n", sampleName);

    runTest(argc, argv);

    hipDeviceReset();
    printf("%s completed, returned %s\n",
           sampleName,
           testResult ? "OK" : "ERROR!");
    exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
}
int main(int argc, char *argv[])
{
    HipTest::parseStandardArguments(argc, argv, true);

    printf ("info: set device to %d\n", p_gpuDevice);
    HIPCHECK(hipSetDevice(p_gpuDevice));


    if (p_tests & 0x1) {
        printf ("\n\n=== tests&1 (types and different memcpy kinds (H2D, D2H, H2H, D2D)\n");
        HIPCHECK ( hipDeviceReset() );
        memcpytest2_for_type<float>(N);
        memcpytest2_for_type<double>(N);
        memcpytest2_for_type<char>(N);
        memcpytest2_for_type<int>(N);
        printf ("===\n\n\n");
    }


    if (p_tests & 0x2) {
        // Some tests around the 64MB boundary which have historically shown issues:
        printf ("\n\n=== tests&0x2 (64MB boundary)\n");
#if 0
        // These all pass:
        memcpytest2<float>(15*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(16*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(16*1024*1024+16*1024,  1, 0, 0, 0);  
#endif
        // Just over 64MB:
        memcpytest2<float>(16*1024*1024+512*1024,  1, 0, 0, 0);  
        memcpytest2<float>(17*1024*1024+1024,  1, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 1, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 0, 0, 0, 0);  
        memcpytest2<float>(32*1024*1024, 1, 1, 1, 0);  
        memcpytest2<float>(32*1024*1024, 1, 1, 1, 0);  
    }


    if (p_tests & 0x4) {
        printf ("\n\n=== tests&4 (test sizes and offsets)\n");
        HIPCHECK ( hipDeviceReset() );
        printSep();
        memcpytest2_sizes<float>(0,0);
        printSep();
        memcpytest2_sizes<float>(0,64);
        printSep();
        memcpytest2_sizes<float>(1024*1024, 13);
        printSep();
        memcpytest2_sizes<float>(1024*1024, 50);
    }

    if (p_tests & 0x8) {
        printf ("\n\n=== tests&8\n");
        HIPCHECK ( hipDeviceReset() );
        printSep();

        // Simplest cases: serialize the threads, and also used pinned memory:
        // This verifies that the sub-calls to memcpytest2 are correct.
        multiThread_1<float>(true, true); 

        // Serialize, but use unpinned memory to stress the unpinned memory xfer path.
        multiThread_1<float>(true, false);

        // Remove serialization, so two threads are performing memory copies in parallel.
        multiThread_1<float>(false, true);

        // Remove serialization, and use unpinned.
        multiThread_1<float>(false, false); // TODO
        printf ("===\n\n\n");
    }


    passed();

}
예제 #6
0
extern "C" void mixbenchGPU(double *c, long size){
	const char *benchtype = "compute with global memory (block strided)";
	printf("Trade-off type:       %s\n", benchtype);
	double *cd;

	CUDA_SAFE_CALL( hipMalloc((void**)&cd, size*sizeof(double)) );

	// Copy data to device memory
	CUDA_SAFE_CALL( hipMemset(cd, 0, size*sizeof(double)) );  // initialize to zeros

	// Synchronize in order to wait for memory operations to finish
	CUDA_SAFE_CALL( hipDeviceSynchronize() );

	printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n");
	printf("Experiment ID, Single Precision ops,,,,              Double precision ops,,,,              Integer operations,,, \n");
	printf("Compute iters, Flops/byte, ex.time,  GFLOPS, GB/sec, Flops/byte, ex.time,  GFLOPS, GB/sec, Iops/byte, ex.time,   GIOPS, GB/sec\n");

	runbench_warmup(cd, size);

	runbench<32>(cd, size);
	runbench<31>(cd, size);
	runbench<30>(cd, size);
	runbench<29>(cd, size);
	runbench<28>(cd, size);
	runbench<27>(cd, size);
	runbench<26>(cd, size);
	runbench<25>(cd, size);
	runbench<24>(cd, size);
	runbench<23>(cd, size);
	runbench<22>(cd, size);
	runbench<21>(cd, size);
	runbench<20>(cd, size);
	runbench<19>(cd, size);
	runbench<18>(cd, size);
	runbench<17>(cd, size);
	runbench<16>(cd, size);
	runbench<15>(cd, size);
	runbench<14>(cd, size);
	runbench<13>(cd, size);
	runbench<12>(cd, size);
	runbench<11>(cd, size);
	runbench<10>(cd, size);
	runbench<9>(cd, size);
	runbench<8>(cd, size);
	runbench<7>(cd, size);
	runbench<6>(cd, size);
	runbench<5>(cd, size);
	runbench<4>(cd, size);
	runbench<3>(cd, size);
	runbench<2>(cd, size);
	runbench<1>(cd, size);
	runbench<0>(cd, size);

	printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n");

	// Copy results back to host memory
	CUDA_SAFE_CALL( hipMemcpy(c, cd, size*sizeof(double), hipMemcpyDeviceToHost) );

	CUDA_SAFE_CALL( hipFree(cd) );

	CUDA_SAFE_CALL( hipDeviceReset() );
}