Beispiel #1
0
int main() {
    size_t Nbytes = N * sizeof(int);
    int numDevices = 0;
    int *A_d, *B_d, *C_d, *X_d, *Y_d, *Z_d;
    int *A_h, *B_h, *C_h;
    hipStream_t s;

    HIPCHECK(hipGetDeviceCount(&numDevices));
    if (numDevices > 1) {
        HIPCHECK(hipSetDevice(0));
        unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, N);
        HipTest::initArrays(&A_d, &B_d, &C_d, &A_h, &B_h, &C_h, N, false);
        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipMalloc(&X_d, Nbytes));
        HIPCHECK(hipMalloc(&Y_d, Nbytes));
        HIPCHECK(hipMalloc(&Z_d, Nbytes));


        HIPCHECK(hipSetDevice(0));
        HIPCHECK(hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));
        HIPCHECK(hipMemcpy(B_d, B_h, Nbytes, hipMemcpyHostToDevice));
        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(A_d), static_cast<const int*>(B_d), C_d, N);
        HIPCHECK(hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));
        HIPCHECK(hipDeviceSynchronize());
        HipTest::checkVectorADD(A_h, B_h, C_h, N);

        HIPCHECK(hipSetDevice(1));
        HIPCHECK(hipStreamCreate(&s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)X_d, (hipDeviceptr_t)A_d, Nbytes, s));
        HIPCHECK(hipMemcpyDtoDAsync((hipDeviceptr_t)Y_d, (hipDeviceptr_t)B_d, Nbytes, s));

        hipLaunchKernelGGL(HipTest::vectorADD, dim3(blocks), dim3(threadsPerBlock), 0, 0,
                        static_cast<const int*>(X_d), static_cast<const int*>(Y_d), Z_d, N);
        HIPCHECK(hipMemcpyDtoHAsync(C_h, (hipDeviceptr_t)Z_d, Nbytes, s));
        HIPCHECK(hipStreamSynchronize(s));
        HIPCHECK(hipDeviceSynchronize());

        HipTest::checkVectorADD(A_h, B_h, C_h, N);
        HIPCHECK(hipStreamDestroy(s));
        HipTest::freeArrays(A_d, B_d, C_d, A_h, B_h, C_h, false);
        HIPCHECK(hipFree(X_d));
        HIPCHECK(hipFree(Y_d));
        HIPCHECK(hipFree(Z_d));
    }

    passed();
}
Beispiel #2
0
bool run_erfinv() {
    double *A, *Ad, *B, *Bd;
    A = new double[N];
    B = new double[N];
    for (int i = 0; i < N; i++) {
        A[i] = -0.6;
        B[i] = 0.0;
    }
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, SIZE);
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_erfinv, dim3(1), dim3(N), 0, 0, Ad, Bd);
    hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        if (B[i] - A[i] < 0.000001) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    hipFree(Ad);
    hipFree(Bd);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #3
0
bool run_rnorm() {
    double *A, *Ad, *B, *Bd;
    A = new double[N];
    B = new double[N];
    double val = 0.0;
    for (int i = 0; i < N; i++) {
        A[i] = 1.0;
        B[i] = 0.0;
        val += 1.0;
    }
    val = 1 / sqrt(val);
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, SIZE);
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_rnorm, dim3(1), dim3(N), 0, 0, Ad, Bd);
    hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        if (B[0] - val < 0.000001) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    hipFree(Ad);
    hipFree(Bd);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #4
0
bool run_lround() {
    double *A, *Ad;
    long int *B, *Bd;
    A = new double[N];
    B = new long int[N];
    for (int i = 0; i < N; i++) {
        A[i] = 1.345;
    }
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, N * sizeof(long int));
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_lround, dim3(1), dim3(N), 0, 0, Ad, Bd);
    hipMemcpy(B, Bd, N * sizeof(long int), hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        long int x = round(A[i]);
        if (B[i] == x) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    hipFree(Ad);
    hipFree(Bd);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #5
0
bool run_add() {
  
  constexpr size_t N = 64;
  std::vector<T> host_input(N);
  std::vector<T> host_expected(N);
  for (int i = 0; i < N; ++i) {
    host_input[i] = (T)i;
    host_expected[i] = host_input[i] + host_input[i];
  }

  T* input1;
  hipMalloc(&input1, N * sizeof(T));
  hipMemcpy(input1, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice);


  T* input2;
  hipMalloc(&input2, N * sizeof(T));
  hipMemcpy(input2, host_input.data(), host_input.size()*sizeof(T), hipMemcpyHostToDevice);


  constexpr unsigned int blocks = 1;
  constexpr unsigned int threads_per_block = 1;
  hipLaunchKernelGGL(add<T>, dim3(blocks), dim3(threads_per_block), 0, 0, input1, input2, N);

  hipMemcpy(host_input.data(), input1, host_input.size()*sizeof(T), hipMemcpyDeviceToHost);

  bool equal = true;
  for (int i = 0; i < N; i++) {
    equal &= (host_input[i] == host_expected[i]);
  }
  return equal;
}
int main() {
    hipLaunchKernelGGL(
        compileDoublePrecisionMathOnDevice,
        dim3(1, 1, 1),
        dim3(1, 1, 1),
        0,
        0,
        1);
    passed();
}
Beispiel #7
0
bool run_rnorm4d() {
    double *A, *Ad, *B, *Bd, *C, *Cd, *D, *Dd, *E, *Ed;
    A = new double[N];
    B = new double[N];
    C = new double[N];
    D = new double[N];
    E = new double[N];
    double val = 0.0;
    for (int i = 0; i < N; i++) {
        A[i] = 1.0;
        B[i] = 2.0;
        C[i] = 3.0;
        D[i] = 4.0;
    }
    val = 1 / sqrt(1.0 + 4.0 + 9.0 + 16.0);
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, SIZE);
    hipMalloc((void**)&Cd, SIZE);
    hipMalloc((void**)&Dd, SIZE);
    hipMalloc((void**)&Ed, SIZE);
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
    hipMemcpy(Cd, C, SIZE, hipMemcpyHostToDevice);
    hipMemcpy(Dd, D, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_rnorm4d, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd, Dd, Ed);
    hipMemcpy(E, Ed, SIZE, hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        if (E[i] - val < 0.000001) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    delete[] C;
    delete[] D;
    delete[] E;
    hipFree(Ad);
    hipFree(Bd);
    hipFree(Cd);
    hipFree(Dd);
    hipFree(Ed);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #8
0
bool run_sincospi() {
    double *A, *Ad, *B, *C, *Bd, *Cd;
    A = new double[N];
    B = new double[N];
    C = new double[N];
    for (int i = 0; i < N; i++) {
        A[i] = 1.0;
    }
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, SIZE);
    hipMalloc((void**)&Cd, SIZE);
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_sincospi, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
    hipMemcpy(B, Bd, SIZE, hipMemcpyDeviceToHost);
    hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        if (B[i] - sin(3.14 * 1.0) < 0.1) {
            passed = 1;
        }
    }
    passed = 0;
    for (int i = 0; i < 512; i++) {
        if (C[i] - cos(3.14 * 1.0) < 0.1) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    delete[] C;
    hipFree(Ad);
    hipFree(Bd);
    hipFree(Cd);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #9
0
bool run_rhypot() {
    double *A, *Ad, *B, *Bd, *C, *Cd;
    A = new double[N];
    B = new double[N];
    C = new double[N];
    double val = 0.0;
    for (int i = 0; i < N; i++) {
        A[i] = 1.0;
        B[i] = 2.0;
    }
    val = 1 / sqrt(1.0 + 4.0);
    hipMalloc((void**)&Ad, SIZE);
    hipMalloc((void**)&Bd, SIZE);
    hipMalloc((void**)&Cd, SIZE);
    hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
    hipMemcpy(Bd, B, SIZE, hipMemcpyHostToDevice);
    hipLaunchKernelGGL(test_rhypot, dim3(1), dim3(N), 0, 0, Ad, Bd, Cd);
    hipMemcpy(C, Cd, SIZE, hipMemcpyDeviceToHost);
    int passed = 0;
    for (int i = 0; i < 512; i++) {
        if (C[i] - val < 0.000001) {
            passed = 1;
        }
    }

    delete[] A;
    delete[] B;
    delete[] C;
    hipFree(Ad);
    hipFree(Bd);
    hipFree(Cd);

    if (passed == 1) {
        return true;
    }
    assert(passed == 1);
    return false;
}
Beispiel #10
0
void no_cache(float *A_h, float *A_d, float *X_h, float *X_d, float *Y_h, float *Y_d, size_t NUM_ROW, int p=0)
{


	if(p) printf ("info: allocate host mem (%6.2f KB)\n", NUM_COLUMN*NUM_ROW*sizeof(float)/1024.0);

	A_h = (float*)malloc(NUM_ROW * NUM_COLUMN * sizeof(float));
	CHECK(A_h == 0 ? hipErrorMemoryAllocation : hipSuccess );

    X_h = (float*)malloc(NUM_COLUMN * sizeof(float));
	CHECK(X_h == 0 ? hipErrorMemoryAllocation : hipSuccess );
    
    Y_h = (float*)malloc(NUM_ROW * sizeof(float));
	CHECK(Y_h == 0 ? hipErrorMemoryAllocation : hipSuccess );

	// Fill with Phi + i
    for (size_t i=0; i<NUM_ROW * NUM_COLUMN; i++)
	{
		 A_h[i] = 1.618f + (i % NB_X);
	}

    for (size_t i=0; i< NUM_COLUMN; i++)
	{
		 X_h[i] = 1.618f + i;
	}

    
	if(p) printf ("info: allocate device mem (%6.2f KB)\n", NUM_ROW * NUM_COLUMN * sizeof(float)/1024.0);
	CHECK(hipMalloc(&A_d, NUM_ROW * NUM_COLUMN * sizeof(float)));
	CHECK(hipMalloc(&X_d, NUM_COLUMN * sizeof(float)));
	CHECK(hipMalloc(&Y_d, NUM_ROW * sizeof(float)));

	if(p) printf ("info: copy Host2Device\n");
    CHECK ( hipMemcpy(A_d, A_h, NUM_ROW * NUM_COLUMN * sizeof(float), hipMemcpyHostToDevice));
    CHECK ( hipMemcpy(X_d, X_h, NUM_COLUMN * sizeof(float), hipMemcpyHostToDevice));

	const unsigned blocks = (NUM_ROW -1)/NB_X + 1;
	const unsigned threadsPerBlock = NB_X;

	if(p) printf ("info: launch 'gemv_kernel' kernel\n");


    for(int i=1 ; i < 1e3; i*=2)
    {
        size_t num_row = NB_X * i;
        clock_t t;
        t = clock();
        
        double time; 
        time = rocblas_wtime();

	    hipLaunchKernelGGL(HIP_KERNEL_NAME(gemv_kernel), dim3(blocks), dim3(threadsPerBlock), 0, 0, A_d, X_d, Y_d, num_row);
    
        //time = rocblas_wtime() - time;

        hipDeviceSynchronize();
        t = clock() - t;
        time = ((double)t)/CLOCKS_PER_SEC*1000 ;

        if(p) printf ("It took me %d clicks (%f miliseconds).\n",t, time);
    
        
        printf ("Row = %d, It took me (%f milliseconds), Gflops=%f\n",time, num_row, 2*num_row*NUM_COLUMN/(time)/10e6);
    }
/*
	if(p) printf ("info: copy Device2Host\n");
    CHECK ( hipMemcpy(Y_h, Y_d, NUM_ROW * sizeof(float), hipMemcpyDeviceToHost));

	if(p) printf ("info: check result\n");


    for (size_t i=0; i<NUM_ROW; i++)  {
        float res = 0;
        for(int j=0; j<NUM_COLUMN; j++){
            res += A_h[i + j * NUM_ROW] * X_h[j];
        }
        if (Y_h[i] != res) 
        {
            printf("i=%d, CPU result=%f, GPU result=%f\n", i, res, Y_h[i]);
		    //CHECK(hipErrorUnknown);
        }
    }

*/
	if(p) printf ("PASSED!\n");

    hipFree(A_d);
    hipFree(Y_d);
    hipFree(X_d);

    free(A_h);
    free(Y_h);
    free(X_h);

}
Beispiel #11
0
void test(unsigned testMask, int *C_d, int *C_h, int64_t numElements, SyncMode syncMode, bool expectMismatch)
{

    // This test sends a long-running kernel to the null stream, then tests to see if the
    // specified synchronization technique is effective.
    //
    // Some syncMode are not expected to correctly sync (for example "syncNone").  in these
    // cases the test sets expectMismatch and the check logic below will attempt to ensure that
    // the undesired synchronization did not occur - ie ensure the kernel is still running and did
    // not yet update the stop event.  This can be tricky since if the kernel runs fast enough it
    // may complete before the check.  To prevent this, the addCountReverse has a count parameter
    // which causes it to loop repeatedly, and the results are checked in reverse order.
    //
    // Tests with expectMismatch=true should ensure the kernel finishes correctly. This results
    // are checked and we test to make sure stop event has completed.

    if (!(testMask & p_tests)) {
        return;
    }
    printf ("\ntest 0x%02x: syncMode=%s expectMismatch=%d\n",
            testMask, syncModeString(syncMode), expectMismatch);

    size_t sizeBytes = numElements * sizeof(int);

    int count =100;
    int init0 = 0;
    HIPCHECK(hipMemset(C_d, init0, sizeBytes));
    for (int i=0; i<numElements; i++) {
        C_h[i] = -1; // initialize
    }

    hipStream_t otherStream = 0;
    unsigned flags = (syncMode == syncMarkerThenOtherNonBlockingStream) ?  hipStreamNonBlocking : hipStreamDefault;
    HIPCHECK(hipStreamCreateWithFlags(&otherStream, flags));
    hipEvent_t stop, otherStreamEvent;
    HIPCHECK(hipEventCreate(&stop));
    HIPCHECK(hipEventCreate(&otherStreamEvent));


    unsigned blocks = HipTest::setNumBlocks(blocksPerCU, threadsPerBlock, numElements);
    // Launch kernel into null stream, should result in C_h == count.
    hipLaunchKernelGGL(
        HipTest::addCountReverse,
        dim3(blocks),
        dim3(threadsPerBlock),
        0,
        0 /*stream*/,
        static_cast<const int*>(C_d),
        C_h,
        numElements,
        count);
    HIPCHECK(hipEventRecord(stop, 0/*default*/));

    switch (syncMode) {
        case syncNone:
            break;
        case syncNullStream:
            HIPCHECK(hipStreamSynchronize(0));  // wait on host for null stream:
            break;
        case syncOtherStream:
            // Does this synchronize with the null stream?
            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncMarkerThenOtherStream:
        case syncMarkerThenOtherNonBlockingStream:

            // this may wait for NULL stream depending hipStreamNonBlocking flag above
            HIPCHECK(hipEventRecord(otherStreamEvent, otherStream));

            HIPCHECK(hipStreamSynchronize(otherStream));
            break;
        case syncDevice:
            HIPCHECK(hipDeviceSynchronize());
            break;
        default:
            assert(0);
    };

    hipError_t done = hipEventQuery(stop);

    if (expectMismatch) {
        assert (done == hipErrorNotReady);
    } else {
        assert (done == hipSuccess);
    }

    int mismatches = 0;
    int expected = init0 + count;
    for (int i=0; i<numElements; i++) {
        bool compareEqual = (C_h[i] == expected);
        if (!compareEqual) {
            mismatches ++;
            if  (!expectMismatch) {
                printf ("C_h[%d] (%d) != %d\n", i, C_h[i], expected);
                assert(C_h[i] == expected);
            }
        }
    }

    if (expectMismatch) {
        assert (mismatches > 0);
    }


    HIPCHECK(hipStreamDestroy(otherStream));
    HIPCHECK(hipEventDestroy(stop));
    HIPCHECK(hipEventDestroy(otherStreamEvent));

    HIPCHECK(hipDeviceSynchronize());

    printf ("test:   OK - %d mismatches (%6.2f%%)\n",  mismatches, ((double)(mismatches)*100.0)/numElements);
}
Beispiel #12
0
int main() {
    hipLaunchKernelGGL(FloatMathPrecise, dim3(1, 1, 1), dim3(1, 1, 1), 0, 0);
    passed();
}