TEST( SLOW_testintwrapper_huge, testreadwrite ) {

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_stress");
    const int N = 1000000;
    int *in = new int[N];
    for( int i = 0; i < N; i++ ) {
        in[i] = i * 3;
    }
    int *out = new int[N];
    CLWrapper *inwrapper = cl->wrap(N, in);
    CLWrapper *outwrapper = cl->wrap(N, out);
    inwrapper->copyToDevice();
    outwrapper->createOnDevice();
    kernel->input( inwrapper );
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    kernel->run_1d( globalSize, workgroupsize );
    outwrapper->copyToHost();
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 689514 ) {
           cout << "out[" << i << "] != 689514: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete inwrapper;
    delete kernel;
    delete cl;
}
TEST( SLOW_testintwrapper_huge, testread ) {
    Timer timer;
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_read");
//    const int N = 4500000;
//    const int N = (4500000/512)*512;
    int N = 100000;
    int *out = new int[N];
    CLWrapper *outwrapper = cl->wrap(N, out);
    kernel->in(3)->in(7);
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    timer.timeCheck("before kernel");
    kernel->run_1d( globalSize, workgroupsize );
    timer.timeCheck("after kernel");
    outwrapper->copyToHost();
    timer.timeCheck("after copy to host");
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 4228 ) {
           cout << "out[" << i << "] != 4228: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete kernel;
    delete cl;
}
Exemple #3
0
TEST(SLOW_testlocal, selfdot_3levels_withoutscratch) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "selfdot_ints_withoutscratch");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int level3size = numWorkgroups / 4;
    const int N = workgroupSize * numWorkgroups * level3size;
    cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
    int *myarray = new int[N];
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 5;
    }

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
//    timer.timeCheck("copied array to device");
    int *second = new int[N];
    CLWrapper *secondwrapper = cl->wrap(N, second);
    int *a2 = new int[numWorkgroups*level3size];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups * level3size, a2);
    kernel->in(a1wrapper);
    kernel->out(secondwrapper);
    kernel->out(a2wrapper);
    kernel->run_1d(N, workgroupSize);
    cl->finish();

    int *a3 = new int[numWorkgroups];
    CLWrapper *a3wrapper = cl->wrap(level3size, a3);
    kernel->in(a2wrapper);
    kernel->out(secondwrapper);
    kernel->out(a3wrapper);
    kernel->run_1d(workgroupSize * level3size, workgroupSize);
    cl->finish();

    int finalSum;
    kernel->in(a3wrapper);
    kernel->out(secondwrapper);
    kernel->out(1, &finalSum);
    kernel->run_1d(level3size, level3size);
//    timer.timeCheck("finished 3-level reduce");

    EXPECT_EQ(-1306309159, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete a3wrapper;
    delete secondwrapper;
    delete[] a3;
    delete[] second;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}
Exemple #4
0
TEST(testlocal, reduceviascratch_multipleworkgroups_ints) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups_ints");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int N = workgroupSize * numWorkgroups;
    cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
    int *myarray = new int[N];
    int sumViaCpu = 0;
    int localSumViaCpu = 0;
    int localSumViaCpu2 = 0;
    int *localSumsViaCpu = new int[numWorkgroups];
    memset(localSumsViaCpu, 0, sizeof(int)*numWorkgroups);
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 50;
        sumViaCpu += myarray[i];
        if(i < workgroupSize) {
            localSumViaCpu += myarray[i];
        }
        if(i >= workgroupSize && i < workgroupSize * 2) {
            localSumViaCpu2 += myarray[i];
        }
        int workgroupId = i / workgroupSize;
        localSumsViaCpu[workgroupId] += myarray[i];
    }
    ASSERT_EQ(localSumViaCpu, localSumsViaCpu[0]);
    ASSERT_EQ(localSumViaCpu2, localSumsViaCpu[1]);
    ASSERT_NE(myarray[0], sumViaCpu);

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
    int *a2 = new int[numWorkgroups];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
    kernel->in(a1wrapper);
    kernel->out(a2wrapper);
    kernel->localInts(workgroupSize);
    kernel->run_1d(N, workgroupSize);

    int finalSum;
    kernel->in(a2wrapper);
    kernel->out(1, &finalSum);
    kernel->localInts(workgroupSize);
    kernel->run_1d(numWorkgroups, workgroupSize);
//    timer.timeCheck("finished 2-way reduce");

    EXPECT_EQ(sumViaCpu, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}
Exemple #5
0
TEST( testdefines, simple ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testdefines.cl", "testDefines", 
        "-D DOUBLE -D SOME_VALUE=5" );
    float out[32];
    kernel->out( 32, out );
    kernel->run_1d( 32, 32 );
    EXPECT_EQ( 10, out[3] );

    delete kernel;
    delete cl;
}
Exemple #6
0
TEST(testlocal, notUselocal) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "notUseLocal");
    int workgroupSize = 64;
    float *myarray = new float[workgroupSize];

    kernel->in(workgroupSize);
    kernel->inout(workgroupSize, myarray);
    kernel->run_1d(workgroupSize, workgroupSize);

    delete[]myarray;
    delete kernel;
    delete cl;
}
Exemple #7
0
TEST(testdirtywrapper, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test");
    float in[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3;
    }
    float out[5];
    CLWrapper *inwrapper = cl->wrap(5, in);
    CLWrapper *outwrapper = cl->wrap(5, out);
    EXPECT_FALSE(inwrapper->isDeviceDirty());
    EXPECT_FALSE(outwrapper->isDeviceDirty());

    inwrapper->copyToDevice();
    EXPECT_FALSE(inwrapper->isDeviceDirty());
    EXPECT_FALSE(outwrapper->isDeviceDirty());

    kernel->input(inwrapper);
    kernel->output(outwrapper);
    EXPECT_FALSE(inwrapper->isDeviceDirty());
    EXPECT_FALSE(outwrapper->isDeviceDirty());

    kernel->run_1d(5, 5);
    EXPECT_FALSE(inwrapper->isDeviceDirty());
    EXPECT_TRUE(outwrapper->isDeviceDirty());

    outwrapper->copyToHost();
    EXPECT_FALSE(outwrapper->isDeviceDirty());

    assertEquals(out[0] , 7);
    assertEquals(out[1] , 10);
    assertEquals(out[2] , 13);
    assertEquals(out[3] , 16);
    assertEquals(out[4] , 19);
    cout << "tests completed ok" << endl;

    delete inwrapper;
    delete outwrapper;
    delete kernel;
    delete cl;
}
Exemple #8
0
TEST(testlocal, reduceviascratch_multipleworkgroups) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int N = workgroupSize * numWorkgroups;
    float *myarray = new float[N];
    float sumViaCpu = 0;
    float localSumViaCpu = 0;
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 10;
        sumViaCpu += myarray[i];
        if(i < workgroupSize) {
            localSumViaCpu += myarray[i];
        }
    }
    cout << "expected sum, calc'd via cpu, : " << sumViaCpu << endl;
    EXPECT_NE(myarray[0], sumViaCpu);

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
    float *a2 = new float[numWorkgroups];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
    kernel->in(a1wrapper);
    kernel->out(a2wrapper);
    kernel->localFloats(workgroupSize);
    kernel->run_1d(N, workgroupSize);

    float finalSum;
    kernel->in(a2wrapper);
    kernel->out(1, &finalSum);
    kernel->localFloats(workgroupSize);
    kernel->run_1d(numWorkgroups, workgroupSize);

    EXPECT_EQ(sumViaCpu, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}
Exemple #9
0
TEST(testlocal, localreduce) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    float *myarray = new float[workgroupSize];
//    Timer timer;
    for(int i = 0; i < 2000; i++) {
        float sumViaCpu = 0;
        for(int i = 0; i < workgroupSize; i++) {
            myarray[i] = (i + 7) * 3;
            sumViaCpu += myarray[i];
        }
        EXPECT_NE(myarray[0], sumViaCpu);

        kernel->inout(workgroupSize, myarray)->localFloats(workgroupSize);
        kernel->run_1d(workgroupSize, workgroupSize);
        
        EXPECT_EQ(myarray[0], sumViaCpu);
    }
//    timer.timeCheck("after iterations");
    delete[]myarray;
    delete kernel;
    delete cl;
}
Exemple #10
0
int main(int argc, char *argv[]) 
{
	const int test_size = 128;

	std::random_device rd;
	std::seed_seq s{ rd(), rd(), rd(), rd(), rd(), rd(), rd(), rd() };
	std::mt19937 mt(s);

	EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();

	CLKernel *kern = cl->buildKernel("test.cl", "test");

	std::vector<uint32_t> inbuf(test_size*4), outbuf(test_size), compare(test_size);
	std::generate(inbuf.begin(), inbuf.end(), mt);

	std::cout << "Running CL implementation" << std::endl;
	kern->in(inbuf.size(), &inbuf[0]);
	kern->out(outbuf.size(), &outbuf[0]);
	size_t global_size[] = { test_size };
	kern->run(1, global_size, nullptr);
	delete kern;

	std::cout << "Running local implementation" << std::endl;
	for (int i = 0; i < compare.size(); ++i) {
		compare[i] = inbuf[i] ^ inbuf[i + 1] ^ inbuf[i + 2] ^ inbuf[i + 3];
	}

	std::cout << "Comparing CL test with local implementation" << std::endl;
	for (int i = 0; i < compare.size(); ++i) {
		if (outbuf[i] != compare[i]) {
			std::cout << "Error in index " << i << " " << outbuf[i] << " != " << compare[i] << std::endl;
		}
	}

	return 0;
}