TEST( testCopyBlock, basic ) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); float a[] = { 1,2,3,4, 5,6,7,8, 9,10,11,12 }; float b[10]; memset(b, 0, sizeof(float)*10); CLKernel *kernel = makeBasicKernel( cl ); kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (0<<10)|1)->in((2<<10)|3); kernel->localFloats( 2 * 3 ); kernel->run_1d(12,4); // kernel->run_1d(12,12); cl->finish(); float expected[] = { 2,3,4, 6,7,8 }; for( int i = 0; i < 2; i++ ) { for( int j = 0; j < 3; j++ ) { cout << b[i*3+j] << " "; EXPECT_EQ( expected[i*3+j], b[i*3+j] ); } cout << endl; } cout << endl; for( int i = 6; i < 10; i++ ) { cout << b[i] << " "; EXPECT_EQ( 0, b[i] ); } cout << endl; cout << endl; kernel->in( 12, a )->out( 6, b )->in( ( 3<<10)|4)->in( (1<<10)|0)->in((2<<10)|3); kernel->localFloats( 2 * 3 ); // kernel->run_1d(12,4); kernel->run_1d(12,4); cl->finish(); float expected2[] = { 5,6,7, 9,10,11 }; for( int i = 0; i < 2; i++ ) { for( int j = 0; j < 3; j++ ) { cout << b[i*3+j] << " "; EXPECT_EQ( expected2[i*3+j], b[i*3+j] ); } cout << endl; } cout << endl; for( int i = 6; i < 10; i++ ) { cout << b[i] << " "; EXPECT_EQ( 0, b[i] ); } cout << endl; cout << endl; delete kernel; delete cl; }
TEST( testCopyLocal, basic ) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); float a[] = { 1,2,3,4, 5,6,7,8, 9,10,11,12 }; float b[16]; memset(b, 0, sizeof(float)*16); CLKernel *kernel = makeKernel( cl ); kernel->in( 12, a )->out( 16, b )->in( 12 ); kernel->localFloats( 12 ); kernel->run_1d(12,12); cl->finish(); for( int i = 0; i < 3; i++ ) { for( int j = 0; j < 4; j++ ) { cout << b[i*4+j] << " "; EXPECT_EQ( i * 4 + j + 1, b[i*4+j] ); } cout << endl; } cout << endl; for( int i = 12; i < 16; i++ ) { cout << b[i] << " "; EXPECT_EQ( 0, b[i] ); } cout << endl; delete kernel; delete cl; }
TEST(testlocal, reduceviascratch_multipleworkgroups) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups"); int workgroupSize = min(512, cl->getMaxWorkgroupSize()); const int numWorkgroups = workgroupSize; const int N = workgroupSize * numWorkgroups; float *myarray = new float[N]; float sumViaCpu = 0; float localSumViaCpu = 0; for(int i = 0; i < N; i++) { myarray[i] = ((i + 7) * 3) % 10; sumViaCpu += myarray[i]; if(i < workgroupSize) { localSumViaCpu += myarray[i]; } } cout << "expected sum, calc'd via cpu, : " << sumViaCpu << endl; EXPECT_NE(myarray[0], sumViaCpu); // Timer timer; CLWrapper *a1wrapper = cl->wrap(N, myarray); a1wrapper->copyToDevice(); float *a2 = new float[numWorkgroups]; CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2); kernel->in(a1wrapper); kernel->out(a2wrapper); kernel->localFloats(workgroupSize); kernel->run_1d(N, workgroupSize); float finalSum; kernel->in(a2wrapper); kernel->out(1, &finalSum); kernel->localFloats(workgroupSize); kernel->run_1d(numWorkgroups, workgroupSize); EXPECT_EQ(sumViaCpu, finalSum); delete a1wrapper; delete a2wrapper; delete[] a2; delete[]myarray; delete kernel; delete cl; }
TEST(testlocal, uselocal) { EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu(); CLKernel *kernel = cl->buildKernel("testlocal.cl", "useLocal"); int workgroupSize = 64; float *myarray = new float[workgroupSize]; kernel->in(workgroupSize); kernel->inout(workgroupSize, myarray); kernel->localFloats(workgroupSize); kernel->run_1d(workgroupSize, workgroupSize); delete[]myarray; delete kernel; delete cl; }