C++ (Cpp) EasyCL::getMaxWorkgroupSize Examples

Programming Language: C++ (Cpp)

Class/Type: EasyCL

Method/Function: getMaxWorkgroupSize

Examples at hotexamples.com: 7

C++ (Cpp) EasyCL::getMaxWorkgroupSize - 7 examples found. These are the top rated real world C++ (Cpp) examples of EasyCL::getMaxWorkgroupSize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

buildKernel(10)

buildKernelFromString(10)

finish(10)

getKernel(9)

kernelExists(9)

getMaxWorkgroupSize(7)

wrap(5)

storeKernel(2)

arrayFloat(1)

newQueue(1)

Example #1

Show file

File: testintwrapper_huge.cpp Project: tkaplan/EasyCL

TEST( SLOW_testintwrapper_huge, testreadwrite ) {

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_stress");
    const int N = 1000000;
    int *in = new int[N];
    for( int i = 0; i < N; i++ ) {
        in[i] = i * 3;
    }
    int *out = new int[N];
    CLWrapper *inwrapper = cl->wrap(N, in);
    CLWrapper *outwrapper = cl->wrap(N, out);
    inwrapper->copyToDevice();
    outwrapper->createOnDevice();
    kernel->input( inwrapper );
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    kernel->run_1d( globalSize, workgroupsize );
    outwrapper->copyToHost();
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 689514 ) {
           cout << "out[" << i << "] != 689514: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete inwrapper;
    delete kernel;
    delete cl;
}

Example #2

Show file

File: testintwrapper_huge.cpp Project: tkaplan/EasyCL

TEST( SLOW_testintwrapper_huge, testread ) {
    Timer timer;
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_read");
//    const int N = 4500000;
//    const int N = (4500000/512)*512;
    int N = 100000;
    int *out = new int[N];
    CLWrapper *outwrapper = cl->wrap(N, out);
    kernel->in(3)->in(7);
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    timer.timeCheck("before kernel");
    kernel->run_1d( globalSize, workgroupsize );
    timer.timeCheck("after kernel");
    outwrapper->copyToHost();
    timer.timeCheck("after copy to host");
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 4228 ) {
           cout << "out[" << i << "] != 4228: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete kernel;
    delete cl;
}

Example #3

Show file

File: testforward.cpp Project: 2php/DeepCL

TEST( testforward, compare_1_n_biased_pad ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    int maxWorkgroupSize = cl->getMaxWorkgroupSize();
    delete cl;

    LayerDimensions dim;
    int batchSize = 4;
    int N = 4;
    string activationName = "tanh";
    dim.setInputPlanes( 8 ).setInputSize(19).setNumFilters( 8 )
        .setFilterSize( 5 )
        .setPadZeros( true ).setBiased( true );
    for( int instance = 2; instance <= 7; instance++ ) {
        if( instance == 5 ) {
            continue; // forwardfc, cant use for inputimagesize != filtersize
        }
        dim.setInputSize(19);
        if(instance == 2 && maxWorkgroupSize < 19 * 19) {
            dim.setInputSize(15);
        }
        if(instance == 3 && maxWorkgroupSize < 19 * 19) {
            dim.setInputSize(15);
        }
        cout << "instance: " << instance << endl;
        compareSpecific( false, N, batchSize, dim, 1, instance );
    }
}

Example #4

Show file

File: testlocal.cpp Project: brunoro/EasyCL

TEST(SLOW_testlocal, selfdot_3levels_withoutscratch) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "selfdot_ints_withoutscratch");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int level3size = numWorkgroups / 4;
    const int N = workgroupSize * numWorkgroups * level3size;
    cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
    int *myarray = new int[N];
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 5;
    }

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
//    timer.timeCheck("copied array to device");
    int *second = new int[N];
    CLWrapper *secondwrapper = cl->wrap(N, second);
    int *a2 = new int[numWorkgroups*level3size];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups * level3size, a2);
    kernel->in(a1wrapper);
    kernel->out(secondwrapper);
    kernel->out(a2wrapper);
    kernel->run_1d(N, workgroupSize);
    cl->finish();

    int *a3 = new int[numWorkgroups];
    CLWrapper *a3wrapper = cl->wrap(level3size, a3);
    kernel->in(a2wrapper);
    kernel->out(secondwrapper);
    kernel->out(a3wrapper);
    kernel->run_1d(workgroupSize * level3size, workgroupSize);
    cl->finish();

    int finalSum;
    kernel->in(a3wrapper);
    kernel->out(secondwrapper);
    kernel->out(1, &finalSum);
    kernel->run_1d(level3size, level3size);
//    timer.timeCheck("finished 3-level reduce");

    EXPECT_EQ(-1306309159, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete a3wrapper;
    delete secondwrapper;
    delete[] a3;
    delete[] second;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}

Example #5

Show file

File: testlocal.cpp Project: brunoro/EasyCL

TEST(testlocal, reduceviascratch_multipleworkgroups_ints) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups_ints");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int N = workgroupSize * numWorkgroups;
    cout << "numworkgroups " << numWorkgroups << " workgroupsize " << workgroupSize << " N " << N << endl;
    int *myarray = new int[N];
    int sumViaCpu = 0;
    int localSumViaCpu = 0;
    int localSumViaCpu2 = 0;
    int *localSumsViaCpu = new int[numWorkgroups];
    memset(localSumsViaCpu, 0, sizeof(int)*numWorkgroups);
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 50;
        sumViaCpu += myarray[i];
        if(i < workgroupSize) {
            localSumViaCpu += myarray[i];
        }
        if(i >= workgroupSize && i < workgroupSize * 2) {
            localSumViaCpu2 += myarray[i];
        }
        int workgroupId = i / workgroupSize;
        localSumsViaCpu[workgroupId] += myarray[i];
    }
    ASSERT_EQ(localSumViaCpu, localSumsViaCpu[0]);
    ASSERT_EQ(localSumViaCpu2, localSumsViaCpu[1]);
    ASSERT_NE(myarray[0], sumViaCpu);

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
    int *a2 = new int[numWorkgroups];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
    kernel->in(a1wrapper);
    kernel->out(a2wrapper);
    kernel->localInts(workgroupSize);
    kernel->run_1d(N, workgroupSize);

    int finalSum;
    kernel->in(a2wrapper);
    kernel->out(1, &finalSum);
    kernel->localInts(workgroupSize);
    kernel->run_1d(numWorkgroups, workgroupSize);
//    timer.timeCheck("finished 2-way reduce");

    EXPECT_EQ(sumViaCpu, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}

Example #6

Show file

File: testlocal.cpp Project: brunoro/EasyCL

TEST(testlocal, reduceviascratch_multipleworkgroups) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch_multipleworkgroups");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    const int numWorkgroups = workgroupSize;
    const int N = workgroupSize * numWorkgroups;
    float *myarray = new float[N];
    float sumViaCpu = 0;
    float localSumViaCpu = 0;
    for(int i = 0; i < N; i++) {
        myarray[i] = ((i + 7) * 3) % 10;
        sumViaCpu += myarray[i];
        if(i < workgroupSize) {
            localSumViaCpu += myarray[i];
        }
    }
    cout << "expected sum, calc'd via cpu, : " << sumViaCpu << endl;
    EXPECT_NE(myarray[0], sumViaCpu);

//    Timer timer;

    CLWrapper *a1wrapper = cl->wrap(N, myarray);
    a1wrapper->copyToDevice();
    float *a2 = new float[numWorkgroups];
    CLWrapper *a2wrapper = cl->wrap(numWorkgroups, a2);
    kernel->in(a1wrapper);
    kernel->out(a2wrapper);
    kernel->localFloats(workgroupSize);
    kernel->run_1d(N, workgroupSize);

    float finalSum;
    kernel->in(a2wrapper);
    kernel->out(1, &finalSum);
    kernel->localFloats(workgroupSize);
    kernel->run_1d(numWorkgroups, workgroupSize);

    EXPECT_EQ(sumViaCpu, finalSum);

    delete a1wrapper;
    delete a2wrapper;
    delete[] a2;
    delete[]myarray;
    delete kernel;
    delete cl;
}

Example #7

Show file

File: testlocal.cpp Project: brunoro/EasyCL

TEST(testlocal, localreduce) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    float *myarray = new float[workgroupSize];
//    Timer timer;
    for(int i = 0; i < 2000; i++) {
        float sumViaCpu = 0;
        for(int i = 0; i < workgroupSize; i++) {
            myarray[i] = (i + 7) * 3;
            sumViaCpu += myarray[i];
        }
        EXPECT_NE(myarray[0], sumViaCpu);

        kernel->inout(workgroupSize, myarray)->localFloats(workgroupSize);
        kernel->run_1d(workgroupSize, workgroupSize);
        
        EXPECT_EQ(myarray[0], sumViaCpu);
    }
//    timer.timeCheck("after iterations");
    delete[]myarray;
    delete kernel;
    delete cl;
}