Ejemplo n.º 1
0
TEST(testcopybuffer, throwsifnotondevice) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    //CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test");
    const int bufferSize = 100 * 1024 / 4;
    float *in = new float[bufferSize];
    float *in2 = new float[bufferSize];
    for(int i = 0; i < bufferSize; i++) {
        in[i] = i * 3;
        in2[i] = 23 + i;
    }
    CLWrapper *inwrapper = cl->wrap(bufferSize, in);
    CLWrapper *in2wrapper = cl->wrap(bufferSize, in2);
    inwrapper->copyToDevice();
//    in2wrapper->copyToDevice();

    bool threw = false;
    try {
        inwrapper->copyTo(in2wrapper);
    } catch(runtime_error &e) {
        threw = true;
    }
    EXPECT_TRUE(threw);
    delete cl;
}
Ejemplo n.º 2
0
void CopyBuffer::copy( EasyCL *cl, CLWrapper *sourceWrapper, int *target ) {
    // first we will copy it to another buffer, so we can copy it out
    int bufferSize = sourceWrapper->size();
//    float *copiedBuffer = new float[ bufferSize ];
    CLWrapper *targetWrapper = cl->wrap( bufferSize, target );
    targetWrapper->createOnDevice();

    // now copy it, via a kernel
    const string kernelSource = "\n"
        "kernel void copy( int N, global int const *source, global int *dest ) {\n"
          "  #define globalId ( get_global_id(0) )\n"
          "  if( (int)globalId < N ) {\n"
          "      dest[globalId] = source[globalId];\n"
          "  }\n"
       " }\n";
    CLKernel *kernel = cl->buildKernelFromString( kernelSource, "copy", "" );
    kernel->in( bufferSize )->in( sourceWrapper )->out( targetWrapper );
    int workgroupSize = 32;
    int numWorkgroups = ( bufferSize + workgroupSize - 1 ) / workgroupSize;
    kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
    cl->finish();
    targetWrapper->copyToHost();

    delete targetWrapper;
    delete kernel;
//    delete[] copiedBuffer;
}
Ejemplo n.º 3
0
TEST( SLOW_testintwrapper_huge, testreadwrite ) {

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_stress");
    const int N = 1000000;
    int *in = new int[N];
    for( int i = 0; i < N; i++ ) {
        in[i] = i * 3;
    }
    int *out = new int[N];
    CLWrapper *inwrapper = cl->wrap(N, in);
    CLWrapper *outwrapper = cl->wrap(N, out);
    inwrapper->copyToDevice();
    outwrapper->createOnDevice();
    kernel->input( inwrapper );
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    kernel->run_1d( globalSize, workgroupsize );
    outwrapper->copyToHost();
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 689514 ) {
           cout << "out[" << i << "] != 689514: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete inwrapper;
    delete kernel;
    delete cl;
}
Ejemplo n.º 4
0
THClStorage* THClStorage_newWithSize(THClState *state, int device, long size)
{
  THArgCheck(size >= 0, 2, "invalid size");

  if(size > 0)
  {
    StatefulTimer::timeCheck("THClStorage_newWithSize START");
    THClStorage *storage = (THClStorage*)THAlloc(sizeof(THClStorage));
    float *data = new float[size];
    storage->device = device;
    storage->cl = THClState_getClv2(state, storage->device);
    CLWrapper *wrapper = storage->cl->wrap( size, data );
    if(state->trace) cout << "new wrapper, size " << size << endl;
    if(state->trace) cout << "wrapper->createOnDevice()" << endl;
    wrapper->createOnDevice();
    storage->data = data;
    storage->wrapper = wrapper;

    storage->size = size;
    storage->refcount = 1;
    storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM;
    StatefulTimer::timeCheck("THClStorage_newWithSize END");
    return storage;
  }
  else
  {
    return THClStorage_newv2(state, device);
  }
}
Ejemplo n.º 5
0
VIRTUAL void ActivationLayer::backward() {
    // have no weights to backprop to, just need to backprop the errors

//    CLWrapper *imagesWrapper = 0;
//    if( previousLayer->hasOutputWrapper() ) {
//        imagesWrapper = previousLayer->getOutputWrapper();
//    } else {
//        imagesWrapper = cl->wrap( previousLayer->getOutputSize(), previousLayer->getOutput() );
//        imagesWrapper->copyToDevice();
//    }

    CLWrapper *gradOutputWrapper = 0;
    bool weOwnGradOutputWrapper = false;
    if( nextLayer->providesGradInputWrapper() ) {
        gradOutputWrapper = nextLayer->getGradInputWrapper();
    } else {
        gradOutputWrapper = cl->wrap( getOutputSize(), nextLayer->getGradInput() );
        gradOutputWrapper->copyToDevice();
        weOwnGradOutputWrapper = true;
    }

    activationBackpropImpl->backward( batchSize, outputWrapper, gradOutputWrapper, gradInputWrapper );
//    gradInputCopiedToHost = false;

//    if( !previousLayer->hasOutputWrapper() ) {
//        delete imagesWrapper;
//    }
    if( weOwnGradOutputWrapper ) {
        delete gradOutputWrapper;
    }
}
Ejemplo n.º 6
0
TEST(testfloatwrapperconst, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float in[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3;
    }
    float out[5];
    CLWrapper *inwrapper = cl->wrap(5, (float const *)in);
    CLWrapper *outwrapper = cl->wrap(5, out);
    inwrapper->copyToDevice();
    kernel->input(inwrapper);
    kernel->output(outwrapper);
    kernel->run_1d(5, 5);
    outwrapper->copyToHost();
    assertEquals(out[0] , 7);
    assertEquals(out[1] , 10);
    assertEquals(out[2] , 13);
    assertEquals(out[3] , 16);
    assertEquals(out[4] , 19);
    cout << "tests completed ok" << endl;

    delete inwrapper;
    delete outwrapper;
    delete kernel;
    delete cl;
}
Ejemplo n.º 7
0
VIRTUAL void Adagrad::updateWeights(CLWrapper *weightsWrapper, CLWrapper *gradWeightsWrapper,
        AdagradState *trainerState) {

    int numWeights = trainerState->numWeights;
    float *working = new float[ numWeights ];
    CLWrapper *workingWrapper = cl->wrap(numWeights, working);
    workingWrapper->createOnDevice();

    CLMathWrapper clWeights(weightsWrapper);
    CLMathWrapper clGradWeights(gradWeightsWrapper);
    CLMathWrapper clSumSquares(trainerState->sumSquaresWrapper);
    CLMathWrapper clWorking(workingWrapper);

    // following all happens on gpu, via clmathwrapper:
    clWorking = clGradWeights;
    clWorking.squared();
    clSumSquares += clWorking;

    clWorking = clSumSquares;
    clWorking.sqrt();
    clWorking.inv();
    clWorking *= clGradWeights;
    clWorking *= - learningRate;
    clWeights += clWorking;

    delete workingWrapper;
    delete[] working;
}
Ejemplo n.º 8
0
VIRTUAL void PoolingLayer::forward() {
    CLWrapper *upstreamOutputWrapper = 0;
    if( previousLayer->hasOutputWrapper() ) {
        upstreamOutputWrapper = previousLayer->getOutputWrapper();
    } else {
        float *upstreamOutput = previousLayer->getOutput();
        upstreamOutputWrapper = cl->wrap( previousLayer->getOutputSize(), upstreamOutput );
        upstreamOutputWrapper->copyToDevice();
    }
    poolingForwardImpl->forward( batchSize, upstreamOutputWrapper, selectorsWrapper, outputWrapper );
    if( !previousLayer->hasOutputWrapper() ) {
        delete upstreamOutputWrapper;
    }

//    cout << "PoolingLayer::forward() selectors after forward: " << endl;
//    for( int i = 0; i < outputImageSize; i++ ) {
//        for( int j = 0; j < outputImageSize; j++ ) {
//            cout << selectors[ i * outputImageSize + j ] << " ";
//        }
//        cout << endl;
//    }

//    cout << "PoolingLayer::forward() selectorsWrapper after forward: " << endl;
//    PrintBuffer::printInts( cl, selectorsWrapper, outputImageSize, outputImageSize );
}
Ejemplo n.º 9
0
TEST( SLOW_testintwrapper_huge, testread ) {
    Timer timer;
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test_read");
//    const int N = 4500000;
//    const int N = (4500000/512)*512;
    int N = 100000;
    int *out = new int[N];
    CLWrapper *outwrapper = cl->wrap(N, out);
    kernel->in(3)->in(7);
    kernel->output( outwrapper );
    int globalSize = N;
    int workgroupsize = cl->getMaxWorkgroupSize();
    globalSize = ( ( globalSize + workgroupsize - 1 ) / workgroupsize ) * workgroupsize;
    cout << "globalsize: " << globalSize << " workgroupsize " << workgroupsize << endl;
    timer.timeCheck("before kernel");
    kernel->run_1d( globalSize, workgroupsize );
    timer.timeCheck("after kernel");
    outwrapper->copyToHost();
    timer.timeCheck("after copy to host");
    for( int i = 0; i < N; i++ ) {
       if( out[i] != 4228 ) {
           cout << "out[" << i << "] != 4228: " << out[i] << endl;
           exit(-1);
       }
    }

    delete outwrapper;
    delete kernel;
    delete cl;
}
Ejemplo n.º 10
0
TEST(testcopybuffer, withoffset) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    //CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test");
    float in[10];
    float in2[10];
    for(int i = 0; i < 10; i++) {
        in[i] = i * 3;
        in2[i] = 23 + i;
    }
    float out[10];
    CLWrapper *inwrapper = cl->wrap(10, in);
    CLWrapper *in2wrapper = cl->wrap(10, in2);
    CLWrapper *outwrapper = cl->wrap(10, out);
    inwrapper->copyToDevice();
    in2wrapper->copyToDevice();

    EXPECT_FALSE(in2wrapper->isDeviceDirty());
    inwrapper->copyTo(in2wrapper, 2, 5, 4);
    EXPECT_TRUE(in2wrapper->isDeviceDirty());
  //  cl->finish();
    // check that in2 host-side unchanged:
    for(int i = 0; i < 10; i++) {
        in[i] = i * 3;
        EXPECT_EQ(23 + i, in2[i]);
    }

    in2wrapper->copyToHost();
    // check that in2 is now a partial copy of in:
    for(int i = 0; i < 10; i++) {
//        in[i] = i * 3;
        if(i >= 5 && i < 9) {
          EXPECT_EQ((i-3) * 3, in2[i]);
        } else {
          EXPECT_EQ(23 + i, in2[i]);
        }
    }

    // check that modifying in2 doesnt modfiy in:
    in2[1] = 27;
    in2wrapper->copyToDevice();
    inwrapper->copyToHost();
    EXPECT_EQ(1 * 3, in[1]);

    in2wrapper->copyToHost();
    EXPECT_EQ(1 * 3, in[1]);
    EXPECT_EQ(27, in2[1]);
    
    delete inwrapper;
    delete in2wrapper;
    delete outwrapper;
    delete cl;
}
Ejemplo n.º 11
0
VIRTUAL void PoolingLayer::backward() {
    // have no weights to backprop to, just need to backprop the errors

    CLWrapper *gradOutputWrapper = 0;
    bool weOwnErrorsWrapper = false;
    if( nextLayer->providesGradInputWrapper() ) {
        gradOutputWrapper = nextLayer->getGradInputWrapper();
    } else {
        gradOutputWrapper = cl->wrap( getOutputSize(), nextLayer->getGradInput() );
        gradOutputWrapper->copyToDevice();
        weOwnErrorsWrapper = true;
    }

//    cout << "PoolingLayer::backward selectorsWrapper:" << endl;
//    PrintBuffer::printInts( cl, selectorsWrapper, outputImageSize, outputImageSize );

//    int *selectors = reinterpret_cast< int * >( selectorsWrapper->getHostArray() );
//    cout << "PoolingLayer::backward selectors before copy to host:" << endl;
//    for( int i = 0; i < outputImageSize; i++ ) {
//        for( int j = 0; j < outputImageSize; j++ ) {
//            cout << " " << selectors[i * outputImageSize + j];
//        }
//        cout << endl;
//    }
//    selectorsWrapper->copyToHost();
//    cout << "PoolingLayer::backward selectors after copy to host:" << endl;
//    for( int i = 0; i < outputImageSize; i++ ) {
//        for( int j = 0; j < outputImageSize; j++ ) {
//            cout << " " << selectors[i * outputImageSize + j];
//        }
//        cout << endl;
//    }
//    selectorsWrapper->copyToDevice();

//    selectorsWrapper->copyToHost();

    poolingBackpropImpl->backward( batchSize, gradOutputWrapper, selectorsWrapper, gradInputWrapper );

//    gradInputWrapper->copyToHost();
//    float *gradInput = reinterpret_cast< float * >( gradInputWrapper->getHostArray() );
//    cout << "gradInput:" << endl;
//    for( int i = 0; i < inputImageSize; i++ ) {
//        for( int j = 0; j < inputImageSize; j++ ) {
////            cout << " " << gradInput[i * inputImageSize + j];
//            if( gradInput[i * inputImageSize + j] != 0 ) {
//                cout << " *";
//            } else {
//                cout << " .";
//            }
//        }
//        cout << endl;
//    }

    if( weOwnErrorsWrapper ) {
        delete gradOutputWrapper;
    }
}
Ejemplo n.º 12
0
void propagateWithWipe( Propagate *prop, int batchSize, LayerDimensions dim, float *inputData, float *filters, float *biases, float *results ) {
    int inputDataSize = batchSize * dim.inputCubeSize;
    CLWrapper *dataWrapper = prop->cl->wrap( inputDataSize, inputData );
    dataWrapper->copyToDevice();

    int weightsSize = dim.filtersSize;
    CLWrapper *weightsWrapper = prop->cl->wrap( weightsSize, filters );
    weightsWrapper->copyToDevice();

    CLWrapper *biasWeightsWrapper = 0;
    if( dim.biased ) {
        biasWeightsWrapper = prop->cl->wrap( dim.numFilters, biases );
        biasWeightsWrapper->copyToDevice();
    }

    CLWrapper *resultsWrapper = prop->cl->wrap( batchSize * dim.outputCubeSize, results );
    memset( results, 99, sizeof(float) * batchSize * dim.outputCubeSize );
    resultsWrapper->copyToDevice(); // so we can wipe it...

    StatefulTimer::timeCheck("testpropagate: after data wrapper processing");
    prop->propagate( batchSize, dataWrapper, weightsWrapper, biasWeightsWrapper,
            resultsWrapper );
//    StatefulTimer::timeCheck("Propagate::propagate after call propagate");
    resultsWrapper->copyToHost();
//    StatefulTimer::timeCheck("Propagate::propagate after copytohost");
    delete resultsWrapper;

    delete dataWrapper;
    delete weightsWrapper;
    if( dim.biased ) {
        delete biasWeightsWrapper;
    }
}
Ejemplo n.º 13
0
void forwardWithWipe( Forward *prop, int batchSize, LayerDimensions dim, float *inputData, float *filters, float *biases, float *output ) {
    int inputDataSize = batchSize * dim.inputCubeSize;
    CLWrapper *dataWrapper = prop->cl->wrap( inputDataSize, inputData );
    dataWrapper->copyToDevice();

    int weightsSize = dim.filtersSize;
    CLWrapper *weightsWrapper = prop->cl->wrap( weightsSize, filters );
    weightsWrapper->copyToDevice();

    CLWrapper *biasWrapper = 0;
    if( dim.biased ) {
        biasWrapper = prop->cl->wrap( dim.numFilters, biases );
        biasWrapper->copyToDevice();
    }

    CLWrapper *outputWrapper = prop->cl->wrap( batchSize * dim.outputCubeSize, output );
    memset( output, 99, sizeof(float) * batchSize * dim.outputCubeSize );
    outputWrapper->copyToDevice(); // so we can wipe it...

    StatefulTimer::timeCheck("testforward: after data wrapper processing");
    prop->forward( batchSize, dataWrapper, weightsWrapper, biasWrapper,
            outputWrapper );
//    StatefulTimer::timeCheck("Forward::forward after call forward");
    outputWrapper->copyToHost();
//    StatefulTimer::timeCheck("Forward::forward after copytohost");
    delete outputWrapper;

    delete dataWrapper;
    delete weightsWrapper;
    if( dim.biased ) {
        delete biasWrapper;
    }
}
Ejemplo n.º 14
0
TEST(testcopybuffer, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    //CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test");
    float in[5];
    float in2[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3.0f;
        in2[i] = 23.0f + i;
    }
    float out[5];
    CLWrapper *inwrapper = cl->wrap(5, in);
    CLWrapper *in2wrapper = cl->wrap(5, in2);
    CLWrapper *outwrapper = cl->wrap(5, out);
    inwrapper->copyToDevice();
    in2wrapper->copyToDevice();

    EXPECT_FALSE(in2wrapper->isDeviceDirty());
    inwrapper->copyTo(in2wrapper);
    EXPECT_TRUE(in2wrapper->isDeviceDirty());
  //  cl->finish();
    // check that in2 host-side unchanged:
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3.0f;
        EXPECT_EQ(23.0f + i, in2[i]);
    }

    in2wrapper->copyToHost();
    // check that in2 is now a copy of in:
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3.0f;
        EXPECT_EQ(i * 3.0f, in2[i]);
    }

    // check that modifying in2 doesnt modfiy in:
    in2[1] = 27;
    in2wrapper->copyToDevice();
    inwrapper->copyToHost();
    EXPECT_EQ(1 * 3.0f, in[1]);

    in2wrapper->copyToHost();
    EXPECT_EQ(1 * 3.0f, in[1]);
    EXPECT_EQ(27.0f, in2[1]);
    
    delete inwrapper;
    delete in2wrapper;
    delete outwrapper;
    delete cl;
}
Ejemplo n.º 15
0
VIRTUAL void ActivationPropagate::propagate( int batchSize, float *input, float *output ) {
//    cout << "ActivationPropagate::propagate( float * )" << endl;
    CLWrapper *inputWrapper = cl->wrap( getInputSize( batchSize ), input );
    CLWrapper *outputWrapper = cl->wrap( getResultsSize( batchSize ), output );

    inputWrapper->copyToDevice();
    propagate( batchSize, inputWrapper, outputWrapper );
    outputWrapper->copyToHost();    

    delete outputWrapper;
    delete inputWrapper;
}
Ejemplo n.º 16
0
TEST(SLOW_testcopybuffer, larger) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    //CLKernel *kernel = cl->buildKernel("testeasycl.cl", "test");
    const int bufferSize = 100 * 1024 * 1024 / 4; // 100MB (of floats)
    float *in = new float[bufferSize];
    float *in2 = new float[bufferSize];
    for(int i = 0; i < bufferSize; i++) {
        in[i] = i * 3;
        in2[i] = 23 + i;
    }
    CLWrapper *inwrapper = cl->wrap(bufferSize, in);
    CLWrapper *in2wrapper = cl->wrap(bufferSize, in2);
    inwrapper->copyToDevice();
    in2wrapper->copyToDevice();

    inwrapper->copyTo(in2wrapper);
//    cl->finish();
    // check that in2 host-side unchanged:
    for(int i = 0; i < bufferSize; i++) {
        in[i] = i * 3;
        EXPECT_EQ(23 + i, in2[i]);
    }

    in2wrapper->copyToHost();
    // check that in2 is now a copy of in:
    for(int i = 0; i < bufferSize; i++) {
        in[i] = i * 3;
        EXPECT_EQ(i * 3, in2[i]);
    }

    // check that modifying in2 doesnt modfiy in:
    in2[1] = 27;
    in2wrapper->copyToDevice();
    inwrapper->copyToHost();
    EXPECT_EQ(1 * 3, in[1]);

    in2wrapper->copyToHost();
    EXPECT_EQ(1 * 3, in[1]);
    EXPECT_EQ(27, in2[1]);
    
    delete inwrapper;
    delete in2wrapper;
    delete[] in;
    delete[] in2;
    delete cl;
}
Ejemplo n.º 17
0
VIRTUAL void ActivationLayer::forward() {
    CLWrapper *inputWrapper = 0;
    if( previousLayer->hasOutputWrapper() ) {
        inputWrapper = previousLayer->getOutputWrapper();
    } else {
        float *input = previousLayer->getOutput();
        inputWrapper = cl->wrap( previousLayer->getOutputSize(), input );
        inputWrapper->copyToDevice();
    }
    activationForwardImpl->forward( batchSize, inputWrapper, outputWrapper );
//    outputCopiedToHost = false;
    if( !previousLayer->hasOutputWrapper() ) {
        delete inputWrapper;
    }
}
Ejemplo n.º 18
0
TEST( testMemset, basic ) {
    EasyCL *cl = DeepCLGtestGlobals_createEasyCL();

    CLKernel *kMemset = 0;
    // [[[cog
    // import stringify
    // stringify.write_kernel2( "kMemset", "cl/memset.cl", "cl_memset", '""' )
    // ]]]
    // generated using cog, from cl/memset.cl:
    const char * kMemsetSource =  
    "// Copyright Hugh Perkins 2015 hughperkins at gmail\n"
    "//\n"
    "// This Source Code Form is subject to the terms of the Mozilla Public License,\n"
    "// v. 2.0. If a copy of the MPL was not distributed with this file, You can\n"
    "// obtain one at http://mozilla.org/MPL/2.0/.\n"
    "\n"
    "kernel void cl_memset(global float *target, const float value, const int N) {\n"
    "    #define globalId get_global_id(0)\n"
    "    if ((int)globalId < N) {\n"
    "        target[globalId] = value;\n"
    "    }\n"
    "}\n"
    "\n"
    "";
    kMemset = cl->buildKernelFromString(kMemsetSource, "cl_memset", "", "cl/memset.cl");
    // [[[end]]]

    int N = 10000;
    float *myArray = new float[N];
    CLWrapper *myArrayWrapper = cl->wrap( N, myArray );
    myArrayWrapper->createOnDevice();
    kMemset->out( myArrayWrapper )->in( 99.0f )->in( N );
    int workgroupSize = 64;
    kMemset->run_1d( ( N + workgroupSize - 1 ) / workgroupSize * workgroupSize, workgroupSize );
    cl->finish();
    myArrayWrapper->copyToHost();
    for( int i = 0; i < 10; i++ ) {
//        cout << "myArray[" << i << "]=" << myArray[i] << endl;
    }
    for( int i = 0; i < N; i++ ) {
        EXPECT_EQ( 99.0f, myArray[i] );
    }

    delete kMemset;

    delete cl;
}
Ejemplo n.º 19
0
VIRTUAL void DropoutLayer::backward() {
    // have no weights to backprop to, just need to backprop the errors

    CLWrapper *gradOutputWrapper = 0;
    bool weOwnErrorsWrapper = false;
    if(nextLayer->providesGradInputWrapper()) {
        gradOutputWrapper = nextLayer->getGradInputWrapper();
    } else {
        gradOutputWrapper = cl->wrap(getOutputNumElements(), nextLayer->getGradInput());
        gradOutputWrapper->copyToDevice();
        weOwnErrorsWrapper = true;
    }
    maskWrapper->copyToDevice();
    dropoutBackwardImpl->backward(batchSize, maskWrapper, gradOutputWrapper, gradInputWrapper);
    if(weOwnErrorsWrapper) {
        delete gradOutputWrapper;
    }
}
Ejemplo n.º 20
0
VIRTUAL void BackpropWeights2::backpropWeights( int batchSize, float learningRate, float *derivLossBySum, float *inputData, float *filters, float *biasWeights ) {
    StatefulTimer::timeCheck("BackpropWeights2::backprop begin");

//    const float learningMultiplier = learningRate / batchSize / sqrt( dim.outputImageSize * dim.outputImageSize );

    int resultsSize = batchSize * dim.outputCubeSize;
    CLWrapper *derivLossBySumWrapper = cl->wrap( resultsSize, derivLossBySum );
    derivLossBySumWrapper->copyToDevice();

    int inputSize = batchSize * dim.inputCubeSize;
    CLWrapper *inputDataWrapper = cl->wrap( inputSize, inputData );
    inputDataWrapper->copyToDevice();

    CLWrapper *weightsWrapper = 0;
    int weightsSize = debug ? std::max(10000, dim.filtersSize ) : dim.filtersSize;
    weightsWrapper = cl->wrap( weightsSize, filters );
    weightsWrapper->copyToDevice();

//    cout << "backpropweights2::backpropweights resultsSize=" << resultsSize << " inputSize=" << inputSize << 
//        " weightSize=" << weightsSize << endl;

    CLWrapper *biasWeightsWrapper = 0;
    if( dim.biased ) {
        biasWeightsWrapper = cl->wrap( dim.numFilters, biasWeights );
        biasWeightsWrapper->copyToDevice();
    }

    StatefulTimer::timeCheck("BackpropWeights2::backprop after copied to device");
    backpropWeights( batchSize, learningRate, derivLossBySumWrapper, inputDataWrapper, weightsWrapper, biasWeightsWrapper );
    StatefulTimer::timeCheck("BackpropWeights2::backprop after call backprop");
    weightsWrapper->copyToHost();
    if( dim.biased ) {
        biasWeightsWrapper->copyToHost();
    }
    StatefulTimer::timeCheck("BackpropWeights2::backprop after copytohost");

    delete derivLossBySumWrapper;
    delete inputDataWrapper;
    delete weightsWrapper;
    if( dim.biased ) {
        delete biasWeightsWrapper;
    }
}
Ejemplo n.º 21
0
VIRTUAL void Adadelta::updateWeights( CLWrapper *weightsWrapper, CLWrapper *gradWeightsWrapper,
        AdadeltaState *trainerState ) {
    // need to calculate
    // sumGradSquared = decay * sumGradSquared + (1 - decay ) * grad.square()
    // update = - sumUpdateSquared.sqrt() / sumGradSquared.sqrt() * grad
    // sumUpdateSquared = decay * sumUpdateSquared + ( 1 - decay ) * update.squared()
    // weights += update

    int numWeights = trainerState->numWeights;
    float *working = new float[ numWeights ];
    CLWrapper *workingWrapper = cl->wrap( numWeights, working );
    workingWrapper->createOnDevice();

    CLMathWrapper clWeights( weightsWrapper );
    CLMathWrapper clGradWeights( gradWeightsWrapper );
    CLMathWrapper clSumGradSquared( trainerState->sumGradSquaredWrapper );
    CLMathWrapper clSumUpdateSquared( trainerState->sumUpdateSquaredWrapper );
    CLMathWrapper clWorking( workingWrapper );

    // following all happens on gpu, via clmathwrapper:
    clWorking = clGradWeights;
    clWorking.squared();
    clWorking *= ( 1 - decay );
    clSumGradSquared *= decay;
    clSumGradSquared += clWorking;

    clWorking = clSumGradSquared;
    clWorking.inv();
    clWorking *= clSumUpdateSquared;
    clWorking.sqrt();
    clWorking *= clGradWeights;
    clWorking *= - 1;

    clWeights += clWorking;

    clSumUpdateSquared *= decay;
    clWorking.squared();
    clWorking *= ( 1 - decay );
    clSumUpdateSquared += clWorking;

    delete workingWrapper;
    delete[] working;
}
Ejemplo n.º 22
0
Archivo: scan.cpp Proyecto: nchong/scan
Scan::Scan(CLWrapper &clw, size_t wx) : clw(clw), wx(wx),
  c0(0), c1(0), m0(0), m1(0), k0(0), k1(0), k2(0) {
  m = wx * 2;
#if EMBED_CL
  #include "scan.cl.h"
  clw.create_all_kernels(clw.compile_from_string((char *)&scan_cl));
#else
  clw.create_all_kernels(clw.compile("scan.cl"));
#endif
  scan_pow2 = clw.kernel_of_name("scan_pow2_wrapper");
  scan_pad_to_pow2 = clw.kernel_of_name("scan_pad_to_pow2");
  scan_subarrays = clw.kernel_of_name("scan_subarrays");
  scan_inc_subarrays = clw.kernel_of_name("scan_inc_subarrays");
}
Ejemplo n.º 23
0
VIRTUAL void ConvolutionalLayer::propagate() {
    if( batchSize == 0 ) {
        throw runtime_error("Need to call setBatchSize(size) before calling propagate etc");
    }
//    if( imageSizeSquared <= cl->getMaxWorkgroupSize() ) {
////        propagate2();
//    } else {
//  //      propagate1();
//    }
//    propagate1();
    StatefulTimer::instance()->timeCheck("    propagate layer " + toString( layerIndex ) + ", START");

    CLWrapper *upstreamWrapper = 0;
    if( previousLayer->hasResultsWrapper() ) {
//            std::cout << "layer " << previousLayer->layerIndex << " has resultsWrapper" << std::endl;
        upstreamWrapper = previousLayer->getResultsWrapper();
    } else {
//            std::cout << "layer " << previousLayer->layerIndex << " has no resultsWrapper" << std::endl;
        upstreamWrapper = cl->wrap( previousLayer->getResultsSize(), (float *)previousLayer->getResults() );
        upstreamWrapper->copyToDevice();
    }
    CLFloatWrapper *biasWeightsWrapper = 0;
    if( dim.biased ) {
        biasWeightsWrapper = cl->wrap( getBiasWeightsSize(), biasWeights );
        biasWeightsWrapper->copyToDevice();
    }
    StatefulTimer::instance()->timeCheck("    propagate layer " + toString( layerIndex ) + ", copied to device");
    propagateimpl->propagate( batchSize, upstreamWrapper, weightsWrapper, biasWeightsWrapper, resultsWrapper );
    StatefulTimer::instance()->timeCheck("    propagate layer " + toString( layerIndex ) + ",  after clFinish");

    if( !previousLayer->hasResultsWrapper() ) {
        delete upstreamWrapper;
    }
    if( dim.biased ) {
        delete biasWeightsWrapper;
    }
    resultsCopiedToHost = false;
}
Ejemplo n.º 24
0
void measurePerf(int instance, int batchSize, LayerDimensions dim) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();

    int inputNumElements = dim.inputCubeSize * batchSize;
    int errorsSize = dim.outputCubeSize * batchSize;
    int weightsSize = dim.filtersSize;
    int errorsForUpstreamSize = dim.inputCubeSize * batchSize;
    float *input = new float[inputNumElements];
    float *errors = new float[errorsSize];
    float *weights = new float[weightsSize];

    WeightRandomizer::randomize(input, inputNumElements, -0.1f, 0.1f);
    WeightRandomizer::randomize(errors, errorsSize, -0.1f, 0.1f);
    WeightRandomizer::randomize(weights, weightsSize, -0.1f, 0.1f);

    float *errorsForUpstream = new float[errorsForUpstreamSize];
    CLWrapper *inputWrapper = cl->wrap(inputNumElements, input);
    CLWrapper *errorsWrapper = cl->wrap(errorsSize, errors);
    CLWrapper *weightsWrapper = cl->wrap(weightsSize, weights);
    CLWrapper *errorsForUpstreamWrapper = cl->wrap(errorsForUpstreamSize, errorsForUpstream);
    inputWrapper->copyToDevice();
    errorsWrapper->copyToDevice();
    weightsWrapper->copyToDevice();
    errorsForUpstreamWrapper->createOnDevice();

    StatefulTimer::timeCheck("after init");
    Backward *backwardImpl = Backward::instanceSpecific(instance, cl, dim);
    for(int it = 0; it < 40; it++) {
        backwardImpl->backward(batchSize, 
            inputWrapper, errorsWrapper, weightsWrapper,
            errorsForUpstreamWrapper);
    }
    StatefulTimer::timeCheck("after backprop");
    StatefulTimer::dump(true);

    delete errorsForUpstreamWrapper;
    delete weightsWrapper;
    delete inputWrapper;
    delete errorsWrapper;

    delete[] errors;
    delete[] weights;
    delete[] input;
    delete[] errorsForUpstream;

    delete backwardImpl;
    delete cl;
}
Ejemplo n.º 25
0
VIRTUAL void ConvolutionalLayer::backProp( float learningRate ) {
//        Timer timer;
    StatefulTimer::instance()->timeCheck("backprop(): start, layer " + toString( layerIndex ) );

    CLWrapper *biasWeightsWrapper = 0;
    if( dim.biased ) {
        biasWeightsWrapper = cl->wrap( getBiasWeightsSize(), biasWeights );
        biasWeightsWrapper->copyToDevice();
    }

    CLWrapper *imagesWrapper = 0;
    if( previousLayer->hasResultsWrapper() ) {
        imagesWrapper = previousLayer->getResultsWrapper();
    } else {
        imagesWrapper = cl->wrap( previousLayer->getResultsSize(), previousLayer->getResults() );
        imagesWrapper->copyToDevice();
    }

    CLWrapper *errorsWrapper = 0;
    bool weOwnErrorsWrapper = false;
    if( nextLayer->providesErrorsForUpstreamWrapper() ) {
        errorsWrapper = nextLayer->getErrorsForUpstreamWrapper();
    } else {
        errorsWrapper = cl->wrap( getResultsSize(), nextLayer->getErrorsForUpstream() );
        errorsWrapper->copyToDevice();
//        int resultsSize = getResultsSize();
//        for( int i = 0; i < resultsSize; i++ ) {
//            cout << "convolutional::backproperrors errorsfromupstream[" << i << "]=" << nextLayer->getErrorsForUpstream()[i] << endl;
//        }
        weOwnErrorsWrapper = true;
    }
    if( previousLayer->needsBackProp() ) {
        backpropErrorsImpl->backpropErrors( batchSize, imagesWrapper, errorsWrapper, weightsWrapper, errorsForUpstreamWrapper );
        StatefulTimer::instance()->timeCheck("backproperrors(): calced errors for upstream, layer " + ::toString( layerIndex ) );
    }

    backpropWeightsImpl->backpropWeights( batchSize, learningRate, errorsWrapper, imagesWrapper,  weightsWrapper, biasWeightsWrapper );
    weightsCopiedToHost = false;
    StatefulTimer::instance()->timeCheck("backproperrors(): done weight backprop, layer " + ::toString( layerIndex ) );

    if( dim.biased ) {
        biasWeightsWrapper->copyToHost();
        delete biasWeightsWrapper;
    }
    if( !previousLayer->hasResultsWrapper() ) {
        delete imagesWrapper;
    }
    if( weOwnErrorsWrapper ) {
        delete errorsWrapper;
    }
    StatefulTimer::instance()->timeCheck("backproperrors(): updated weights, layer " + ::toString( layerIndex ) );
}
Ejemplo n.º 26
0
TEST(testCopyBuffer, ints) {
    EasyCL *cl = DeepCLGtestGlobals_createEasyCL();

    const int N = 10;
    int *a = new int[N];
    for(int i = 0; i < N; i++) {
        a[i] = 3 + i;
    }
    CLWrapper *aWrapper = cl->wrap(N, a);
    aWrapper->copyToDevice();
    memset(a, 0, sizeof(int) * N);

    int *b = new int[N];
    CopyBuffer::copy(cl, aWrapper, b);
    
    for(int i = 0; i < N; i++) {
//        cout << b[i] << endl;
        EXPECT_EQ(3 + i, b[i]);
    }

    memset(b, 0, sizeof(int) * N);
    CopyBuffer::copy(cl, aWrapper, b);
    
    for(int i = 0; i < N; i++) {
//        cout << b[i] << endl;
        EXPECT_EQ(3 + i, b[i]);
    }

    PrintBuffer::printInts(cl, aWrapper, 10, 1);
    
    delete[] b;
    delete aWrapper;
    delete[] a;

    delete cl;
}
Ejemplo n.º 27
0
VIRTUAL void DropoutLayer::forward() {
    CLWrapper *upstreamOutputWrapper = 0;
    if(previousLayer->hasOutputWrapper()) {
        upstreamOutputWrapper = previousLayer->getOutputWrapper();
    } else {
        float *upstreamOutput = previousLayer->getOutput();
        upstreamOutputWrapper = cl->wrap(previousLayer->getOutputNumElements(), upstreamOutput);
        upstreamOutputWrapper->copyToDevice();
    }

//    cout << "training: " << training << endl;
    if(training) {
        // create new masks...
        generateMasks();
        maskWrapper->copyToDevice();
        dropoutForwardImpl->forward(batchSize, maskWrapper, upstreamOutputWrapper, outputWrapper);
    } else {
        // if not training, then simply skip the dropout bit, copy the buffers directly
        multiplyBuffer->multiply(getOutputNumElements(), dropRatio, upstreamOutputWrapper, outputWrapper);
    }
    if(!previousLayer->hasOutputWrapper()) {
        delete upstreamOutputWrapper;
    }
}
Ejemplo n.º 28
0
TEST( testCopyBuffer, floats ) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();

    const int N = 10;
    float *a = new float[N];
    for( int i = 0; i < N; i++ ) {
        a[i] = 3 + i;
    }
    CLWrapper *aWrapper = cl->wrap( N, a );
    aWrapper->copyToDevice();
    memset( a, 0, sizeof(float) * N );

    float *b = new float[N];
    CopyBuffer::copy( cl, aWrapper, b );
    
    for( int i = 0; i < N; i++ ) {
        cout << b[i] << endl;
        EXPECT_EQ( 3 + i, b[i] );
    }

    memset( b, 0, sizeof(float) * N );
    CopyBuffer::copy( cl, aWrapper, b );
    
    for( int i = 0; i < N; i++ ) {
        cout << b[i] << endl;
        EXPECT_EQ( 3 + i, b[i] );
    }

    PrintBuffer::printFloats( cl, aWrapper, 10, 1 );
    
    delete[] b;
    delete aWrapper;
    delete[] a;

    delete cl;
}
Ejemplo n.º 29
0
// must allocate output yourself before the call
VIRTUAL void Forward::forward( int batchSize, float *inputData, float *filters, float *biases, float *output ) {
    StatefulTimer::timeCheck("Forward::forward begin");
    int inputDataSize = batchSize * dim.inputCubeSize;
    CLWrapper *dataWrapper = cl->wrap( inputDataSize, inputData );
    dataWrapper->copyToDevice();

    int weightsSize = dim.filtersSize;
    CLWrapper *weightsWrapper = cl->wrap( weightsSize, filters );
    weightsWrapper->copyToDevice();

    CLWrapper *biasWrapper = 0;
    if( dim.biased ) {
        int biasWrapperSize = dim.numFilters;
        biasWrapper = cl->wrap( biasWrapperSize, biases );
        biasWrapper->copyToDevice();
    }

//    int outputDataSize = batchSize * dim.outputCubeSize;
//    cout << " batchsize " << batchSize << " " << dim << endl;
//    int allocatedOutputSize = std::max(5000, outputDataSize );
//    int allocatedOutputSize = outputDataSize;
//    float *output = new float[allocatedOutputSize];
    CLWrapper *outputWrapper = cl->wrap( batchSize * dim.outputCubeSize, output );
    cl->finish();

    StatefulTimer::timeCheck("Forward::forward after copied to device");
    forward( batchSize, dataWrapper, weightsWrapper, biasWrapper,
            outputWrapper );
    StatefulTimer::timeCheck("Forward::forward after call forward");
    outputWrapper->copyToHost();
    StatefulTimer::timeCheck("Forward::forward after copytohost");
//    for( int i = 0; i < 20; i++ ) {
//        cout << "output[" << i << "]=" << output[i] << endl;
//    }
    delete outputWrapper;

    delete dataWrapper;
    delete weightsWrapper;
    if( dim.biased ) {
        delete biasWrapper;
    }

//    return output;
}
Ejemplo n.º 30
0
TEST(testClBlas, colMajor2) {
    EasyCL *cl = DeepCLGtestGlobals_createEasyCL();

    float A[] = {1, 3,
                 2, 7,
                 9, 5,
                 0, -2};
    float B[] = {3,2,8,
                 -1,0,4};

    float C[4*3];
    transpose(A, 4, 2);
    transpose(B, 2, 3);
//    for(int row=0; row < 2; row++) {
//        for(int col=0; col < 1; col++) {
//            cout << B[row*1 + col] << " ";
//        }
//        cout << endl;
//    }
    ClBlasInstance clblasInstance;
//    ClBlasInstance::initializeIfNecessary();
    CLWrapper *AWrap = cl->wrap(4*2, A);
    CLWrapper *BWrap = cl->wrap(2*3, B);
    CLWrapper *CWrap = cl->wrap(4*3, C);
    AWrap->copyToDevice();
    BWrap->copyToDevice();
    ClBlasHelper::Gemm(
        cl,
        clblasColumnMajor,
        clblasNoTrans, clblasNoTrans,
        4, 2, 3,
        1,
        AWrap, 0,
        BWrap, 0,
        0,
        CWrap, 0
    );
//    cl->finish();
    CWrap->copyToHost();
    transpose(C, 3, 4);
    EXPECT_EQ(1*3-1*3, C[0]);
    EXPECT_EQ(1*2+3*0, C[1]);
    EXPECT_EQ(1*8+4*3, C[2]);
    EXPECT_EQ(-8, C[11]);

    delete CWrap;
    delete BWrap;
    delete AWrap;

    delete cl;
}