예제 #1
0
TEST(testinout, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float inout[5];
    for(int i = 0; i < 5; i++) {
        inout[i] = i * 3;
    }
    kernel->inout(5, inout);
    size_t global = 5;
    size_t local = 5;
    kernel->run(1, &global, &local);
    assertEquals(inout[0] , 7);
    assertEquals(inout[1] , 10);
    assertEquals(inout[2] , 13);
    assertEquals(inout[3] , 16);
    assertEquals(inout[4] , 19);
    cout << "tests completed ok" << endl;

    delete kernel;
    delete cl;
}
예제 #2
0
// this runs an entire kernel to get one value.  Clearly this is going to be pretty slow, but
// at least it's more or less compatible, and comparable, to how cutorch does it
void THClStorage_set(THClState *state, THClStorage *self, long index, float value)
{
////  cout << "set size=" << self->size << " index=" << index << " value=" << value << endl;
  THArgCheck((index >= 0) && (index < self->size), 2, "index out of bounds");
  THArgCheck(self->wrapper != 0, 1, "storage argument not initialized, is empty");
//  if( self->wrapper->isDeviceDirty() ) { // we have to do this, since we're going to copy it all back again
//                                         // although I suppose we could set via a kernel perhaps
//                                         // either way, this function is pretty inefficient right now :-P
//    if(state->trace) cout << "wrapper->copyToHost() size " << self->size << endl;
//    self->wrapper->copyToHost();
//  }
//  self->data[index] = value;
//  if(state->trace) cout << "wrapper->copyToDevice() size " << self->size << endl;
//  self->wrapper->copyToDevice();

  const char *uniqueName = __FILE__ ":set";
  EasyCL *cl = self->cl; // cant remember if this is a good idea or not :-P
  CLKernel *kernel = 0;
  if(cl->kernelExists(uniqueName)) {
    kernel = cl->getKernel(uniqueName);
  } else {
    TemplatedKernel kernelBuilder(cl);
    kernel = kernelBuilder.buildKernel( uniqueName, __FILE__, getSetKernelSource(), "THClStorageSet" );
  }

  kernel->inout(self->wrapper);
  kernel->in((int64_t)index);
  kernel->in(value);
  kernel->run_1d(1, 1);

  if(state->addFinish) cl->finish();
}
예제 #3
0
TEST(testlocal, notUselocal) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "notUseLocal");
    int workgroupSize = 64;
    float *myarray = new float[workgroupSize];

    kernel->in(workgroupSize);
    kernel->inout(workgroupSize, myarray);
    kernel->run_1d(workgroupSize, workgroupSize);

    delete[]myarray;
    delete kernel;
    delete cl;
}
예제 #4
0
TEST(testfloatarray, main) {
    if(!EasyCL::isOpenCLAvailable()) {
        cout << "opencl library not found" << endl;
        exit(-1);
    }
    cout << "found opencl library" << endl;

    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernelFromString(getKernel(), "test", "");
    float in[5];
    float inout[5];
    float out[5];
    for(int i = 0; i < 5; i++) {
        in[i] = i * 3;
        inout[i] = i * 3;
    }
    kernel->in(5, in);
    kernel->out(5, out);
    kernel->inout(5, inout);
    kernel->run_1d(5, 5);

    for(int i = 0; i < 5; i++) {
        cout << out[i] << " ";
    }
    cout << endl;
    for(int i = 0; i < 5; i++) {
        cout << inout[i] << " ";
    }
    cout << endl;

    assertEquals(inout[0], 7);
    assertEquals(inout[1] , 10);
    assertEquals(inout[2] , 34);
    assertEquals(inout[3] , 16);
    assertEquals(inout[4], 19);
    assertEquals(out[0] , 5);
    assertEquals(out[1] , 8);
    assertEquals(out[2] , 26);
    assertEquals(out[3] , 14);
    assertEquals(out[4] , 17);
    cout << "tests completed ok" << endl;

    delete kernel;
    delete cl;
}
예제 #5
0
파일: GpuOp.cpp 프로젝트: neuroidss/DeepCL
VIRTUAL void GpuOp::apply1_inplace( int N, CLWrapper*destinationWrapper, Op1 *op ) {
    StatefulTimer::instance()->timeCheck("GpuOp::apply inplace start" );

    string kernelName = "GpuOp::" + op->getName() + "_inplace";
    if( !cl->kernelExists( kernelName ) ) {
        buildKernel( kernelName, op, true );
    }
    CLKernel *kernel = cl->getKernel( kernelName );

    kernel->in( N );
    kernel->inout( destinationWrapper );
    int globalSize = N;
    int workgroupSize = 64;
    int numWorkgroups = ( globalSize + workgroupSize - 1 ) / workgroupSize;
    kernel->run_1d( numWorkgroups * workgroupSize, workgroupSize );
    cl->finish();

    StatefulTimer::instance()->timeCheck("GpuOp::apply inplace end" );
}
예제 #6
0
TEST(testlocal, localreduce) {
    EasyCL *cl = EasyCL::createForFirstGpuOtherwiseCpu();
    CLKernel *kernel = cl->buildKernel("testlocal.cl", "reduceViaScratch");
    int workgroupSize = min(512, cl->getMaxWorkgroupSize());
    float *myarray = new float[workgroupSize];
//    Timer timer;
    for(int i = 0; i < 2000; i++) {
        float sumViaCpu = 0;
        for(int i = 0; i < workgroupSize; i++) {
            myarray[i] = (i + 7) * 3;
            sumViaCpu += myarray[i];
        }
        EXPECT_NE(myarray[0], sumViaCpu);

        kernel->inout(workgroupSize, myarray)->localFloats(workgroupSize);
        kernel->run_1d(workgroupSize, workgroupSize);
        
        EXPECT_EQ(myarray[0], sumViaCpu);
    }
//    timer.timeCheck("after iterations");
    delete[]myarray;
    delete kernel;
    delete cl;
}