C++ (Cpp) Tensor::MoveToGPU Examples

Programming Language: C++ (Cpp)

Class/Type: Tensor

Method/Function: MoveToGPU

Examples at hotexamples.com: 3

C++ (Cpp) Tensor::MoveToGPU - 3 examples found. These are the top rated real world C++ (Cpp) examples of Tensor::MoveToGPU extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

At(13)

NumElements(11)

Initialize(10)

Dim(10)

FlatTo2D(6)

Dims(5)

Rank(3)

Order(3)

MoveToGPU(3)

NumLayers(2)

HaveSameSize(2)

MoveToCPU(2)

AddLayer(1)

IDFT(1)

Deserialize(1)

DFT(1)

ComputeIndexArray(1)

ComputeIndex(1)

GetFrame(1)

Example #1

Show file

File: TensorMath.cpp Project: imclab/cn24

void TensorMath::SMS(const Tensor& source, Tensor& target)
{
#ifdef BUILD_OPENCL
  if(source.cl_gpu_ || target.cl_gpu_) {
    ((Tensor&)source).MoveToGPU();
    target.MoveToGPU(true);
    const int width = target.width();
    const int height = target.height();
    const int maps = target.maps();
    const int samples = target.samples();

    cl_uint error = 0;

    error |= clSetKernelArg (CLHelper::k_sms, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_sms, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_sms, 2, sizeof (cl_uint), &width);
    error |= clSetKernelArg (CLHelper::k_sms, 3, sizeof (cl_uint), &height);
    error |= clSetKernelArg (CLHelper::k_sms, 4, sizeof (cl_uint), &maps);
    error |= clSetKernelArg (CLHelper::k_sms, 5, sizeof (cl_uint), &samples);

    if (error != CL_SUCCESS) {
      FATAL("Error setting kernel args: " << (signed int) error);
    }

    size_t global_work_size[] = {(size_t)target.elements()};

    error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_sms, 1, NULL,
        global_work_size, NULL, 0, NULL, NULL);
    if (error != CL_SUCCESS) {
      FATAL("Error enqueueing kernel: " << (signed int) error);
    }

#ifdef BRUTAL_FINISH
    error = clFinish (CLHelper::queue);
    if (error != CL_SUCCESS) {
      FATAL("Error finishing command queue: " << (signed int) error);
    }
#endif
  } else {
#endif
    const int width = target.width();
    const int height = target.height();
    const int maps = target.maps();
    const int samples = target.samples();
    for(int sample = 0; sample < samples; sample++) {
      for(int map = 0; map < maps; map++) {
        const datum* src = source.data_ptr_const(0, 0, sample, map);
        datum* tgt = target.data_ptr(0, 0, map, sample);
        std::memcpy(tgt, src, sizeof(datum) * width * height);
      }
    }
  
#ifdef BUILD_OPENCL
  }
#endif
  
  target.hint_ignore_content_ = false;
}

Example #2

Show file

File: TensorMath.cpp Project: imclab/cn24

void TensorMath::UP(const Tensor& source, Tensor& target, const int region_width, const int region_height, const datum target_factor)
{
#ifdef BUILD_OPENCL
  if(source.cl_gpu_ || target.cl_gpu_) {
    ((Tensor&)source).MoveToGPU();
    target.MoveToGPU(true);
    const int target_width = target.width();
    const int target_height = target.height();
    const int source_width = source.width();
    const int source_height = source.height();
    cl_uint error = 0;

    error |= clSetKernelArg (CLHelper::k_up, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_up, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_up, 2, sizeof (cl_uint), &target_width);
    error |= clSetKernelArg (CLHelper::k_up, 3, sizeof (cl_uint), &target_height);
    error |= clSetKernelArg (CLHelper::k_up, 4, sizeof (cl_uint), &source_width);
    error |= clSetKernelArg (CLHelper::k_up, 5, sizeof (cl_uint), &source_height);
    error |= clSetKernelArg (CLHelper::k_up, 6, sizeof (cl_uint), &region_width);
    error |= clSetKernelArg (CLHelper::k_up, 7, sizeof (cl_uint), &region_height);
    error |= clSetKernelArg (CLHelper::k_up, 8, sizeof (cl_float), &target_factor);

    if (error != CL_SUCCESS) {
      FATAL("Error setting kernel args: " << (signed int) error);
    }

    size_t global_work_size[] = {(size_t)target.width(), (size_t)target.height(), (size_t)(target.maps() * target.samples())};

    error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_up, 3, NULL,
        global_work_size, NULL, 0, NULL, NULL);
    if (error != CL_SUCCESS) {
      FATAL("Error enqueueing kernel: " << (signed int) error);
    }

#ifdef BRUTAL_FINISH
    error = clFinish (CLHelper::queue);
    if (error != CL_SUCCESS) {
      FATAL("Error finishing command queue: " << (signed int) error);
    }
#endif
  } else {
#endif
    const int width = source.width();
    const int height = source.height();
    const int maps = source.maps();
    const int samples = source.samples();
    for(int sample = 0; sample < samples; sample++) {
      for(int map = 0; map < maps; map++) {
        for(unsigned int y = 0; y < height; y++) {
          const unsigned int iy = region_height * y;
          for(unsigned int x = 0; x < width; x++) {
            const unsigned int ix = region_width * x;
            const datum* src = source.data_ptr_const(x, y, map, sample);
            datum sum = *src;
            for(unsigned int ry = 0; ry < region_height; ry++) {
              for(unsigned int rx = 0; rx < region_width; rx++) {
                datum* tgt = target.data_ptr(ix + rx, iy + ry, map, sample);
                *tgt = sum * target_factor;
              }
            }
          }
        }
      }
    }
#ifdef BUILD_OPENCL
  }
#endif

  target.hint_ignore_content_ = false;
}

Example #3

Show file

File: TensorMath.cpp Project: imclab/cn24

void TensorMath::COL2IM(Tensor& source, const int source_width, const int source_height, const int maps, const int samples, const int kernel_width, const int kernel_height, const int stride_width, const int stride_height, const int pad_width, const int pad_height, const Tensor& target)
{
#ifdef BUILD_OPENCL
  if(source.cl_gpu_ || target.cl_gpu_) {
    ((Tensor&)target).MoveToGPU();
    source.MoveToGPU(true);
    
    cl_uint error = 0;
    const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
    const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
    const int target_maps = kernel_width * kernel_height * maps;
    
    error |= clSetKernelArg (CLHelper::k_col2im, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_col2im, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_col2im, 2, sizeof (cl_int), &source_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 3, sizeof (cl_int), &source_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 4, sizeof (cl_int), &maps);
    error |= clSetKernelArg (CLHelper::k_col2im, 5, sizeof (cl_int), &samples);
    error |= clSetKernelArg (CLHelper::k_col2im, 6, sizeof (cl_int), &target_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 7, sizeof (cl_int), &target_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 8, sizeof (cl_int), &target_maps);
    error |= clSetKernelArg (CLHelper::k_col2im, 9, sizeof (cl_int), &kernel_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 10, sizeof (cl_int), &kernel_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 11, sizeof (cl_int), &stride_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 12, sizeof (cl_int), &stride_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 13, sizeof (cl_int), &pad_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 14, sizeof (cl_int), &pad_height);

    if (error != CL_SUCCESS) {
      FATAL("Error setting kernel args: " << (signed int) error);
    }

    size_t global_work_size[] = {(size_t)(source_width * source_height), (size_t)maps, (size_t)samples};

    error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_col2im, 3, NULL,
        global_work_size, NULL, 0, NULL, NULL);
    if (error != CL_SUCCESS) {
      FATAL("Error enqueueing kernel: " << (signed int) error);
    }

#ifdef BRUTAL_FINISH
    error = clFinish (CLHelper::queue);
    if (error != CL_SUCCESS) {
      FATAL("Error finishing command queue: " << (signed int) error);
    }
#endif
  } else {
    ((Tensor&)target).MoveToCPU();
    source.MoveToCPU(true);
#endif    
    SETSAMPLE(source, -1, 0.0);
    
    const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
    const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
    const int target_maps = kernel_width * kernel_height * maps;
    
    const int target_size = samples * target_width * target_height * target_maps;
    const int actual_target_size = target.samples() * target.width()* target.height() * target.maps();
    
    if(target_size != actual_target_size)
      FATAL("Target size wrong!");
    
    for(int sample = 0; sample < samples; sample++) {
      datum* source_ptr = source.data_ptr(0, 0, 0, sample);
      for(int target_map = 0; target_map < target_maps; target_map++) {
        const datum* target_ptr = target.data_ptr_const(0, 0, 0, target_map);
        int kx = target_map % kernel_width;
        int ky = (target_map / kernel_width) % kernel_height;
        int imap = target_map / (kernel_width * kernel_height);
        for(int oy = 0; oy < target_height; oy++) {
          int iy = oy * stride_height - pad_height + ky;
          if(iy >= 0 && iy < source_height) {
            for(int ox = 0; ox < target_width; ox++) {
              int ix = ox * stride_width - pad_width + kx;
              if(ix >= 0 && iy < source_width) {
                source_ptr[(imap * source_height + iy) * source_width + ix] +=
                  target_ptr[(sample * target_height + oy) * target_width + ox];
              } 
            }
          }
        }
      }
    }
  
#ifdef BUILD_OPENCL
  }
#endif
  source.hint_ignore_content_ = false;
}