Example #1
0
void TensorMath::ADD(const Tensor& source_a, const Tensor& source_b, Tensor& target)
{
#ifdef BUILD_OPENCL
  ((Tensor&)source_a).MoveToCPU();
  ((Tensor&)source_b).MoveToCPU();
  target.MoveToCPU(true);
#endif
  if((source_a.samples() != source_b.samples())
    || (source_b.samples() != target.samples())
    || (source_a.elements() != source_b.elements())
    || (source_b.elements() != target.elements())) {
    FATAL("Dimensions don't match!");
  }
  
  #pragma omp parallel for default(shared)
  for(unsigned int element = 0; element < source_a.elements(); element++) {
    const datum* source_a_ptr = &(source_a.data_ptr_const()[element]);
    const datum* source_b_ptr = &(source_b.data_ptr_const()[element]);
    datum* target_ptr = &(target.data_ptr()[element]);
    
    *target_ptr = *source_a_ptr + *source_b_ptr;
  }
  
  target.hint_ignore_content_ = false;
}
Example #2
0
void TensorMath::COL2IM(Tensor& source, const int source_width, const int source_height, const int maps, const int samples, const int kernel_width, const int kernel_height, const int stride_width, const int stride_height, const int pad_width, const int pad_height, const Tensor& target)
{
#ifdef BUILD_OPENCL
  if(source.cl_gpu_ || target.cl_gpu_) {
    ((Tensor&)target).MoveToGPU();
    source.MoveToGPU(true);
    
    cl_uint error = 0;
    const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
    const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
    const int target_maps = kernel_width * kernel_height * maps;
    
    error |= clSetKernelArg (CLHelper::k_col2im, 0, sizeof (cl_mem), &(((Tensor&)source).cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_col2im, 1, sizeof (cl_mem), &(target.cl_data_ptr_));
    error |= clSetKernelArg (CLHelper::k_col2im, 2, sizeof (cl_int), &source_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 3, sizeof (cl_int), &source_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 4, sizeof (cl_int), &maps);
    error |= clSetKernelArg (CLHelper::k_col2im, 5, sizeof (cl_int), &samples);
    error |= clSetKernelArg (CLHelper::k_col2im, 6, sizeof (cl_int), &target_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 7, sizeof (cl_int), &target_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 8, sizeof (cl_int), &target_maps);
    error |= clSetKernelArg (CLHelper::k_col2im, 9, sizeof (cl_int), &kernel_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 10, sizeof (cl_int), &kernel_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 11, sizeof (cl_int), &stride_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 12, sizeof (cl_int), &stride_height);
    error |= clSetKernelArg (CLHelper::k_col2im, 13, sizeof (cl_int), &pad_width);
    error |= clSetKernelArg (CLHelper::k_col2im, 14, sizeof (cl_int), &pad_height);

    if (error != CL_SUCCESS) {
      FATAL("Error setting kernel args: " << (signed int) error);
    }

    size_t global_work_size[] = {(size_t)(source_width * source_height), (size_t)maps, (size_t)samples};

    error = clEnqueueNDRangeKernel (CLHelper::queue, CLHelper::k_col2im, 3, NULL,
        global_work_size, NULL, 0, NULL, NULL);
    if (error != CL_SUCCESS) {
      FATAL("Error enqueueing kernel: " << (signed int) error);
    }

#ifdef BRUTAL_FINISH
    error = clFinish (CLHelper::queue);
    if (error != CL_SUCCESS) {
      FATAL("Error finishing command queue: " << (signed int) error);
    }
#endif
  } else {
    ((Tensor&)target).MoveToCPU();
    source.MoveToCPU(true);
#endif    
    SETSAMPLE(source, -1, 0.0);
    
    const int target_width = (2 * pad_width + source_width - kernel_width) / stride_width + 1;
    const int target_height = (2 * pad_height + source_height - kernel_height) / stride_height + 1;
    const int target_maps = kernel_width * kernel_height * maps;
    
    const int target_size = samples * target_width * target_height * target_maps;
    const int actual_target_size = target.samples() * target.width()* target.height() * target.maps();
    
    if(target_size != actual_target_size)
      FATAL("Target size wrong!");
    
    for(int sample = 0; sample < samples; sample++) {
      datum* source_ptr = source.data_ptr(0, 0, 0, sample);
      for(int target_map = 0; target_map < target_maps; target_map++) {
        const datum* target_ptr = target.data_ptr_const(0, 0, 0, target_map);
        int kx = target_map % kernel_width;
        int ky = (target_map / kernel_width) % kernel_height;
        int imap = target_map / (kernel_width * kernel_height);
        for(int oy = 0; oy < target_height; oy++) {
          int iy = oy * stride_height - pad_height + ky;
          if(iy >= 0 && iy < source_height) {
            for(int ox = 0; ox < target_width; ox++) {
              int ix = ox * stride_width - pad_width + kx;
              if(ix >= 0 && iy < source_width) {
                source_ptr[(imap * source_height + iy) * source_width + ix] +=
                  target_ptr[(sample * target_height + oy) * target_width + ox];
              } 
            }
          }
        }
      }
    }
  
#ifdef BUILD_OPENCL
  }
#endif
  source.hint_ignore_content_ = false;
}