예제 #1
0
void MKLDNNMemoryDescriptor<Dtype, is_diff>::convert_from_extprv(shared_ptr<primitive> aprimitive)
{
#ifdef DEBUG
    LOG(INFO) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_from_extprv --- " << this->name;
#endif
    CHECK(aprimitive);
    if(this->_reorder_extprv2prv_pd == NULL)
        return;
    if (*this->_extprv_memory_pd == *this->_prv_memory_pd)
    {
#ifdef DEBUG
        LOG(INFO) << "The format and data_type of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion.";
#endif
        return;
    }
    create_reorder_from_extprv(aprimitive);
    VLOG(1) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_from_extprv --- " << this->name;
#ifdef DEBUG
    LOG(INFO) << "Reorder: from extprv to prv.";
    LOG(INFO) << "Format of _extprv_memory_pd: " << this->_extprv_memory_pd->desc().data.format;
    LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format;
#endif
    PERFORMANCE_MEASUREMENT_BEGIN();
    this->_reorder_extprv2prv.submit();
    PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion");
}
예제 #2
0
void MKLDNNLRNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom
                                        ,const vector<Blob<Dtype>*>& top)
{
    VLOG(1) << "MKLDNNLRNLayer<Dtype>::Forward_cpu: " << this->layer_param_.name();
    if( lrnFwd_pd == NULL || this->reshape)
        InitLRNFwd(bottom, top);
    // making reorders if needed.
    fwd_bottom_data->sync_before_read();
    // update top that head at prv
    fwd_top_data->sync_before_write();

    PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW"));
    PERFORMANCE_MEASUREMENT_BEGIN();
    lrnFwd.submit();
    PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_);
}
예제 #3
0
void MKLDNNMemoryDescriptor<Dtype, is_diff>::convert_to_prv(void* cpu_ptr)
{
#ifdef DEBUG
    LOG(INFO) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_to_prv --- " << this->name;
#endif
    create_reorder_to_prv(cpu_ptr);
    VLOG(1) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_to_prv --- " << this->name;
#ifdef DEBUG
    LOG(INFO) << "Reorder: from usr to prv.";
    LOG(INFO) << "Format of _usr_memory_pd: " << this->_usr_memory_pd->desc().data.format;
    LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format;
#endif
    PERFORMANCE_MEASUREMENT_BEGIN();
    this->_reorder_usr2prv.submit();
    PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion");
}
예제 #4
0
void MKLDNNLRNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top
                                        ,const vector<bool>& propagate_down
                                        ,const vector<Blob<Dtype>*>& bottom)
{
    VLOG(1) << "MKLDNNLRNLayer<Dtype>::Backward_cpu: " << this->layer_param_.name();
    if (!propagate_down[0]) {
        return;
    }
    if( lrnBwd_pd == NULL || this->reshape)
        InitLRNBwd(top, propagate_down, bottom);
    bwd_top_diff->sync_before_read();
    bwd_bottom_diff->sync_before_write();

    PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW"));
    PERFORMANCE_MEASUREMENT_BEGIN();
    lrnBwd.submit();
    PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_);
}
예제 #5
0
void MKLPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) {
    return;
  }
  // Different pooling methods. We explicitly do the switch outside the for
  // loop to save time, although this results in more codes.

  const size_t* mask = NULL;  // suppress warnings about uninitialized variables

  // The main loop
  dnnError_t e;
  void* pooling_res[dnnResourceNumber];

  mask = (top.size() > 1) ?
    reinterpret_cast<const size_t*>(top[1]->cpu_data()) :
    (max_idx_.cpu_data());
  pooling_res[dnnResourceWorkspace] =
    reinterpret_cast<void *>(const_cast<size_t*>(mask));

  pooling_res[dnnResourceDiffDst] = bwd_top_diff->get_converted_prv(top[0],
          true);

  if (bwd_bottom_diff->conversion_needed()) {
    bottom[0]->set_prv_diff_descriptor(bwd_bottom_diff);
    pooling_res[dnnResourceDiffSrc] = bottom[0]->mutable_prv_diff();
  } else {
    pooling_res[dnnResourceDiffSrc] = bottom[0]->mutable_cpu_diff();
  }
  caffe_set(bottom[0]->count(), Dtype(0),
          reinterpret_cast<Dtype *>(pooling_res[dnnResourceDiffSrc]));

  PERFORMANCE_MEASUREMENT_BEGIN();
  e = dnnExecute<Dtype>(poolingBwd, pooling_res);
  PERFORMANCE_MEASUREMENT_END_MKL("BW");

  CHECK_EQ(e, E_SUCCESS);
}
예제 #6
0
void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  // We'll output the mask to top[1] if it's of size >1.
  size_t* mask = NULL;  // suppress warnings about uninitalized variables

  // We'll output the mask to top[1] if it's of size >1.
  const bool use_top_mask = top.size() > 1;
  dnnAlgorithm_t algorithm;

  switch (this->layer_param_.pooling_param().pool()) {
  case PoolingParameter_PoolMethod_MAX:
    algorithm = dnnAlgorithmPoolingMax;
    break;
  case PoolingParameter_PoolMethod_AVE:
    algorithm = dnnAlgorithmPoolingAvg;
    break;
  case PoolingParameter_PoolMethod_STOCHASTIC:
    NOT_IMPLEMENTED;
    break;
  default:
    LOG(FATAL) << "Unknown pooling method.";
  }

  dnnError_t status;
  void* pooling_res[dnnResourceNumber];

  mask = (use_top_mask) ?
      reinterpret_cast<size_t*>(top[1]->mutable_cpu_data()) :
      (max_idx_.mutable_cpu_data());
  pooling_res[dnnResourceWorkspace] = reinterpret_cast<void*>(mask);

  void* bottom_data =
    reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->prv_data()));
  if (NULL == bottom_data) {
    bottom_data =
      reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->cpu_data()));
    if (NULL == poolingFwd) {
      // Now create poolingFwd
      status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL,
              algorithm, fwd_bottom_data->layout_usr,
              kernel_size, kernel_stride, src_offset, dnnBorderZeros);
      CHECK_EQ(status, E_SUCCESS);

      // Now create poolingBwd
      status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL,
              algorithm, fwd_bottom_data->layout_usr,
              kernel_size, kernel_stride, src_offset, dnnBorderZeros);
      CHECK_EQ(status, E_SUCCESS);
    }
  } else if (NULL == poolingFwd) {
    // Is it the first pass? Create a primitive.
    CHECK_EQ((bottom[0]->get_prv_data_descriptor())->get_descr_type(),
            PrvMemDescr::PRV_DESCR_MKL2017);
    shared_ptr<MKLData<Dtype> > mem_descr
      =  boost::static_pointer_cast<MKLData<Dtype> >
            (bottom[0]->get_prv_data_descriptor());
    CHECK(mem_descr != NULL);

    DLOG(INFO) << "Using layout of " << mem_descr->name
            << " as input layout for " << this->layer_param_.name();

    // copy shared_ptr
    fwd_bottom_data = mem_descr;

    // Now create poolingFwd
    status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL,
            algorithm, fwd_bottom_data->layout_int, kernel_size,
            kernel_stride, src_offset, dnnBorderZeros);
    CHECK_EQ(status, E_SUCCESS);

    fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst);

    // Now create poolingBwd
    status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL,
            algorithm, fwd_bottom_data->layout_int, kernel_size,
            kernel_stride, src_offset, dnnBorderZeros);
    CHECK_EQ(status, E_SUCCESS);

    bwd_top_diff   ->create_internal_layout(poolingFwd, dnnResourceDst);
    bwd_bottom_diff->create_internal_layout(poolingFwd, dnnResourceSrc);
  }

  pooling_res[dnnResourceSrc] = bottom_data;
  if (fwd_top_data->conversion_needed()) {
    top[0]->set_prv_data_descriptor(fwd_top_data);
    pooling_res[dnnResourceDst] =
            reinterpret_cast<void *>(top[0]->mutable_prv_data());
  } else {
    pooling_res[dnnResourceDst] =
            reinterpret_cast<void *>(top[0]->mutable_cpu_data());
    DLOG(INFO) << "Using cpu_data for top in DnnPooling.";
  }
  PERFORMANCE_MEASUREMENT_BEGIN();
  status = dnnExecute<Dtype>(poolingFwd, pooling_res);
  PERFORMANCE_MEASUREMENT_END_MKL("FW");

  CHECK_EQ(status, E_SUCCESS);
}
예제 #7
0
void MKLSplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (!propagate_down[0]) { return; }
  dnnError_t e;
  vector<void*> top_diff;
  bool num_prv = 0;
  for (size_t i = 0; i < num_tops; i++) {
    top_diff.push_back(reinterpret_cast<void *>(
      const_cast<Dtype*>(top[i]->prv_diff())));
    if (top_diff[i] != NULL) {
      num_prv += 1;
    } else {
      top_diff[i] = reinterpret_cast<void*>(
      reinterpret_cast<void *>(const_cast<Dtype*>(top[i]->cpu_diff())));
    }
  }

  if (num_prv > 0) {
    if (sumPrimitive == NULL) {
      dnnLayout_t int_layout = NULL;
      for (size_t i = 0; i < num_tops; ++i) {
        if (top[i]->prv_diff() != NULL) {
          CHECK((top[i]->get_prv_diff_descriptor())->get_descr_type() ==
            PrvMemDescr::PRV_DESCR_MKL2017);
          shared_ptr<MKLDiff<Dtype> > mem_descr =
            boost::static_pointer_cast<MKLDiff<Dtype> >(
                top[i]->get_prv_diff_descriptor());
          CHECK(mem_descr != NULL);
          bwd_top_diff[i] = mem_descr;
          if (int_layout == NULL) {
            int_layout = mem_descr->layout_int;
          }
        }
      }
      e = dnnSumCreate<Dtype>(&sumPrimitive, NULL, num_tops,
        int_layout, &coeffs_[0]);
      CHECK_EQ(e, E_SUCCESS);

      bwd_bottom_diff->create_internal_layout(sumPrimitive, dnnResourceDst);

      for (size_t i = 0; i < num_tops; ++i) {
        if (top[i]->prv_diff() == NULL) {
          bwd_top_diff[i]->create_internal_layout(sumPrimitive,
                  (dnnResourceType_t)(dnnResourceMultipleSrc + i));
        }
      }
    }
  } else {
    if (sumPrimitive == NULL) {
      e = dnnSumCreate<Dtype>(&sumPrimitive, NULL, num_tops,
        bwd_bottom_diff->layout_usr, &coeffs_[0]);
      CHECK_EQ(e, E_SUCCESS);
    }
  }

  void *sum_res[dnnResourceNumber];
  for (int i = 0; i < num_tops; ++i) {
    if (bwd_top_diff[i]->convert_to_int) {
      sum_res[dnnResourceMultipleSrc + i] =
        bwd_top_diff[i]->get_converted_prv(top[i], false);
    } else {
      sum_res[dnnResourceMultipleSrc + i] =
        reinterpret_cast<void*>(top_diff[i]);
    }
  }

  if (bwd_bottom_diff->conversion_needed()) {
    bottom[0]->set_prv_diff_descriptor(bwd_bottom_diff);
    sum_res[dnnResourceDst] =
        reinterpret_cast<void*>(bottom[0]->mutable_prv_diff());
  } else {
    sum_res[dnnResourceDst] =
        reinterpret_cast<void*>(bottom[0]->mutable_cpu_diff());
  }

  PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKL_NAME("BW"));
  PERFORMANCE_MEASUREMENT_BEGIN();
  e = dnnExecute<Dtype>(sumPrimitive, sum_res);
  PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_);

  CHECK_EQ(e, E_SUCCESS);
}