void MKLDNNMemoryDescriptor<Dtype, is_diff>::convert_from_extprv(shared_ptr<primitive> aprimitive) { #ifdef DEBUG LOG(INFO) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_from_extprv --- " << this->name; #endif CHECK(aprimitive); if(this->_reorder_extprv2prv_pd == NULL) return; if (*this->_extprv_memory_pd == *this->_prv_memory_pd) { #ifdef DEBUG LOG(INFO) << "The format and data_type of _extprv_memory_pd and _prv_memory_pd is same, no need do conversion."; #endif return; } create_reorder_from_extprv(aprimitive); VLOG(1) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_from_extprv --- " << this->name; #ifdef DEBUG LOG(INFO) << "Reorder: from extprv to prv."; LOG(INFO) << "Format of _extprv_memory_pd: " << this->_extprv_memory_pd->desc().data.format; LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format; #endif PERFORMANCE_MEASUREMENT_BEGIN(); this->_reorder_extprv2prv.submit(); PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion"); }
void MKLDNNLRNLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom ,const vector<Blob<Dtype>*>& top) { VLOG(1) << "MKLDNNLRNLayer<Dtype>::Forward_cpu: " << this->layer_param_.name(); if( lrnFwd_pd == NULL || this->reshape) InitLRNFwd(bottom, top); // making reorders if needed. fwd_bottom_data->sync_before_read(); // update top that head at prv fwd_top_data->sync_before_write(); PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKLDNN_NAME("FW")); PERFORMANCE_MEASUREMENT_BEGIN(); lrnFwd.submit(); PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_); }
void MKLDNNMemoryDescriptor<Dtype, is_diff>::convert_to_prv(void* cpu_ptr) { #ifdef DEBUG LOG(INFO) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_to_prv --- " << this->name; #endif create_reorder_to_prv(cpu_ptr); VLOG(1) << "--- MKLDNNMemoryDescriptorBase<Dtype>::convert_to_prv --- " << this->name; #ifdef DEBUG LOG(INFO) << "Reorder: from usr to prv."; LOG(INFO) << "Format of _usr_memory_pd: " << this->_usr_memory_pd->desc().data.format; LOG(INFO) << "Format of _prv_memory_pd: " << this->_prv_memory_pd->desc().data.format; #endif PERFORMANCE_MEASUREMENT_BEGIN(); this->_reorder_usr2prv.submit(); PERFORMANCE_MEASUREMENT_END_STATIC("mkldnn_conversion"); }
void MKLDNNLRNLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top ,const vector<bool>& propagate_down ,const vector<Blob<Dtype>*>& bottom) { VLOG(1) << "MKLDNNLRNLayer<Dtype>::Backward_cpu: " << this->layer_param_.name(); if (!propagate_down[0]) { return; } if( lrnBwd_pd == NULL || this->reshape) InitLRNBwd(top, propagate_down, bottom); bwd_top_diff->sync_before_read(); bwd_bottom_diff->sync_before_write(); PERFORMANCE_EVENT_ID_INIT(perf_id_bw_, PERFORMANCE_MKLDNN_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); lrnBwd.submit(); PERFORMANCE_MEASUREMENT_END_ID(perf_id_bw_); }
void MKLPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } // Different pooling methods. We explicitly do the switch outside the for // loop to save time, although this results in more codes. const size_t* mask = NULL; // suppress warnings about uninitialized variables // The main loop dnnError_t e; void* pooling_res[dnnResourceNumber]; mask = (top.size() > 1) ? reinterpret_cast<const size_t*>(top[1]->cpu_data()) : (max_idx_.cpu_data()); pooling_res[dnnResourceWorkspace] = reinterpret_cast<void *>(const_cast<size_t*>(mask)); pooling_res[dnnResourceDiffDst] = bwd_top_diff->get_converted_prv(top[0], true); if (bwd_bottom_diff->conversion_needed()) { bottom[0]->set_prv_diff_descriptor(bwd_bottom_diff); pooling_res[dnnResourceDiffSrc] = bottom[0]->mutable_prv_diff(); } else { pooling_res[dnnResourceDiffSrc] = bottom[0]->mutable_cpu_diff(); } caffe_set(bottom[0]->count(), Dtype(0), reinterpret_cast<Dtype *>(pooling_res[dnnResourceDiffSrc])); PERFORMANCE_MEASUREMENT_BEGIN(); e = dnnExecute<Dtype>(poolingBwd, pooling_res); PERFORMANCE_MEASUREMENT_END_MKL("BW"); CHECK_EQ(e, E_SUCCESS); }
void MKLPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // We'll output the mask to top[1] if it's of size >1. size_t* mask = NULL; // suppress warnings about uninitalized variables // We'll output the mask to top[1] if it's of size >1. const bool use_top_mask = top.size() > 1; dnnAlgorithm_t algorithm; switch (this->layer_param_.pooling_param().pool()) { case PoolingParameter_PoolMethod_MAX: algorithm = dnnAlgorithmPoolingMax; break; case PoolingParameter_PoolMethod_AVE: algorithm = dnnAlgorithmPoolingAvg; break; case PoolingParameter_PoolMethod_STOCHASTIC: NOT_IMPLEMENTED; break; default: LOG(FATAL) << "Unknown pooling method."; } dnnError_t status; void* pooling_res[dnnResourceNumber]; mask = (use_top_mask) ? reinterpret_cast<size_t*>(top[1]->mutable_cpu_data()) : (max_idx_.mutable_cpu_data()); pooling_res[dnnResourceWorkspace] = reinterpret_cast<void*>(mask); void* bottom_data = reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->prv_data())); if (NULL == bottom_data) { bottom_data = reinterpret_cast<void *>(const_cast<Dtype*>(bottom[0]->cpu_data())); if (NULL == poolingFwd) { // Now create poolingFwd status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL, algorithm, fwd_bottom_data->layout_usr, kernel_size, kernel_stride, src_offset, dnnBorderZeros); CHECK_EQ(status, E_SUCCESS); // Now create poolingBwd status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL, algorithm, fwd_bottom_data->layout_usr, kernel_size, kernel_stride, src_offset, dnnBorderZeros); CHECK_EQ(status, E_SUCCESS); } } else if (NULL == poolingFwd) { // Is it the first pass? Create a primitive. CHECK_EQ((bottom[0]->get_prv_data_descriptor())->get_descr_type(), PrvMemDescr::PRV_DESCR_MKL2017); shared_ptr<MKLData<Dtype> > mem_descr = boost::static_pointer_cast<MKLData<Dtype> > (bottom[0]->get_prv_data_descriptor()); CHECK(mem_descr != NULL); DLOG(INFO) << "Using layout of " << mem_descr->name << " as input layout for " << this->layer_param_.name(); // copy shared_ptr fwd_bottom_data = mem_descr; // Now create poolingFwd status = dnnPoolingCreateForward<Dtype>(&poolingFwd, NULL, algorithm, fwd_bottom_data->layout_int, kernel_size, kernel_stride, src_offset, dnnBorderZeros); CHECK_EQ(status, E_SUCCESS); fwd_top_data->create_internal_layout(poolingFwd, dnnResourceDst); // Now create poolingBwd status = dnnPoolingCreateBackward<Dtype>(&poolingBwd, NULL, algorithm, fwd_bottom_data->layout_int, kernel_size, kernel_stride, src_offset, dnnBorderZeros); CHECK_EQ(status, E_SUCCESS); bwd_top_diff ->create_internal_layout(poolingFwd, dnnResourceDst); bwd_bottom_diff->create_internal_layout(poolingFwd, dnnResourceSrc); } pooling_res[dnnResourceSrc] = bottom_data; if (fwd_top_data->conversion_needed()) { top[0]->set_prv_data_descriptor(fwd_top_data); pooling_res[dnnResourceDst] = reinterpret_cast<void *>(top[0]->mutable_prv_data()); } else { pooling_res[dnnResourceDst] = reinterpret_cast<void *>(top[0]->mutable_cpu_data()); DLOG(INFO) << "Using cpu_data for top in DnnPooling."; } PERFORMANCE_MEASUREMENT_BEGIN(); status = dnnExecute<Dtype>(poolingFwd, pooling_res); PERFORMANCE_MEASUREMENT_END_MKL("FW"); CHECK_EQ(status, E_SUCCESS); }
void MKLSplitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (!propagate_down[0]) { return; } dnnError_t e; vector<void*> top_diff; bool num_prv = 0; for (size_t i = 0; i < num_tops; i++) { top_diff.push_back(reinterpret_cast<void *>( const_cast<Dtype*>(top[i]->prv_diff()))); if (top_diff[i] != NULL) { num_prv += 1; } else { top_diff[i] = reinterpret_cast<void*>( reinterpret_cast<void *>(const_cast<Dtype*>(top[i]->cpu_diff()))); } } if (num_prv > 0) { if (sumPrimitive == NULL) { dnnLayout_t int_layout = NULL; for (size_t i = 0; i < num_tops; ++i) { if (top[i]->prv_diff() != NULL) { CHECK((top[i]->get_prv_diff_descriptor())->get_descr_type() == PrvMemDescr::PRV_DESCR_MKL2017); shared_ptr<MKLDiff<Dtype> > mem_descr = boost::static_pointer_cast<MKLDiff<Dtype> >( top[i]->get_prv_diff_descriptor()); CHECK(mem_descr != NULL); bwd_top_diff[i] = mem_descr; if (int_layout == NULL) { int_layout = mem_descr->layout_int; } } } e = dnnSumCreate<Dtype>(&sumPrimitive, NULL, num_tops, int_layout, &coeffs_[0]); CHECK_EQ(e, E_SUCCESS); bwd_bottom_diff->create_internal_layout(sumPrimitive, dnnResourceDst); for (size_t i = 0; i < num_tops; ++i) { if (top[i]->prv_diff() == NULL) { bwd_top_diff[i]->create_internal_layout(sumPrimitive, (dnnResourceType_t)(dnnResourceMultipleSrc + i)); } } } } else { if (sumPrimitive == NULL) { e = dnnSumCreate<Dtype>(&sumPrimitive, NULL, num_tops, bwd_bottom_diff->layout_usr, &coeffs_[0]); CHECK_EQ(e, E_SUCCESS); } } void *sum_res[dnnResourceNumber]; for (int i = 0; i < num_tops; ++i) { if (bwd_top_diff[i]->convert_to_int) { sum_res[dnnResourceMultipleSrc + i] = bwd_top_diff[i]->get_converted_prv(top[i], false); } else { sum_res[dnnResourceMultipleSrc + i] = reinterpret_cast<void*>(top_diff[i]); } } if (bwd_bottom_diff->conversion_needed()) { bottom[0]->set_prv_diff_descriptor(bwd_bottom_diff); sum_res[dnnResourceDst] = reinterpret_cast<void*>(bottom[0]->mutable_prv_diff()); } else { sum_res[dnnResourceDst] = reinterpret_cast<void*>(bottom[0]->mutable_cpu_diff()); } PERFORMANCE_EVENT_ID_INIT(perf_id_fw_, PERFORMANCE_MKL_NAME("BW")); PERFORMANCE_MEASUREMENT_BEGIN(); e = dnnExecute<Dtype>(sumPrimitive, sum_res); PERFORMANCE_MEASUREMENT_END_ID(perf_id_fw_); CHECK_EQ(e, E_SUCCESS); }