void BaseConvolutionLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes())<< "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" " convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->channels()) << "Inputs must have same channels."; CHECK_EQ(height_, bottom[bottom_id]->height()) << "Inputs must have same height."; CHECK_EQ(width_, bottom[bottom_id]->width()) << "Inputs must have same width."; } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); } if (reverse_dimensions()) { conv_in_height_ = height_out_; conv_in_width_ = width_out_; conv_out_spatial_dim_ = height_ * width_; } else { conv_in_height_ = height_; conv_in_width_ = width_; conv_out_spatial_dim_ = height_out_ * width_out_; } kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. if (reverse_dimensions()) { col_buffer_.Reshape(num_, kernel_dim_, height_, width_); } else { col_buffer_.Reshape(num_, kernel_dim_, height_out_, width_out_); } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, num_* height_out_ * width_out_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } // this->setupMaskIM2COL(); // this->setupMaskCOL2IM(); }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_axes = bottom[0]->num_axes(); // Setting forced_3d_ in LayerSetup() alone is not sufficient as that can be // skipped and Reshape() is directed called. if (num_axes == 5 && channel_axis_ == 1 && bottom[0]->shape(2) == 1) { forced_3d_ = true; } else { forced_3d_ = false; } const int first_spatial_axis = channel_axis_ + 1 + forced_3d_; CHECK_EQ(num_axes, first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "shape mismatch - bottom[0]: " << bottom[0]->shape_string() << " vs. bottom[" << bottom_id << "]: " << bottom[bottom_id]->shape_string(); } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); top_shape.push_back(num_output_); if (forced_3d_) top_shape.push_back(1); // in place of length for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i + forced_3d_); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i + forced_3d_); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int_tp first_spatial_axis = channel_axis_ + 1; CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int_tp bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int_tp> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); top_shape.push_back(num_output_); for (int_tp i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int_tp top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int_tp> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int_tp* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int_tp i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int_tp i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); if (Caffe::mode() == Caffe::Brew::GPU) { // Shared column buffer per device-queue across all layers on that device for (int_tp i = 0; i < this->device_->num_queues(); ++i) { shared_ptr<Blob<Dtype> > buffer = this->device_ ->template Buffer<Dtype>(i); buffer->Reshape(col_buffer_shape_); } } bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int_tp> bias_multiplier_shape(1, out_spatial_dim_); bool reshaped = bias_multiplier_.Reshape(bias_multiplier_shape); // This will trigger a memory copy if in GPU mode, // which may not be necessary. // Thus omit to set the values if not necessary. if (reshaped) { caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } } }
void BaseConvolutionNDLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ConvolutionParameter conv_param = this->layer_param_.convolution_param(); channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); const int first_spatial_axis = channel_axis_ + 1; const int num_axes = bottom[0]->num_axes(); num_spatial_axes_ = num_axes - first_spatial_axis; CHECK_GE(num_spatial_axes_, 1); num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. compute_output_shape(); vector<int> top_shape = bottom[0]->shape(); top_shape[channel_axis_] = num_output_; top_shape.resize(first_spatial_axis); // Discard input spatial axes. for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } const int* kernel_shape_data = kernel_shape_.cpu_data(); kernel_dim_ = conv_in_channels_; for (int i = 0; i < num_spatial_axes_; ++i) { kernel_dim_ *= kernel_shape_data[i]; } weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_); const int* input_shape_data = input_shape_.cpu_data() + 1; for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape_data[i]); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { if (is_direct_connect_ && !is_direct_intialized_) { direct_num_ = std::min((int)((direct_ratio_)*(num_output_ / (1 - direct_ratio_))), bottom[0]->channels()); this->blobs_.push_back(shared_ptr<Blob<Dtype> >()); vector<int> idx_shape; idx_shape.push_back(direct_num_); int idx_param_idx = this->blobs_.size() - 1; this->blobs_[idx_param_idx].reset(new Blob<Dtype>(idx_shape)); vector<int> idx_tmp; for (int i = 0; i < bottom[0]->channels(); i++) idx_tmp.push_back(i); std::random_shuffle(idx_tmp.begin(), idx_tmp.end()); for (int i = 0; i < direct_num_; i++) //direct_idx_.push_back(idx_tmp[i]); this->blobs_[idx_param_idx]->mutable_cpu_data()[i] = idx_tmp[i]; is_direct_intialized_ = true; } const int first_spatial_axis = channel_axis_ + 1; CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); if (is_direct_connect_) top_shape.push_back(num_output_ + direct_num_); else top_shape.push_back(num_output_); for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); if (is_direct_connect_) top_dim_ = top[0]->count(channel_axis_) * num_output_ / (direct_num_ + num_output_); else top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes())<< "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" " convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->channels()) << "Inputs must have same channels."; CHECK_EQ(height_, bottom[bottom_id]->height()) << "Inputs must have same height."; CHECK_EQ(width_, bottom[bottom_id]->width()) << "Inputs must have same width."; } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); } if (reverse_dimensions()) { conv_in_height_ = height_out_; conv_in_width_ = width_out_; conv_out_spatial_dim_ = height_ * width_; } else { conv_in_height_ = height_; conv_in_width_ = width_; conv_out_spatial_dim_ = height_out_ * width_out_; } kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. if (Caffe::mode() == Caffe::Brew::CPU) { if (reverse_dimensions()) { col_buffer_.Reshape(1, kernel_dim_, height_, width_); } else { col_buffer_.Reshape(1, kernel_dim_, height_out_, width_out_); } } else { // Shared column buffer per device-queue across all layers on that device for (int i = 0; i < this->device_context_->num_queues(); ++i) { if (reverse_dimensions()) { shared_ptr< Blob<Dtype> > buffer = this->device_context_->template Buffer<Dtype>(i); buffer->Reshape(1, kernel_dim_, height_, width_); } else { shared_ptr< Blob<Dtype> > buffer = this->device_context_->template Buffer<Dtype>(i); buffer->Reshape(1, kernel_dim_, height_out_, width_out_); } } } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, height_out_ * width_out_); bool reshaped = bias_multiplier_.Reshape(bias_multiplier_shape); // This will trigger a memory copy if in GPU mode, // which may not be necessary. // Thus omit to set the values if not necessary. if (reshaped) { caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } } }
void MKLConvolutionLayer<Dtype>::LayerSetUp( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ConvolutionLayer<Dtype>::LayerSetUp(bottom, top); this->width_ = bottom[0]->width(); this->height_ = bottom[0]->height(); this->num_ = bottom[0]->num(); // TODO: clean up this kernel_w_ = this->kernel_shape_.cpu_data()[0]; kernel_h_ = this->kernel_shape_.cpu_data()[1]; stride_w_ = this->stride_.cpu_data()[0]; stride_h_ = this->stride_.cpu_data()[1]; pad_w_ = this->pad_.cpu_data()[0]; pad_h_ = this->pad_.cpu_data()[1]; this->bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); int status; size_t n, g; size_t iw, ih, ic; size_t ow, oh, oc; size_t kw, kh; /* filter */ size_t dimension = 4; g = this->group_; n = this->num_; iw = this->width_; ih = this->height_; ic = this->channels_; ow = this->width_out_; oh = this->height_out_; oc = this->num_output_; kw = this->kernel_w_; kh = this->kernel_h_; size_t bdata_sizes[4] = {iw, ih, ic, n}; size_t bdata_strides[4] = {1, iw, iw*ih, iw*ih*ic}; size_t fdata_sizes[4] = {kw, kh, ic/g, oc}; size_t fdata_strides[4] = {1, kw, kw*kh, kw*kh*ic/g}; size_t bias_sizes[1] = {oc}; size_t bias_strides[1] = {1}; size_t tdata_sizes[4] = {ow, oh, oc, n}; size_t tdata_strides[4] = {1, ow, ow*oh, ow*oh*oc}; size_t convolutionStrides[2] = {this->stride_w_, this->stride_h_}; int inputOffset[2] = {-this->pad_w_, -this->pad_h_}; if (this->bias_term_) { status = dnnGroupsConvolutionCreateForwardBias<Dtype>( &convolutionFwd, NULL, dnnAlgorithmConvolutionDirect, g, dimension, bdata_sizes, tdata_sizes, fdata_sizes, convolutionStrides, inputOffset, dnnBorderZeros); } else { status = dnnGroupsConvolutionCreateForward<Dtype>( &convolutionFwd, NULL, dnnAlgorithmConvolutionDirect, g, dimension, bdata_sizes, tdata_sizes, fdata_sizes, convolutionStrides, inputOffset, dnnBorderZeros); } CHECK_EQ(status, 0) << "Failed dnnCreateConvolution<Dtype>(dnnForward) with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>( &fwd_bottom_data->layout_int, convolutionFwd, dnnResourceSrc); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>( &fwd_top_data->layout_int, convolutionFwd, dnnResourceDst); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>( &fwd_filter_data->layout_int, convolutionFwd, dnnResourceFilter); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreate<Dtype>( &fwd_bottom_data->layout_usr, dimension, bdata_sizes, bdata_strides); CHECK_EQ(status, 0) << "Failed creation of l_fwd_bottom_data_usr layout with status " << status << "\n"; status = dnnLayoutCreate<Dtype>( &fwd_top_data->layout_usr , dimension, tdata_sizes, tdata_strides); CHECK_EQ(status, 0) << "Failed creation of l_fwd_top_data_usr layout with status " << status << "\n"; status = dnnLayoutCreate<Dtype>( &fwd_filter_data->layout_usr, dimension, fdata_sizes, fdata_strides); CHECK_EQ(status, 0) << "Failed creation of l_fwd_filter_data_usr layout with status " << status << "\n"; fwd_bottom_data->create_conversions(); fwd_top_data ->create_conversions(); fwd_filter_data->create_conversions(); if (this->bias_term_) { status = dnnLayoutCreateFromPrimitive<Dtype>( &fwd_bias_data->layout_int, convolutionFwd, dnnResourceBias); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreate<Dtype>( &fwd_bias_data->layout_usr, 1, bias_sizes, bias_strides); CHECK_EQ(status, 0) << "Failed creation of l_fwd_bias_data_usr layout with status " << status << "\n"; fwd_bias_data ->create_conversions(); } /* * Backward by data layer setup */ status = dnnGroupsConvolutionCreateBackwardData<Dtype>( &convolutionBwdData, NULL, dnnAlgorithmConvolutionDirect, g, dimension, bdata_sizes, tdata_sizes, fdata_sizes, convolutionStrides, inputOffset, dnnBorderZeros); CHECK_EQ(status, 0) << "Failed dnnConvolutionCreateBackwardData with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdd_bottom_diff->layout_int, convolutionBwdData, dnnResourceDiffSrc); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>( &bwdd_top_diff->layout_int, convolutionBwdData, dnnResourceDiffDst); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdd_filter_data->layout_int, convolutionBwdData, dnnResourceFilter); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdd_bottom_diff->layout_usr, dimension, bdata_sizes, bdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdd_bottom_diff->layout_usr with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdd_top_diff->layout_usr, dimension, tdata_sizes, tdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdd_top_diff->layout_usr with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdd_filter_data->layout_usr, dimension, fdata_sizes, fdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdd_filter_data->layout_usr with status " << status << "\n"; bwdd_bottom_diff->create_conversions(); bwdd_top_diff->create_conversions(); bwdd_filter_data->create_conversions(); /* * Backward by filter layer setup */ status = dnnGroupsConvolutionCreateBackwardFilter<Dtype>( &convolutionBwdFilter, NULL, dnnAlgorithmConvolutionDirect, g, dimension, bdata_sizes, tdata_sizes, fdata_sizes, convolutionStrides, inputOffset, dnnBorderZeros); CHECK_EQ(status, 0) << "Failed dnnConvolutionCreateBackwardFilter with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdf_bottom_data->layout_int, convolutionBwdFilter, dnnResourceSrc); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdf_top_diff->layout_int, convolutionBwdFilter, dnnResourceDiffDst); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdf_filter_diff->layout_int, convolutionBwdFilter, dnnResourceDiffFilter); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdf_bottom_data->layout_usr, dimension, bdata_sizes, bdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdf_bottom_data->layout_usr with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdf_top_diff->layout_usr, dimension, tdata_sizes, tdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdf_top_diff->layout_usr with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdf_filter_diff->layout_usr, dimension, fdata_sizes, fdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdf_filter_diff->layout_usr with status " << status << "\n"; bwdf_bottom_data->create_conversions(); bwdf_top_diff->create_conversions(); bwdf_filter_diff->create_conversions(); /* * Backward by bias layer setup */ if (this->bias_term_) { status = dnnGroupsConvolutionCreateBackwardBias<Dtype>( &convolutionBwdBias, NULL, dnnAlgorithmConvolutionDirect, g, dimension, tdata_sizes); CHECK_EQ(status, 0) << "Failed dnnConvolutionCreateBackwardBias with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdb_top_diff->layout_int, convolutionBwdBias, dnnResourceDiffDst); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreateFromPrimitive<Dtype>(&bwdb_bias_diff->layout_int, convolutionBwdBias, dnnResourceDiffBias); CHECK_EQ(status, 0) << "Failed dnnLayoutCreateFromPrimitive with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdb_top_diff->layout_usr , dimension, tdata_sizes, tdata_strides); CHECK_EQ(status, 0) << "Failed creation of bwdb_top_diff->layout_usr with status " << status << "\n"; status = dnnLayoutCreate<Dtype>(&bwdb_bias_diff->layout_usr, 1, bias_sizes, bias_strides); CHECK_EQ(status, 0) << "Failed creation of bwdb_bias_diff->layout_usr with status " << status << "\n"; bwdb_top_diff->create_conversions(); bwdb_bias_diff->create_conversions(); } // Names are for debugging purposes only. TODO: Consider removing this. fwd_bottom_data ->name = "fwd_bottom_data @ " + this->layer_param_.name(); fwd_top_data ->name = "fwd_top_data @ " + this->layer_param_.name(); fwd_filter_data ->name = "fwd_filter_data @ " + this->layer_param_.name(); fwd_bias_data ->name = "fwd_bias_data @ " + this->layer_param_.name(); bwdd_top_diff ->name = "bwdd_top_diff @ " + this->layer_param_.name(); bwdd_bottom_diff->name = "bwdd_bottom_diff @ " + this->layer_param_.name(); bwdd_filter_data->name = "bwdd_filter_data @ " + this->layer_param_.name(); bwdf_top_diff ->name = "bwdf_top_diff @ " + this->layer_param_.name(); bwdf_bottom_data->name = "bwdf_bottom_data @ " + this->layer_param_.name(); bwdf_filter_diff->name = "bwdf_filter_diff @ " + this->layer_param_.name(); bwdb_top_diff ->name = "bwdb_top_diff @ " + this->layer_param_.name(); bwdb_bias_diff ->name = "bwdb_bias_diff @ " + this->layer_param_.name(); }
void CudnnNdConvolutionLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { num_ = bottom[0]->shape(0); CHECK_EQ(bottom[0]->shape(1), channels_) << "Input size incompatible with convolution kernel."; input_shape_ = bottom[0]->shape(); // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->shape(0)) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->shape(1)) << "Inputs must have same channels."; for (int i = 0; i < bottom[0]->num_axes(); ++i) { CHECK_EQ(input_shape_[i], bottom[bottom_id]->shape(i)) << "Inputs must have same shape."; } } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(output_shape_); } conv_out_spatial_dim_ = 1; for (int i = 2; i < output_shape_.size(); ++i) { conv_out_spatial_dim_ *= output_shape_[i]; } kernel_dim_ = channels_; for (int i = 0; i < kernel_shape_.size(); ++i) { kernel_dim_ *= kernel_shape_[i]; } weight_offset_ = num_output_ * kernel_dim_ / group_ / group_; output_offset_ = num_output_ * conv_out_spatial_dim_ / group_; // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, conv_out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } bottom_offset_ = 1; for (int i = 1; i < input_shape_.size(); ++i) { bottom_offset_ *= input_shape_[i]; } bottom_offset_ /= group_; top_offset_ = 1; for (int i = 1; i < output_shape_.size(); ++i) { top_offset_ *= output_shape_[i]; } top_offset_ /= group_; vector<int> bottom_tensor_shape(input_shape_); bottom_tensor_shape[1] /= group_; vector<int> bottom_tensor_stride(input_shape_.size(), 1); for (int i = input_shape_.size()-2; i >= 0; --i) { bottom_tensor_stride[i] = input_shape_[i+1] * bottom_tensor_stride[i +1]; } vector<int> top_tensor_shape(output_shape_); top_tensor_shape[1] /= group_; vector<int> top_tensor_stride(output_shape_.size(), 1); for (int i = output_shape_.size()-2; i >= 0; --i) { top_tensor_stride[i] = output_shape_[i+1] * top_tensor_stride[i+1]; } for (int i = 0; i < bottom.size(); i++) { cudnn::setTensorNdDesc<Dtype>(&bottom_descs_[i], bottom_tensor_shape, bottom_tensor_stride); cudnn::setTensorNdDesc<Dtype>(&top_descs_[i], top_tensor_shape, top_tensor_stride); cudnn::setNdConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i], filter_desc_, pad_shape_, stride_shape_); } // Tensor descriptor for bias. if (this->bias_term_) { vector<int> bias_shape(input_shape_.size(), 1); bias_shape[1] = this->num_output_ / this->group_; cudnn::setTensorNdDesc<Dtype>(&bias_desc_, bias_shape); } }