void DeconvNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; constant1.Reshape(1, 1, bottom[0]->height(), bottom[0]->width()); caffe_set(constant1.count(), Dtype(1), constant1.mutable_cpu_data()); deconv2_layer->Reshape(bottom, deconv2_top_vec); deconv1_layer->Reshape(deconv1_bottom_vec, deconv1_top_vec); exp_layer->Reshape(exp_bottom_vec, exp_top_vec); top[0]->ReshapeLike(*deconv2_top_vec[0]); deconv1_top_cache.Reshape(deconv1_top_vec[0]->shape()); alpha_cache.Reshape(alphas->shape()); alpha_cache2.Reshape(alphas->shape()); // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, top[0]->height() * top[0]->width()); bias_multiplier.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier.count(), Dtype(1), bias_multiplier.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes())<< "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" " convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->channels()) << "Inputs must have same channels."; CHECK_EQ(height_, bottom[bottom_id]->height()) << "Inputs must have same height."; CHECK_EQ(width_, bottom[bottom_id]->width()) << "Inputs must have same width."; } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); } if (reverse_dimensions()) { conv_in_height_ = height_out_; conv_in_width_ = width_out_; conv_out_spatial_dim_ = height_ * width_; } else { conv_in_height_ = height_; conv_in_width_ = width_; conv_out_spatial_dim_ = height_out_ * width_out_; } kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. if (reverse_dimensions()) { col_buffer_.Reshape(num_, kernel_dim_, height_, width_); } else { col_buffer_.Reshape(num_, kernel_dim_, height_out_, width_out_); } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, num_* height_out_ * width_out_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } // this->setupMaskIM2COL(); // this->setupMaskCOL2IM(); }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_axes = bottom[0]->num_axes(); // Setting forced_3d_ in LayerSetup() alone is not sufficient as that can be // skipped and Reshape() is directed called. if (num_axes == 5 && channel_axis_ == 1 && bottom[0]->shape(2) == 1) { forced_3d_ = true; } else { forced_3d_ = false; } const int first_spatial_axis = channel_axis_ + 1 + forced_3d_; CHECK_EQ(num_axes, first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "shape mismatch - bottom[0]: " << bottom[0]->shape_string() << " vs. bottom[" << bottom_id << "]: " << bottom[bottom_id]->shape_string(); } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); top_shape.push_back(num_output_); if (forced_3d_) top_shape.push_back(1); // in place of length for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i + forced_3d_); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i + forced_3d_); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int_tp first_spatial_axis = channel_axis_ + 1; CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int_tp bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int_tp> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); top_shape.push_back(num_output_); for (int_tp i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int_tp top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int_tp> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int_tp* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int_tp i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int_tp i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); if (Caffe::mode() == Caffe::Brew::GPU) { // Shared column buffer per device-queue across all layers on that device for (int_tp i = 0; i < this->device_->num_queues(); ++i) { shared_ptr<Blob<Dtype> > buffer = this->device_ ->template Buffer<Dtype>(i); buffer->Reshape(col_buffer_shape_); } } bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int_tp> bias_multiplier_shape(1, out_spatial_dim_); bool reshaped = bias_multiplier_.Reshape(bias_multiplier_shape); // This will trigger a memory copy if in GPU mode, // which may not be necessary. // Thus omit to set the values if not necessary. if (reshaped) { caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } } }
void BaseConvolutionNDLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { ConvolutionParameter conv_param = this->layer_param_.convolution_param(); channel_axis_ = bottom[0]->CanonicalAxisIndex(conv_param.axis()); const int first_spatial_axis = channel_axis_ + 1; const int num_axes = bottom[0]->num_axes(); num_spatial_axes_ = num_axes - first_spatial_axis; CHECK_GE(num_spatial_axes_, 1); num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. compute_output_shape(); vector<int> top_shape = bottom[0]->shape(); top_shape[channel_axis_] = num_output_; top_shape.resize(first_spatial_axis); // Discard input spatial axes. for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } const int* kernel_shape_data = kernel_shape_.cpu_data(); kernel_dim_ = conv_in_channels_; for (int i = 0; i < num_spatial_axes_; ++i) { kernel_dim_ *= kernel_shape_data[i]; } weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_); const int* input_shape_data = input_shape_.cpu_data() + 1; for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape_data[i]); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { if (is_direct_connect_ && !is_direct_intialized_) { direct_num_ = std::min((int)((direct_ratio_)*(num_output_ / (1 - direct_ratio_))), bottom[0]->channels()); this->blobs_.push_back(shared_ptr<Blob<Dtype> >()); vector<int> idx_shape; idx_shape.push_back(direct_num_); int idx_param_idx = this->blobs_.size() - 1; this->blobs_[idx_param_idx].reset(new Blob<Dtype>(idx_shape)); vector<int> idx_tmp; for (int i = 0; i < bottom[0]->channels(); i++) idx_tmp.push_back(i); std::random_shuffle(idx_tmp.begin(), idx_tmp.end()); for (int i = 0; i < direct_num_; i++) //direct_idx_.push_back(idx_tmp[i]); this->blobs_[idx_param_idx]->mutable_cpu_data()[i] = idx_tmp[i]; is_direct_intialized_ = true; } const int first_spatial_axis = channel_axis_ + 1; CHECK_EQ(bottom[0]->num_axes(), first_spatial_axis + num_spatial_axes_) << "bottom num_axes may not change."; num_ = bottom[0]->count(0, channel_axis_); CHECK_EQ(bottom[0]->shape(channel_axis_), channels_) << "Input size incompatible with convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK(bottom[0]->shape() == bottom[bottom_id]->shape()) << "All inputs must have the same shape."; } // Shape the tops. bottom_shape_ = &bottom[0]->shape(); compute_output_shape(); vector<int> top_shape(bottom[0]->shape().begin(), bottom[0]->shape().begin() + channel_axis_); if (is_direct_connect_) top_shape.push_back(num_output_ + direct_num_); else top_shape.push_back(num_output_); for (int i = 0; i < num_spatial_axes_; ++i) { top_shape.push_back(output_shape_[i]); } for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(top_shape); } if (reverse_dimensions()) { conv_out_spatial_dim_ = bottom[0]->count(first_spatial_axis); } else { conv_out_spatial_dim_ = top[0]->count(first_spatial_axis); } col_offset_ = kernel_dim_ * conv_out_spatial_dim_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // Setup input dimensions (conv_input_shape_). vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1); conv_input_shape_.Reshape(bottom_dim_blob_shape); int* conv_input_shape_data = conv_input_shape_.mutable_cpu_data(); for (int i = 0; i < num_spatial_axes_ + 1; ++i) { if (reverse_dimensions()) { conv_input_shape_data[i] = top[0]->shape(channel_axis_ + i); } else { conv_input_shape_data[i] = bottom[0]->shape(channel_axis_ + i); } } // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. col_buffer_shape_.clear(); col_buffer_shape_.push_back(kernel_dim_ * group_); for (int i = 0; i < num_spatial_axes_; ++i) { if (reverse_dimensions()) { col_buffer_shape_.push_back(input_shape(i + 1)); } else { col_buffer_shape_.push_back(output_shape_[i]); } } col_buffer_.Reshape(col_buffer_shape_); bottom_dim_ = bottom[0]->count(channel_axis_); if (is_direct_connect_) top_dim_ = top[0]->count(channel_axis_) * num_output_ / (direct_num_ + num_output_); else top_dim_ = top[0]->count(channel_axis_); num_kernels_im2col_ = conv_in_channels_ * conv_out_spatial_dim_; num_kernels_col2im_ = reverse_dimensions() ? top_dim_ : bottom_dim_; // Set up the all ones "bias multiplier" for adding biases by BLAS out_spatial_dim_ = top[0]->count(first_spatial_axis); if (bias_term_) { vector<int> bias_multiplier_shape(1, out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } }
void BaseConvolutionLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { CHECK_EQ(4, bottom[0]->num_axes())<< "Input must have 4 axes, " << "corresponding to (num, channels, height, width)"; num_ = bottom[0]->num(); height_ = bottom[0]->height(); width_ = bottom[0]->width(); CHECK_EQ(bottom[0]->channels(), channels_) << "Input size incompatible with" " convolution kernel."; // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->num()) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->channels()) << "Inputs must have same channels."; CHECK_EQ(height_, bottom[bottom_id]->height()) << "Inputs must have same height."; CHECK_EQ(width_, bottom[bottom_id]->width()) << "Inputs must have same width."; } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(num_, num_output_, height_out_, width_out_); } if (reverse_dimensions()) { conv_in_height_ = height_out_; conv_in_width_ = width_out_; conv_out_spatial_dim_ = height_ * width_; } else { conv_in_height_ = height_; conv_in_width_ = width_; conv_out_spatial_dim_ = height_out_ * width_out_; } kernel_dim_ = conv_in_channels_ * kernel_h_ * kernel_w_; weight_offset_ = conv_out_channels_ * kernel_dim_ / group_ / group_; col_offset_ = kernel_dim_ * conv_out_spatial_dim_ / group_; output_offset_ = conv_out_channels_ * conv_out_spatial_dim_ / group_; // The im2col result buffer will only hold one image at a time to avoid // overly large memory usage. In the special case of 1x1 convolution // it goes lazily unused to save memory. if (Caffe::mode() == Caffe::Brew::CPU) { if (reverse_dimensions()) { col_buffer_.Reshape(1, kernel_dim_, height_, width_); } else { col_buffer_.Reshape(1, kernel_dim_, height_out_, width_out_); } } else { // Shared column buffer per device-queue across all layers on that device for (int i = 0; i < this->device_context_->num_queues(); ++i) { if (reverse_dimensions()) { shared_ptr< Blob<Dtype> > buffer = this->device_context_->template Buffer<Dtype>(i); buffer->Reshape(1, kernel_dim_, height_, width_); } else { shared_ptr< Blob<Dtype> > buffer = this->device_context_->template Buffer<Dtype>(i); buffer->Reshape(1, kernel_dim_, height_out_, width_out_); } } } // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, height_out_ * width_out_); bool reshaped = bias_multiplier_.Reshape(bias_multiplier_shape); // This will trigger a memory copy if in GPU mode, // which may not be necessary. // Thus omit to set the values if not necessary. if (reshaped) { caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } } }
void CudnnNdConvolutionLayer<Dtype>::Reshape( const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { num_ = bottom[0]->shape(0); CHECK_EQ(bottom[0]->shape(1), channels_) << "Input size incompatible with convolution kernel."; input_shape_ = bottom[0]->shape(); // TODO: generalize to handle inputs of different shapes. for (int bottom_id = 1; bottom_id < bottom.size(); ++bottom_id) { CHECK_EQ(num_, bottom[bottom_id]->shape(0)) << "Inputs must have same num."; CHECK_EQ(channels_, bottom[bottom_id]->shape(1)) << "Inputs must have same channels."; for (int i = 0; i < bottom[0]->num_axes(); ++i) { CHECK_EQ(input_shape_[i], bottom[bottom_id]->shape(i)) << "Inputs must have same shape."; } } // Shape the tops. compute_output_shape(); for (int top_id = 0; top_id < top.size(); ++top_id) { top[top_id]->Reshape(output_shape_); } conv_out_spatial_dim_ = 1; for (int i = 2; i < output_shape_.size(); ++i) { conv_out_spatial_dim_ *= output_shape_[i]; } kernel_dim_ = channels_; for (int i = 0; i < kernel_shape_.size(); ++i) { kernel_dim_ *= kernel_shape_[i]; } weight_offset_ = num_output_ * kernel_dim_ / group_ / group_; output_offset_ = num_output_ * conv_out_spatial_dim_ / group_; // Set up the all ones "bias multiplier" for adding biases by BLAS if (bias_term_) { vector<int> bias_multiplier_shape(1, conv_out_spatial_dim_); bias_multiplier_.Reshape(bias_multiplier_shape); caffe_set(bias_multiplier_.count(), Dtype(1), bias_multiplier_.mutable_cpu_data()); } bottom_offset_ = 1; for (int i = 1; i < input_shape_.size(); ++i) { bottom_offset_ *= input_shape_[i]; } bottom_offset_ /= group_; top_offset_ = 1; for (int i = 1; i < output_shape_.size(); ++i) { top_offset_ *= output_shape_[i]; } top_offset_ /= group_; vector<int> bottom_tensor_shape(input_shape_); bottom_tensor_shape[1] /= group_; vector<int> bottom_tensor_stride(input_shape_.size(), 1); for (int i = input_shape_.size()-2; i >= 0; --i) { bottom_tensor_stride[i] = input_shape_[i+1] * bottom_tensor_stride[i +1]; } vector<int> top_tensor_shape(output_shape_); top_tensor_shape[1] /= group_; vector<int> top_tensor_stride(output_shape_.size(), 1); for (int i = output_shape_.size()-2; i >= 0; --i) { top_tensor_stride[i] = output_shape_[i+1] * top_tensor_stride[i+1]; } for (int i = 0; i < bottom.size(); i++) { cudnn::setTensorNdDesc<Dtype>(&bottom_descs_[i], bottom_tensor_shape, bottom_tensor_stride); cudnn::setTensorNdDesc<Dtype>(&top_descs_[i], top_tensor_shape, top_tensor_stride); cudnn::setNdConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i], filter_desc_, pad_shape_, stride_shape_); } // Tensor descriptor for bias. if (this->bias_term_) { vector<int> bias_shape(input_shape_.size(), 1); bias_shape[1] = this->num_output_ / this->group_; cudnn::setTensorNdDesc<Dtype>(&bias_desc_, bias_shape); } }