virtual void Forward(bool is_train, const std::vector<Node<gpu>*> &nodes_in, const std::vector<Node<gpu>*> &nodes_out, ConnectState<gpu> *p_cstate) { mshadow::Tensor<gpu,4> &tmp = p_cstate->states[0]; if (!init_cudnn_) { init_cudnn_ = true; CUDA_CHECK(cudnnSetStream(handle_, nodes_out[0]->data.stream_->stream_)); mshadow::Tensor<gpu, 4, float> &in = nodes_in[0]->data; mshadow::Tensor<gpu, 4, float> &out = nodes_out[0]->data; CUDA_CHECK(cudnnSetTensor4dDescriptor(in_desc_, CUDNN_TENSOR_NCHW, dtype_, in.shape_[0], in.shape_[1], in.shape_[2], in.shape_[3])); CUDA_CHECK(cudnnSetTensor4dDescriptor(out_desc_, CUDNN_TENSOR_NCHW, dtype_, out.shape_[0], out.shape_[1], out.shape_[2], out.shape_[3])); } float alpha = 1.0f; float beta = 0.0f; utils::Assert(nodes_in[0]->data.CheckContiguous(), "contiguous in conv"); utils::Assert(nodes_out[0]->data.CheckContiguous(), "contiguous in conv"); utils::Assert(tmp.CheckContiguous(), "contiguous in conv"); CUDA_CHECK(cudnnPoolingForward(handle_, pooling_desc_, &alpha, in_desc_, nodes_in[0]->data.dptr_, &beta, out_desc_, tmp.dptr_)); mshadow::Copy(nodes_out[0]->data, tmp, nodes_out[0]->data.stream_); }
THFloatTensor *cudnn_SpatialMaxPooling_updateOutput(struct module *module, THFloatTensor *input) { int kW = module->SpatialMaxPooling.kW; int kH = module->SpatialMaxPooling.kH; int dW = module->SpatialMaxPooling.dW; int dH = module->SpatialMaxPooling.dH; int padW = module->SpatialMaxPooling.padW; int padH = module->SpatialMaxPooling.padH; THFloatTensor *output = module->output; cudnnTensorDescriptor_t dinput, doutput; cudnnPoolingDescriptor_t dpool; float one = 1, zero = 0; int sizes[4]; errcheck(THcudnn_TensorDescriptor(&dinput, input)); errcheck(cudnnCreatePoolingDescriptor(&dpool)); errcheck(cudnnSetPooling2dDescriptor(dpool, CUDNN_POOLING_MAX, kH, kW, padH, padW, dH, dW)); errcheck(cudnnGetPoolingNdForwardOutputDim(dpool, dinput, 4, sizes)); THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]); errcheck(THcudnn_TensorDescriptor(&doutput, output)); errcheck(cudnnPoolingForward(THcudnn_getHandle(), dpool, &one, dinput, THFloatTensor_data(input), &zero, doutput, THFloatTensor_data(output))); cudnnDestroyTensorDescriptor(dinput); cudnnDestroyTensorDescriptor(doutput); cudnnDestroyPoolingDescriptor(dpool); return output; }
void PoolBC01CuDNN<T>::fprop(const T *imgs, int *imgs_shape, T *poolout) { bool new_shape = false; int n_imgs_dims = n_img_dims + 2; for (int i = 0; i < n_imgs_dims; ++i) { if (this->imgs_shape[i] != imgs_shape[i]) { new_shape = true; break; } } if (new_shape) { for (int i = 0; i < n_imgs_dims; ++i) { this->imgs_shape[i] = imgs_shape[i]; } int imgs_strides[n_imgs_dims]; array_strides(n_imgs_dims, imgs_shape, imgs_strides); CUDNN_CHECK(cudnnSetTensorNdDescriptor( imgs_desc, CUDNN_DATA_FLOAT, n_imgs_dims, imgs_shape, imgs_strides )); CUDNN_CHECK(cudnnSetPoolingNdDescriptor( pool_desc, pool_mode, n_img_dims, win_shape, padding, strides )); int poolout_shape[n_imgs_dims]; poolout_shape[0] = imgs_shape[0]; poolout_shape[1] = imgs_shape[1]; for (int i = 0; i < n_img_dims; ++i) { poolout_shape[i+2] = (imgs_shape[i+2] + 2*padding[i] - win_shape[i]) / strides[i] + 1; } int poolout_strides[n_imgs_dims]; array_strides(n_imgs_dims, poolout_shape, poolout_strides); CUDNN_CHECK(cudnnSetTensorNdDescriptor( poolout_desc, CUDNN_DATA_FLOAT, n_imgs_dims, poolout_shape, poolout_strides )); } CUDNN_CHECK(cudnnPoolingForward( CUDNN::handle(), pool_desc, &CUDNN::one, imgs_desc, imgs, &CUDNN::zero, poolout_desc, poolout )); }
SaberStatus VenderConv2DActPooling<NV, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NCHW>::\ dispatch(const std::vector<DataTensor_in*>& inputs, std::vector<DataTensor_out*>& outputs, ConvActivePoolingParam<OpTensor>& param) { const InDataType *in_data = (const InDataType*)inputs[0]->data(); InDataType *inner_data = (InDataType*)_inner_tensor.mutable_data(); InDataType *out_data = (InDataType*)outputs[0]->mutable_data(); const float *weight_data = (const float *) param.conv_param.weight()->data(); if (param.has_activation == false) { CUDNN_CHECK(cudnnConvolutionForward(_handle, cudnn::cudnnTypeWrapper<float>::kOne(), _input_descs, in_data, _filter_desc, weight_data, _conv_descs, _fwd_algo, _workspace, _workspace_fwd_sizes, cudnn::cudnnTypeWrapper<float>::kZero(), _inner_descs, inner_data )); if (param.conv_param.bias()->size() > 0) { // add up bias. const float * bias_data = (const float*)param.conv_param.bias()->data(); CUDNN_CHECK(cudnnAddTensor(_handle, cudnn::cudnnTypeWrapper<float>::kOne(), _bias_desc, bias_data, cudnn::cudnnTypeWrapper<float>::kOne(), _inner_descs, inner_data)); } CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs, cudnn::cudnnTypeWrapper<InDataType>::kOne(), _inner_descs, inner_data, cudnn::cudnnTypeWrapper<InDataType>::kZero(), _output_descs, out_data )); return SaberSuccess; } if (param.conv_param.bias()->size() > 0) { const float * bias_data = (const float*)param.conv_param.bias()->data(); CUDNN_CHECK(cudnnConvolutionBiasActivationForward(_handle, cudnn::cudnnTypeWrapper<float>::kOne(), _input_descs, in_data, _filter_desc, weight_data, _conv_descs, _fwd_algo, _workspace, _workspace_fwd_sizes, cudnn::cudnnTypeWrapper<float>::kZero(), _inner_descs, inner_data, _bias_desc, bias_data, _active_descs, _inner_descs, inner_data)); CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs, cudnn::cudnnTypeWrapper<InDataType>::kOne(), _inner_descs, inner_data, cudnn::cudnnTypeWrapper<InDataType>::kZero(), _output_descs, out_data )); } else { CUDNN_CHECK(cudnnConvolutionForward(_handle, cudnn::cudnnTypeWrapper<float>::kOne(), _input_descs, in_data, _filter_desc, weight_data, _conv_descs, _fwd_algo, _workspace, _workspace_fwd_sizes, cudnn::cudnnTypeWrapper<float>::kZero(), _inner_descs, inner_data )); CUDNN_CHECK(cudnnActivationForward(_handle, _active_descs, cudnn::cudnnTypeWrapper<InDataType>::kOne(), _inner_descs, inner_data, cudnn::cudnnTypeWrapper<InDataType>::kZero(), _inner_descs, inner_data )); CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs, cudnn::cudnnTypeWrapper<InDataType>::kOne(), _inner_descs, inner_data, cudnn::cudnnTypeWrapper<InDataType>::kZero(), _output_descs, out_data )); } return SaberSuccess; }