Ejemplo n.º 1
0
 virtual void Forward(bool is_train,
                      const std::vector<Node<gpu>*> &nodes_in,
                      const std::vector<Node<gpu>*> &nodes_out,
                      ConnectState<gpu> *p_cstate) {
   mshadow::Tensor<gpu,4> &tmp = p_cstate->states[0];
   if (!init_cudnn_) {
     init_cudnn_ = true;
     CUDA_CHECK(cudnnSetStream(handle_, nodes_out[0]->data.stream_->stream_));
     mshadow::Tensor<gpu, 4, float> &in = nodes_in[0]->data;
     mshadow::Tensor<gpu, 4, float> &out = nodes_out[0]->data;
     CUDA_CHECK(cudnnSetTensor4dDescriptor(in_desc_, CUDNN_TENSOR_NCHW, dtype_,
                                           in.shape_[0], in.shape_[1],
                                           in.shape_[2], in.shape_[3]));
     CUDA_CHECK(cudnnSetTensor4dDescriptor(out_desc_, CUDNN_TENSOR_NCHW, dtype_,
                                           out.shape_[0], out.shape_[1],
                                           out.shape_[2], out.shape_[3]));
   }
   float alpha = 1.0f;
   float beta = 0.0f;
   utils::Assert(nodes_in[0]->data.CheckContiguous(), "contiguous in conv");
   utils::Assert(nodes_out[0]->data.CheckContiguous(), "contiguous in conv");
   utils::Assert(tmp.CheckContiguous(), "contiguous in conv");
   CUDA_CHECK(cudnnPoolingForward(handle_, pooling_desc_, &alpha,
                                  in_desc_, nodes_in[0]->data.dptr_, &beta,
                                  out_desc_, tmp.dptr_));
   mshadow::Copy(nodes_out[0]->data, tmp, nodes_out[0]->data.stream_);
 }
Ejemplo n.º 2
0
THFloatTensor *cudnn_SpatialMaxPooling_updateOutput(struct module *module, THFloatTensor *input)
{
	int kW = module->SpatialMaxPooling.kW;
	int kH = module->SpatialMaxPooling.kH;
	int dW = module->SpatialMaxPooling.dW;
	int dH = module->SpatialMaxPooling.dH;
	int padW = module->SpatialMaxPooling.padW;
	int padH = module->SpatialMaxPooling.padH;

	THFloatTensor *output = module->output;
	cudnnTensorDescriptor_t dinput, doutput;
	cudnnPoolingDescriptor_t dpool;
	float one = 1, zero = 0;
	int sizes[4];

	errcheck(THcudnn_TensorDescriptor(&dinput, input));
	errcheck(cudnnCreatePoolingDescriptor(&dpool));
	errcheck(cudnnSetPooling2dDescriptor(dpool, CUDNN_POOLING_MAX, kH, kW, padH, padW, dH, dW));
	errcheck(cudnnGetPoolingNdForwardOutputDim(dpool, dinput, 4, sizes));
	THCudaTensor_resize4d(output, sizes[0], sizes[1], sizes[2], sizes[3]);
	errcheck(THcudnn_TensorDescriptor(&doutput, output));

	errcheck(cudnnPoolingForward(THcudnn_getHandle(), dpool, &one, dinput, THFloatTensor_data(input), &zero,
		doutput, THFloatTensor_data(output)));

	cudnnDestroyTensorDescriptor(dinput);
	cudnnDestroyTensorDescriptor(doutput);
	cudnnDestroyPoolingDescriptor(dpool);
	return output;
}
Ejemplo n.º 3
0
void PoolBC01CuDNN<T>::fprop(const T *imgs, int *imgs_shape, T *poolout) {
  bool new_shape = false;
  int n_imgs_dims = n_img_dims + 2;
  for (int i = 0; i < n_imgs_dims; ++i) {
    if (this->imgs_shape[i] != imgs_shape[i]) {
      new_shape = true;
      break;
    }
  }

  if (new_shape) {
    for (int i = 0; i < n_imgs_dims; ++i) {
      this->imgs_shape[i] = imgs_shape[i];
    }
    int imgs_strides[n_imgs_dims];
    array_strides(n_imgs_dims, imgs_shape, imgs_strides);
    CUDNN_CHECK(cudnnSetTensorNdDescriptor(
        imgs_desc, CUDNN_DATA_FLOAT, n_imgs_dims, imgs_shape, imgs_strides
    ));

    CUDNN_CHECK(cudnnSetPoolingNdDescriptor(
        pool_desc, pool_mode, n_img_dims, win_shape, padding, strides
    ));

    int poolout_shape[n_imgs_dims];
    poolout_shape[0] = imgs_shape[0];
    poolout_shape[1] = imgs_shape[1];
    for (int i = 0; i < n_img_dims; ++i) {
      poolout_shape[i+2] = (imgs_shape[i+2] + 2*padding[i] - win_shape[i])
                           / strides[i] + 1;
    }

    int poolout_strides[n_imgs_dims];
    array_strides(n_imgs_dims, poolout_shape, poolout_strides);
    CUDNN_CHECK(cudnnSetTensorNdDescriptor(
        poolout_desc, CUDNN_DATA_FLOAT, n_imgs_dims, poolout_shape,
        poolout_strides
    ));
  }

  CUDNN_CHECK(cudnnPoolingForward(
      CUDNN::handle(), pool_desc, &CUDNN::one, imgs_desc, imgs, &CUDNN::zero,
      poolout_desc, poolout
  ));
}
Ejemplo n.º 4
0
SaberStatus VenderConv2DActPooling<NV, AK_FLOAT, AK_FLOAT, AK_FLOAT, NCHW, NCHW, NCHW>::\
    dispatch(const std::vector<DataTensor_in*>& inputs,
                std::vector<DataTensor_out*>& outputs,
                ConvActivePoolingParam<OpTensor>& param) {

    const InDataType *in_data = (const InDataType*)inputs[0]->data();
    InDataType *inner_data = (InDataType*)_inner_tensor.mutable_data();
    InDataType *out_data = (InDataType*)outputs[0]->mutable_data();

    const float *weight_data = (const float *) param.conv_param.weight()->data();
    if (param.has_activation == false) {
        CUDNN_CHECK(cudnnConvolutionForward(_handle,
                                            cudnn::cudnnTypeWrapper<float>::kOne(),
                                            _input_descs, in_data,
                                            _filter_desc, weight_data,
                                            _conv_descs,  _fwd_algo, _workspace, _workspace_fwd_sizes,
                                            cudnn::cudnnTypeWrapper<float>::kZero(),
                                            _inner_descs, inner_data
        ));
        if (param.conv_param.bias()->size() > 0) {
            // add up bias.
            const float * bias_data = (const float*)param.conv_param.bias()->data();
            CUDNN_CHECK(cudnnAddTensor(_handle,
                                       cudnn::cudnnTypeWrapper<float>::kOne(),
                                       _bias_desc, bias_data,
                                       cudnn::cudnnTypeWrapper<float>::kOne(),
                                       _inner_descs, inner_data));
        }
        CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs,
                                        cudnn::cudnnTypeWrapper<InDataType>::kOne(),
                                        _inner_descs, inner_data,
                                        cudnn::cudnnTypeWrapper<InDataType>::kZero(),
                                        _output_descs, out_data
        ));
        return SaberSuccess;
    }

    if (param.conv_param.bias()->size() > 0) {
        const float * bias_data = (const float*)param.conv_param.bias()->data();
        CUDNN_CHECK(cudnnConvolutionBiasActivationForward(_handle,
                                                          cudnn::cudnnTypeWrapper<float>::kOne(),
                                                          _input_descs, in_data,
                                                          _filter_desc, weight_data,
                                                          _conv_descs, _fwd_algo,
                                                          _workspace, _workspace_fwd_sizes,
                                                          cudnn::cudnnTypeWrapper<float>::kZero(),
                                                          _inner_descs, inner_data,
                                                          _bias_desc,  bias_data,
                                                          _active_descs, _inner_descs, inner_data));

        CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs,
                                        cudnn::cudnnTypeWrapper<InDataType>::kOne(),
                                        _inner_descs, inner_data,
                                        cudnn::cudnnTypeWrapper<InDataType>::kZero(),
                                        _output_descs, out_data
        ));

    } else {

        CUDNN_CHECK(cudnnConvolutionForward(_handle,
                                            cudnn::cudnnTypeWrapper<float>::kOne(),
                                            _input_descs, in_data,
                                            _filter_desc, weight_data,
                                            _conv_descs, _fwd_algo,
                                            _workspace, _workspace_fwd_sizes,
                                            cudnn::cudnnTypeWrapper<float>::kZero(),
                                            _inner_descs, inner_data
        ));

        CUDNN_CHECK(cudnnActivationForward(_handle, _active_descs,
                                           cudnn::cudnnTypeWrapper<InDataType>::kOne(),
                                           _inner_descs, inner_data,
                                           cudnn::cudnnTypeWrapper<InDataType>::kZero(),
                                           _inner_descs, inner_data
        ));
        CUDNN_CHECK(cudnnPoolingForward(_handle, _pooling_descs,
                                        cudnn::cudnnTypeWrapper<InDataType>::kOne(),
                                        _inner_descs, inner_data,
                                        cudnn::cudnnTypeWrapper<InDataType>::kZero(),
                                        _output_descs, out_data
        ));
    }
    return SaberSuccess;
}