void SpatialConvolution::init(TorchData& input) { if (input.type() != TorchDataType::TENSOR_DATA) { throw std::runtime_error("SpatialConvolution::init() - " "FloatTensor expected!"); } Tensor<float>& in = (Tensor<float>&)input; if (in.dim()[2] != feats_in_) { throw std::runtime_error("SpatialConvolution::init() - ERROR: " "incorrect number of input features!"); } if (output != NULL) { Int3 out_dim(in.dim()); out_dim[0] = out_dim[0] - filt_width_ + 1; out_dim[1] = out_dim[1] - filt_height_ + 1; out_dim[2] = feats_out_; if (!Int3::equal(out_dim, ((Tensor<float>*)output)->dim())) { // Input dimension has changed! SAFE_DELETE(output); } } if (output == NULL) { Int3 out_dim(in.dim()); out_dim[0] = out_dim[0] - filt_width_ + 1; out_dim[1] = out_dim[1] - filt_height_ + 1; out_dim[2] = feats_out_; output = new Tensor<float>(out_dim); //cl_context->getOptimalLocalWorkgroupSizes(deviceid, // ((Tensor<float>*)output)->dim(), local_worgroup_size); } }
void Parallel::forwardProp(TorchData& input) { if (input.type() != TorchDataType::TABLE_DATA) { throw std::runtime_error("Parallel::forwardProp() - " "Table expected!"); } Table& in = (Table&)input; if (in.tableSize() != network_->size()) { throw std::runtime_error("Parallel::forwardProp() - ERROR: " "Table size does not match number of parallel stages!"); } for (uint32_t i = 0; i < network_->size(); i++) { (*network_)[i]->forwardProp(*in(i)); } initOutput(); // Init output just copies the pointers from the output // of all the parallel stages and fills up a table with them }
void SpatialSubtractiveNormalization::init(TorchData& input) { if (input.type() != TorchDataType::TENSOR_DATA) { throw std::runtime_error("SpatialSubtractiveNormalization::init() - " "FloatTensor expected!"); } Tensor<float>& in = (Tensor<float>&)input; if (in.dim() != 3) { throw std::runtime_error("SpatialDivisiveNormalization::init() - " "3D input is expected!"); } if (output != NULL) { if (!in.isSameSizeAs(*(Tensor<float>*)output)) { // Input dimension has changed! cleanup(); } } if (output == NULL) { output = new Tensor<float>(in.dim(), in.size()); mean_pass1_ = new Tensor<float>(in.dim(), in.size()); mean_pass2_ = new Tensor<float>(in.dim(), in.size()); } if (mean_coef_ == NULL) { uint32_t mean_coeff_size[2]; mean_coeff_size[0] = TO_TENSOR_PTR(output)->size()[0]; mean_coeff_size[1] = TO_TENSOR_PTR(output)->size()[1]; mean_coef_ = new Tensor<float>(2, mean_coeff_size); float* mean_coef_cpu = new float[mean_coef_->nelems()]; float* kernel_cpu = new float[kernel_->nelems()]; kernel_->getData(kernel_cpu); bool onedim_kernel = kernel_->dim() == 1; // Filter an image of all 1 values to create the normalization constants // See norm_test.lua for proof that this works as well as: // https://github.com/andresy/torch/blob/master/extra/nn/SpatialSubtractiveNormalization.lua int32_t n_feats = TO_TENSOR_PTR(output)->size()[2]; int32_t height = TO_TENSOR_PTR(output)->size()[1]; int32_t width = TO_TENSOR_PTR(output)->size()[0]; if (onedim_kernel) { // 1D case - The filter is seperable, but we'll just do the dumb 2D // version since we only do this once on startup. --> O(n * m) uint32_t kernel_size = kernel_->size()[0]; int32_t filt_rad = (kernel_size - 1) / 2; for (int32_t v = 0; v < height; v++) { for (int32_t u = 0; u < width; u++) { float tmp = 0.0f; for (int32_t v_filt = -filt_rad; v_filt <= filt_rad; v_filt++) { for (int32_t u_filt = -filt_rad; u_filt <= filt_rad; u_filt++) { int32_t u_in = u + u_filt; int32_t v_in = v + v_filt; if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) { // Pixel is inside --> We'll effectively clamp zeros elsewhere. tmp += (kernel_cpu[v_filt + filt_rad] * kernel_cpu[u_filt + filt_rad]); } } } mean_coef_cpu[v * width + u] = tmp / n_feats; } } } else { // 2D case int32_t kernel_size_u = kernel_->size()[0]; int32_t kernel_size_v = kernel_->size()[1]; int32_t filt_rad_u = (kernel_size_u - 1) / 2; int32_t filt_rad_v = (kernel_size_v - 1) / 2; for (int32_t v = 0; v < height; v++) { for (int32_t u = 0; u < width; u++) { float tmp = 0.0f; for (int32_t v_filt = -filt_rad_v; v_filt <= filt_rad_v; v_filt++) { for (int32_t u_filt = -filt_rad_u; u_filt <= filt_rad_u; u_filt++) { int32_t u_in = u + u_filt; int32_t v_in = v + v_filt; if (u_in >= 0 && u_in < width && v_in >= 0 && v_in < height) { // Pixel is inside --> We'll effectively clamp zeros elsewhere. tmp += kernel_cpu[(v_filt + filt_rad_v) * kernel_size_u + (u_filt + filt_rad_u)]; } } } mean_coef_cpu[v * width + u] = tmp / n_feats; } } } mean_coef_->setData(mean_coef_cpu); delete[] mean_coef_cpu; delete[] kernel_cpu; } if (mean_ == NULL) { uint32_t mean_coeff_size[2]; mean_coeff_size[0] = TO_TENSOR_PTR(output)->size()[0]; mean_coeff_size[1] = TO_TENSOR_PTR(output)->size()[1]; mean_ = new Tensor<float>(2, mean_coeff_size); } }
void JoinTable::init(TorchData& input) { if (input.type() != TorchDataType::TABLE_DATA) { throw std::runtime_error("JoinTable::forwardProp() - " "Table expected!"); } Table& in = (Table&)input; if (in.tableSize() == 0) { throw std::runtime_error("JoinTable::forwardProp() - " "Empty input Table!"); } // Check that it is a table of FloatTensors for (uint32_t i = 0; i < in.tableSize(); i++) { if (in(i)->type() != TENSOR_DATA) { throw std::runtime_error("JoinTable::forwardProp() - " "Table of float tensors expected!"); } } uint32_t dim = TO_TENSOR_PTR(in(0))->dim(); if (dim <= dimension_) { throw std::runtime_error("JoinTable::forwardProp() - " "Input is smaller than join dimension!"); } uint32_t jdim = dim - dimension_ - 1; // dimension_=0 is the top dim // Make sure the dimensions OTHER than the join dimension are all the same for (uint32_t d = 0; d < dim; d++) { if (d != jdim) { for (uint32_t j = 1; j < in.tableSize(); j++) { if (TO_TENSOR_PTR(in(j))->size()[d] != TO_TENSOR_PTR(in(0))->size()[d]) { throw std::runtime_error("JoinTable::forwardProp() - " "Size mismatch!"); } } if (output != NULL && TO_TENSOR_PTR(output)->size()[d] != TO_TENSOR_PTR(in(0))->size()[d]) { SAFE_DELETE(output); } } } uint32_t nelems_jdim = 0; for (uint32_t j = 1; j < in.tableSize(); j++) { nelems_jdim += TO_TENSOR_PTR(in(j))->size()[jdim]; } if (output != NULL && TO_TENSOR_PTR(output)->size()[jdim] != nelems_jdim) { SAFE_DELETE(output); } if (output == NULL) { uint32_t* size = new uint32_t[dim]; memcpy(size, TO_TENSOR_PTR(in(0))->size(), sizeof(size[0]) * dim); size[dimension_] = nelems_jdim; output = new Tensor<float>(dim, size); SAFE_DELETE_ARR(size); } }