inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, const int* dilation, Dtype* data_output) { if (!im2col) { int im_size = im_shape[0]; for (int i = 0; i < num_spatial_axes; ++i) { im_size *= im_shape[1 + i]; } caffe_set(im_size, Dtype(0), data_output); } int kernel_size = 1; for (int i = 0; i < num_spatial_axes; ++i) { kernel_size *= kernel_shape[i]; } const int channels_col = col_shape[0]; vector<int> d_offset(num_spatial_axes, 0); vector<int> d_iter(num_spatial_axes, 0); for (int c_col = 0; c_col < channels_col; ++c_col) { // Loop over spatial axes in reverse order to compute a per-axis offset. int offset = c_col; for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { if (d_i < num_spatial_axes - 1) { offset /= kernel_shape[d_i + 1]; } d_offset[d_i] = offset % kernel_shape[d_i]; } for (bool incremented = true; incremented; ) { // Loop over spatial axes in forward order to compute the indices in the // image and column, and whether the index lies in the padding. int index_col = c_col; int index_im = c_col / kernel_size; bool is_padding = false; for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { const int d = d_iter[d_i]; const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i] * dilation[d_i]; is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1]; index_col *= col_shape[d_i + 1]; index_col += d; index_im *= im_shape[d_i + 1]; index_im += d_im; } if (im2col) { if (is_padding) { data_output[index_col] = 0; } else { data_output[index_col] = data_input[index_im]; } } else if (!is_padding) { // col2im data_output[index_im] += data_input[index_col]; } // Loop over spatial axes in reverse order to choose an index, // like counting. incremented = false; for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { const int d_max = col_shape[d_i + 1]; DCHECK_LT(d_iter[d_i], d_max); if (d_iter[d_i] == d_max - 1) { d_iter[d_i] = 0; } else { // d_iter[d_i] < d_max - 1 ++d_iter[d_i]; incremented = true; break; } } } // while(incremented) { } // for (int c = 0; c < channels_col; ++c) { }
inline void im2col_nd_core_cpu(const Dtype* data_input, const bool im2col, const int num_spatial_axes, const int* im_shape, const int* col_shape, const int* kernel_shape, const int* pad, const int* stride, const int* dilation, Dtype* data_output) { // 如果不是im2col则表明是col2im,也就是说data_output是需要输出的原始图像大小的数据 if (!im2col) { int im_size = im_shape[0]; for (int i = 0; i < num_spatial_axes; ++i) { im_size *= im_shape[1 + i]; } caffe_set(im_size, Dtype(0), data_output); } // 一个kernel大小的块有多大 int kernel_size = 1; for (int i = 0; i < num_spatial_axes; ++i) { kernel_size *= kernel_shape[i]; } // channels_col = inputchannel(输入图像的channel)*kernel_size const int channels_col = col_shape[0]; // 类似于im2col中的w_offset和h_offset,只不过因为这里是n维,所以用数组表示 vector<int> d_offset(num_spatial_axes, 0); // 类似于im2col中w和h,是col_buff中的偏移 vector<int> d_iter(num_spatial_axes, 0); for (int c_col = 0; c_col < channels_col; ++c_col) { // Loop over spatial axes in reverse order to compute a per-axis offset. // Loop over spatial axes in reverse order to compute a per-axis offset. // 计算n维kernel上的offset,与im2col中对应的代码一样的道理 // 只不过这里是n维了,所以用d_offset来表示 // 注意,这里用逆序来进行计算得到每个轴的偏移 int offset = c_col; for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { if (d_i < num_spatial_axes - 1) { offset /= kernel_shape[d_i + 1]; } d_offset[d_i] = offset % kernel_shape[d_i]; } for (bool incremented = true; incremented; ) { // Loop over spatial axes in forward order to compute the indices in the // image and column, and whether the index lies in the padding. // 是经过im2colnd变换之后的索引 int index_col = c_col; // index_im是原始图像中的channel int index_im = c_col / kernel_size; bool is_padding = false; for (int d_i = 0; d_i < num_spatial_axes; ++d_i) { // d是col_buff上的偏移,与d_pad相对(d_pad是原始图像上的偏移) const int d = d_iter[d_i]; // 在d_pad是经过pad之后的col_buff中的坐标经过转换成原图中的坐标 const int d_im = d * stride[d_i] - pad[d_i] + d_offset[d_i] * dilation[d_i]; // 判断经过im2colnd处理的图像上的像素是否位于输入的n维图像的上的pad的那个部分 is_padding |= d_im < 0 || d_im >= im_shape[d_i + 1]; // 计算位于col_buff中的位置(就是经过im2colnd变换之后的) index_col *= col_shape[d_i + 1]; index_col += d; // 计算位于原始图像中的位置 index_im *= im_shape[d_i + 1]; index_im += d_im; } if (im2col) { if (is_padding) { // 如果是位于pad的部分则设置为0 data_output[index_col] = 0; } else { data_output[index_col] = data_input[index_im]; } } else if (!is_padding) { // col2im data_output[index_im] += data_input[index_col]; } // 更新位于col_buff上的偏移d(d_iter就是所有的d存进去的) // Loop over spatial axes in reverse order to choose an index, // like counting. incremented = false; for (int d_i = num_spatial_axes - 1; d_i >= 0; --d_i) { const int d_max = col_shape[d_i + 1]; DCHECK_LT(d_iter[d_i], d_max); if (d_iter[d_i] == d_max - 1) { d_iter[d_i] = 0; } else { // d_iter[d_i] < d_max - 1 ++d_iter[d_i]; incremented = true; break; } } } // while(incremented) { } // for (int c = 0; c < channels_col; ++c) { }