void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_im) { caffe_set(height * width * channels, Dtype(0), data_im); const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channel_size = height * width; for (int channel = channels; channel--; data_im += channel_size) { for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { int input_row = -pad_h + kernel_row * dilation_h; for (int output_rows = output_h; output_rows; output_rows--) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { data_col += output_w; } else { int input_col = -pad_w + kernel_col * dilation_w; for (int output_col = output_w; output_col; output_col--) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) { data_im[input_row * width + input_col] += *data_col; } data_col++; input_col += stride_w; } } input_row += stride_h; } } } } }
void im2col_cpu_big(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col, int bs) { const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channel_size = height * width; for (int channel = 0; channel < channels; ++ channel) { for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { for (int b = 0; b < bs; ++b) { int input_row = -pad_h + kernel_row * dilation_h; const Dtype * data = data_im + (b * channels + channel) * channel_size; for (int output_rows = output_h; output_rows; output_rows--) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { for (int output_cols = output_w; output_cols; output_cols--) { *(data_col++) = 0; } // end for ow? } else { int input_col = -pad_w + kernel_col * dilation_w; for (int output_col = output_w; output_col; output_col--) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) { *(data_col++) = data[input_row * width + input_col]; } else { *(data_col++) = 0; } // end if input_col += stride_w; } // end for ow } input_row += stride_h; } // end for oh } // end for batch size } // end for kenel_col } // end for kernel_row } // end for channel }
void im2col_cpu(const Dtype* data_im, const int_tp channels, const int_tp height, const int_tp width, const int_tp kernel_h, const int_tp kernel_w, const int_tp pad_h, const int_tp pad_w, const int_tp stride_h, const int_tp stride_w, const int_tp dilation_h, const int_tp dilation_w, Dtype* data_col, const QuantizerValues* const data_quant) { const Dtype zero = data_quant ? data_quant->template get_zero<Dtype>() : Dtype(0); const int_tp output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int_tp output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int_tp channel_size = height * width; for (int_tp channel = channels; channel--; data_im += channel_size) { for (int_tp kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int_tp kernel_col = 0; kernel_col < kernel_w; kernel_col++) { int_tp input_row = -pad_h + kernel_row * dilation_h; for (int_tp output_rows = output_h; output_rows; output_rows--) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { for (int_tp output_cols = output_w; output_cols; output_cols--) { *(data_col++) = zero; } } else { int_tp input_col = -pad_w + kernel_col * dilation_w; for (int_tp output_col = output_w; output_col; output_col--) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) { *(data_col++) = data_im[input_row * width + input_col]; } else { *(data_col++) = zero; } input_col += stride_w; } } input_row += stride_h; } } } } }
void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col) { const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channel_size = height * width; // 遍历一个kernel大小的图像 for (int channel = channels; channel--; data_im += channel_size) { // 下面三行是计算在kernel大小的图像上面的位置 // c_im h_offset w_offset // 遍历卷积之后的图像的上面的每一个像素 for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { // 计算卷积之后的图像与卷积之前的图像的位置 // 卷积之后的图像与卷积之前的图像像素所对应的位置 // 卷积之后的像素为h和w那么所对应的原图像的位置为 [h * stride_h - pad_h, h * stride_h - pad_h+kernel_h]以及 // [w * stride_w - pad_w, w * stride_w - pad_w+kernel_w] int input_row = -pad_h + kernel_row * dilation_h; for (int output_rows = output_h; output_rows; output_rows--) { // 如果符合input_row>=height运行循环里面的代码,然后去掉大于height的部分 if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { for (int output_cols = output_w; output_cols; output_cols--) { *(data_col++) = 0; } } else { int input_col = -pad_w + kernel_col * dilation_w; for (int output_col = output_w; output_col; output_col--) { // 如果符合input_col<width运行循环里面的代码,然后将这个位置的kernel的地址给了data_col,否则去掉大于width的部分 if (is_a_ge_zero_and_a_lt_b(input_col, width)) { *(data_col++) = data_im[input_row * width + input_col]; } else { *(data_col++) = 0; } input_col += stride_w; } } input_row += stride_h; } } } } }
void im2col_cpu(const Dtype* data_im, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_col) { #if 0 const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channel_size = height * width; for (int channel = channels; channel--; data_im += channel_size) { for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { int input_row = -pad_h + kernel_row * dilation_h; for (int output_rows = output_h; output_rows; output_rows--) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { for (int output_cols = output_w; output_cols; output_cols--) { *(data_col++) = 0; } } else { int input_col = -pad_w + kernel_col * dilation_w; for (int output_col = output_w; output_col; output_col--) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) { *(data_col++) = data_im[input_row * width + input_col]; } else { *(data_col++) = 0; } input_col += stride_w; } } input_row += stride_h; } } } } #else int dil_kernel_h = (kernel_h - 1) * dilation_h + 1; int dil_kernel_w = (kernel_w - 1) * dilation_w + 1; int height_col = (height + 2 * pad_h - dil_kernel_h) / stride_h + 1; int width_col = (width + 2 * pad_w - dil_kernel_w) / stride_w + 1; int channels_col = channels * kernel_h * kernel_w; #ifdef _OPENMP #pragma omp parallel for #endif for (int c = 0; c < channels_col; ++c) { int w_offset = c % kernel_w; int h_offset = (c / kernel_w) % kernel_h; int c_im = c / kernel_h / kernel_w; const int hc0 = h_offset * dilation_h - pad_h; const int wc0 = w_offset * dilation_w - pad_w; for (int h = 0; h < height_col; ++h) { int h_pad = h * stride_h + hc0; const int row_offset = (c * height_col + h) * width_col; const int srow_offset = (c_im * height + h_pad) * width; for (int w = 0; w < width_col; ++w) { int w_pad = w * stride_w + wc0; if ((((unsigned)h_pad) < ((unsigned)height)) && (((unsigned)w_pad) < ((unsigned)width))) data_col[row_offset + w] = data_im[srow_offset + w_pad]; else { data_col[row_offset + w] = 0.; } } } } #endif }
void col2im_cpu(const Dtype* data_col, const int channels, const int height, const int width, const int kernel_h, const int kernel_w, const int pad_h, const int pad_w, const int stride_h, const int stride_w, const int dilation_h, const int dilation_w, Dtype* data_im) { #if 0 caffe_set(height * width * channels, Dtype(0), data_im); const int output_h = (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1; const int output_w = (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1; const int channel_size = height * width; for (int channel = channels; channel--; data_im += channel_size) { for (int kernel_row = 0; kernel_row < kernel_h; kernel_row++) { for (int kernel_col = 0; kernel_col < kernel_w; kernel_col++) { int input_row = -pad_h + kernel_row * dilation_h; for (int output_rows = output_h; output_rows; output_rows--) { if (!is_a_ge_zero_and_a_lt_b(input_row, height)) { data_col += output_w; } else { int input_col = -pad_w + kernel_col * dilation_w; for (int output_col = output_w; output_col; output_col--) { if (is_a_ge_zero_and_a_lt_b(input_col, width)) { data_im[input_row * width + input_col] += *data_col; } data_col++; input_col += stride_w; } } input_row += stride_h; } } } } #else int dil_patch_h = (kernel_h - 1) * dilation_h + 1; int dil_patch_w = (kernel_w - 1) * dilation_w + 1; int height_col = (height + 2 * pad_h - dil_patch_h) / stride_h + 1; int width_col = (width + 2 * pad_w - dil_patch_w) / stride_w + 1; long chunk_len = kernel_h * kernel_w; caffe_set(height * width * channels, Dtype(0), data_im); #ifdef _OPENMP #pragma omp parallel for if (channels > 1) #endif for (int idx = 0; idx < channels; ++idx) { for (int inner_idx = 0; inner_idx < chunk_len; ++inner_idx) { int c = idx * chunk_len + inner_idx; int w_offset = c % kernel_w; int h_offset = (c / kernel_w) % kernel_h; int c_im = c / kernel_h / kernel_w; const int hc0 = h_offset * dilation_h - pad_h; const int wc0 = w_offset * dilation_w - pad_w; for (int h = 0; h < height_col; ++h) { for (int w = 0; w < width_col; ++w) { int h_pad = h * stride_h + hc0; const int srow_offset = (c_im * height + h_pad) * width; const int row_offset = (c * height_col + h) * width_col; int w_pad = w * stride_w + wc0; if ((((unsigned)h_pad) < ((unsigned)height)) && (((unsigned)w_pad) < ((unsigned)width))) { data_im[srow_offset + w_pad] += data_col[row_offset + w]; } } } } } #endif }