int SPP::forward(const Mat& bottom_blob, Mat& top_blob) const { // 1 + 4 + 16 + 64 + ... + (2*pyramid_height)^2 int pyramid_num_bins = ((1 << (pyramid_height * 2)) - 1) / 3; top_blob.create(pyramid_num_bins, 1, 2); if (top_blob.empty()) return -100; float* pyramid_ptr = top_blob; // all spatial pyramids for (int p = 0; p < pyramid_height; p++) { int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; int num_bins = 1 << p; int kernel_h = ceil(h / (float)num_bins); int stride_h = kernel_h; int remainder_h = stride_h * num_bins - h; int pad_h = (remainder_h + 1) / 2; int kernel_w = ceil(w / (float)num_bins); int stride_w = kernel_w; int remainder_w = stride_w * num_bins - w; int pad_w = (remainder_w + 1) / 2; // max value in NxN window // avg value in NxN window int outw = num_bins; int outh = num_bins; Mat bottom_blob_bordered = bottom_blob; if (pad_h > 0 || pad_w > 0) { copy_make_border(bottom_blob, bottom_blob_bordered, pad_h, pad_h, pad_w, pad_w, BORDER_CONSTANT, 0.f); if (bottom_blob_bordered.empty()) return -100; w = bottom_blob_bordered.w; h = bottom_blob_bordered.h; } const int maxk = kernel_h * kernel_w; // kernel offsets std::vector<int> _space_ofs(maxk); int* space_ofs = &_space_ofs[0]; { int p1 = 0; int p2 = 0; int gap = w - kernel_w; for (int i = 0; i < kernel_h; i++) { for (int j = 0; j < kernel_w; j++) { space_ofs[p1] = p2; p1++; p2++; } p2 += gap; } } if (pooling_type == PoolMethod_MAX) { #pragma omp parallel for for (int q=0; q<channels; q++) { const Mat m(w, h, bottom_blob_bordered.channel(q)); float* outptr = pyramid_ptr + outh * outw * q; for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i*stride_h) + j*stride_w; float max = sptr[0]; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; max = std::max(max, val); } outptr[j] = max; } outptr += outw; } } } else if (pooling_type == PoolMethod_AVE) { #pragma omp parallel for for (int q=0; q<channels; q++) { const Mat m(w, h, bottom_blob_bordered.channel(q)); float* outptr = pyramid_ptr + outh * outw * q; for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i*stride_h) + j*stride_w; float sum = 0; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; sum += val; } outptr[j] = sum / maxk; } outptr += outw; } } } pyramid_ptr += channels * outh * outw; } return 0; }
int LRN::forward(const Mat& bottom_blob, Mat& top_blob) const { int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; int size = w * h; top_blob.create(w, h, channels); if (top_blob.empty()) return -100; // squared values with local_size padding Mat square_blob; square_blob.create(w, h, channels); if (square_blob.empty()) return -100; #pragma omp parallel for for (int q=0; q<channels; q++) { const float* ptr = bottom_blob.channel(q); float* outptr = square_blob.channel(q); for (int i=0; i<size; i++) { outptr[i] = ptr[i] * ptr[i]; } } if (region_type == NormRegion_ACROSS_CHANNELS) { top_blob.fill(0.f); const float alpha_div_size = alpha / local_size; #pragma omp parallel for for (int q=0; q<channels; q++) { // square sum float* outptr = top_blob.channel(q); for (int p=q - local_size / 2; p<=q + local_size / 2; p++) { if (p < 0 || p >= channels) continue; const float* sptr = square_blob.channel(p); for (int i=0; i<size; i++) { outptr[i] += sptr[i]; } } const float* ptr = bottom_blob.channel(q); for (int i=0; i<size; i++) { outptr[i] = ptr[i] * pow(1.f + alpha_div_size * outptr[i], -beta); } } } else if (region_type == NormRegion_WITHIN_CHANNEL) { int outw = w; int outh = h; Mat square_blob_bordered = square_blob; int pad = local_size / 2; if (pad > 0) { copy_make_border(square_blob, square_blob_bordered, pad, local_size - pad - 1, pad, local_size - pad - 1, BORDER_CONSTANT, 0.f); if (square_blob_bordered.empty()) return -100; w = square_blob_bordered.w; h = square_blob_bordered.h; } const int maxk = local_size * local_size; const float alpha_div_size = alpha / maxk; // norm window offsets std::vector<int> _space_ofs(maxk); int* space_ofs = &_space_ofs[0]; { int p1 = 0; int p2 = 0; int gap = w - local_size; for (int i = 0; i < local_size; i++) { for (int j = 0; j < local_size; j++) { space_ofs[p1] = p2; p1++; p2++; } p2 += gap; } } #pragma omp parallel for for (int q=0; q<channels; q++) { const float* ptr = bottom_blob.channel(q); const Mat m = square_blob_bordered.channel(q); float* outptr = top_blob.channel(q); for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i) + j; float ss = 0.f; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; ss += val; } outptr[j] = ptr[j] * pow(1.f + alpha_div_size * ss, -beta); } ptr += outw; outptr += outw; } } } return 0; }