int SPP::forward(const Mat& bottom_blob, Mat& top_blob) const { // 1 + 4 + 16 + 64 + ... + (2*pyramid_height)^2 int pyramid_num_bins = ((1 << (pyramid_height * 2)) - 1) / 3; top_blob.create(pyramid_num_bins, 1, 2); if (top_blob.empty()) return -100; float* pyramid_ptr = top_blob; // all spatial pyramids for (int p = 0; p < pyramid_height; p++) { int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; int num_bins = 1 << p; int kernel_h = ceil(h / (float)num_bins); int stride_h = kernel_h; int remainder_h = stride_h * num_bins - h; int pad_h = (remainder_h + 1) / 2; int kernel_w = ceil(w / (float)num_bins); int stride_w = kernel_w; int remainder_w = stride_w * num_bins - w; int pad_w = (remainder_w + 1) / 2; // max value in NxN window // avg value in NxN window int outw = num_bins; int outh = num_bins; Mat bottom_blob_bordered = bottom_blob; if (pad_h > 0 || pad_w > 0) { copy_make_border(bottom_blob, bottom_blob_bordered, pad_h, pad_h, pad_w, pad_w, BORDER_CONSTANT, 0.f); if (bottom_blob_bordered.empty()) return -100; w = bottom_blob_bordered.w; h = bottom_blob_bordered.h; } const int maxk = kernel_h * kernel_w; // kernel offsets std::vector<int> _space_ofs(maxk); int* space_ofs = &_space_ofs[0]; { int p1 = 0; int p2 = 0; int gap = w - kernel_w; for (int i = 0; i < kernel_h; i++) { for (int j = 0; j < kernel_w; j++) { space_ofs[p1] = p2; p1++; p2++; } p2 += gap; } } if (pooling_type == PoolMethod_MAX) { #pragma omp parallel for for (int q=0; q<channels; q++) { const Mat m(w, h, bottom_blob_bordered.channel(q)); float* outptr = pyramid_ptr + outh * outw * q; for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i*stride_h) + j*stride_w; float max = sptr[0]; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; max = std::max(max, val); } outptr[j] = max; } outptr += outw; } } } else if (pooling_type == PoolMethod_AVE) { #pragma omp parallel for for (int q=0; q<channels; q++) { const Mat m(w, h, bottom_blob_bordered.channel(q)); float* outptr = pyramid_ptr + outh * outw * q; for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i*stride_h) + j*stride_w; float sum = 0; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; sum += val; } outptr[j] = sum / maxk; } outptr += outw; } } } pyramid_ptr += channels * outh * outw; } return 0; }
int DeconvolutionDepthWise::forward(const Mat& bottom_blob, Mat& top_blob) const { // deconvolv with NxN kernel // value = value + bias int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; if (channels % group != 0 || num_output % group != 0) { // reject invalid group return -100; } const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1; const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1; int outw = (w - 1) * stride_w + kernel_extent_w; int outh = (h - 1) * stride_h + kernel_extent_h; Mat top_blob_bordered = top_blob; top_blob_bordered.create(outw, outh, num_output); if (top_blob_bordered.empty()) return -100; const int maxk = kernel_w * kernel_h; // kernel offsets std::vector<int> _space_ofs(maxk); int* space_ofs = &_space_ofs[0]; { int p1 = 0; int p2 = 0; int gap = outw * dilation_h - kernel_w * dilation_w; for (int i = 0; i < kernel_h; i++) { for (int j = 0; j < kernel_w; j++) { space_ofs[p1] = p2; p1++; p2 += dilation_w; } p2 += gap; } } // depth-wise if (channels == group && group == num_output) { #pragma omp parallel for for (int g=0; g<group; g++) { const float* inptr = bottom_blob.channel(g); const float* kptr = (const float*)weight_data + maxk * g; Mat m = top_blob_bordered.channel(g); const float bias = bias_term ? bias_data[g] : 0.f; m.fill(bias); for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { float* outptr = m.row(i*stride_h) + j*stride_w; for (int k = 0; k < maxk; k++) { float val = inptr[i*w + j]; float w = kptr[k]; outptr[ space_ofs[k] ] += val * w; } } } } } else { // num_output const int channels_g = channels / group; const int num_output_g = num_output / group; #pragma omp parallel for for (int g = 0; g < group; g++) { const float* weight_data_ptr = (const float*)weight_data + maxk * channels_g * num_output_g * g; for (int p = 0; p < num_output_g; p++) { Mat out = top_blob_bordered.channel(g * num_output_g + p); const float bias = bias_term ? bias_data[g * num_output_g + p] : 0.f; out.fill(bias); for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { float* outptr = out.row(i*stride_h) + j*stride_w; const float* kptr = weight_data_ptr + maxk * channels_g * p; // channels_g for (int q = 0; q < channels_g; q++) { const Mat m = bottom_blob.channel(channels_g * g + q); float val = *(m.row(i) + j); for (int k = 0; k < maxk; k++) { outptr[ space_ofs[k] ] += val * kptr[k]; } kptr += maxk; } } } } } } top_blob = top_blob_bordered; if (pad_w > 0 || pad_h > 0) { copy_cut_border(top_blob_bordered, top_blob, pad_h, pad_h, pad_w, pad_w); if (top_blob.empty()) return -100; outw = top_blob.w; outh = top_blob.h; } return 0; }
int LRN::forward(const Mat& bottom_blob, Mat& top_blob) const { int w = bottom_blob.w; int h = bottom_blob.h; int channels = bottom_blob.c; int size = w * h; top_blob.create(w, h, channels); if (top_blob.empty()) return -100; // squared values with local_size padding Mat square_blob; square_blob.create(w, h, channels); if (square_blob.empty()) return -100; #pragma omp parallel for for (int q=0; q<channels; q++) { const float* ptr = bottom_blob.channel(q); float* outptr = square_blob.channel(q); for (int i=0; i<size; i++) { outptr[i] = ptr[i] * ptr[i]; } } if (region_type == NormRegion_ACROSS_CHANNELS) { top_blob.fill(0.f); const float alpha_div_size = alpha / local_size; #pragma omp parallel for for (int q=0; q<channels; q++) { // square sum float* outptr = top_blob.channel(q); for (int p=q - local_size / 2; p<=q + local_size / 2; p++) { if (p < 0 || p >= channels) continue; const float* sptr = square_blob.channel(p); for (int i=0; i<size; i++) { outptr[i] += sptr[i]; } } const float* ptr = bottom_blob.channel(q); for (int i=0; i<size; i++) { outptr[i] = ptr[i] * pow(1.f + alpha_div_size * outptr[i], -beta); } } } else if (region_type == NormRegion_WITHIN_CHANNEL) { int outw = w; int outh = h; Mat square_blob_bordered = square_blob; int pad = local_size / 2; if (pad > 0) { copy_make_border(square_blob, square_blob_bordered, pad, local_size - pad - 1, pad, local_size - pad - 1, BORDER_CONSTANT, 0.f); if (square_blob_bordered.empty()) return -100; w = square_blob_bordered.w; h = square_blob_bordered.h; } const int maxk = local_size * local_size; const float alpha_div_size = alpha / maxk; // norm window offsets std::vector<int> _space_ofs(maxk); int* space_ofs = &_space_ofs[0]; { int p1 = 0; int p2 = 0; int gap = w - local_size; for (int i = 0; i < local_size; i++) { for (int j = 0; j < local_size; j++) { space_ofs[p1] = p2; p1++; p2++; } p2 += gap; } } #pragma omp parallel for for (int q=0; q<channels; q++) { const float* ptr = bottom_blob.channel(q); const Mat m = square_blob_bordered.channel(q); float* outptr = top_blob.channel(q); for (int i = 0; i < outh; i++) { for (int j = 0; j < outw; j++) { const float* sptr = m.row(i) + j; float ss = 0.f; for (int k = 0; k < maxk; k++) { float val = sptr[ space_ofs[k] ]; ss += val; } outptr[j] = ptr[j] * pow(1.f + alpha_div_size * ss, -beta); } ptr += outw; outptr += outw; } } } return 0; }
void CV_BilateralFilterTest::reference_bilateral_filter(const Mat &src, Mat &dst, int d, double sigma_color, double sigma_space, int borderType) { int cn = src.channels(); int i, j, k, maxk, radius; double minValSrc = -1, maxValSrc = 1; const int kExpNumBinsPerChannel = 1 << 12; int kExpNumBins = 0; float lastExpVal = 1.f; float len, scale_index; Size size = src.size(); dst.create(size, src.type()); CV_Assert( (src.type() == CV_32FC1 || src.type() == CV_32FC3) && src.type() == dst.type() && src.size() == dst.size() && src.data != dst.data ); if( sigma_color <= 0 ) sigma_color = 1; if( sigma_space <= 0 ) sigma_space = 1; double gauss_color_coeff = -0.5/(sigma_color*sigma_color); double gauss_space_coeff = -0.5/(sigma_space*sigma_space); if( d <= 0 ) radius = cvRound(sigma_space*1.5); else radius = d/2; radius = MAX(radius, 1); d = radius*2 + 1; // compute the min/max range for the input image (even if multichannel) minMaxLoc( src.reshape(1), &minValSrc, &maxValSrc ); if(std::abs(minValSrc - maxValSrc) < FLT_EPSILON) { src.copyTo(dst); return; } // temporary copy of the image with borders for easy processing Mat temp; copyMakeBorder( src, temp, radius, radius, radius, radius, borderType ); patchNaNs(temp); // allocate lookup tables vector<float> _space_weight(d*d); vector<int> _space_ofs(d*d); float* space_weight = &_space_weight[0]; int* space_ofs = &_space_ofs[0]; // assign a length which is slightly more than needed len = (float)(maxValSrc - minValSrc) * cn; kExpNumBins = kExpNumBinsPerChannel * cn; vector<float> _expLUT(kExpNumBins+2); float* expLUT = &_expLUT[0]; scale_index = kExpNumBins/len; // initialize the exp LUT for( i = 0; i < kExpNumBins+2; i++ ) { if( lastExpVal > 0.f ) { double val = i / scale_index; expLUT[i] = (float)std::exp(val * val * gauss_color_coeff); lastExpVal = expLUT[i]; } else expLUT[i] = 0.f; } // initialize space-related bilateral filter coefficients for( i = -radius, maxk = 0; i <= radius; i++ ) for( j = -radius; j <= radius; j++ ) { double r = std::sqrt((double)i*i + (double)j*j); if( r > radius ) continue; space_weight[maxk] = (float)std::exp(r*r*gauss_space_coeff); space_ofs[maxk++] = (int)(i*(temp.step/sizeof(float)) + j*cn); } for( i = 0; i < size.height; i++ ) { const float* sptr = (const float*)(temp.data + (i+radius)*temp.step) + radius*cn; float* dptr = (float*)(dst.data + i*dst.step); if( cn == 1 ) { for( j = 0; j < size.width; j++ ) { float sum = 0, wsum = 0; float val0 = sptr[j]; for( k = 0; k < maxk; k++ ) { float val = sptr[j + space_ofs[k]]; float alpha = (float)(std::abs(val - val0)*scale_index); int idx = cvFloor(alpha); alpha -= idx; float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx])); sum += val*w; wsum += w; } dptr[j] = (float)(sum/wsum); } } else { assert( cn == 3 ); for( j = 0; j < size.width*3; j += 3 ) { float sum_b = 0, sum_g = 0, sum_r = 0, wsum = 0; float b0 = sptr[j], g0 = sptr[j+1], r0 = sptr[j+2]; for( k = 0; k < maxk; k++ ) { const float* sptr_k = sptr + j + space_ofs[k]; float b = sptr_k[0], g = sptr_k[1], r = sptr_k[2]; float alpha = (float)((std::abs(b - b0) + std::abs(g - g0) + std::abs(r - r0))*scale_index); int idx = cvFloor(alpha); alpha -= idx; float w = space_weight[k]*(expLUT[idx] + alpha*(expLUT[idx+1] - expLUT[idx])); sum_b += b*w; sum_g += g*w; sum_r += r*w; wsum += w; } wsum = 1.f/wsum; b0 = sum_b*wsum; g0 = sum_g*wsum; r0 = sum_r*wsum; dptr[j] = b0; dptr[j+1] = g0; dptr[j+2] = r0; } } } }