void MultiBandBlender::feed(const Mat &img, const Mat &mask, Point tl) { CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(mask.type() == CV_8U); // Keep source image in memory with small border int gap = 3 * (1 << num_bands_); Point tl_new(std::max(dst_roi_.x, tl.x - gap), std::max(dst_roi_.y, tl.y - gap)); Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap), std::min(dst_roi_.br().y, tl.y + img.rows + gap)); // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_). // After that scale between layers is exactly 2. // // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when // image is bordered to have size equal to the final image size, but this is too memory hungry approach. tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_); tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_); int width = br_new.x - tl_new.x; int height = br_new.y - tl_new.y; width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_); height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_); br_new.x = tl_new.x + width; br_new.y = tl_new.y + height; int dy = std::max(br_new.y - dst_roi_.br().y, 0); int dx = std::max(br_new.x - dst_roi_.br().x, 0); tl_new.x -= dx; br_new.x -= dx; tl_new.y -= dy; br_new.y -= dy; int top = tl.y - tl_new.y; int left = tl.x - tl_new.x; int bottom = br_new.y - tl.y - img.rows; int right = br_new.x - tl.x - img.cols; // Create the source image Laplacian pyramid Mat img_with_border; copyMakeBorder(img, img_with_border, top, bottom, left, right, BORDER_REFLECT); std::vector<Mat> src_pyr_laplace; if (can_use_gpu_ && img_with_border.depth() == CV_16S) createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace); else createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace); // Create the weight map Gaussian pyramid Mat weight_map; std::vector<Mat> weight_pyr_gauss(num_bands_ + 1); if(weight_type_ == CV_32F) { mask.convertTo(weight_map, CV_32F, 1./255.); } else// weight_type_ == CV_16S { mask.convertTo(weight_map, CV_16S); add(weight_map, 1, weight_map, mask != 0); } copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); for (int i = 0; i < num_bands_; ++i) pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]); int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; int x_tl = tl_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x; // Add weighted layer of the source image to the final Laplacian pyramid layer if(weight_type_ == CV_32F) { for (int i = 0; i <= num_bands_; ++i) { for (int y = y_tl; y < y_br; ++y) { int y_ = y - y_tl; const Point3_<short>* src_row = src_pyr_laplace[i].ptr<Point3_<short> >(y_); Point3_<short>* dst_row = dst_pyr_laplace_[i].ptr<Point3_<short> >(y); const float* weight_row = weight_pyr_gauss[i].ptr<float>(y_); float* dst_weight_row = dst_band_weights_[i].ptr<float>(y); for (int x = x_tl; x < x_br; ++x) { int x_ = x - x_tl; dst_row[x].x += static_cast<short>(src_row[x_].x * weight_row[x_]); dst_row[x].y += static_cast<short>(src_row[x_].y * weight_row[x_]); dst_row[x].z += static_cast<short>(src_row[x_].z * weight_row[x_]); dst_weight_row[x] += weight_row[x_]; } } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } } else// weight_type_ == CV_16S { for (int i = 0; i <= num_bands_; ++i) { for (int y = y_tl; y < y_br; ++y) { int y_ = y - y_tl; const Point3_<short>* src_row = src_pyr_laplace[i].ptr<Point3_<short> >(y_); Point3_<short>* dst_row = dst_pyr_laplace_[i].ptr<Point3_<short> >(y); const short* weight_row = weight_pyr_gauss[i].ptr<short>(y_); short* dst_weight_row = dst_band_weights_[i].ptr<short>(y); for (int x = x_tl; x < x_br; ++x) { int x_ = x - x_tl; dst_row[x].x += short((src_row[x_].x * weight_row[x_]) >> 8); dst_row[x].y += short((src_row[x_].y * weight_row[x_]) >> 8); dst_row[x].z += short((src_row[x_].z * weight_row[x_]) >> 8); dst_weight_row[x] += weight_row[x_]; } } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } } }
void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) { #if ENABLE_LOG int64 t = getTickCount(); #endif UMat img = _img.getUMat(); CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(mask.type() == CV_8U); // Keep source image in memory with small border int gap = 3 * (1 << num_bands_); Point tl_new(std::max(dst_roi_.x, tl.x - gap), std::max(dst_roi_.y, tl.y - gap)); Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap), std::min(dst_roi_.br().y, tl.y + img.rows + gap)); // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_). // After that scale between layers is exactly 2. // // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when // image is bordered to have size equal to the final image size, but this is too memory hungry approach. tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_); tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_); int width = br_new.x - tl_new.x; int height = br_new.y - tl_new.y; width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_); height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_); br_new.x = tl_new.x + width; br_new.y = tl_new.y + height; int dy = std::max(br_new.y - dst_roi_.br().y, 0); int dx = std::max(br_new.x - dst_roi_.br().x, 0); tl_new.x -= dx; br_new.x -= dx; tl_new.y -= dy; br_new.y -= dy; int top = tl.y - tl_new.y; int left = tl.x - tl_new.x; int bottom = br_new.y - tl.y - img.rows; int right = br_new.x - tl.x - img.cols; // Create the source image Laplacian pyramid UMat img_with_border; copyMakeBorder(_img, img_with_border, top, bottom, left, right, BORDER_REFLECT); LOGLN(" Add border to the source image, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif std::vector<UMat> src_pyr_laplace; if (can_use_gpu_ && img_with_border.depth() == CV_16S) createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace); else createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace); LOGLN(" Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif // Create the weight map Gaussian pyramid UMat weight_map; std::vector<UMat> weight_pyr_gauss(num_bands_ + 1); if(weight_type_ == CV_32F) { mask.getUMat().convertTo(weight_map, CV_32F, 1./255.); } else // weight_type_ == CV_16S { mask.getUMat().convertTo(weight_map, CV_16S); UMat add_mask; compare(mask, 0, add_mask, CMP_NE); add(weight_map, Scalar::all(1), weight_map, add_mask); } copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); for (int i = 0; i < num_bands_; ++i) pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]); LOGLN(" Create the weight map Gaussian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; int x_tl = tl_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x; // Add weighted layer of the source image to the final Laplacian pyramid layer for (int i = 0; i <= num_bands_; ++i) { Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl); #ifdef HAVE_OPENCL if ( !cv::ocl::useOpenCL() || !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i], dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) ) #endif { Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ); Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW); Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ); Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW); if(weight_type_ == CV_32F) { for (int y = 0; y < rc.height; ++y) { const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); const float* weight_row = _weight_pyr_gauss.ptr<float>(y); float* dst_weight_row = _dst_band_weights.ptr<float>(y); for (int x = 0; x < rc.width; ++x) { dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]); dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]); dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]); dst_weight_row[x] += weight_row[x]; } } } else // weight_type_ == CV_16S { for (int y = 0; y < y_br - y_tl; ++y) { const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); const short* weight_row = _weight_pyr_gauss.ptr<short>(y); short* dst_weight_row = _dst_band_weights.ptr<short>(y); for (int x = 0; x < x_br - x_tl; ++x) { dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8); dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8); dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8); dst_weight_row[x] += weight_row[x]; } } } } #ifdef HAVE_OPENCL else {
void MultiBandBlenderGpu::feed(const gpu::GpuMat &d_img, const gpu::GpuMat &d_mask, Point tl) { CV_Assert(d_img.type() == CV_16SC3); CV_Assert(d_mask.type() == CV_8U); // Keep source image in memory with small border int gap = 3 * (1 << num_bands_); Point tl_new(max(dst_roi_.x, tl.x - gap), max(dst_roi_.y, tl.y - gap)); Point br_new(min(dst_roi_.br().x, tl.x + d_img.cols + gap), min(dst_roi_.br().y, tl.y + d_img.rows + gap)); // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_). // After that scale between layers is exactly 2. // // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when // image is bordered to have size equal to the final image size, but this is too memory hungry approach. tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_); tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_); int width = br_new.x - tl_new.x; int height = br_new.y - tl_new.y; width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_); height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_); br_new.x = tl_new.x + width; br_new.y = tl_new.y + height; int dy = max(br_new.y - dst_roi_.br().y, 0); int dx = max(br_new.x - dst_roi_.br().x, 0); tl_new.x -= dx; br_new.x -= dx; tl_new.y -= dy; br_new.y -= dy; int top = tl.y - tl_new.y; int left = tl.x - tl_new.x; int bottom = br_new.y - tl.y - d_img.rows; int right = br_new.x - tl.x - d_img.cols; // Create the source image Laplacian pyramid gpu::GpuMat d_img_with_border; gpu::copyMakeBorder(d_img, d_img_with_border, top, bottom, left, right, BORDER_REFLECT, Scalar(), stream_); vector<gpu::GpuMat> d_src_pyr_laplace; createLaplacePyrGpu(d_img_with_border, num_bands_, d_src_pyr_laplace); // Create the weight map Gaussian pyramid gpu::GpuMat d_weight_map; stream_.enqueueConvert(d_mask, d_weight_map, CV_32F, 1./255.); vector<gpu::GpuMat> d_weight_pyr_gauss(num_bands_ + 1); gpu::copyMakeBorder(d_weight_map, d_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT, Scalar(), stream_); for (int i = 0; i < num_bands_; ++i) gpu::pyrDown(d_weight_pyr_gauss[i], d_weight_pyr_gauss[i + 1], stream_); int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; int x_tl = tl_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x; // Add weighted layer of the source image to the final Laplacian pyramid layer gpu::GpuMat d_tmp; for (int i = 0; i <= num_bands_; ++i) { gpu::GpuMat d_src_roi = d_src_pyr_laplace[i](cv::Rect(0, 0, x_br-x_tl, y_br-y_tl)); gpu::GpuMat d_dst_roi = d_dst_pyr_laplace_[i](cv::Rect(x_tl, y_tl, x_br-x_tl, y_br-y_tl)); gpu::GpuMat d_weight_roi = d_weight_pyr_gauss[i](cv::Rect(0, 0, x_br-x_tl, y_br-y_tl)); gpu::GpuMat d_dst_weight_roi = d_dst_band_weights_[i](cv::Rect(x_tl, y_tl, x_br-x_tl, y_br-y_tl)); // dst_roi += src_roi * weight_roi; // dst_weight_roi += weight_roi; gpu::multiply(d_src_roi, d_weight_roi, d_tmp, 1, -1, stream_); gpu::add(d_dst_roi, d_tmp, d_dst_roi, gpu::GpuMat(), -1, stream_); gpu::add(d_dst_weight_roi, d_weight_roi, d_dst_weight_roi, gpu::GpuMat(), -1, stream_); x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } stream_.waitForCompletion(); }