void pkmSIFTFlow::computeFlow(Mat image1, Mat image2, int nchannels) { #ifdef _DEBUG assert(image1.cols == image2.cols && image1.rows == image2.rows); #endif width = image1.cols; height = image1.rows; sift1_level1 = image1; sift2_level1 = image2; pyrDown(sift1_level1, sift1_level2, cv::Size(width/2, height/2)); pyrDown(sift2_level1, sift2_level2, cv::Size(width/2, height/2)); pyrDown(sift1_level2, sift1_level3, cv::Size(width/4, height/4)); pyrDown(sift2_level2, sift2_level3, cv::Size(width/4, height/4)); pyrDown(sift1_level3, sift1_level4, cv::Size(width/8, height/8)); pyrDown(sift2_level3, sift2_level4, cv::Size(width/8, height/8)); if (!sift1_level3.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } if (!sift2_level3.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } if (!sift1_level2.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } if (!sift2_level2.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } if (!sift1_level1.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } if (!sift2_level1.isContinuous()) { printf("[ERROR] Matrix not continuous!!!\n"); } bpflow.LoadImages(width, height, nchannels, sift1_level4.ptr<unsigned char>(0), sift2_level4.ptr<unsigned char>(0)); bpflow.setPara(alpha, d); // first assume homogeneous setup bpflow.setHomogeneousMRF(wsize); // level 4 vx_level4 = Mat::zeros(height/8, width/8, CV_32FC1); vy_level4 = Mat::zeros(height/8, width/8, CV_32FC1); bpflow.LoadOffset(vx_level4.ptr(0), vy_level4.ptr(0)); bpflow.LoadWinSize(winSizeX_level4.ptr(0), winSizeY_level4.ptr(0)); bpflow.ComputeDataTerm(); bpflow.ComputeRangeTerm(gamma); bpflow.MessagePassing(nIterations, 2, pEnergyList); bpflow.ComputeVelocity(); { Mat flow(height/8, width/8, CV_32FC2, bpflow.flow().pData); vector<Mat> flows; split(flow, flows); vx_level4 = flows[0]; vy_level4 = flows[1]; } pyrUp(vx_level4, vx_level3, cv::Size(width/4, height/4)); pyrUp(vy_level4, vy_level3, cv::Size(width/4, height/4)); // level 3 bpflow.LoadOffset(vx_level3.ptr(0), vy_level3.ptr(0)); bpflow.LoadWinSize(winSizeX_level3.ptr(0), winSizeY_level3.ptr(0)); bpflow.ComputeDataTerm(); bpflow.ComputeRangeTerm(gamma); bpflow.MessagePassing(nIterations, 2, pEnergyList); bpflow.ComputeVelocity(); { Mat flow(height/4, width/4, CV_32FC2, bpflow.flow().pData); vector<Mat> flows; split(flow, flows); vx_level3 = flows[0]; vy_level3 = flows[1]; } pyrUp(vx_level3, vx_level2, cv::Size(width/2, height/2)); pyrUp(vy_level3, vy_level2, cv::Size(width/2, height/2)); // level 2 bpflow.LoadOffset(vx_level2.ptr(0), vy_level2.ptr(0)); bpflow.LoadWinSize(winSizeX_level2.ptr(0), winSizeY_level2.ptr(0)); bpflow.ComputeDataTerm(); bpflow.ComputeRangeTerm(gamma); bpflow.MessagePassing(nIterations, 2, pEnergyList); bpflow.ComputeVelocity(); { Mat flow(height/2, width/2, CV_32FC2, bpflow.flow().pData); vector<Mat> flows; split(flow, flows); vx_level2 = flows[0]; vy_level2 = flows[1]; } pyrUp(vx_level2, vx_level1, cv::Size(width, height)); pyrUp(vy_level2, vy_level1, cv::Size(width, height)); // level 1 bpflow.LoadOffset(vx_level1.ptr(0), vy_level1.ptr(0)); bpflow.LoadWinSize(winSizeX_level1.ptr(0), winSizeY_level1.ptr(0)); bpflow.ComputeDataTerm(); bpflow.ComputeRangeTerm(gamma); bpflow.MessagePassing(nIterations, 2, pEnergyList); bpflow.ComputeVelocity(); { Mat flow(height, width, CV_32FC2, bpflow.flow().pData); vector<Mat> flows; split(flow, flows); vx_level1 = flows[0]; vy_level1 = flows[1]; } }
void initKernel(Mat& kernel, const Mat& blurredGray, const int width, const Mat& mask, const int pyrLevel, const int iterations, float thresholdR, float thresholdS) { assert(blurredGray.type() == CV_8U && "gray value image needed"); assert(mask.type() == CV_8U && "mask should be binary image"); // #ifndef NDEBUG // imshow("blurred", blurredGray); // #endif // save min and maximum value of the original image to be able to restore // the latent image with the correct brightness double grayMin; double grayMax; minMaxLoc(blurredGray, &grayMin, &grayMax); // build an image pyramid with gray value images vector<Mat> pyramid, masks; pyramid.push_back(blurredGray); masks.push_back(mask); for (int i = 0; i < (pyrLevel - 1); i++) { Mat downImage, downMask; pyrDown(pyramid[i], downImage, Size(pyramid[i].cols/2, pyramid[i].rows/2)); pyrDown(masks[i], downMask, Size(masks[i].cols/2, masks[i].rows/2)); pyramid.push_back(downImage); masks.push_back(downMask); } // init kernel but in the iterations the tmp-kernel is used kernel = Mat::zeros(width, width, CV_32F); Mat tmpKernel; // go through image pyramid from small to large for (int l = pyramid.size() - 1; l >= 0; l--) { #ifdef IMWRITE imshow("pyr Image", pyramid[l]); double min; double max; minMaxLoc(pyramid[l], &min, &max); cout << "pyr: " << min << " " << max << endl; waitKey(); #endif // compute image gradient for x and y direction // // gaussian blur (in-place operation is supported) GaussianBlur(pyramid[l], pyramid[l], Size(3,3), 0, 0, BORDER_DEFAULT); // parameter for sobel filtering to obtain gradients array<Mat,2> gradients, tmpGradients; const int delta = 0; const int ddepth = CV_32F; const int ksize = 3; const int scale = 1; // gradient x and y Sobel(pyramid[l], tmpGradients[0], ddepth, 1, 0, ksize, scale, delta, BORDER_DEFAULT); Sobel(pyramid[l], tmpGradients[1], ddepth, 0, 1, ksize, scale, delta, BORDER_DEFAULT); // cut off gradients outside the mask tmpGradients[0].copyTo(gradients[0], masks[l]); tmpGradients[1].copyTo(gradients[1], masks[l]); // normalize gradients into range [-1,1] normalizeOne(gradients); // #ifdef IMWRITE // showGradients("x gradient", gradients[0]); // showGradients("y gradient", gradients[1]); // #endif // compute gradient confidence for al pixels Mat gradientConfidence; computeGradientConfidence(gradientConfidence, gradients, width, masks[l]); // #ifdef IMWRITE // showFloat("confidence", gradientConfidence); // #endif // each iterations works on an updated image Mat currentImage; pyramid[l].copyTo(currentImage); // assert(iterations == 1 && "Implement multiple iterations"); for (int i = 0; i < iterations; i++) { #ifdef IMWRITE imshow("current Image", currentImage); minMaxLoc(currentImage, &min, &max); cout << "current: " << min << " " << max << endl; waitKey(); #endif // select edges for kernel estimation (normalized gradients [-1,1]) array<Mat,2> selectedEdges; selectEdges(currentImage, gradientConfidence, thresholdR, thresholdS, selectedEdges); #ifdef IMWRITE showGradients("x gradient selection", selectedEdges[0]); showGradients("y gradient selection", selectedEdges[1]); minMaxLoc(selectedEdges[0], &min, &max); cout << "x gradients: " << min << " " << max << endl; waitKey(); #endif // estimate kernel with gaussian prior fastKernelEstimation(selectedEdges, gradients, kernel, 0.0); #ifdef IMWRITE showFloat("tmp-kernel", kernel, true); minMaxLoc(kernel, &min, &max); cout << "kernel: " << min << " " << max << " sum: " << sum(kernel)[0] << endl; waitKey(); #endif // coarse image estimation with a spatial prior Mat latentImage; // FIXME: it looks like there are some edges of the gradients in the latent image. // with more iterations it becomes worse // coarseImageEstimation(pyramid[l], kernel, selectedEdges, latentImage); // use oother spatial deconv method for now deconvolveIRLS(pyramid[l], latentImage, kernel); #ifdef IMWRITE string name = "two-phase-latent-" + to_string(i); imshow(name, latentImage); waitKey(); string filename = name + ".png"; imwrite(filename, latentImage); #endif // set current image to coarse latent image latentImage.copyTo(currentImage); // decrease thresholds τ_r and τ_s will to include more and more edges thresholdR = thresholdR / 1.1; thresholdS = thresholdS / 1.1; } // set next pyramid image to the upscaled latent image if (l > 0) { Mat upImage; pyrUp(currentImage, upImage, Size(pyramid[l - 1].cols, pyramid[l - 1].rows)); pyramid[l - 1] = upImage; } } // #ifdef IMWRITE // imshow("kernel", kernel); // waitKey(); // #endif }
int main( int argc, char** argv ) { printf( "Scale Space Cost Aggregation\n" ); if( argc != 11 ) { printf( "Usage: [CC_METHOD] [CA_METHOD] [PP_METHOD] [C_ALPHA] [lImg] [rImg] [lDis] [rDis] [maxDis] [disSc]\n" ); printf( "\nPress any key to continue...\n" ); getchar(); return -1; } string ccName = argv[ 1 ]; string caName = argv[ 2 ]; string ppName = argv[ 3 ]; double costAlpha = atof( argv[ 4 ] ); string lFn = argv[ 5 ]; string rFn = argv[ 6 ]; string lDisFn = argv[ 7 ]; string rDisFn = argv[ 8 ]; int maxDis = atoi( argv[ 9 ] ); int disSc = atoi( argv[ 10 ] ); // // Load left right image // printf( "\n--------------------------------------------------------\n" ); printf( "Load Image: (%s) (%s)\n", argv[ 5 ], argv[ 6 ] ); printf( "--------------------------------------------------------\n" ); Mat lImg = imread( lFn, CV_LOAD_IMAGE_COLOR ); Mat rImg = imread( rFn, CV_LOAD_IMAGE_COLOR ); if( !lImg.data || !rImg.data ) { printf( "Error: can not open image\n" ); printf( "\nPress any key to continue...\n" ); getchar(); return -1; } // set image format cvtColor( lImg, lImg, CV_BGR2RGB ); cvtColor( rImg, rImg, CV_BGR2RGB ); lImg.convertTo( lImg, CV_64F, 1 / 255.0f ); rImg.convertTo( rImg, CV_64F, 1 / 255.0f ); // time double duration; duration = static_cast<double>(getTickCount()); // // Stereo Match at each pyramid // int PY_LVL = 5; // build pyramid and cost volume Mat lP = lImg.clone(); Mat rP = rImg.clone(); SSCA** smPyr = new SSCA*[ PY_LVL ]; CCMethod* ccMtd = getCCType( ccName ); CAMethod* caMtd = getCAType( caName ); PPMethod* ppMtd = getPPType( ppName ); for( int p = 0; p < PY_LVL; p ++ ) { if( maxDis < 5 ) { PY_LVL = p; break; } printf( "\n\tPyramid: %d:", p ); smPyr[ p ] = new SSCA( lP, rP, maxDis, disSc ); smPyr[ p ]->CostCompute( ccMtd ); smPyr[ p ]->CostAggre( caMtd ); // pyramid downsample maxDis = maxDis / 2 + 1; disSc *= 2; pyrDown( lP, lP ); pyrDown( rP, rP ); } printf( "\n--------------------------------------------------------\n" ); printf( "\n Cost Aggregation in Scale Space\n" ); printf( "\n--------------------------------------------------------\n" ); // new method SolveAll( smPyr, PY_LVL, costAlpha ); // old method //for( int p = PY_LVL - 2 ; p >= 0; p -- ) { // smPyr[ p ]->AddPyrCostVol( smPyr[ p + 1 ], costAlpha ); //} // // Match + Postprocess // smPyr[ 0 ]->Match(); smPyr[ 0 ]->PostProcess( ppMtd ); Mat lDis = smPyr[ 0 ]->getLDis(); Mat rDis = smPyr[ 0 ]->getRDis(); #ifdef _DEBUG for( int s = 0; s < PY_LVL; s ++ ) { smPyr[ s ]->Match(); Mat sDis = smPyr[ s ]->getLDis(); ostringstream sStr; sStr << s; string sFn = sStr.str( ) + "_ld.png"; imwrite( sFn, sDis ); } saveOnePixCost( smPyr, PY_LVL ); #endif #ifdef USE_MEDIAN_FILTER // // Median Filter Output // MeanFilter( lDis, lDis, 3 ); #endif duration = static_cast<double>(getTickCount())-duration; duration /= cv::getTickFrequency(); // the elapsed time in sec printf( "\n--------------------------------------------------------\n" ); printf( "Total Time: %.2lf s\n", duration ); printf( "--------------------------------------------------------\n" ); // // Save Output // imwrite( lDisFn, lDis ); imwrite( rDisFn, rDis ); delete [] smPyr; delete ccMtd; delete caMtd; delete ppMtd; return 0; }
void MultiBandBlender::feed(const Mat &img, const Mat &mask, Point tl) { CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(mask.type() == CV_8U); // Keep source image in memory with small border int gap = 3 * (1 << num_bands_); Point tl_new(std::max(dst_roi_.x, tl.x - gap), std::max(dst_roi_.y, tl.y - gap)); Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap), std::min(dst_roi_.br().y, tl.y + img.rows + gap)); // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_). // After that scale between layers is exactly 2. // // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when // image is bordered to have size equal to the final image size, but this is too memory hungry approach. tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_); tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_); int width = br_new.x - tl_new.x; int height = br_new.y - tl_new.y; width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_); height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_); br_new.x = tl_new.x + width; br_new.y = tl_new.y + height; int dy = std::max(br_new.y - dst_roi_.br().y, 0); int dx = std::max(br_new.x - dst_roi_.br().x, 0); tl_new.x -= dx; br_new.x -= dx; tl_new.y -= dy; br_new.y -= dy; int top = tl.y - tl_new.y; int left = tl.x - tl_new.x; int bottom = br_new.y - tl.y - img.rows; int right = br_new.x - tl.x - img.cols; // Create the source image Laplacian pyramid Mat img_with_border; copyMakeBorder(img, img_with_border, top, bottom, left, right, BORDER_REFLECT); std::vector<Mat> src_pyr_laplace; if (can_use_gpu_ && img_with_border.depth() == CV_16S) createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace); else createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace); // Create the weight map Gaussian pyramid Mat weight_map; std::vector<Mat> weight_pyr_gauss(num_bands_ + 1); if(weight_type_ == CV_32F) { mask.convertTo(weight_map, CV_32F, 1./255.); } else// weight_type_ == CV_16S { mask.convertTo(weight_map, CV_16S); add(weight_map, 1, weight_map, mask != 0); } copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); for (int i = 0; i < num_bands_; ++i) pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]); int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; int x_tl = tl_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x; // Add weighted layer of the source image to the final Laplacian pyramid layer if(weight_type_ == CV_32F) { for (int i = 0; i <= num_bands_; ++i) { for (int y = y_tl; y < y_br; ++y) { int y_ = y - y_tl; const Point3_<short>* src_row = src_pyr_laplace[i].ptr<Point3_<short> >(y_); Point3_<short>* dst_row = dst_pyr_laplace_[i].ptr<Point3_<short> >(y); const float* weight_row = weight_pyr_gauss[i].ptr<float>(y_); float* dst_weight_row = dst_band_weights_[i].ptr<float>(y); for (int x = x_tl; x < x_br; ++x) { int x_ = x - x_tl; dst_row[x].x += static_cast<short>(src_row[x_].x * weight_row[x_]); dst_row[x].y += static_cast<short>(src_row[x_].y * weight_row[x_]); dst_row[x].z += static_cast<short>(src_row[x_].z * weight_row[x_]); dst_weight_row[x] += weight_row[x_]; } } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } } else// weight_type_ == CV_16S { for (int i = 0; i <= num_bands_; ++i) { for (int y = y_tl; y < y_br; ++y) { int y_ = y - y_tl; const Point3_<short>* src_row = src_pyr_laplace[i].ptr<Point3_<short> >(y_); Point3_<short>* dst_row = dst_pyr_laplace_[i].ptr<Point3_<short> >(y); const short* weight_row = weight_pyr_gauss[i].ptr<short>(y_); short* dst_weight_row = dst_band_weights_[i].ptr<short>(y); for (int x = x_tl; x < x_br; ++x) { int x_ = x - x_tl; dst_row[x].x += short((src_row[x_].x * weight_row[x_]) >> 8); dst_row[x].y += short((src_row[x_].y * weight_row[x_]) >> 8); dst_row[x].z += short((src_row[x_].z * weight_row[x_]) >> 8); dst_weight_row[x] += weight_row[x_]; } } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; } } }
void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl) { #if ENABLE_LOG int64 t = getTickCount(); #endif UMat img = _img.getUMat(); CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3); CV_Assert(mask.type() == CV_8U); // Keep source image in memory with small border int gap = 3 * (1 << num_bands_); Point tl_new(std::max(dst_roi_.x, tl.x - gap), std::max(dst_roi_.y, tl.y - gap)); Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap), std::min(dst_roi_.br().y, tl.y + img.rows + gap)); // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_). // After that scale between layers is exactly 2. // // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when // image is bordered to have size equal to the final image size, but this is too memory hungry approach. tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_); tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_); int width = br_new.x - tl_new.x; int height = br_new.y - tl_new.y; width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_); height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_); br_new.x = tl_new.x + width; br_new.y = tl_new.y + height; int dy = std::max(br_new.y - dst_roi_.br().y, 0); int dx = std::max(br_new.x - dst_roi_.br().x, 0); tl_new.x -= dx; br_new.x -= dx; tl_new.y -= dy; br_new.y -= dy; int top = tl.y - tl_new.y; int left = tl.x - tl_new.x; int bottom = br_new.y - tl.y - img.rows; int right = br_new.x - tl.x - img.cols; // Create the source image Laplacian pyramid UMat img_with_border; copyMakeBorder(_img, img_with_border, top, bottom, left, right, BORDER_REFLECT); LOGLN(" Add border to the source image, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif std::vector<UMat> src_pyr_laplace; if (can_use_gpu_ && img_with_border.depth() == CV_16S) createLaplacePyrGpu(img_with_border, num_bands_, src_pyr_laplace); else createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace); LOGLN(" Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif // Create the weight map Gaussian pyramid UMat weight_map; std::vector<UMat> weight_pyr_gauss(num_bands_ + 1); if(weight_type_ == CV_32F) { mask.getUMat().convertTo(weight_map, CV_32F, 1./255.); } else // weight_type_ == CV_16S { mask.getUMat().convertTo(weight_map, CV_16S); UMat add_mask; compare(mask, 0, add_mask, CMP_NE); add(weight_map, Scalar::all(1), weight_map, add_mask); } copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT); for (int i = 0; i < num_bands_; ++i) pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]); LOGLN(" Create the weight map Gaussian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec"); #if ENABLE_LOG t = getTickCount(); #endif int y_tl = tl_new.y - dst_roi_.y; int y_br = br_new.y - dst_roi_.y; int x_tl = tl_new.x - dst_roi_.x; int x_br = br_new.x - dst_roi_.x; // Add weighted layer of the source image to the final Laplacian pyramid layer for (int i = 0; i <= num_bands_; ++i) { Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl); #ifdef HAVE_OPENCL if ( !cv::ocl::useOpenCL() || !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i], dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) ) #endif { Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ); Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW); Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ); Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW); if(weight_type_ == CV_32F) { for (int y = 0; y < rc.height; ++y) { const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); const float* weight_row = _weight_pyr_gauss.ptr<float>(y); float* dst_weight_row = _dst_band_weights.ptr<float>(y); for (int x = 0; x < rc.width; ++x) { dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]); dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]); dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]); dst_weight_row[x] += weight_row[x]; } } } else // weight_type_ == CV_16S { for (int y = 0; y < y_br - y_tl; ++y) { const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y); Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y); const short* weight_row = _weight_pyr_gauss.ptr<short>(y); short* dst_weight_row = _dst_band_weights.ptr<short>(y); for (int x = 0; x < x_br - x_tl; ++x) { dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8); dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8); dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8); dst_weight_row[x] += weight_row[x]; } } } } x_tl /= 2; y_tl /= 2; x_br /= 2; y_br /= 2; }