void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, float scale, float offset, int decimate, int interpolate) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (decimate > 1) { dShape.width = (dShape.width + decimate-1) / decimate; dShape.height = (dShape.height + decimate-1) / decimate; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; // Perform the two convolutions Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f); Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset); // Downsample or copy for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += decimate * nB; dPtr += nB; } } interpolate++; // to get rid of "unused parameter" warning }
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, int subsample) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (subsample > 1) { dShape.width = (dShape.width + subsample-1) / subsample; dShape.height = (dShape.height + subsample-1) / subsample; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; // Perform the two convolutions Convolve(src, tmpImg1, x_kernel); Convolve(tmpImg1, tmpImg2, v_kernel); // Downsample or copy for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * subsample, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += subsample * nB; dPtr += nB; } } }
void drawMatching(const ImgG& img1, const Mat_d& keyPts1, const ImgG& img2, const Mat_d& keyPts2, const Matching& matches, ImgRGB& outImg, double scale, unsigned char* flag) { cv::Mat tmpImg1(img1.rows, img1.cols, CV_8UC1, img1.data); cv::Mat tmpImg2(img2.rows, img2.cols, CV_8UC1, img2.data); KpVec tmpKeyPts1, tmpKeyPts2; mat2KpVec(keyPts1, tmpKeyPts1); mat2KpVec(keyPts2, tmpKeyPts2); DMatchVec tmpMatches; myMatch2CvMatch(matches, tmpMatches); cv::Mat cvImg; drawMatching(tmpImg1, tmpKeyPts1, tmpImg2, tmpKeyPts2, tmpMatches, cvImg, scale, flag); cvImg2ImgRGB(cvImg, outImg); }
void ConvolveSeparable(CImageOf<T> src, CImageOf<T>& dst, CFloatImage x_kernel, CFloatImage y_kernel, float scale, float offset, int decimate, int interpolate) { // Allocate the result, if necessary CShape dShape = src.Shape(); if (decimate > 1) { dShape.width = (dShape.width + decimate-1) / decimate; dShape.height = (dShape.height + decimate-1) / decimate; } dst.ReAllocate(dShape, false); // Allocate the intermediate images CImageOf<T> tmpImg1(src.Shape()); //CImageOf<T> tmpImgCUDA(src.Shape()); CImageOf<T> tmpImg2(src.Shape()); // Create a proper vertical convolution kernel CFloatImage v_kernel(1, y_kernel.Shape().width, 1); for (int k = 0; k < y_kernel.Shape().width; k++) v_kernel.Pixel(0, k, 0) = y_kernel.Pixel(k, 0, 0); v_kernel.origin[1] = y_kernel.origin[0]; #ifdef RUN_ON_GPU // Modifications for integrating CUDA kernels BinomialFilterType type; profilingTimer->startTimer(); // CUDA Convolve switch (x_kernel.Shape().width) { case 3: type = BINOMIAL6126; break; case 5: type = BINOMIAL14641; break; default: // Unsupported kernel case throw CError("Convolution kernel Unknown"); assert(false); } // Skip copy if decimation is not required if (decimate != 1) CudaConvolveXY(src, tmpImg2, type); else CudaConvolveXY(src, dst, type); printf("\nGPU convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue()); #else profilingTimer->startTimer(); //VerifyComputedData(&tmpImg2.Pixel(0, 0, 0), &tmpImgCUDA.Pixel(0, 0, 0), 7003904); // Perform the two convolutions Convolve(src, tmpImg1, x_kernel, 1.0f, 0.0f); Convolve(tmpImg1, tmpImg2, v_kernel, scale, offset); printf("\nCPU Convolution time = %f ms\n", profilingTimer->stopAndGetTimerValue()); #endif profilingTimer->startTimer(); // Downsample or copy // Skip decimate and recopy if not required #ifdef RUN_ON_GPU if (decimate != 1) { #endif for (int y = 0; y < dShape.height; y++) { T* sPtr = &tmpImg2.Pixel(0, y * decimate, 0); T* dPtr = &dst.Pixel(0, y, 0); int nB = dShape.nBands; for (int x = 0; x < dShape.width; x++) { for (int b = 0; b < nB; b++) dPtr[b] = sPtr[b]; sPtr += decimate * nB; dPtr += nB; } } #ifdef RUN_ON_GPU } #endif printf("\nDecimate/Recopy took = %f ms\n", profilingTimer->stopAndGetTimerValue()); }