Example #1
0
void HighPassFilter::applyLowPass(const Image2DPtr &image)
{
	// Guassian convolution can be separated in two 1D convolution
	// because of properties of the 2D Gaussian function.
	Image2DPtr temp = Image2D::CreateZeroImagePtr(image->Width(), image->Height());
	size_t hKernelMid = _hWindowSize/2;
	for(size_t i=0; i<_hWindowSize; ++i) {
		const num_t kernelValue = _hKernel[i];
		const size_t
			xStart = (i >= hKernelMid) ? 0 : (hKernelMid-i),
			xEnd = (i <= hKernelMid) ? image->Width() : image->Width()-i+hKernelMid;
		for(unsigned y=0;y<image->Height();++y) {
			for(unsigned x=xStart;x<xEnd;++x)	
				temp->AddValue(x, y, image->Value(x+i-hKernelMid, y)*kernelValue);
		}
	}
	
	image->SetAll(0.0);
	size_t vKernelMid = _vWindowSize/2;
	for(size_t i=0; i<_vWindowSize; ++i) {
		const num_t kernelValue = _vKernel[i];
		const size_t
			yStart = (i >= vKernelMid) ? 0 : (vKernelMid-i),
			yEnd = (i <= vKernelMid) ? image->Height() : image->Height()-i+vKernelMid;
		for(unsigned y=yStart;y<yEnd;++y) {
			for(unsigned x=0;x<image->Width();++x)
				image->AddValue(x, y, temp->Value(x, y+i-vKernelMid)*kernelValue);
		}
	}
}
Example #2
0
void HighPassFilter::applyLowPassSSE(const Image2DPtr &image)
{
	Image2DPtr temp = Image2D::CreateZeroImagePtr(image->Width(), image->Height());
	unsigned hKernelMid = _hWindowSize/2;
	for(unsigned i=0; i<_hWindowSize; ++i) {
		
		const num_t k = _hKernel[i];
		const __m128 k4 = _mm_set_ps(k, k, k, k);
		unsigned
			/* xStart is the first column to start writing to. Note that it might be larger
			 * than the width. */
			xStart = (i >= hKernelMid) ? 0 : (hKernelMid-i),
			xEnd = (i <= hKernelMid) ? image->Width() : (image->Width()+hKernelMid > i ? (image->Width()-i+hKernelMid) : 0);
		
		for(unsigned y=0;y<image->Height();++y) {
			
			float *tempPtr = temp->ValuePtr(xStart, y);
			const float *imagePtr = image->ValuePtr(xStart+i-hKernelMid, y);
			
			unsigned x = xStart;
			for(;x+4<xEnd;x+=4) {
				const __m128
					imageVal = _mm_loadu_ps(imagePtr),
					tempVal = _mm_loadu_ps(tempPtr);

				// *tempPtr += k * (*imagePtr);
				_mm_storeu_ps(tempPtr, _mm_add_ps(tempVal, _mm_mul_ps(imageVal, k4)));
				
				tempPtr += 4;
				imagePtr += 4;
			}
			for(;x<xEnd;++x) {
				*tempPtr += k * (*imagePtr);
				++tempPtr;
				++imagePtr;
			}
		}
	}
	
	image->SetAll(0.0);
	unsigned vKernelMid = _vWindowSize/2;
	for(unsigned i=0; i<_vWindowSize; ++i) {
		const num_t k = _vKernel[i];
		const __m128 k4 = _mm_set_ps(k, k, k, k);
		const unsigned
			yStart = (i >= vKernelMid) ? 0 : (vKernelMid-i),
			yEnd = (i <= vKernelMid) ? image->Height() : ((image->Height()+vKernelMid>i) ? (image->Height()-i+vKernelMid) : 0);
		for(unsigned y=yStart;y<yEnd;++y) {
			
			const float *tempPtr = temp->ValuePtr(0, y+i-vKernelMid);
			float *imagePtr = image->ValuePtr(0, y);
			
			unsigned x=0;
			for(;x+4<image->Width();x += 4) {
				
				const __m128
					imageVal = _mm_load_ps(imagePtr),
					tempVal = _mm_load_ps(tempPtr);
				
				// *imagePtr += k * (*tempPtr);
				_mm_store_ps(imagePtr, _mm_add_ps(imageVal, _mm_mul_ps(tempVal, k4)));
				
				tempPtr += 4;
				imagePtr += 4;
			}
			for(;x<image->Width();++x) {
				*imagePtr += k * (*tempPtr);
				++tempPtr;
				++imagePtr;
			}
		}
	}
}