コード例 #1
0
void HighPassFilter::setFlaggedValuesToZeroAndMakeWeightsSSE(const Image2DCPtr &inputImage, const Image2DPtr &outputImage, const Mask2DCPtr &inputMask, const Image2DPtr &weightsOutput)
{
	const size_t width = inputImage->Width();
	const __m128i zero4i = _mm_set_epi32(0, 0, 0, 0);
	const __m128 zero4 = _mm_set_ps(0.0, 0.0, 0.0, 0.0);
	const __m128 one4 = _mm_set_ps(1.0, 1.0, 1.0, 1.0);
	for(size_t y=0;y<inputImage->Height();++y)
	{
		const bool *rowPtr = inputMask->ValuePtr(0, y);
		const float *inputPtr = inputImage->ValuePtr(0, y);
		float *outputPtr = outputImage->ValuePtr(0, y);
		float *weightsPtr = weightsOutput->ValuePtr(0, y);
		const float *end = inputPtr + width;
		while(inputPtr < end)
		{
			
			// Assign each integer to one bool in the mask
			// Convert false to 0xFFFFFFFF and true to 0
			__m128 conditionMask = _mm_castsi128_ps(
				_mm_cmpeq_epi32(_mm_set_epi32(rowPtr[3] || !isfinite(inputPtr[3]), rowPtr[2] || !isfinite(inputPtr[2]),
																			rowPtr[1] || !isfinite(inputPtr[1]), rowPtr[0] || !isfinite(inputPtr[0])),
												zero4i));
			
			_mm_store_ps(weightsPtr, _mm_or_ps(
				_mm_and_ps(conditionMask, one4),
				_mm_andnot_ps(conditionMask, zero4)
			));
			_mm_store_ps(outputPtr, _mm_or_ps(
				_mm_and_ps(conditionMask, _mm_load_ps(inputPtr)),
				_mm_andnot_ps(conditionMask, zero4)
			));
			
			rowPtr += 4;
			outputPtr += 4;
			inputPtr += 4;
			weightsPtr += 4;
		}
	}
}
コード例 #2
0
void HighPassFilter::elementWiseDivideSSE(const Image2DPtr &leftHand, const Image2DCPtr &rightHand)
{
	const __m128 zero4 = _mm_set_ps(0.0, 0.0, 0.0, 0.0);
	
	for(unsigned y=0;y<leftHand->Height();++y) {
		float *leftHandPtr = leftHand->ValuePtr(0, y);
		const float *rightHandPtr = rightHand->ValuePtr(0, y);
		float *end = leftHandPtr + leftHand->Width();
		while(leftHandPtr < end)
		{
			__m128
				l = _mm_load_ps(leftHandPtr),
				r = _mm_load_ps(rightHandPtr);
			__m128 conditionMask = _mm_cmpeq_ps(r, zero4);
			_mm_store_ps(leftHandPtr, _mm_or_ps(
				_mm_and_ps(conditionMask, zero4),
				_mm_andnot_ps(conditionMask, _mm_div_ps(l, r))
			));
			leftHandPtr += 4;
			rightHandPtr += 4;
		}
	}
}
コード例 #3
0
void HighPassFilter::applyLowPassSSE(const Image2DPtr &image)
{
	Image2DPtr temp = Image2D::CreateZeroImagePtr(image->Width(), image->Height());
	unsigned hKernelMid = _hWindowSize/2;
	for(unsigned i=0; i<_hWindowSize; ++i) {
		
		const num_t k = _hKernel[i];
		const __m128 k4 = _mm_set_ps(k, k, k, k);
		unsigned
			/* xStart is the first column to start writing to. Note that it might be larger
			 * than the width. */
			xStart = (i >= hKernelMid) ? 0 : (hKernelMid-i),
			xEnd = (i <= hKernelMid) ? image->Width() : (image->Width()+hKernelMid > i ? (image->Width()-i+hKernelMid) : 0);
		
		for(unsigned y=0;y<image->Height();++y) {
			
			float *tempPtr = temp->ValuePtr(xStart, y);
			const float *imagePtr = image->ValuePtr(xStart+i-hKernelMid, y);
			
			unsigned x = xStart;
			for(;x+4<xEnd;x+=4) {
				const __m128
					imageVal = _mm_loadu_ps(imagePtr),
					tempVal = _mm_loadu_ps(tempPtr);

				// *tempPtr += k * (*imagePtr);
				_mm_storeu_ps(tempPtr, _mm_add_ps(tempVal, _mm_mul_ps(imageVal, k4)));
				
				tempPtr += 4;
				imagePtr += 4;
			}
			for(;x<xEnd;++x) {
				*tempPtr += k * (*imagePtr);
				++tempPtr;
				++imagePtr;
			}
		}
	}
	
	image->SetAll(0.0);
	unsigned vKernelMid = _vWindowSize/2;
	for(unsigned i=0; i<_vWindowSize; ++i) {
		const num_t k = _vKernel[i];
		const __m128 k4 = _mm_set_ps(k, k, k, k);
		const unsigned
			yStart = (i >= vKernelMid) ? 0 : (vKernelMid-i),
			yEnd = (i <= vKernelMid) ? image->Height() : ((image->Height()+vKernelMid>i) ? (image->Height()-i+vKernelMid) : 0);
		for(unsigned y=yStart;y<yEnd;++y) {
			
			const float *tempPtr = temp->ValuePtr(0, y+i-vKernelMid);
			float *imagePtr = image->ValuePtr(0, y);
			
			unsigned x=0;
			for(;x+4<image->Width();x += 4) {
				
				const __m128
					imageVal = _mm_load_ps(imagePtr),
					tempVal = _mm_load_ps(tempPtr);
				
				// *imagePtr += k * (*tempPtr);
				_mm_store_ps(imagePtr, _mm_add_ps(imageVal, _mm_mul_ps(tempVal, k4)));
				
				tempPtr += 4;
				imagePtr += 4;
			}
			for(;x<image->Width();++x) {
				*imagePtr += k * (*tempPtr);
				++tempPtr;
				++imagePtr;
			}
		}
	}
}