示例#1
0
        template <bool align> void LaplaceAbsSum(const uint8_t * src, size_t stride, size_t width, size_t height, uint64_t * sum)
        {
            assert(width > A);
            if(align)
                assert(Aligned(src) && Aligned(stride));

            size_t bodyWidth = Simd::AlignHi(width, A) - A;
            const uint8_t *src0, *src1, *src2;
            v128_u8 a[3][3];
            v128_u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth);
            *sum = 0;

            for(size_t row = 0; row < height; ++row)
            {
                src0 = src + stride*(row - 1);
                src1 = src0 + stride;
                src2 = src1 + stride;
                if(row == 0)
                    src0 = src1;
                if(row == height - 1)
                    src2 = src1;

                v128_u32 sums[2] = {K32_00000000, K32_00000000};

                LoadNose3<align, 1>(src0 + 0, a[0]);
                LoadNose3<align, 1>(src1 + 0, a[1]);
                LoadNose3<align, 1>(src2 + 0, a[2]);
                LaplaceAbsSum(a, sums);
                for(size_t col = A; col < bodyWidth; col += A)
                {
                    LoadBody3<align, 1>(src0 + col, a[0]);
                    LoadBody3<align, 1>(src1 + col, a[1]);
                    LoadBody3<align, 1>(src2 + col, a[2]);
                    LaplaceAbsSum(a, sums);
                }
                LoadTail3<false, 1>(src0 + width - A, a[0]);
                LoadTail3<false, 1>(src1 + width - A, a[1]);
                LoadTail3<false, 1>(src2 + width - A, a[2]);
                SetMask3x3(a, tailMask);
                LaplaceAbsSum(a, sums);

                *sum += ExtractSum(vec_add(sums[0], sums[1]));
            }
        }
示例#2
0
		void SobelDxAbsSum(const uint8_t * src, size_t stride, size_t width, size_t height, uint64_t * sum)
		{
			assert(width > A);
			size_t bodyWidth = Simd::AlignHi(width, A) - A;
			const uint8_t *src0, *src1, *src2;

			v16u8 a[3][3];
			v2u64 fullSum = Zero<v2u64>();
			const v16u8 K8_FF = Fill((uint8_t)0xff);
			v16u8 tailMask = ShiftLeft(K8_FF, A - width + bodyWidth);

			for (size_t row = 0; row < height; ++row)
			{
				src0 = src + stride*(row - 1);
				src1 = src0 + stride;
				src2 = src1 + stride;
				if (row == 0)
					src0 = src1;
				if (row == height - 1)
					src2 = src1;

				v4u32 rowSum = Zero<v4u32>();

				LoadNoseDx(src0 + 0, a[0]);
				LoadNoseDx(src1 + 0, a[1]);
				LoadNoseDx(src2 + 0, a[2]);
				SobelDxAbsSum(a, rowSum);
				for (size_t col = A; col < bodyWidth; col += A)
				{
					LoadBodyDx(src0 + col, a[0]);
					LoadBodyDx(src1 + col, a[1]);
					LoadBodyDx(src2 + col, a[2]);
					SobelDxAbsSum(a, rowSum);
				}
				LoadTailDx(src0 + width - A, a[0]);
				LoadTailDx(src1 + width - A, a[1]);
				LoadTailDx(src2 + width - A, a[2]);
				SetMask3x3(a, tailMask);
				SobelDxAbsSum(a, rowSum);
				fullSum = PadSum(fullSum,rowSum);
			}
			*sum = ExtractSum(fullSum);
        }