Esempio n. 1
0
	Vec3q SATSampler::operator()(const Vec2q &uv,const Vec2q &diff) const {
		f32x4b fullMask=diff.x>=0.5f||diff.x>= 0.5f;
		if(ForAll(fullMask)) return Vec3q(avg.x,avg.y,avg.z);

		Vec2q tDiff=diff*floatq(0.5f);
		Vec2q a=(uv-tDiff),b=(uv+tDiff);
		a*=Vec2q(floatq(w),floatq(h));
		b*=Vec2q(floatq(w),floatq(h));

		i32x4 ax(a.x),ay(a.y);
		i32x4 bx(b.x),by(b.y);
		ax&=wMask; ay&=hMask;
		bx&=wMask; by&=hMask;

		union { __m128 count; float countf[4]; };
		TSample sum[4];
		i32x4 one(1);

		if(ForAll(ax<=bx&&ay<=by)) {
			count = (f32x4(by-ay+one)*f32x4(bx-ax+one)).m;
			ComputeRect(ax,ay,bx,by,sum);
		}
		else for(int k=0;k<4;k++) {
			if(ax[k]>bx[k]) {
				if(ay[k]>by[k]) {
					countf[k]=(bx[k]+1)*(by[k]+1)+(w-ax[k])*(h-ay[k]);
					sum[k]=ComputeRect(0,0,bx[k],by[k])+ComputeRect(ax[k],ay[k],w-1,h-1);
				}
				else {
					countf[k]=(bx[k]+1+w-ax[k])*(by[k]-ay[k]+1);
					sum[k]=ComputeRect(0,ay[k],bx[k],by[k])+ComputeRect(ax[k],ay[k],w-1,by[k]);
				}
			}
			else {
				if(ay[k]>by[k]) {
					countf[k]=(bx[k]-ax[k]+1)*(by[k]+h+1-ay[k]);
					sum[k]=ComputeRect(ax[k],0,bx[k],by[k])+ComputeRect(ax[k],ay[k],bx[k],h-1);
				}
				else {
					countf[k]=(by[k]-ay[k]+1)*(bx[k]-ax[k]+1);
					sum[k]=ComputeRect(ax[k],ay[k],bx[k],by[k]);
				}
			}
		}

		union {
			__m128 out[3];
			struct { float ox[4]; float oy[4]; float oz[4]; } o;
		};
		o.ox[0]=sum[0].R(); o.oy[0]=sum[0].G(); o.oz[0]=sum[0].B();
		o.ox[1]=sum[1].R(); o.oy[1]=sum[1].G(); o.oz[1]=sum[1].B();
		o.ox[2]=sum[2].R(); o.oy[2]=sum[2].G(); o.oz[2]=sum[2].B();
		o.ox[3]=sum[3].R(); o.oy[3]=sum[3].G(); o.oz[3]=sum[3].B();

		return Condition(fullMask,Vec3q(avg.x,avg.y,avg.z),
				Vec3q(out[0], out[1], out[2]) * Inv(floatq(count) * 255.0f));
	}
Esempio n. 2
0
	Vec3q PointSampler::operator()(const Vec2q &coord, const Vec2q &diff) const {
		Vec2q pos = (coord) * Vec2q(wMul, hMul);
		i32x4 x(pos.x), y(pos.y);

		uint mip = 0;
		{
			floatq min    = Min(diff.x * wMul, diff.y * hMul);
			uint   pixels = uint(Minimize(min));
			mip = 0;
			while(pixels) {
				mip++;
				pixels >>= 1;
			}
			mip = Min(mip, tex->Mips() - 1);
		}

		const u8 *data = (u8 *)tex->DataPointer(mip);
		int pitch = mipPitch[mip];

		x >>= mip;
		y >>= mip;
		x  &= i32x4(wMask >> mip);
		y  &= i32x4(hMask >> mip);

		x = x + x + x;
		y *= pitch;

		floatq r = floatq(data[x[0] + y[0] + 0], data[x[1] + y[1] + 0], data[x[2] + y[2] + 0], data[x[3] + y[3] + 0]);
		floatq g = floatq(data[x[0] + y[0] + 1], data[x[1] + y[1] + 1], data[x[2] + y[2] + 1], data[x[3] + y[3] + 1]);
		floatq b = floatq(data[x[0] + y[0] + 2], data[x[1] + y[1] + 2], data[x[2] + y[2] + 2], data[x[3] + y[3] + 2]);

		return Vec3q(r, g, b) * f32x4(1.0f / 255.0f);
	}
Esempio n. 3
0
	Vec3q PointSampler::operator()(const Vec2q &coord) const {
		Vec2q pos = coord * Vec2q(wMul, hMul);
		i32x4 x1(pos.x), y1(pos.y);

		x1 &= i32x4(wMask);
		y1 &= i32x4(hMask);
		x1  = x1 + x1 + x1;
			
		//TODO: wylaczyc odbicie w pionie
		y1 = int(tex->Height()) -1 - y1;

		const u8 *data = (u8 *)tex->DataPointer();
		int      pitch = mipPitch[0];

		y1 *= pitch;

		floatq r = floatq(data[x1[0] + y1[0] + 0], data[x1[1] + y1[1] + 0], data[x1[2] + y1[2] + 0], data[x1[3] + y1[3] + 0]);
		floatq g = floatq(data[x1[0] + y1[0] + 1], data[x1[1] + y1[1] + 1], data[x1[2] + y1[2] + 1], data[x1[3] + y1[3] + 1]);
		floatq b = floatq(data[x1[0] + y1[0] + 2], data[x1[1] + y1[1] + 2], data[x1[2] + y1[2] + 2], data[x1[3] + y1[3] + 2]);

		return Vec3q(r, g, b) * f32x4(1.0f / 255.0f);
	}
Esempio n. 4
0
	// bilinear filtering
	void PointSampler::Sample(shading::Sample *samples, Cache &, bool mipmapping) const {
		for(int k = 0 + 0; k < 4; k++) {
			shading::Sample &s = samples[k];

			Vec2q pos = ClampTexCoord <i32x4>(s.texCoord) * Vec2q(wMul, hMul);

			uint mip = 0;
			if(mipmapping) {
				floatq min = Min(s.texDiff.x * wMul, s.texDiff.y * hMul);
				uint   pixels = uint(Minimize(min) * 0.6f);
				mip = 0;
				while(pixels) {
					mip++;
					pixels >>= 1;
				}
				mip = Min(mip, tex->Mips() - 1);
			}

			const u8 *data = (u8 *)tex->DataPointer(mip);
			int pitch = mipPitch[mip];

			i32x4 x1(pos.x), y1(pos.y);
			i32x4 x2 = x1 + i32x4(1), y2 = y1 + i32x4(1);

			floatq dx = pos.x - floatq(x1), dy = pos.y - floatq(y1);

			//TODO: wylaczyc odbicie w pionie
			y1 = int(tex->Height()) - y1;
			y2 = int(tex->Height()) - y2;

			x1 >>= mip;
			y1 >>= mip;
			x2 >>= mip;
			y2 >>= mip;

			x1 &= i32x4(wMask >> mip);
			y1 &= i32x4(hMask >> mip);
			x2 &= i32x4(wMask >> mip);
			y2 &= i32x4(hMask >> mip);
			x1  = x1 + x1 + x1;
			x2  = x2 + x2 + x2;

			y1 *= pitch;
			y2 *= pitch;

			i32x4 o[4] = { x1 + y1, x2 + y1, x1 + y2, x2 + y2 };

#define DATA(a, b)    *(u32*)&data[o[a][b]]

			//	s.temp1 = Vec3q(B(0, 0), G(0, 0), R(0, 0)) * f32x4(1.0f / 255.0f);
			//	continue;
			floatq red, green, blue; {
				// TODO upewnic sie ze mozna czytac zawsze 4 bajty
				i32x4 col0( DATA(0, 0), DATA(0, 1), DATA(0, 2), DATA(0, 3) );
				i32x4 col1( DATA(1, 0), DATA(1, 1), DATA(1, 2), DATA(1, 3) );
				i32x4 col2( DATA(2, 0), DATA(2, 1), DATA(2, 2), DATA(2, 3) );
				i32x4 col3( DATA(3, 0), DATA(3, 1), DATA(3, 2), DATA(3, 3) );

				floatq r[4], g[4], b[4];

				r[0] = floatq( col0 & i32x4(255) );
				g[0] = floatq( Shr<8>(col0) & i32x4(255) );
				b[0] = floatq( Shr<16>(col0) & i32x4(255) );

				r[1] = floatq( col1 & i32x4(255) );
				g[1] = floatq( Shr<8>(col1) & i32x4(255) );
				b[1] = floatq( Shr<16>(col1) & i32x4(255) );

				r[2] = floatq( col2 & i32x4(255) );
				g[2] = floatq( Shr<8>(col2) & i32x4(255) );
				b[2] = floatq( Shr<16>(col2) & i32x4(255) );

				r[3] = floatq( col3 & i32x4(255) );
				g[3] = floatq( Shr<8>(col3) & i32x4(255) );
				b[3] = floatq( Shr<16>(col3) & i32x4(255) );

				red = Lerp(Lerp(r[0], r[1], dx), Lerp(r[2], r[3], dx), dy);
				green = Lerp(Lerp(g[0], g[1], dx), Lerp(g[2], g[3], dx), dy);
				blue = Lerp(Lerp(b[0], b[1], dx), Lerp(b[2], b[3], dx), dy);
			}
#undef DATA

			s.temp1 = Vec3q(red, green, blue) * f32x4(1.0f / 255.0f);
		}
	}
Esempio n. 5
0
File: round.hpp Progetto: kfrlib/fft
 KFR_SINTRIN f32sse round(f32sse x) { return cast<f32>(cast<i32>(x + mulsign(f32x4(0.5f), x))); }