예제 #1
0
	bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth )
	{
		#ifdef VISUALIZE_RATE_OF_CHANGE
		vector4 vDdx, vDdy; GetDerivatives( 0, vDdx, vDdy );
		io_vColor.r = (*(vector2 *)&vDdx).length() * 100;
		io_vColor.g = (*(vector2 *)&vDdy).length() * 100;
		io_vColor.b = 0;
		io_vColor.a = 1;
		return true;
		#endif

		vector4 vRainbowFilm;
		SampleTexture( vRainbowFilm, 0, i_pInput[0].x, i_pInput[0].y, 0.0f );

		const float32 fFresnel = 1.0f - fabsf( i_pInput[2].x );

		vector4 vReflectionEnv;
		SampleTexture( vReflectionEnv, 1, i_pInput[1].x, i_pInput[1].y, i_pInput[1].z );

		float32 fAlpha = fSaturate( 4.0f * ( vReflectionEnv.a * vReflectionEnv.a - 0.75f ) );
		const vector4 vBaseEnvColor = ( vRainbowFilm * vReflectionEnv * 2.0f ).saturate();

		vector4 vColor;
		vVector4Lerp( vColor, vBaseEnvColor, vReflectionEnv, fAlpha );

		fAlpha += 0.6f * fFresnel + 0.1f;

		vVector4Lerp( io_vColor, io_vColor, vColor, fSaturate( fAlpha ) );
		return true;
	}
예제 #2
0
	bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth )
	{
		// read normal from normalmap
		vector4 vTexNormal; SampleTexture( vTexNormal, 1, i_pInput[0].x, i_pInput[0].y, 0.0f );
		const vector3 vNormal( vTexNormal.x * 2.0f - 1.0f, vTexNormal.y * 2.0f - 1.0f, vTexNormal.z * 2.0f - 1.0f );

		// sample texture
		vector4 vTex;
		SampleTexture( vTex, 0, i_pInput[0].x, i_pInput[0].y, 0.0f );
		
		// renormalize interpolated light direction vector
		vector3 vLightDir = i_pInput[1]; vLightDir.normalize();

		// compute diffuse light
		float32 fDiffuse = fVector3Dot( vNormal, vLightDir );
		float32 fSpecular = 0.0f;
		if( fDiffuse >= 0.0f )
		{			
			// compute specular light
			vector3 vHalf = i_pInput[2]; vHalf.normalize();
			fSpecular = fVector3Dot( vNormal, vHalf );
			if( fSpecular < 0.0f )
				fSpecular = 0.0f;
			else
				fSpecular = powf( fSpecular, 128.0f );
		}
		else
			fDiffuse = 0.0f;

		const vector4 &vLightColor = vGetVector( 0 );
		io_vColor = vTex * vLightColor * fDiffuse + vLightColor * fSpecular; // += for additive blending with backbuffer, e.g. when there are multiple lights

		return true;
	}
예제 #3
0
	bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth )
	{
		if( fGetFloat( 0 ) )
		{
			// render flare

			vector4 vFlareColor;
			SampleTexture( vFlareColor, 0, i_pInput[0].x, i_pInput[0].y, 0.0f );
			if( vFlareColor.a <= FLT_EPSILON ) // perform alpha test
				return false;

			io_vColor += vFlareColor * fGetFloat( 1 ); // scale and blend with backbuffer
		}
		else
			io_vColor = vGetVector( 0 );
			
		return true;
	}
예제 #4
0
	void Execute( const shaderreg *i_pInput, vector4 &o_vPosition, shaderreg *o_pOutput )
	{
		// offset position
		vector4 vTexNormal; SampleTexture( vTexNormal, 1, i_pInput[3].x, i_pInput[3].y );
		const float32 fHeight = 0.4f * vTexNormal.a;
		vector3 vNormal = i_pInput[1]; vNormal.normalize(); // renormalize normal - length changed due to interpolation of vertices during subdivision

		// transform position
		o_vPosition = (i_pInput[0] + vNormal * fHeight) * matGetMatrix( m3dsc_wvpmatrix );

		// pass texcoord to pixelshader
		o_pOutput[0] = i_pInput[3];

		// build transformation matrix to tangent space
		vector3 vTangent = i_pInput[2]; vTangent.normalize();
		vVector3TransformNormal( vNormal, vNormal, matGetMatrix( m3dsc_worldmatrix ) );
		vVector3TransformNormal( vTangent, vTangent, matGetMatrix( m3dsc_worldmatrix ) );
		vector3 vBinormal; vVector3Cross( vBinormal, vNormal, vTangent );

		const matrix44 matWorldToTangentSpace(
			vTangent.x, vBinormal.x, vNormal.x, 0.0f,
			vTangent.y, vBinormal.y, vNormal.y, 0.0f,
			vTangent.z, vBinormal.z, vNormal.z, 0.0f,
			0.0f, 0.0f, 0.0f, 1.0f );

		// transform light direction to tangent space
		const vector3 vWorldPosition = i_pInput[0] * matGetMatrix( m3dsc_worldmatrix );
		vector3 vLightDir = (vector3)vGetVector( 1 ) - vWorldPosition;
		vector3 vLightDirTangentSpace; vVector3TransformNormal( vLightDirTangentSpace, vLightDir, matWorldToTangentSpace );
		o_pOutput[1] = vLightDirTangentSpace;

		// compute half vector and transform to tangent space
		vector3 vViewDir = (vector3)vGetVector( 0 ) - vWorldPosition;
		const vector3 vHalf = ( vViewDir.normalize() + vLightDir.normalize() ) * 0.5f;
		vector3 vHalfTangentSpace; vVector3TransformNormal( vHalfTangentSpace, vHalf, matWorldToTangentSpace );
		o_pOutput[2] = vHalfTangentSpace;
	}
void GSDrawScanlineCodeGenerator::Generate()
{
	// TODO: on linux/mac rsi, rdi, xmm6-xmm15 are all caller saved

	push(rbx);
	push(rsi);
	push(rdi);
	push(rbp);
	push(r12);
	push(r13);

	sub(rsp, 8 + 10 * 16);
	
	for(int i = 6; i < 16; i++)
	{
		vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
	}

	mov(r10, (size_t)&m_test[0]);
	mov(r11, (size_t)&m_local);
	mov(r12, (size_t)m_local.gd);
	mov(r13, (size_t)m_local.gd->vm);

	Init();

	// rcx = steps
	// rsi = fza_base
	// rdi = fza_offset
	// r10 = &m_test[0]
	// r11 = &m_local
	// r12 = m_local->gd
	// r13 = m_local->gd.vm
	// xmm7 = vf (sprite && ltf)
	// xmm8 = z
	// xmm9 = f
	// xmm10 = s
	// xmm11 = t
	// xmm12 = q
	// xmm13 = rb
	// xmm14 = ga 
	// xmm15 = test

	if(!m_sel.edge)
	{
		align(16);
	}

L("loop");

	TestZ(xmm5, xmm6);

	// ebp = za

	if(m_sel.mmin)
	{
		SampleTextureLOD();
	}
	else
	{
		SampleTexture();
	}

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga

	AlphaTFX();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga

	ReadMask();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	TestAlpha();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	ColorTFX();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	Fog();

	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm

	ReadFrame();

	// ebx = fa
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	TestDestAlpha();

	// ebx = fa
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	WriteMask();

	// ebx = fa
	// edx = fzm
	// ebp = za
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm5 = zm
	// xmm6 = fd

	WriteZBuf();

	// ebx = fa
	// edx = fzm
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm6 = fd

	AlphaBlend();

	// ebx = fa
	// edx = fzm
	// xmm2 = rb
	// xmm3 = ga
	// xmm4 = fm
	// xmm6 = fd

	WriteFrame();

L("step");

	// if(steps <= 0) break;

	if(!m_sel.edge)
	{
		test(rcx, rcx);

		jle("exit", T_NEAR);

		Step();

		jmp("loop", T_NEAR);
	}

L("exit");

	for(int i = 6; i < 16; i++)
	{
		vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
	}

	add(rsp, 8 + 10 * 16);

	pop(r13);
	pop(r12);
	pop(rbp);
	pop(rdi);
	pop(rsi);
	pop(rbx);

	ret();
}
void GPUDrawScanlineCodeGenerator::Generate()
{
	push(esi);
	push(edi);

	Init();

	align(16);

L("loop");

	// GSVector4i test = m_test[7 + (steps & (steps >> 31))];

	mov(edx, ecx);
	sar(edx, 31);
	and(edx, ecx);
	shl(edx, 4);

	movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);

	// movdqu(xmm1, ptr[edi]);

	movq(xmm1, qword[edi]);
	movhps(xmm1, qword[edi + 8]);

	// ecx = steps
	// esi = tex (tme)
	// edi = fb
	// xmm1 = fd
	// xmm2 = s
	// xmm3 = t
	// xmm4 = r
	// xmm5 = g
	// xmm6 = b
	// xmm7 = test

	TestMask();

	SampleTexture();

	// xmm1 = fd
	// xmm3 = a
	// xmm4 = r
	// xmm5 = g
	// xmm6 = b
	// xmm7 = test
	// xmm0, xmm2 = free

	ColorTFX();

	AlphaBlend();

	Dither();

	WriteFrame();

L("step");

	// if(steps <= 0) break;

	test(ecx, ecx);
	jle("exit", T_NEAR);

	Step();

	jmp("loop", T_NEAR);

L("exit");

	pop(edi);
	pop(esi);

	ret(8);
}
void GSDrawScanlineCodeGenerator::Generate()
{
    push(ebx);
    push(esi);
    push(edi);
    push(ebp);

    const int params = 16;

    Init(params);

    if(!m_sel.edge)
    {
        align(16);
    }

    L("loop");

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // xmm0 = z/zi
    // xmm2 = u (tme)
    // xmm3 = v (tme)
    // xmm5 = rb (!tme)
    // xmm6 = ga (!tme)
    // xmm7 = test

    bool tme = m_sel.tfx != TFX_NONE;

    TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // - xmm0
    // xmm2 = u (tme)
    // xmm3 = v (tme)
    // xmm5 = rb (!tme)
    // xmm6 = ga (!tme)
    // xmm7 = test

    SampleTexture();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // - xmm2
    // - xmm3
    // - xmm4
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    AlphaTFX();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    if(m_sel.fwrite)
    {
        movdqa(xmm3, xmmword[&m_env.fm]);
    }

    if(m_sel.zwrite)
    {
        movdqa(xmm4, xmmword[&m_env.zm]);
    }

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    TestAlpha();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc)
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    ColorTFX();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    Fog();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    ReadFrame();

    // ecx = steps
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = fd
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga
    // xmm7 = test

    TestDestAlpha();

    // fm |= test;
    // zm |= test;

    if(m_sel.fwrite)
    {
        por(xmm3, xmm7);
    }

    if(m_sel.zwrite)
    {
        por(xmm4, xmm7);
    }

    // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();

    pcmpeqd(xmm1, xmm1);

    if(m_sel.fwrite && m_sel.zwrite)
    {
        movdqa(xmm0, xmm1);
        pcmpeqd(xmm1, xmm3);
        pcmpeqd(xmm0, xmm4);
        packssdw(xmm1, xmm0);
    }
    else if(m_sel.fwrite)
    {
        pcmpeqd(xmm1, xmm3);
        packssdw(xmm1, xmm1);
    }
    else if(m_sel.zwrite)
    {
        pcmpeqd(xmm1, xmm4);
        packssdw(xmm1, xmm1);
    }

    pmovmskb(edx, xmm1);
    not(edx);

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // ebp = za
    // xmm2 = fd
    // xmm3 = fm
    // xmm4 = zm
    // xmm5 = rb
    // xmm6 = ga

    WriteZBuf();

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // - ebp
    // xmm2 = fd
    // xmm3 = fm
    // - xmm4
    // xmm5 = rb
    // xmm6 = ga

    AlphaBlend();

    // ebx = fa
    // ecx = steps
    // edx = fzm
    // esi = fzbr
    // edi = fzbc
    // xmm2 = fd
    // xmm3 = fm
    // xmm5 = rb
    // xmm6 = ga

    WriteFrame(params);

    L("step");

    // if(steps <= 0) break;

    if(!m_sel.edge)
    {
        test(ecx, ecx);
        jle("exit", T_NEAR);

        Step();

        jmp("loop", T_NEAR);
    }

    L("exit");

    pop(ebp);
    pop(edi);
    pop(esi);
    pop(ebx);

    ret(8);
}
예제 #8
0
파일: Shader.cpp 프로젝트: silverXz/ToyX
void CheckboardCubePS::Execute(Arti3DShaderUniform *i_pUniform, Arti3DPSParam *io_pPSParam)
{
	Arti3DSurface *pSurface = i_pUniform->pSurfaces[0];
	io_pPSParam->Output = SampleTexture(pSurface,io_pPSParam->Varyings[0], io_pPSParam->Varyings[1]);
}