bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth ) { #ifdef VISUALIZE_RATE_OF_CHANGE vector4 vDdx, vDdy; GetDerivatives( 0, vDdx, vDdy ); io_vColor.r = (*(vector2 *)&vDdx).length() * 100; io_vColor.g = (*(vector2 *)&vDdy).length() * 100; io_vColor.b = 0; io_vColor.a = 1; return true; #endif vector4 vRainbowFilm; SampleTexture( vRainbowFilm, 0, i_pInput[0].x, i_pInput[0].y, 0.0f ); const float32 fFresnel = 1.0f - fabsf( i_pInput[2].x ); vector4 vReflectionEnv; SampleTexture( vReflectionEnv, 1, i_pInput[1].x, i_pInput[1].y, i_pInput[1].z ); float32 fAlpha = fSaturate( 4.0f * ( vReflectionEnv.a * vReflectionEnv.a - 0.75f ) ); const vector4 vBaseEnvColor = ( vRainbowFilm * vReflectionEnv * 2.0f ).saturate(); vector4 vColor; vVector4Lerp( vColor, vBaseEnvColor, vReflectionEnv, fAlpha ); fAlpha += 0.6f * fFresnel + 0.1f; vVector4Lerp( io_vColor, io_vColor, vColor, fSaturate( fAlpha ) ); return true; }
bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth ) { // read normal from normalmap vector4 vTexNormal; SampleTexture( vTexNormal, 1, i_pInput[0].x, i_pInput[0].y, 0.0f ); const vector3 vNormal( vTexNormal.x * 2.0f - 1.0f, vTexNormal.y * 2.0f - 1.0f, vTexNormal.z * 2.0f - 1.0f ); // sample texture vector4 vTex; SampleTexture( vTex, 0, i_pInput[0].x, i_pInput[0].y, 0.0f ); // renormalize interpolated light direction vector vector3 vLightDir = i_pInput[1]; vLightDir.normalize(); // compute diffuse light float32 fDiffuse = fVector3Dot( vNormal, vLightDir ); float32 fSpecular = 0.0f; if( fDiffuse >= 0.0f ) { // compute specular light vector3 vHalf = i_pInput[2]; vHalf.normalize(); fSpecular = fVector3Dot( vNormal, vHalf ); if( fSpecular < 0.0f ) fSpecular = 0.0f; else fSpecular = powf( fSpecular, 128.0f ); } else fDiffuse = 0.0f; const vector4 &vLightColor = vGetVector( 0 ); io_vColor = vTex * vLightColor * fDiffuse + vLightColor * fSpecular; // += for additive blending with backbuffer, e.g. when there are multiple lights return true; }
bool bExecute( const shaderreg *i_pInput, vector4 &io_vColor, float32 &io_fDepth ) { if( fGetFloat( 0 ) ) { // render flare vector4 vFlareColor; SampleTexture( vFlareColor, 0, i_pInput[0].x, i_pInput[0].y, 0.0f ); if( vFlareColor.a <= FLT_EPSILON ) // perform alpha test return false; io_vColor += vFlareColor * fGetFloat( 1 ); // scale and blend with backbuffer } else io_vColor = vGetVector( 0 ); return true; }
void Execute( const shaderreg *i_pInput, vector4 &o_vPosition, shaderreg *o_pOutput ) { // offset position vector4 vTexNormal; SampleTexture( vTexNormal, 1, i_pInput[3].x, i_pInput[3].y ); const float32 fHeight = 0.4f * vTexNormal.a; vector3 vNormal = i_pInput[1]; vNormal.normalize(); // renormalize normal - length changed due to interpolation of vertices during subdivision // transform position o_vPosition = (i_pInput[0] + vNormal * fHeight) * matGetMatrix( m3dsc_wvpmatrix ); // pass texcoord to pixelshader o_pOutput[0] = i_pInput[3]; // build transformation matrix to tangent space vector3 vTangent = i_pInput[2]; vTangent.normalize(); vVector3TransformNormal( vNormal, vNormal, matGetMatrix( m3dsc_worldmatrix ) ); vVector3TransformNormal( vTangent, vTangent, matGetMatrix( m3dsc_worldmatrix ) ); vector3 vBinormal; vVector3Cross( vBinormal, vNormal, vTangent ); const matrix44 matWorldToTangentSpace( vTangent.x, vBinormal.x, vNormal.x, 0.0f, vTangent.y, vBinormal.y, vNormal.y, 0.0f, vTangent.z, vBinormal.z, vNormal.z, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f ); // transform light direction to tangent space const vector3 vWorldPosition = i_pInput[0] * matGetMatrix( m3dsc_worldmatrix ); vector3 vLightDir = (vector3)vGetVector( 1 ) - vWorldPosition; vector3 vLightDirTangentSpace; vVector3TransformNormal( vLightDirTangentSpace, vLightDir, matWorldToTangentSpace ); o_pOutput[1] = vLightDirTangentSpace; // compute half vector and transform to tangent space vector3 vViewDir = (vector3)vGetVector( 0 ) - vWorldPosition; const vector3 vHalf = ( vViewDir.normalize() + vLightDir.normalize() ) * 0.5f; vector3 vHalfTangentSpace; vVector3TransformNormal( vHalfTangentSpace, vHalf, matWorldToTangentSpace ); o_pOutput[2] = vHalfTangentSpace; }
void GSDrawScanlineCodeGenerator::Generate() { // TODO: on linux/mac rsi, rdi, xmm6-xmm15 are all caller saved push(rbx); push(rsi); push(rdi); push(rbp); push(r12); push(r13); sub(rsp, 8 + 10 * 16); for(int i = 6; i < 16; i++) { vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i)); } mov(r10, (size_t)&m_test[0]); mov(r11, (size_t)&m_local); mov(r12, (size_t)m_local.gd); mov(r13, (size_t)m_local.gd->vm); Init(); // rcx = steps // rsi = fza_base // rdi = fza_offset // r10 = &m_test[0] // r11 = &m_local // r12 = m_local->gd // r13 = m_local->gd.vm // xmm7 = vf (sprite && ltf) // xmm8 = z // xmm9 = f // xmm10 = s // xmm11 = t // xmm12 = q // xmm13 = rb // xmm14 = ga // xmm15 = test if(!m_sel.edge) { align(16); } L("loop"); TestZ(xmm5, xmm6); // ebp = za if(m_sel.mmin) { SampleTextureLOD(); } else { SampleTexture(); } // ebp = za // xmm2 = rb // xmm3 = ga AlphaTFX(); // ebp = za // xmm2 = rb // xmm3 = ga ReadMask(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm TestAlpha(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm ColorTFX(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm Fog(); // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm ReadFrame(); // ebx = fa // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd TestDestAlpha(); // ebx = fa // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd WriteMask(); // ebx = fa // edx = fzm // ebp = za // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm5 = zm // xmm6 = fd WriteZBuf(); // ebx = fa // edx = fzm // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm6 = fd AlphaBlend(); // ebx = fa // edx = fzm // xmm2 = rb // xmm3 = ga // xmm4 = fm // xmm6 = fd WriteFrame(); L("step"); // if(steps <= 0) break; if(!m_sel.edge) { test(rcx, rcx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); } L("exit"); for(int i = 6; i < 16; i++) { vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]); } add(rsp, 8 + 10 * 16); pop(r13); pop(r12); pop(rbp); pop(rdi); pop(rsi); pop(rbx); ret(); }
void GPUDrawScanlineCodeGenerator::Generate() { push(esi); push(edi); Init(); align(16); L("loop"); // GSVector4i test = m_test[7 + (steps & (steps >> 31))]; mov(edx, ecx); sar(edx, 31); and(edx, ecx); shl(edx, 4); movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); // movdqu(xmm1, ptr[edi]); movq(xmm1, qword[edi]); movhps(xmm1, qword[edi + 8]); // ecx = steps // esi = tex (tme) // edi = fb // xmm1 = fd // xmm2 = s // xmm3 = t // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test TestMask(); SampleTexture(); // xmm1 = fd // xmm3 = a // xmm4 = r // xmm5 = g // xmm6 = b // xmm7 = test // xmm0, xmm2 = free ColorTFX(); AlphaBlend(); Dither(); WriteFrame(); L("step"); // if(steps <= 0) break; test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); L("exit"); pop(edi); pop(esi); ret(8); }
void GSDrawScanlineCodeGenerator::Generate() { push(ebx); push(esi); push(edi); push(ebp); const int params = 16; Init(params); if(!m_sel.edge) { align(16); } L("loop"); // ecx = steps // esi = fzbr // edi = fzbc // xmm0 = z/zi // xmm2 = u (tme) // xmm3 = v (tme) // xmm5 = rb (!tme) // xmm6 = ga (!tme) // xmm7 = test bool tme = m_sel.tfx != TFX_NONE; TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3); // ecx = steps // esi = fzbr // edi = fzbc // - xmm0 // xmm2 = u (tme) // xmm3 = v (tme) // xmm5 = rb (!tme) // xmm6 = ga (!tme) // xmm7 = test SampleTexture(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // - xmm2 // - xmm3 // - xmm4 // xmm5 = rb // xmm6 = ga // xmm7 = test AlphaTFX(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm5 = rb // xmm6 = ga // xmm7 = test if(m_sel.fwrite) { movdqa(xmm3, xmmword[&m_env.fm]); } if(m_sel.zwrite) { movdqa(xmm4, xmmword[&m_env.zm]); } // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test TestAlpha(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = gaf (TFX_HIGHLIGHT || TFX_HIGHLIGHT2 && !tcc) // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test ColorTFX(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test Fog(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test ReadFrame(); // ecx = steps // esi = fzbr // edi = fzbc // ebp = za // xmm2 = fd // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga // xmm7 = test TestDestAlpha(); // fm |= test; // zm |= test; if(m_sel.fwrite) { por(xmm3, xmm7); } if(m_sel.zwrite) { por(xmm4, xmm7); } // int fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask(); pcmpeqd(xmm1, xmm1); if(m_sel.fwrite && m_sel.zwrite) { movdqa(xmm0, xmm1); pcmpeqd(xmm1, xmm3); pcmpeqd(xmm0, xmm4); packssdw(xmm1, xmm0); } else if(m_sel.fwrite) { pcmpeqd(xmm1, xmm3); packssdw(xmm1, xmm1); } else if(m_sel.zwrite) { pcmpeqd(xmm1, xmm4); packssdw(xmm1, xmm1); } pmovmskb(edx, xmm1); not(edx); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // ebp = za // xmm2 = fd // xmm3 = fm // xmm4 = zm // xmm5 = rb // xmm6 = ga WriteZBuf(); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // - ebp // xmm2 = fd // xmm3 = fm // - xmm4 // xmm5 = rb // xmm6 = ga AlphaBlend(); // ebx = fa // ecx = steps // edx = fzm // esi = fzbr // edi = fzbc // xmm2 = fd // xmm3 = fm // xmm5 = rb // xmm6 = ga WriteFrame(params); L("step"); // if(steps <= 0) break; if(!m_sel.edge) { test(ecx, ecx); jle("exit", T_NEAR); Step(); jmp("loop", T_NEAR); } L("exit"); pop(ebp); pop(edi); pop(esi); pop(ebx); ret(8); }
void CheckboardCubePS::Execute(Arti3DShaderUniform *i_pUniform, Arti3DPSParam *io_pPSParam) { Arti3DSurface *pSurface = i_pUniform->pSurfaces[0]; io_pPSParam->Output = SampleTexture(pSurface,io_pPSParam->Varyings[0], io_pPSParam->Varyings[1]); }