void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
{
	if(!m_sel.zb)
	{
		return;
	}

	// int za = fza_base.y + fza_offset->y;

	movsxd(rbp, dword[rsi + 4]);
	movsxd(rax, dword[rdi + 4]);
	add(rbp, rax);

	// GSVector4i zs = zi;

	if(!m_sel.sprite)
	{
		if(m_sel.zoverflow)
		{
			// zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
			
			mov(rax, (size_t)&GSVector4::m_half);

			vbroadcastss(xmm0, ptr[rax]);
			vmulps(xmm0, xmm8);
			vcvttps2dq(xmm0, xmm0);
			vpslld(xmm0, 1);

			vcvttps2dq(xmm1, xmm8);
			vpcmpeqd(xmm2, xmm2);
			vpsrld(xmm2, 31);
			vpand(xmm1, xmm2);

			vpor(xmm0, xmm1);
		}
		else
		{
			// zs = GSVector4i(z);

			vcvttps2dq(xmm0, xmm8);
		}

		if(m_sel.zwrite)
		{
			vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zs)], xmm0);
		}
	}

	if(m_sel.ztest)
	{
		ReadPixel(xmm1, rbp);

		if(m_sel.zwrite && m_sel.zpsm < 2)
		{
			vmovdqa(ptr[r11 + offsetof(GSScanlineLocalData, temp.zd)], xmm1);
		}

		// zd &= 0xffffffff >> m_sel.zpsm * 8;

		if(m_sel.zpsm)
		{
			vpslld(xmm1, m_sel.zpsm * 8);
			vpsrld(xmm1, m_sel.zpsm * 8);
		}

		if(m_sel.zoverflow || m_sel.zpsm == 0)
		{
			// GSVector4i off = GSVector4i::x80000000();

			vpcmpeqd(xmm2, xmm2);
			vpslld(xmm2, 31);

			// GSVector4i zso = zs - off;
			// GSVector4i zdo = zd - off;

			vpsubd(xmm0, xmm2);
			vpsubd(xmm1, xmm2);
		}

		switch(m_sel.ztst)
		{
		case ZTST_GEQUAL:
			// test |= zso < zdo; // ~(zso >= zdo)
			vpcmpgtd(xmm1, xmm0);
			vpor(xmm15, xmm1);
			break;

		case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL
			// test |= zso <= zdo; // ~(zso > zdo)
			vpcmpgtd(xmm0, xmm1);
			vpcmpeqd(xmm2, xmm2);
			vpxor(xmm0, xmm2);
			vpor(xmm15, xmm0);
			break;
		}

		alltrue();
	}
}
예제 #2
0
 result_type operator()(B const &block) const
 {
   typedef typename B::value_type value_type;
   typedef typename vsip::impl::view_of<B>::type view_type;
   view_type view(const_cast<B&>(block));
   return alltrue(view);
 }
void GPUDrawScanlineCodeGenerator::TestMask()
{
	if(!m_sel.me)
	{
		return;
	}

	// test |= fd.sra16(15);

	movdqa(xmm0, xmm1);
	psraw(xmm0, 15);
	por(xmm7, xmm0);

	alltrue();
}
void GSDrawScanlineCodeGenerator::TestDestAlpha()
{
	if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
	{
		return;
	}

	// test |= ((fd [<< 16]) ^ m_local.gd->datm).sra32(31);

	if(m_sel.datm)
	{
		if(m_sel.fpsm == 2)
		{
			vpxor(xmm0, xmm0);
			//vpsrld(xmm1, xmm6, 15);
			vpslld(xmm1, xmm6, 16);
			vpsrad(xmm1, 31);
			vpcmpeqd(xmm1, xmm0);
		}
		else
		{
			vpcmpeqd(xmm0, xmm0);
			vpxor(xmm1, xmm6, xmm0);
			vpsrad(xmm1, 31);
		}
	}
	else
	{
		if(m_sel.fpsm == 2)
		{
			vpslld(xmm1, xmm6, 16);
			vpsrad(xmm1, 31);
		}
		else
		{
			vpsrad(xmm1, xmm6, 31);
		}
	}

	vpor(xmm15, xmm1);

	alltrue();
}
void GSDrawScanlineCodeGenerator::TestDestAlpha()
{
    if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
    {
        return;
    }

    // test |= ((fd [<< 16]) ^ m_env.datm).sra32(31);

    movdqa(xmm1, xmm2);

    if(m_sel.datm)
    {
        if(m_sel.fpsm == 2)
        {
            pxor(xmm0, xmm0);
            psrld(xmm1, 15);
            pcmpeqd(xmm1, xmm0);
        }
        else
        {
            pcmpeqd(xmm0, xmm0);
            pxor(xmm1, xmm0);
            psrad(xmm1, 31);
        }
    }
    else
    {
        if(m_sel.fpsm == 2)
        {
            pslld(xmm1, 16);
        }

        psrad(xmm1, 31);
    }

    por(xmm7, xmm1);

    alltrue();
}
void GSDrawScanlineCodeGenerator::TestAlpha()
{
	switch(m_sel.afail)
	{
	case AFAIL_FB_ONLY:
		if(!m_sel.zwrite) return;
		break;

	case AFAIL_ZB_ONLY:
		if(!m_sel.fwrite) return;
		break;

	case AFAIL_RGB_ONLY:
		if(!m_sel.zwrite && m_sel.fpsm == 1) return;
		break;
	}

	switch(m_sel.atst)
	{
	case ATST_NEVER:
		// t = GSVector4i::xffffffff();
		vpcmpeqd(xmm1, xmm1);
		break;

	case ATST_ALWAYS:
		return;

	case ATST_LESS:
	case ATST_LEQUAL:
		// t = (ga >> 16) > m_local.gd->aref;
		vpsrld(xmm1, xmm3, 16);
		vpcmpgtd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]);
		break;

	case ATST_EQUAL:
		// t = (ga >> 16) != m_local.gd->aref;
		vpsrld(xmm1, xmm3, 16);
		vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]);
		vpcmpeqd(xmm0, xmm0);
		vpxor(xmm1, xmm0);
		break;

	case ATST_GEQUAL:
	case ATST_GREATER:
		// t = (ga >> 16) < m_local.gd->aref;
		vpsrld(xmm0, xmm3, 16);
		vmovdqa(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]);
		vpcmpgtd(xmm1, xmm0);
		break;

	case ATST_NOTEQUAL:
		// t = (ga >> 16) == m_local.gd->aref;
		vpsrld(xmm1, xmm3, 16);
		vpcmpeqd(xmm1, ptr[r12 + offsetof(GSScanlineGlobalData, aref)]);
		break;
	}

	switch(m_sel.afail)
	{
	case AFAIL_KEEP:
		// test |= t;
		vpor(xmm15, xmm1);
		alltrue();
		break;

	case AFAIL_FB_ONLY:
		// zm |= t;
		vpor(xmm5, xmm1);
		break;

	case AFAIL_ZB_ONLY:
		// fm |= t;
		vpor(xmm4, xmm1);
		break;

	case AFAIL_RGB_ONLY:
		// zm |= t;
		vpor(xmm5, xmm1);
		// fm |= t & GSVector4i::xff000000();
		vpsrld(xmm1, 24);
		vpslld(xmm1, 24);
		vpor(xmm4, xmm1);
		break;
	}
}
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
{
    if(!m_sel.zb)
    {
        return;
    }

    // int za = fza_base.y + fza_offset->y;

    mov(ebp, dword[esi + 4]);
    add(ebp, dword[edi + 4]);

    // GSVector4i zs = zi;

    if(!m_sel.sprite)
    {
        if(m_sel.zoverflow)
        {
            // zs = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());

            static float half = 0.5f;

            movss(temp1, dword[&half]);
            shufps(temp1, temp1, _MM_SHUFFLE(0, 0, 0, 0));
            mulps(temp1, xmm0);
            cvttps2dq(temp1, temp1);
            pslld(temp1, 1);

            cvttps2dq(xmm0, xmm0);
            pcmpeqd(temp2, temp2);
            psrld(temp2, 31);
            pand(xmm0, temp2);

            por(xmm0, temp1);
        }
        else
        {
            // zs = GSVector4i(z);

            cvttps2dq(xmm0, xmm0);
        }

        if(m_sel.zwrite)
        {
            movdqa(xmmword[&m_env.temp.zs], xmm0);
        }
    }

    if(m_sel.ztest)
    {
        ReadPixel(xmm1, ebp);

        if(m_sel.zwrite && m_sel.zpsm < 2)
        {
            movdqa(xmmword[&m_env.temp.zd], xmm1);
        }

        // zd &= 0xffffffff >> m_sel.zpsm * 8;

        if(m_sel.zpsm)
        {
            pslld(xmm1, m_sel.zpsm * 8);
            psrld(xmm1, m_sel.zpsm * 8);
        }

        if(m_sel.zoverflow || m_sel.zpsm == 0)
        {
            // GSVector4i o = GSVector4i::x80000000();

            pcmpeqd(xmm4, xmm4);
            pslld(xmm4, 31);

            // GSVector4i zso = zs - o;

            psubd(xmm0, xmm4);

            // GSVector4i zdo = zd - o;

            psubd(xmm1, xmm4);
        }

        switch(m_sel.ztst)
        {
        case ZTST_GEQUAL:
            // test |= zso < zdo; // ~(zso >= zdo)
            pcmpgtd(xmm1, xmm0);
            por(xmm7, xmm1);
            break;

        case ZTST_GREATER: // TODO: tidus hair and chocobo wings only appear fully when this is tested as ZTST_GEQUAL
            // test |= zso <= zdo; // ~(zso > zdo)
            pcmpgtd(xmm0, xmm1);
            pcmpeqd(xmm4, xmm4);
            pxor(xmm0, xmm4);
            por(xmm7, xmm0);
            break;
        }

        alltrue();
    }
}
void GSDrawScanlineCodeGenerator::TestAlpha()
{
    switch(m_sel.afail)
    {
    case AFAIL_FB_ONLY:
        if(!m_sel.zwrite) return;
        break;

    case AFAIL_ZB_ONLY:
        if(!m_sel.fwrite) return;
        break;

    case AFAIL_RGB_ONLY:
        if(!m_sel.zwrite && m_sel.fpsm == 1) return;
        break;
    }

    switch(m_sel.atst)
    {
    case ATST_NEVER:
        // t = GSVector4i::xffffffff();
        pcmpeqd(xmm1, xmm1);
        break;

    case ATST_ALWAYS:
        return;

    case ATST_LESS:
    case ATST_LEQUAL:
        // t = (ga >> 16) > m_env.aref;
        movdqa(xmm1, xmm6);
        psrld(xmm1, 16);
        pcmpgtd(xmm1, xmmword[&m_env.aref]);
        break;

    case ATST_EQUAL:
        // t = (ga >> 16) != m_env.aref;
        movdqa(xmm1, xmm6);
        psrld(xmm1, 16);
        pcmpeqd(xmm1, xmmword[&m_env.aref]);
        pcmpeqd(xmm0, xmm0);
        pxor(xmm1, xmm0);
        break;

    case ATST_GEQUAL:
    case ATST_GREATER:
        // t = (ga >> 16) < m_env.aref;
        movdqa(xmm0, xmm6);
        psrld(xmm0, 16);
        movdqa(xmm1, xmmword[&m_env.aref]);
        pcmpgtd(xmm1, xmm0);
        break;

    case ATST_NOTEQUAL:
        // t = (ga >> 16) == m_env.aref;
        movdqa(xmm1, xmm6);
        psrld(xmm1, 16);
        pcmpeqd(xmm1, xmmword[&m_env.aref]);
        break;
    }

    switch(m_sel.afail)
    {
    case AFAIL_KEEP:
        // test |= t;
        por(xmm7, xmm1);
        alltrue();
        break;

    case AFAIL_FB_ONLY:
        // zm |= t;
        por(xmm4, xmm1);
        break;

    case AFAIL_ZB_ONLY:
        // fm |= t;
        por(xmm3, xmm1);
        break;

    case AFAIL_RGB_ONLY:
        // zm |= t;
        por(xmm4, xmm1);
        // fm |= t & GSVector4i::xff000000();
        psrld(xmm1, 24);
        pslld(xmm1, 24);
        por(xmm3, xmm1);
        break;
    }
}