예제 #1
0
static void emit_vtx_fetch(struct fd_ringbuffer *ring,
		struct fd_shader *shader, struct fd_parameters *attr,
		uint32_t first)
{
	uint32_t i;

	for (i = 0; i < shader->ir->attributes_count; i++) {
		bool switchnext = (i != (shader->ir->attributes_count - 1));
		struct ir3_attribute *a = shader->ir->attributes[i];
		struct fd_param *p = find_param(attr, a->name);
		uint32_t s = fmt2size(p->fmt);

		OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
		OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(s - 1) |
				A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(s) |
				COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
				A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
				A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
		OUT_RELOC(ring, p->bo, s * first, 0);    /* VFD_FETCH[i].INSTR_1 */

		OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
		OUT_RING(ring, A3XX_VFD_DECODE_INSTR_WRITEMASK(regmask(a->num)) |
				A3XX_VFD_DECODE_INSTR_CONSTFILL |
				A3XX_VFD_DECODE_INSTR_FORMAT(p->fmt) |
				A3XX_VFD_DECODE_INSTR_REGID(a->rstart->num) |
				A3XX_VFD_DECODE_INSTR_SHIFTCNT(s) |
				A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
				COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
	}
}
예제 #2
0
파일: nteh.c 프로젝트: spott/dmd
code *cdsetjmp(elem *e,regm_t *pretregs)
{   code cs;
    code *c;
    regm_t retregs;
    unsigned stackpushsave;
    unsigned flag;

    c = NULL;
    stackpushsave = stackpush;
#if SCPP
    if (CPP && (funcsym_p->Sfunc->Fflags3 & Fcppeh || usednteh & NTEHcpp))
    {
        /*  If in C++ try block
            If the frame that is calling setjmp has a try,catch block then
            the call to setjmp3 is as follows:
              __setjmp3(environment,3,__cpp_longjmp_unwind,trylevel,funcdata);

            __cpp_longjmp_unwind is a routine in the RTL. This is a
            stdcall routine that will deal with unwinding for CPP Frames.
            trylevel is the value that gets incremented at each catch,
            constructor invocation.
            funcdata is the same value that you put into EAX prior to
            cppframehandler getting called.
         */
        symbol *s;

        s = except_gensym();
        if (!s)
            goto L1;

        c = gencs(c,0x68,0,FLextern,s);                 // PUSH &scope_table
        stackpush += 4;
        genadjesp(c,4);

        c = genc1(c,0xFF,modregrm(1,6,BP),FLconst,(targ_uns)-4);
        // PUSH trylevel
        stackpush += 4;
        genadjesp(c,4);

        cs.Iop = 0x68;
        cs.Iflags = CFoff;
        cs.Irex = 0;
        cs.IFL2 = FLextern;
        cs.IEVsym2 = rtlsym[RTLSYM_CPP_LONGJMP];
        cs.IEVoffset2 = 0;
        c = gen(c,&cs);                         // PUSH &_cpp_longjmp_unwind
        stackpush += 4;
        genadjesp(c,4);

        flag = 3;
    }
    else
#endif
        if (funcsym_p->Sfunc->Fflags3 & Fnteh)
        {
            /*  If in NT SEH try block
                If the frame that is calling setjmp has a try, except block
                then the call to setjmp3 is as follows:
                  __setjmp3(environment,2,__seh_longjmp_unwind,trylevel);
                __seth_longjmp_unwind is supplied by the RTL and is a stdcall
                function. It is the name that MSOFT uses, we should
                probably use the same one.
                trylevel is the value that you increment at each try and
                decrement at the close of the try.  This corresponds to the
                index field of the ehrec.
             */
            int sindex_off;

            sindex_off = 20;                // offset of __context.sindex
            cs.Iop = 0xFF;
            cs.Irm = modregrm(2,6,BPRM);
            cs.Iflags = 0;
            cs.Irex = 0;
            cs.IFL1 = FLbprel;
            cs.IEVsym1 = nteh_contextsym();
            cs.IEVoffset1 = sindex_off;
            c = gen(c,&cs);                 // PUSH scope_index
            stackpush += 4;
            genadjesp(c,4);

            cs.Iop = 0x68;
            cs.Iflags = CFoff;
            cs.Irex = 0;
            cs.IFL2 = FLextern;
            cs.IEVsym2 = rtlsym[RTLSYM_LONGJMP];
            cs.IEVoffset2 = 0;
            c = gen(c,&cs);                 // PUSH &_seh_longjmp_unwind
            stackpush += 4;
            genadjesp(c,4);

            flag = 2;
        }
        else
        {
            /*  If the frame calling setjmp has neither a try..except, nor a
                try..catch, then call setjmp3 as follows:
                _setjmp3(environment,0)
             */
L1:
            flag = 0;
        }

    cs.Iop = 0x68;
    cs.Iflags = 0;
    cs.Irex = 0;
    cs.IFL2 = FLconst;
    cs.IEV2.Vint = flag;
    c = gen(c,&cs);                     // PUSH flag
    stackpush += 4;
    genadjesp(c,4);

    c = cat(c,params(e->E1,REGSIZE));

    c = cat(c,getregs(~rtlsym[RTLSYM_SETJMP3]->Sregsaved & (ALLREGS | mES)));
    gencs(c,0xE8,0,FLfunc,rtlsym[RTLSYM_SETJMP3]);      // CALL __setjmp3

    c = genc2(c,0x81,modregrm(3,0,SP),stackpush - stackpushsave);       // ADD ESP,8
    genadjesp(c,-(stackpush - stackpushsave));

    stackpush = stackpushsave;
    retregs = regmask(e->Ety, TYnfunc);
    return cat(c,fixresult(e,retregs,pretregs));
}
예제 #3
0
void fd_program_emit_state(struct fd_program *program, uint32_t first,
		struct fd_parameters *uniforms, struct fd_parameters *attr,
		struct fd_ringbuffer *ring)
{
	struct fd_shader *vs = get_shader(program, FD_SHADER_VERTEX);
	struct fd_shader *fs = get_shader(program, FD_SHADER_FRAGMENT);
	struct ir3_shader_info *vsi = &vs->info;
	struct ir3_shader_info *fsi = &fs->info;
	uint32_t vsconstlen = constlen(vs);
	uint32_t fsconstlen = constlen(fs);
	uint32_t i, outloc;

	uint32_t posregid   = getpos(vs, "gl_Position", 0);
	uint32_t psizeregid = getpos(vs, "gl_PointSize", (63 << 2));
	uint32_t colorregid = getpos(fs, "gl_FragColor", 0);

	uint32_t numvar = totalvar(fs);

	assert (vs->ir->varyings_count == fs->ir->varyings_count);

	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
			A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
	OUT_RING(ring, 0x00000000);        /* HLSQ_CONTROL_3_REG */
	OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vsconstlen) |
			A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
			A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(instrlen(vs)));
	OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fsconstlen) |
			A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
			A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(instrlen(fs)));

	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
			// XXX "resolve" (?) bit set on gmem->mem pass..
			COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
			// XXX sometimes 0, sometimes 1:
			A3XX_SP_SP_CTRL_REG_LOMODE(1));

	/* emit unknown sequence of writes to 0x0ec4/0x0ec8 that the blob
	 * emits as part of the program state (it seems)..
	 */
	for (i = 0; i < 6; i++) {
		OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
		OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER0_SELECT */

		OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER3_SELECT, 1);
		OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER3_SELECT */
	}

	OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
	OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(instrlen(vs)));

	OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
	OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
			A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
			A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
			A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
			A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
			A3XX_SP_VS_CTRL_REG0_LENGTH(instrlen(vs)));

	OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vsconstlen) |
			A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(totalattr(vs)) |
			A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(max(vsi->max_const, 0)));
	OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(posregid) |
			A3XX_SP_VS_PARAM_REG_PSIZEREGID(psizeregid) |
			A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fs->ir->varyings_count));

	for (i = 0; i < vs->ir->varyings_count; ) {
		struct ir3_varying *v;
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_OUT_REG_A_REGID(v->rstart->num);
			reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(regmask(v->num));
		}

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_OUT_REG_B_REGID(v->rstart->num);
			reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(regmask(v->num));
		}

		OUT_RING(ring, reg);
	}

	outloc = 8;    /* I assume 0 and 4 are gl_Position/gl_PointSize? */
	for (i = 0; i < vs->ir->varyings_count; ) {
		struct ir3_varying *v;
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);

		/* note: if we supported anything other than vec4 varyings, we'd
		 * actually be incrementing outloc by the actual varying size in
		 * units of scalar registers (ie. vec3 -> 3)
		 */

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(outloc);
			outloc += v->num;
		}

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(outloc);
			outloc += v->num;
		}

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(outloc);
			outloc += v->num;
		}

		v = vs->ir->varyings[i++];
		if (v) {
			reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(outloc);
			outloc += v->num;
		}

		OUT_RING(ring, reg);
	}

	// TODO SP_VS_OBJ_OFFSET_REG / SP_VS_OBJ_START_REG

	OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
	OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(instrlen(fs)));

	OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
	OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
			A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
			A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
			A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
			A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
			A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
			A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
			COND(fs->ir->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
			A3XX_SP_FS_CTRL_REG0_LENGTH(instrlen(fs)));
	OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fsconstlen) |
			A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(0) |
			A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(max(fsi->max_const, 0)) |
			A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));

	// TODO SP_FS_OBJ_OFFSET_REG / SP_FS_OBJ_START_REG

	OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
	OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
	OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */

	OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
	OUT_RING(ring, 0x00000000);        /* SP_FS_OUTPUT_REG */

	OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
	OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(colorregid) |  /* SP_FS_MRT[0].REG */
			A3XX_SP_FS_MRT_REG_HALF_PRECISION);
	OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));           /* SP_FS_MRT[1].REG */
	OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));           /* SP_FS_MRT[2].REG */
	OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));           /* SP_FS_MRT[3].REG */

	OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
	OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(numvar) |
			A3XX_VPC_ATTR_THRDASSIGN(1) |
			A3XX_VPC_ATTR_LMSIZE(1));
	OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(numvar) |
			A3XX_VPC_PACK_NUMNONPOSVSVAR(numvar));

	OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_INTERP[0].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_INTERP[1].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_INTERP[2].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_INTERP[3].MODE */

	OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_PS_REPL[0].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_PS_REPL[1].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_PS_REPL[2].MODE */
	OUT_RING(ring, 0x00000000);        /* VPC_VARYING_PS_REPL[3].MODE */

	OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
	OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
			A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));

	emit_shader(ring, vs, SB_VERT_SHADER);

	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
	OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */

	emit_shader(ring, fs, SB_FRAG_SHADER);

	OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
	OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */

	OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
	OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(totalattr(vs)) |
			A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
			A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vs->ir->attributes_count) |
			A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vs->ir->attributes_count));
	OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
			A3XX_VFD_CONTROL_1_REGID4VTX(63 << 2) |
			A3XX_VFD_CONTROL_1_REGID4INST(63 << 2));

	emit_vtx_fetch(ring, vs, attr, first);

	/* we have this sometimes, not others.. perhaps we could be clever
	 * and figure out actually when we need to invalidate cache:
	 */
	OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
	OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
			A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);

	/* for RB_RESOLVE_PASS, I think the consts are not needed: */
	if (uniforms) {
		emit_uniconst(ring, vs, uniforms, SB_VERT_SHADER);
		emit_uniconst(ring, fs, uniforms, SB_FRAG_SHADER);
	}
}