Exemple #1
0
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
                 int nr, struct pipe_surface **bufs)
{
    const struct ir3_shader_variant *vp, *fp;
    const struct ir3_info *vsi, *fsi;
    enum a3xx_instrbuffermode fpbuffer, vpbuffer;
    uint32_t fpbuffersz, vpbuffersz, fsoff;
    uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0};
    int constmode;
    int i, j, k;

    debug_assert(nr <= ARRAY_SIZE(color_regid));

    vp = fd3_emit_get_vp(emit);

    if (emit->key.binning_pass) {
        /* use dummy stateobj to simplify binning vs non-binning: */
        static const struct ir3_shader_variant binning_fp = {};
        fp = &binning_fp;
    } else {
        fp = fd3_emit_get_fp(emit);
    }

    vsi = &vp->info;
    fsi = &fp->info;

    fpbuffer = BUFFER;
    vpbuffer = BUFFER;
    fpbuffersz = fp->instrlen;
    vpbuffersz = vp->instrlen;

    /*
     * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
     * appears like 256 is the hard limit, but when the combined size
     * exceeds 128 then blob will try to keep FS in BUFFER mode and
     * switch to CACHE for VS until VS is too large.  The blob seems
     * to switch FS out of BUFFER mode at slightly under 128.  But
     * a bit fuzzy on the decision tree, so use slightly conservative
     * limits.
     *
     * TODO check if these thresholds for BUFFER vs CACHE mode are the
     *      same for all a3xx or whether we need to consider the gpuid
     */

    if ((fpbuffersz + vpbuffersz) > 128) {
        if (fpbuffersz < 112) {
            /* FP:BUFFER   VP:CACHE  */
            vpbuffer = CACHE;
            vpbuffersz = 256 - fpbuffersz;
        } else if (vpbuffersz < 112) {
            /* FP:CACHE    VP:BUFFER */
            fpbuffer = CACHE;
            fpbuffersz = 256 - vpbuffersz;
        } else {
            /* FP:CACHE    VP:CACHE  */
            vpbuffer = fpbuffer = CACHE;
            vpbuffersz = fpbuffersz = 192;
        }
    }

    if (fpbuffer == BUFFER) {
        fsoff = 128 - fpbuffersz;
    } else {
        fsoff = 256 - fpbuffersz;
    }

    /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
    constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

    pos_regid = ir3_find_output_regid(vp,
                                      ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
    posz_regid = ir3_find_output_regid(fp,
                                       ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
    psize_regid = ir3_find_output_regid(vp,
                                        ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
    if (fp->color0_mrt) {
        color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
                                              ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
    } else {
        for (i = 0; i < fp->outputs_count; i++) {
            ir3_semantic sem = fp->outputs[i].semantic;
            unsigned idx = sem2idx(sem);
            if (sem2name(sem) != TGSI_SEMANTIC_COLOR)
                continue;
            debug_assert(idx < ARRAY_SIZE(color_regid));
            color_regid[idx] = fp->outputs[i].regid;
        }
    }

    /* adjust regids for alpha output formats. there is no alpha render
     * format, so it's just treated like red
     */
    for (i = 0; i < nr; i++)
        if (util_format_is_alpha(pipe_surface_format(bufs[i])))
            color_regid[i] += 3;

    /* we could probably divide this up into things that need to be
     * emitted if frag-prog is dirty vs if vert-prog is dirty..
     */

    OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
    OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
             A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
             /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
              * flush some caches? I think we only need to set those
              * bits if we have updated const or shader..
              */
             A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
             A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
    OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
             A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
             COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
    OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
    OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
    OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
             A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
             A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
    OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
             A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
             A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));

    OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
    OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
             COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
             A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
             A3XX_SP_SP_CTRL_REG_L0MODE(0));

    OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
    OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));

    OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
    OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
             A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
             COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
             A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
             A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
             A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
             A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
             A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
             COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
             A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
    OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
             A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
             A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
    OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
             A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
             A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));

    for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
        uint32_t reg = 0;

        OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count) {
            k = ir3_find_output(vp, fp->inputs[j].semantic);
            reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid);
            reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask);
        }

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count) {
            k = ir3_find_output(vp, fp->inputs[j].semantic);
            reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid);
            reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask);
        }

        OUT_RING(ring, reg);
    }

    for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
        uint32_t reg = 0;

        OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);

        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc);
        j = ir3_next_varying(fp, j);
        if (j < fp->inputs_count)
            reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc);

        OUT_RING(ring, reg);
    }

    OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
    OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
             A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
    OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

    if (emit->key.binning_pass) {
        OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
        OUT_RING(ring, 0x00000000);

        OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
                 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
        OUT_RING(ring, 0x00000000);

        OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
        OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
                 A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
    } else {
        OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
        OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));

        OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
                 A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
                 COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
                 A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
                 A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
                 A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
                 A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
                 A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
                 COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
                 A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
        OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
                 A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
                 A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
                 A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));

        OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
        OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
                     MAX2(128, vp->constlen)) |
                 A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
        OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
    }

    OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
    OUT_RING(ring,
             COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
             A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
             A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));

    OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
    for (i = 0; i < 4; i++) {
        uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
                           COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);

        if (i < nr) {
            enum pipe_format fmt = pipe_surface_format(bufs[i]);
            mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
                       COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
        }
        OUT_RING(ring, mrt_reg);
    }

    if (emit->key.binning_pass) {
        OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
        OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
                 A3XX_VPC_ATTR_LMSIZE(1) |
                 COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
        OUT_RING(ring, 0x00000000);
    } else {
        uint32_t vinterp[4], flatshade[2], vpsrepl[4];

        memset(vinterp, 0, sizeof(vinterp));
        memset(flatshade, 0, sizeof(flatshade));
        memset(vpsrepl, 0, sizeof(vpsrepl));

        /* figure out VARYING_INTERP / FLAT_SHAD register values: */
        for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
            uint32_t interp = fp->inputs[j].interpolate;

            /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
             * instead.. rather than -8 everywhere else..
             */
            uint32_t inloc = fp->inputs[j].inloc - 8;

            /* currently assuming varyings aligned to 4 (not
             * packed):
             */
            debug_assert((inloc % 4) == 0);

            if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
                    ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
                uint32_t loc = inloc;
                for (i = 0; i < 4; i++, loc++) {
                    vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
                    flatshade[loc / 32] |= 1 << (loc % 32);
                }
            }

            /* Replace the .xy coordinates with S/T from the point sprite. Set
             * interpolation bits for .zw such that they become .01
             */
            if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) {
                vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
                                       << ((inloc % 16) * 2);
                vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
                vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
            }
        }

        OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
        OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
                 A3XX_VPC_ATTR_THRDASSIGN(1) |
                 A3XX_VPC_ATTR_LMSIZE(1) |
                 COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
        OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
                 A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));

        OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
        OUT_RING(ring, vinterp[0]);    /* VPC_VARYING_INTERP[0].MODE */
        OUT_RING(ring, vinterp[1]);    /* VPC_VARYING_INTERP[1].MODE */
        OUT_RING(ring, vinterp[2]);    /* VPC_VARYING_INTERP[2].MODE */
        OUT_RING(ring, vinterp[3]);    /* VPC_VARYING_INTERP[3].MODE */

        OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
        OUT_RING(ring, vpsrepl[0]);    /* VPC_VARYING_PS_REPL[0].MODE */
        OUT_RING(ring, vpsrepl[1]);    /* VPC_VARYING_PS_REPL[1].MODE */
        OUT_RING(ring, vpsrepl[2]);    /* VPC_VARYING_PS_REPL[2].MODE */
        OUT_RING(ring, vpsrepl[3]);    /* VPC_VARYING_PS_REPL[3].MODE */

        OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
        OUT_RING(ring, flatshade[0]);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
        OUT_RING(ring, flatshade[1]);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
    }

    if (vpbuffer == BUFFER)
        emit_shader(ring, vp);

    OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
    OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */

    if (!emit->key.binning_pass) {
        if (fpbuffer == BUFFER)
            emit_shader(ring, fp);

        OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
        OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
    }
}
Exemple #2
0
/* open listening socket and initialize */
void cann_listen(char *port)
{
        struct addrinfo hints;
        struct addrinfo *result, *rp;
        int sfd, s;
        char buf[200];
        int ret, one = 1;

        signal(SIGPIPE, SIG_IGN);

        memset(&hints, 0, sizeof(struct addrinfo));
        hints.ai_family = AF_UNSPEC;  // Allow IPv4 or IPv6 - ipv4 is mapped into an v6 addr -> ai_v4mapped
        hints.ai_socktype = SOCK_STREAM;                // Datagram socket
        hints.ai_flags = AI_PASSIVE | AI_V4MAPPED | AI_NUMERICSERV | AI_ALL;     // For wildcard IP address

        s = getaddrinfo(NULL, port, &hints, &result);
        if (s != 0) {
                fprintf(stderr,
                        "No listening addresses available(getaddrinfo: %s)\n",
                        gai_strerror(s));
                exit(EXIT_FAILURE);
        }

        //dump address info
        for (rp = result; rp != NULL; rp = rp->ai_next)
                debug(5, "Found Address %s\t\t(family: %s, socktype %i: protocol %i)",
                      get_ip_str((struct sockaddr *)rp->ai_addr,
                                 buf, sizeof(buf)),
                      (rp->ai_family == AF_INET) ? "IPv4" :
                      ((rp->ai_family == AF_INET6) ? "IPv6" :
                       ((snprintf(buf, sizeof(buf), "%i", rp->ai_family) != 0)
                        ? buf : "")),
                      rp->ai_socktype,
                      rp->ai_protocol);

        /* getaddrinfo() returns a list of address structures.
           Try each address until we successfully bind(2).
           If socket(2) (or bind(2)) fails, we (close the socket
           and) try the next address. */

        for (rp = result; rp != NULL; rp = rp->ai_next) {
                sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
                if (sfd == -1)
                        continue;

                debug(5, "Trying to bind to %s (%s)",
                      get_ip_str((struct sockaddr *)rp->ai_addr,
                                 buf, sizeof(buf)),
                      (rp->ai_family == AF_INET) ? "IPv4" :
                      ((rp->ai_family == AF_INET6) ? "IPv6" :
                       ((snprintf(buf, sizeof(buf), "%i", rp->ai_family) != 0)
                        ? buf : "")),
                      rp->ai_socktype,
                      rp->ai_protocol);

                //set reuseaddr to avoid address in use (b/c if close_wait) when restarting
                ret = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));

                if (ret != 0) debug_perror(0, "Could not set socket options: ");

                if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0)
                        break;                  /* Success */

                debug_perror(0, "Could not bind to %s.",
                             get_ip_str((struct sockaddr *)rp->ai_addr,
                                        buf,
                                        sizeof(buf)));
                close(sfd);
        }

        if (rp == NULL) {               /* No address succeeded */
                debug_perror(0, "All addresses in use");
                exit(EXIT_FAILURE);
        } else debug(5, "Bind succeeded!");

        freeaddrinfo(result);           /* No longer needed */
        listen_socket = sfd;

        int flags = 0;
        flags = fcntl(listen_socket, F_GETFL, 0);
        fcntl(listen_socket, F_SETFL, flags | O_NDELAY);

        /* specify queue */
        ret = listen(listen_socket, SOMAXCONN);
        debug_assert(ret >= 0, "Could listen() listening socket");
}
Exemple #3
0
static int response_to_task_terminated (entity_messages message, entity *receiver, entity *sender, va_list pargs)
{

	entity
		*guide,
		*landing_en,
		*task;

	aircraft
		*raw;

	raw = (aircraft *) get_local_entity_data (receiver);

	#if DEBUG_MODULE_MESSAGE_TEXT

	debug_log ("AIRCRAFT MSGS: received task terminated, ac %s made safe", entity_sub_type_aircraft_names [raw->mob.sub_type]);

	#endif

	debug_assert (raw->member_link.parent);

	//
	// Check if aircraft was on a landing/takeoff/holding route
	//

	guide = get_local_entity_parent (receiver, LIST_TYPE_FOLLOWER);

	ASSERT (guide);

	task = get_local_entity_parent (guide, LIST_TYPE_GUIDE);

	ASSERT (task);

	landing_en = get_local_entity_parent (task, LIST_TYPE_ASSIGNED_TASK);

	switch (get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE))
	{

		case ENTITY_SUB_TYPE_TASK_LANDING:
		{
			ASSERT (landing_en);

			#if DEBUG_MODULE_MESSAGE_TEXT

			debug_log ("AC_MSGS: mobile was on a %s route, unlocking route and landing site", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]);

			#endif

			if (get_local_entity_int_value (receiver, INT_TYPE_PLAYER) == ENTITY_PLAYER_AI)
			{

				notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_ROUTE, landing_en, receiver);
			}

			notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_SITE, landing_en, receiver);

			break;
		}

		case ENTITY_SUB_TYPE_TASK_TAKEOFF:
		{
			ASSERT (landing_en);

			#if DEBUG_MODULE_MESSAGE_TEXT

			debug_log ("AC_MSGS: mobile was on a %s route, unlocking route", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]);

			#endif

			notify_local_entity (ENTITY_MESSAGE_UNLOCK_TAKEOFF_ROUTE, landing_en, receiver);

			break;
		}

		case ENTITY_SUB_TYPE_TASK_LANDING_HOLDING:
		{
			ASSERT (landing_en);

			#if DEBUG_MODULE_MESSAGE_TEXT

			debug_log ("AC_MSGS: mobile was on a %s route, locking route and landing site", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]);

			#endif

			notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_SITE, landing_en, receiver);

			break;
		}

		case ENTITY_SUB_TYPE_TASK_TAKEOFF_HOLDING:
		{
			ASSERT (landing_en);

			#if DEBUG_MODULE_MESSAGE_TEXT

			debug_log ("AC_MSGS: mobile was on a %s route, no route to unlock", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]);

			#endif

			break;
		}
	}

	return (TRUE);
}
Exemple #4
0
static unsigned long mk_unixtime(int y, int m, int d, int hh, int mm, int ss)
{
 unsigned long u=0;
 int i;
 /* Clash with NetBSD/x86-64 patch: leaving rc as unsigned long still permits
    to escape the year 2038 problem in favor of year 2106 problem, while a
    dedicated time_t structure can be expected as a 64-bit value on relevant
    platforms -- ASR fix 25/01/2004 */
 unsigned long rc;
 time_t tt;
 long tzshift;
 #ifndef TZ_VAR
  long shiftd1, shiftd2;
 #endif
 struct tm *stm;

 if(y>=2001)
 {
  i=y-2001;
  u=11323;
  /* The following piece of code is rather paranoid in 16/32-bit world, where the
     timestamps are limited to year 2108. */
  #if defined(__32BIT__)||defined(TILED)
   if(i>=400)
   {
    u+=1022679L*(i/400);
    i%=400;
   }
  #endif
  if(i>=100)
  {
   u+=36524L*(i/100);
   i%=100;
  }
  u+=1461L*(i/4);
  u+=365L*(i%4);
 }
 else if(y>=1973)
  u=1096+(y-1973)/4*1461L+((y-1973)%4)*365L;
 else
  u=(y-1970)*365L;
 for(i=1; i<m; i++)
 {
  u+=(int)monthdays[i-1];
  if(i==2)
   u+=isleapyear(y);
 }
 rc=86400*(unsigned long)(u+d-1)+(unsigned long)hh*3600+(unsigned long)mm*60+(unsigned long)ss;
 /* If we have to use the timezone variable, do it now */
 #ifdef TZ_VAR
  tzshift=-TZ_VAR;
  if(_daylight)
   tzshift+=3600;
 #else
  tt=(time_t)rc;
  stm=localtime(&tt);
  debug_assert(stm!=NULL);              /* LIBCS.DLL returns NULL for unixtime beyond
                                           0x7FFFFFFF */
  tzshift=(long)stm->tm_hour*3600+(long)stm->tm_min*60;
  shiftd1=stm->tm_mday;
  stm=gmtime(&tt);
  debug_assert(stm!=NULL);
  shiftd2=stm->tm_mday;
  /* Local time overruns GMT, add 24 hours for safety */
  if(shiftd1<shiftd2&&shiftd1==1&&shiftd2>=28)
   tzshift+=86400;
  else if(shiftd1>shiftd2&&shiftd1>=28&&shiftd2==1)
   tzshift-=86400;
  else if(shiftd1>shiftd2)
   tzshift+=86400;
  else if(shiftd1<shiftd2)
   tzshift-=86400;
  tzshift-=(long)stm->tm_hour*3600+(long)stm->tm_min*60;
  tzshift%=86400;
 #endif
 /* Fix the timezone if it does not roll over the zero */
 return((tzshift>0&&rc<tzshift)?rc:rc-tzshift);
}
Exemple #5
0
rs232can_msg *cann_buffer_get()
{
        rs232can_msg *cmsg = malloc(sizeof(rs232can_msg));
        debug_assert(cmsg == NULL, "cann_buffer_get malloc fail!\n");
        return cmsg;
}
/**
 * Translate a SWZ instruction into a MOV, MUL or MAD instruction.  EG:
 *
 *   SWZ dst, src.x-y10 
 * 
 * becomes:
 *
 *   MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0}
 */
static void emit_swz( struct st_translate *t,
                      struct ureg_dst dst,
                      const struct prog_src_register *SrcReg )
{
   struct ureg_program *ureg = t->ureg;
   struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index );

   unsigned negate_mask =  SrcReg->Negate;

   unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 |
                        (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 |
                        (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 |
                        (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3);

   unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 |
                         (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 |
                         (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 |
                         (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3);

   unsigned negative_one_mask = one_mask & negate_mask;
   unsigned positive_one_mask = one_mask & ~negate_mask;
   
   struct ureg_src imm;
   unsigned i;
   unsigned mul_swizzle[4] = {0,0,0,0};
   unsigned add_swizzle[4] = {0,0,0,0};
   unsigned src_swizzle[4] = {0,0,0,0};
   boolean need_add = FALSE;
   boolean need_mul = FALSE;

   if (dst.WriteMask == 0)
      return;

   /* Is this just a MOV?
    */
   if (zero_mask == 0 &&
       one_mask == 0 &&
       (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 
   {
      ureg_MOV( ureg, dst, translate_src( t, SrcReg ));
      return;
   }

#define IMM_ZERO    0
#define IMM_ONE     1
#define IMM_NEG_ONE 2

   imm = ureg_imm3f( ureg, 0, 1, -1 );

   for (i = 0; i < 4; i++) {
      unsigned bit = 1 << i;

      if (dst.WriteMask & bit) {
         if (positive_one_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_ONE;
            need_add = TRUE;
         }
         else if (negative_one_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_NEG_ONE;
            need_add = TRUE;
         }
         else if (zero_mask & bit) {
            mul_swizzle[i] = IMM_ZERO;
            add_swizzle[i] = IMM_ZERO;
            need_add = TRUE;
         }
         else {
            add_swizzle[i] = IMM_ZERO;
            src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i);
            need_mul = TRUE;
            if (negate_mask & bit) {
               mul_swizzle[i] = IMM_NEG_ONE;
            }
            else {
               mul_swizzle[i] = IMM_ONE;
            }
         }
      }
   }

   if (need_mul && need_add) {
      ureg_MAD( ureg, 
                dst,
                swizzle_4v( src, src_swizzle ),
                swizzle_4v( imm, mul_swizzle ),
                swizzle_4v( imm, add_swizzle ) );
   }
   else if (need_mul) {
      ureg_MUL( ureg, 
                dst,
                swizzle_4v( src, src_swizzle ),
                swizzle_4v( imm, mul_swizzle ) );
   }
   else if (need_add) {
      ureg_MOV( ureg, 
                dst,
                swizzle_4v( imm, add_swizzle ) );
   }
   else {
      debug_assert(0);
   }

#undef IMM_ZERO
#undef IMM_ONE
#undef IMM_NEG_ONE
}
Exemple #7
0
void
fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit,
		int nr, struct pipe_surface **bufs)
{
	struct stage s[MAX_STAGES];
	uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
	uint32_t face_regid, coord_regid, zwcoord_regid;
	uint32_t vcoord_regid, vertex_regid, instance_regid;
	int i, j;

	debug_assert(nr <= ARRAY_SIZE(color_regid));

	if (emit->key.binning_pass)
		nr = 0;

	setup_stages(emit, s);

	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
	posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
	vertex_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
	instance_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID);

	if (s[FS].v->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
	}

	/* TODO get these dynamically: */
	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
	zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
	vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0);

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONTROL_REG, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_HLSQ_VS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_HLSQ_FS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_HLSQ_HS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_HLSQ_DS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_HLSQ_GS_CONTROL_REG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5);
	OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen));
	OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen));
	OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen));
	OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen));
	OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen));

	OUT_PKT4(ring, REG_A5XX_SP_VS_CONTROL_REG, 5);
	OUT_RING(ring, A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) |
			COND(s[VS].v, A5XX_SP_VS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) |
			COND(s[FS].v, A5XX_SP_FS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) |
			COND(s[HS].v, A5XX_SP_HS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) |
			COND(s[DS].v, A5XX_SP_DS_CONTROL_REG_ENABLED));
	OUT_RING(ring, A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) |
			COND(s[GS].v, A5XX_SP_GS_CONTROL_REG_ENABLED));

	OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1);
	OUT_RING(ring, 0x00000000);

	OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2);
	OUT_RING(ring, s[VS].constlen);    /* HLSQ_VS_CONSTLEN */
	OUT_RING(ring, s[VS].instrlen);    /* HLSQ_VS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2);
	OUT_RING(ring, s[FS].constlen);    /* HLSQ_FS_CONSTLEN */
	OUT_RING(ring, s[FS].instrlen);    /* HLSQ_FS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2);
	OUT_RING(ring, s[HS].constlen);    /* HLSQ_HS_CONSTLEN */
	OUT_RING(ring, s[HS].instrlen);    /* HLSQ_HS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2);
	OUT_RING(ring, s[DS].constlen);    /* HLSQ_DS_CONSTLEN */
	OUT_RING(ring, s[DS].instrlen);    /* HLSQ_DS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2);
	OUT_RING(ring, s[GS].constlen);    /* HLSQ_GS_CONSTLEN */
	OUT_RING(ring, s[GS].instrlen);    /* HLSQ_GS_INSTRLEN */

	OUT_PKT4(ring, REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_3, 2);
	OUT_RING(ring, 0x00000000);   /* HLSQ_CONTEXT_SWITCH_CS_SW_3 */
	OUT_RING(ring, 0x00000000);   /* HLSQ_CONTEXT_SWITCH_CS_SW_4 */

	OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1);
	OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
			A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
			0x6 | /* XXX seems to be always set? */
			A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE));

	struct ir3_shader_linkage l = {0};
	ir3_link_shaders(&l, s[VS].v, s[FS].v);

	BITSET_DECLARE(varbs, 128) = {0};
	uint32_t *varmask = (uint32_t *)varbs;

	for (i = 0; i < l.cnt; i++)
		for (j = 0; j < util_last_bit(l.var[i].compmask); j++)
			BITSET_SET(varbs, l.var[i].loc + j);

	OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4);
	OUT_RING(ring, ~varmask[0]);  /* VPC_VAR[0].DISABLE */
	OUT_RING(ring, ~varmask[1]);  /* VPC_VAR[1].DISABLE */
	OUT_RING(ring, ~varmask[2]);  /* VPC_VAR[2].DISABLE */
	OUT_RING(ring, ~varmask[3]);  /* VPC_VAR[3].DISABLE */

	/* a5xx appends pos/psize to end of the linkage map: */
	if (pos_regid != regid(63,0))
		ir3_link_add(&l, pos_regid, 0xf, l.max_loc);

	if (psize_regid != regid(63,0))
		ir3_link_add(&l, psize_regid, 0x1, l.max_loc);

	for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1);

		reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
		j++;

		reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
		reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
		j++;

		OUT_RING(ring, reg);
	}

	for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1);

		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc);
		reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc);

		OUT_RING(ring, reg);
	}

	OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2);
	OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_LO/HI */

	if (s[VS].instrlen)
		emit_shader(ring, s[VS].v);

	// TODO depending on other bits in this reg (if any) set somewhere else?
	OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1);
	OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE));

	if (emit->key.binning_pass) {
		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_LO */
		OUT_RING(ring, 0x00000000);    /* SP_FS_OBJ_START_HI */
	} else {
		uint32_t stride_in_vpc = align(s[FS].v->total_in, 4) + 4;

		if (s[VS].v->writes_psize)
			stride_in_vpc++;

		// TODO if some of these other bits depend on something other than
		// program state we should probably move these next three regs:

		OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1);
		OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt));

		OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1);
		OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(stride_in_vpc) |
				COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) |
				0x10000);    // XXX

		OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1);
		OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(stride_in_vpc) |
				0x400);      // XXX

		OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2);
		OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_LO/HI */
	}

	OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
	OUT_RING(ring, 0x00000881);        /* XXX HLSQ_CONTROL_0 */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
	OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) |
			0xfcfcfc00);               /* XXX */
	OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) |
			A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) |
			0x0000fcfc);               /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
			0x4000e | /* XXX set pretty much everywhere */
			A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
			A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) |  // XXX need to figure this out somehow..
			COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE));

	OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1);
	OUT_RING(ring, 0x020fffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1);
	OUT_RING(ring, 0x0000ffff);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1);
	OUT_RING(ring, 0x00000010);        /* XXX */

	OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1);
	OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD |
					A5XX_GRAS_CNTL_YCOORD |
					A5XX_GRAS_CNTL_ZCOORD |
					A5XX_GRAS_CNTL_WCOORD |
					A5XX_GRAS_CNTL_UNK3) |
			COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3));

	OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 3);
	OUT_RING(ring,
			COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) |
			COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD |
					A5XX_RB_RENDER_CONTROL0_YCOORD |
					A5XX_RB_RENDER_CONTROL0_ZCOORD |
					A5XX_RB_RENDER_CONTROL0_WCOORD |
					A5XX_RB_RENDER_CONTROL0_UNK3) |
			COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3));

	OUT_RING(ring,
			COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS));
	OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) |
			COND(s[FS].v->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z));

	OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 9);
	OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) |
			A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) |
			A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0)));
	for (i = 0; i < 8; i++) {
		OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) |
				COND(emit->key.half_precision,
					A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION));
	}

	if (emit->key.binning_pass) {
		OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
		OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(0));
	} else {
		uint32_t vinterp[8], vpsrepl[8];

		memset(vinterp, 0, sizeof(vinterp));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* looks like we need to do int varyings in the frag
		 * shader on a5xx (no flatshad reg?  or a420.0 bug?):
		 *
		 *    (sy)(ss)nop
		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
		 *    ldlv.u32 r0.y,l[r0.x+1], 1
		 *    (ss)bary.f (ei)r63.x, 0, r0.x
		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
		 *    (rpt5)nop
		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
		 *
		 * Possibly on later a5xx variants we'll be able to use
		 * something like the code below instead of workaround
		 * in the shader:
		 */
		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = s[FS].v->inputs[j].compmask;

			uint32_t inloc = s[FS].v->inputs[j].inloc;

			if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) ||
					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
						//flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = s[FS].v->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x4) {
						/* .z <- 0.0f */
						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x8) {
						/* .w <- 1.0f */
						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
						loc++;
					}
				}
Exemple #8
0
/**
 * Find instruction src's which are mov's that can be collapsed, replacing
 * the mov dst with the mov src
 */
static void
instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
{
	struct ir3_register *reg;

	if (instr->regs_count == 0)
		return;

	if (ir3_instr_check_mark(instr))
		return;

	/* walk down the graph from each src: */
	foreach_src_n(reg, n, instr) {
		struct ir3_instruction *src = ssa(reg);

		if (!src)
			continue;

		instr_cp(ctx, src);

		/* TODO non-indirect access we could figure out which register
		 * we actually want and allow cp..
		 */
		if (reg->flags & IR3_REG_ARRAY)
			continue;

		/* Don't CP absneg into meta instructions, that won't end well: */
		if (is_meta(instr) && (src->opc != OPC_MOV))
			continue;

		reg_cp(ctx, instr, reg, n);
	}

	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
		struct ir3_instruction *src = ssa(instr->regs[0]);
		if (src)
			instr_cp(ctx, src);
	}

	if (instr->address) {
		instr_cp(ctx, instr->address);
		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
	}

	/* we can end up with extra cmps.s from frontend, which uses a
	 *
	 *    cmps.s p0.x, cond, 0
	 *
	 * as a way to mov into the predicate register.  But frequently 'cond'
	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
	 * just re-write the instruction writing predicate register to get rid
	 * of the double cmps.
	 */
	if ((instr->opc == OPC_CMPS_S) &&
			(instr->regs[0]->num == regid(REG_P0, 0)) &&
			ssa(instr->regs[1]) &&
			(instr->regs[2]->flags & IR3_REG_IMMED) &&
			(instr->regs[2]->iim_val == 0)) {
		struct ir3_instruction *cond = ssa(instr->regs[1]);
		switch (cond->opc) {
		case OPC_CMPS_S:
		case OPC_CMPS_F:
		case OPC_CMPS_U:
			instr->opc   = cond->opc;
			instr->flags = cond->flags;
			instr->cat2  = cond->cat2;
			instr->address = cond->address;
			instr->regs[1] = cond->regs[1];
			instr->regs[2] = cond->regs[2];
			instr->barrier_class |= cond->barrier_class;
			instr->barrier_conflict |= cond->barrier_conflict;
			unuse(cond);
			break;
		default:
			break;
		}
	}

	/* Handle converting a sam.s2en (taking samp/tex idx params via
	 * register) into a normal sam (encoding immediate samp/tex idx)
	 * if they are immediate.  This saves some instructions and regs
	 * in the common case where we know samp/tex at compile time:
	 */
	if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) &&
			!(ir3_shader_debug & IR3_DBG_FORCES2EN)) {
		/* The first src will be a fan-in (collect), if both of it's
		 * two sources are mov from imm, then we can
		 */
		struct ir3_instruction *samp_tex = ssa(instr->regs[1]);

		debug_assert(samp_tex->opc == OPC_META_FI);

		struct ir3_instruction *samp = ssa(samp_tex->regs[1]);
		struct ir3_instruction *tex  = ssa(samp_tex->regs[2]);

		if ((samp->opc == OPC_MOV) &&
				(samp->regs[1]->flags & IR3_REG_IMMED) &&
				(tex->opc == OPC_MOV) &&
				(tex->regs[1]->flags & IR3_REG_IMMED)) {
			instr->flags &= ~IR3_INSTR_S2EN;
			instr->cat5.samp = samp->regs[1]->iim_val;
			instr->cat5.tex  = tex->regs[1]->iim_val;
			instr->regs[1]->instr = NULL;
		}
	}
}
Exemple #9
0
/**
 * Translate a vertex program to create a new variant.
 */
static struct st_vp_variant *
st_translate_vertex_program(struct st_context *st,
                            struct st_vertex_program *stvp,
                            const struct st_vp_variant_key *key)
{
   struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant);
   struct pipe_context *pipe = st->pipe;
   struct ureg_program *ureg;
   enum pipe_error error;
   unsigned num_outputs;

   st_prepare_vertex_program(st->ctx, stvp);

   if (!stvp->glsl_to_tgsi)
   {
      _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT);
   }

   ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
   if (ureg == NULL) {
      free(vpv);
      return NULL;
   }

   vpv->key = *key;

   vpv->num_inputs = stvp->num_inputs;
   num_outputs = stvp->num_outputs;
   if (key->passthrough_edgeflags) {
      vpv->num_inputs++;
      num_outputs++;
   }

   if (ST_DEBUG & DEBUG_MESA) {
      _mesa_print_program(&stvp->Base.Base);
      _mesa_print_program_parameters(st->ctx, &stvp->Base.Base);
      debug_printf("\n");
   }

   if (stvp->glsl_to_tgsi)
      error = st_translate_program(st->ctx,
                                   TGSI_PROCESSOR_VERTEX,
                                   ureg,
                                   stvp->glsl_to_tgsi,
                                   &stvp->Base.Base,
                                   /* inputs */
                                   vpv->num_inputs,
                                   stvp->input_to_index,
                                   NULL, /* input semantic name */
                                   NULL, /* input semantic index */
                                   NULL, /* interp mode */
                                   NULL, /* interp location */
                                   /* outputs */
                                   num_outputs,
                                   stvp->result_to_output,
                                   stvp->output_semantic_name,
                                   stvp->output_semantic_index,
                                   key->passthrough_edgeflags,
                                   key->clamp_color);
   else
      error = st_translate_mesa_program(st->ctx,
                                        TGSI_PROCESSOR_VERTEX,
                                        ureg,
                                        &stvp->Base.Base,
                                        /* inputs */
                                        vpv->num_inputs,
                                        stvp->input_to_index,
                                        NULL, /* input semantic name */
                                        NULL, /* input semantic index */
                                        NULL,
                                        /* outputs */
                                        num_outputs,
                                        stvp->result_to_output,
                                        stvp->output_semantic_name,
                                        stvp->output_semantic_index,
                                        key->passthrough_edgeflags,
                                        key->clamp_color);

   if (error)
      goto fail;

   vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL );
   if (!vpv->tgsi.tokens)
      goto fail;

   ureg_destroy( ureg );

   if (stvp->glsl_to_tgsi) {
      st_translate_stream_output_info(stvp->glsl_to_tgsi,
                                      stvp->result_to_output,
                                      &vpv->tgsi.stream_output);
   }

   if (ST_DEBUG & DEBUG_TGSI) {
      tgsi_dump(vpv->tgsi.tokens, 0);
      debug_printf("\n");
   }

   vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi);
   return vpv;

fail:
   debug_printf("%s: failed to translate Mesa program:\n", __FUNCTION__);
   _mesa_print_program(&stvp->Base.Base);
   debug_assert(0);

   ureg_destroy( ureg );
   return NULL;
}
Exemple #10
0
void *
debug_realloc(const char *file, unsigned line, const char *function,
              void *old_ptr, size_t old_size, size_t new_size )
{
   struct debug_memory_header *old_hdr, *new_hdr;
   struct debug_memory_footer *old_ftr, *new_ftr;
   void *new_ptr;
   
   if(!old_ptr)
      return debug_malloc( file, line, function, new_size );
   
   if(!new_size) {
      debug_free( file, line, function, old_ptr );
      return NULL;
   }
   
   old_hdr = header_from_data(old_ptr);
   if(old_hdr->magic != DEBUG_MEMORY_MAGIC) {
      debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n",
                   file, line, function,
                   old_ptr);
      debug_assert(0);
      return NULL;
   }
   
   old_ftr = footer_from_header(old_hdr);
   if(old_ftr->magic != DEBUG_MEMORY_MAGIC) {
      debug_printf("%s:%u:%s: buffer overflow %p\n",
                   old_hdr->file, old_hdr->line, old_hdr->function,
                   old_ptr);
      debug_assert(0);
   }

   /* alloc new */
   new_hdr = os_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr));
   if(!new_hdr) {
      debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n",
                   file, line, function,
                   (long unsigned)new_size);
      return NULL;
   }
   new_hdr->no = old_hdr->no;
   new_hdr->file = old_hdr->file;
   new_hdr->line = old_hdr->line;
   new_hdr->function = old_hdr->function;
   new_hdr->size = new_size;
   new_hdr->magic = DEBUG_MEMORY_MAGIC;
   new_hdr->tag = 0;
#if DEBUG_FREED_MEMORY
   new_hdr->freed = FALSE;
#endif
   
   new_ftr = footer_from_header(new_hdr);
   new_ftr->magic = DEBUG_MEMORY_MAGIC;
   
   pipe_mutex_lock(list_mutex);
   LIST_REPLACE(&old_hdr->head, &new_hdr->head);
   pipe_mutex_unlock(list_mutex);

   /* copy data */
   new_ptr = data_from_header(new_hdr);
   memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size );

   /* free old */
   old_hdr->magic = 0;
   old_ftr->magic = 0;
   os_free(old_hdr);

   return new_ptr;
}
Exemple #11
0
/**
 * Handle cp for a given src register.  This additionally handles
 * the cases of collapsing immedate/const (which replace the src
 * register with a non-ssa src) or collapsing mov's from relative
 * src (which needs to also fixup the address src reference by the
 * instruction).
 */
static void
reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
		struct ir3_register *reg, unsigned n)
{
	struct ir3_instruction *src = ssa(reg);

	if (is_eligible_mov(src, true)) {
		/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
		struct ir3_register *src_reg = src->regs[1];
		unsigned new_flags = reg->flags;

		combine_flags(&new_flags, src);

		if (valid_flags(instr, n, new_flags)) {
			if (new_flags & IR3_REG_ARRAY) {
				debug_assert(!(reg->flags & IR3_REG_ARRAY));
				reg->array = src_reg->array;
			}
			reg->flags = new_flags;
			reg->instr = ssa(src_reg);

			instr->barrier_class |= src->barrier_class;
			instr->barrier_conflict |= src->barrier_conflict;

			unuse(src);
			reg->instr->use_count++;
		}

	} else if (is_same_type_mov(src) &&
			/* cannot collapse const/immed/etc into meta instrs: */
			!is_meta(instr)) {
		/* immed/const/etc cases, which require some special handling: */
		struct ir3_register *src_reg = src->regs[1];
		unsigned new_flags = reg->flags;

		combine_flags(&new_flags, src);

		if (!valid_flags(instr, n, new_flags)) {
			/* See if lowering an immediate to const would help. */
			if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
				debug_assert(new_flags & IR3_REG_IMMED);
				instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
				return;
			}

			/* special case for "normal" mad instructions, we can
			 * try swapping the first two args if that fits better.
			 *
			 * the "plain" MAD's (ie. the ones that don't shift first
			 * src prior to multiply) can swap their first two srcs if
			 * src[0] is !CONST and src[1] is CONST:
			 */
			if ((n == 1) && is_mad(instr->opc) &&
					!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
					valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) {
				/* swap src[0] and src[1]: */
				struct ir3_register *tmp;
				tmp = instr->regs[0 + 1];
				instr->regs[0 + 1] = instr->regs[1 + 1];
				instr->regs[1 + 1] = tmp;

				n = 0;
			} else {
				return;
			}
		}

		/* Here we handle the special case of mov from
		 * CONST and/or RELATIV.  These need to be handled
		 * specially, because in the case of move from CONST
		 * there is no src ir3_instruction so we need to
		 * replace the ir3_register.  And in the case of
		 * RELATIV we need to handle the address register
		 * dependency.
		 */
		if (src_reg->flags & IR3_REG_CONST) {
			/* an instruction cannot reference two different
			 * address registers:
			 */
			if ((src_reg->flags & IR3_REG_RELATIV) &&
					conflicts(instr->address, reg->instr->address))
				return;

			/* This seems to be a hw bug, or something where the timings
			 * just somehow don't work out.  This restriction may only
			 * apply if the first src is also CONST.
			 */
			if ((opc_cat(instr->opc) == 3) && (n == 2) &&
					(src_reg->flags & IR3_REG_RELATIV) &&
					(src_reg->array.offset == 0))
				return;

			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
			src_reg->flags = new_flags;
			instr->regs[n+1] = src_reg;

			if (src_reg->flags & IR3_REG_RELATIV)
				ir3_instr_set_address(instr, reg->instr->address);

			return;
		}

		if ((src_reg->flags & IR3_REG_RELATIV) &&
				!conflicts(instr->address, reg->instr->address)) {
			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
			src_reg->flags = new_flags;
			instr->regs[n+1] = src_reg;
			ir3_instr_set_address(instr, reg->instr->address);

			return;
		}

		/* NOTE: seems we can only do immed integers, so don't
		 * need to care about float.  But we do need to handle
		 * abs/neg *before* checking that the immediate requires
		 * few enough bits to encode:
		 *
		 * TODO: do we need to do something to avoid accidentally
		 * catching a float immed?
		 */
		if (src_reg->flags & IR3_REG_IMMED) {
			int32_t iim_val = src_reg->iim_val;

			debug_assert((opc_cat(instr->opc) == 1) ||
					(opc_cat(instr->opc) == 6) ||
					ir3_cat2_int(instr->opc) ||
					(is_mad(instr->opc) && (n == 0)));

			if (new_flags & IR3_REG_SABS)
				iim_val = abs(iim_val);

			if (new_flags & IR3_REG_SNEG)
				iim_val = -iim_val;

			if (new_flags & IR3_REG_BNOT)
				iim_val = ~iim_val;

			/* other than category 1 (mov) we can only encode up to 10 bits: */
			if ((instr->opc == OPC_MOV) ||
					!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
				src_reg = ir3_reg_clone(instr->block->shader, src_reg);
				src_reg->flags = new_flags;
				src_reg->iim_val = iim_val;
				instr->regs[n+1] = src_reg;
			} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
				/* See if lowering an immediate to const would help. */
				instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
			}

			return;
		}
	}
}
void
fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
		int nr, struct pipe_surface **bufs)
{
	struct stage s[MAX_STAGES];
	uint32_t pos_regid, posz_regid, psize_regid, color_regid[8];
	uint32_t face_regid, coord_regid, zwcoord_regid;
	int constmode;
	int i, j, k;

	debug_assert(nr <= ARRAY_SIZE(color_regid));

	setup_stages(emit, s);

	/* blob seems to always use constmode currently: */
	constmode = 1;

	pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
	posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH);
	psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
	if (s[FS].v->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
		color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] =
			ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3);
		color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4);
		color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5);
		color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6);
		color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7);
	}

	/* TODO get these dynamically: */
	face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
	coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
	zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
	OUT_RING(ring, 0x00000003);

	OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5);
	OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
			A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
			A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
			 * flush some caches? I think we only need to set those
			 * bits if we have updated const or shader..
			 */
			A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
			A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
	OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
			A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
			A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) |
			A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid));
	OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) |
			0x3f3f000 |           /* XXX */
			A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid));
	OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) |
			0xfcfcfc00);
	OUT_RING(ring, 0x00fcfcfc);   /* XXX HLSQ_CONTROL_4 */

	OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
	OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
			A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
			A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
	OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
			A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
			A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
	OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
			A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
			A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
	OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
			A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
			A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
	OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
			A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
			A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
	OUT_RING(ring, 0x140010 | /* XXX */
			COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));

	OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
	OUT_RING(ring, 0x7f | /* XXX */
			COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) |
			COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) |
			COND(s[VS].instrlen && s[FS].instrlen,
					A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER));

	OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
	OUT_RING(ring, s[VS].v->instrlen);      /* SP_VS_LENGTH_REG */

	OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
	OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
			A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
			A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
			A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
			A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
			A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
			COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
	OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
			A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
	OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
			A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
			A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));

	for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count) {
			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
			reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
			reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
		}

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count) {
			k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot);
			reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
			reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
		}

		OUT_RING(ring, reg);
	}

	for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);

		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
		j = ir3_next_varying(s[FS].v, j);
		if (j < s[FS].v->inputs_count)
			reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);

		OUT_RING(ring, reg);
	}

	OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
	OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
			A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
	OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

	OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
	OUT_RING(ring, s[FS].v->instrlen);  /* SP_FS_LENGTH_REG */

	OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
	OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
			COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
			A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
			A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
			A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
			A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
			A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
			COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
	OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
			0x80000000 |      /* XXX */
			COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) |
			COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) |
			COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD));

	OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
	OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
			A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
	if (emit->key.binning_pass)
		OUT_RING(ring, 0x00000000);
	else
		OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */

	OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
			A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
			A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));

	OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
	OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
			A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));

	OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1);
	OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) |
			COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) |
			COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
			COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD |
					A4XX_RB_RENDER_CONTROL2_YCOORD |
// TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems
// to work everywhere else).
//					A4XX_RB_RENDER_CONTROL2_ZCOORD |
					A4XX_RB_RENDER_CONTROL2_WCOORD));

	OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
	OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
			COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z));

	OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
	OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) |
			COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
			A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));

	OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
	for (i = 0; i < 8; i++) {
		enum a4xx_color_fmt format = 0;
		bool srgb = false;
		if (i < nr) {
			format = fd4_emit_format(bufs[i]);
			if (bufs[i] && !emit->no_decode_srgb)
				srgb = util_format_is_srgb(bufs[i]->format);
		}
		OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
				A4XX_SP_FS_MRT_REG_MRTFORMAT(format) |
				COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) |
				COND(emit->key.half_precision,
					A4XX_SP_FS_MRT_REG_HALF_PRECISION));
	}

	if (emit->key.binning_pass) {
		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
		OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
				0x40000000 |      /* XXX */
				COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
		OUT_RING(ring, 0x00000000);
	} else {
		uint32_t vinterp[8], vpsrepl[8];

		memset(vinterp, 0, sizeof(vinterp));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* looks like we need to do int varyings in the frag
		 * shader on a4xx (no flatshad reg?  or a420.0 bug?):
		 *
		 *    (sy)(ss)nop
		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
		 *    ldlv.u32 r0.y,l[r0.x+1], 1
		 *    (ss)bary.f (ei)r63.x, 0, r0.x
		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
		 *    (rpt5)nop
		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
		 *
		 * Possibly on later a4xx variants we'll be able to use
		 * something like the code below instead of workaround
		 * in the shader:
		 */
		/* figure out VARYING_INTERP / VARYING_PS_REPL register values: */
		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {

			/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
			 * instead.. rather than -8 everywhere else..
			 */
			uint32_t inloc = s[FS].v->inputs[j].inloc - 8;

			/* currently assuming varyings aligned to 4 (not
			 * packed):
			 */
			debug_assert((inloc % 4) == 0);

			if ((s[FS].v->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) ||
					(s[FS].v->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++, loc++) {
					vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
					//flatshade[loc / 32] |= 1 << (loc % 32);
				}
			}

			gl_varying_slot slot = s[FS].v->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09)
						<< ((inloc % 16) * 2);
					vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2);
					vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2);
				}
			}
		}

		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
		OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
				A4XX_VPC_ATTR_THRDASSIGN(1) |
				COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
				0x40000000 |      /* XXX */
				COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
		OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
				A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));

		OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
		for (i = 0; i < 8; i++)
			OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */

		OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
		for (i = 0; i < 8; i++)
			OUT_RING(ring, vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
	}

	if (s[VS].instrlen)
		emit_shader(ring, s[VS].v);

	if (!emit->key.binning_pass)
		if (s[FS].instrlen)
			emit_shader(ring, s[FS].v);
}
Exemple #13
0
static void
emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
		struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
{
	enum a4xx_tile_mode tile_mode;
	unsigned i;

	if (bin_w) {
		tile_mode = 2;
	} else {
		tile_mode = TILE4_LINEAR;
	}

	for (i = 0; i < 8; i++) {
		enum a4xx_color_fmt format = 0;
		enum a3xx_color_swap swap = WZYX;
		struct fd_resource *rsc = NULL;
		struct fd_resource_slice *slice = NULL;
		uint32_t stride = 0;
		uint32_t base = 0;
		uint32_t offset = 0;

		if ((i < nr_bufs) && bufs[i]) {
			struct pipe_surface *psurf = bufs[i];

			rsc = fd_resource(psurf->texture);
			slice = fd_resource_slice(rsc, psurf->u.tex.level);
			format = fd4_pipe2color(psurf->format);
			swap = fd4_pipe2swap(psurf->format);

			debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);

			offset = fd_resource_offset(rsc, psurf->u.tex.level,
					psurf->u.tex.first_layer);

			if (bin_w) {
				stride = bin_w * rsc->cpp;

				if (bases) {
					base = bases[i];
				}
			} else {
				stride = slice->pitch * rsc->cpp;
			}
		}

		OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
		OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
				A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
				A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
				A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
		if (bin_w || (i >= nr_bufs)) {
			OUT_RING(ring, base);
			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
		} else {
			OUT_RELOCW(ring, rsc->bo, offset, 0, 0);
			/* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d..
			 * not sure if we need to skip it for bypass or
			 * not.
			 */
			OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0));
		}
	}
}
Exemple #14
0
static void *
create_fs(struct pipe_context *pipe, unsigned fs_traits)
{
    struct ureg_program *ureg;
    struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler;
    struct ureg_src /*dst_pos, */ src_input, mask_pos;
    struct ureg_dst src, mask;
    struct ureg_dst out;
    struct ureg_src imm0 = { 0 };
    unsigned has_mask = (fs_traits & FS_MASK) != 0;
    unsigned is_fill = (fs_traits & FS_FILL) != 0;
    unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0;
    unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0;
    unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0;
    unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0;
    unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA;
    unsigned is_yuv = (fs_traits & FS_YUV) != 0;
    unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0;
    unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0;
    unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0;
    unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0;
    unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0;
    unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0;
    unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0;
    unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0;
    unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0;

#if 0
    print_fs_traits(fs_traits);
#else
    (void)print_fs_traits;
#endif

    ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
    if (ureg == NULL)
	return 0;

    /* it has to be either a fill, a composite op or a yuv conversion */
    debug_assert((is_fill ^ is_composite) ^ is_yuv);
    (void)is_yuv;

    out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);

    if (src_repeat_none || mask_repeat_none ||
	src_set_alpha || mask_set_alpha || src_luminance) {
	imm0 = ureg_imm4f(ureg, 0, 0, 0, 1);
    }
    if (is_composite) {
	src_sampler = ureg_DECL_sampler(ureg, 0);
	src_input = ureg_DECL_fs_input(ureg,
				       TGSI_SEMANTIC_GENERIC, 0,
				       TGSI_INTERPOLATE_PERSPECTIVE);
    } else if (is_fill) {
	if (is_solid)
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_COLOR, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
	else
	    src_input = ureg_DECL_fs_input(ureg,
					   TGSI_SEMANTIC_POSITION, 0,
					   TGSI_INTERPOLATE_PERSPECTIVE);
    } else {
	debug_assert(is_yuv);
	return create_yuv_shader(pipe, ureg);
    }

    if (has_mask) {
	mask_sampler = ureg_DECL_sampler(ureg, 1);
	mask_pos = ureg_DECL_fs_input(ureg,
				      TGSI_SEMANTIC_GENERIC, 1,
				      TGSI_INTERPOLATE_PERSPECTIVE);
    }
#if 0				/* unused right now */
    dst_sampler = ureg_DECL_sampler(ureg, 2);
    dst_pos = ureg_DECL_fs_input(ureg,
				 TGSI_SEMANTIC_POSITION, 2,
				 TGSI_INTERPOLATE_PERSPECTIVE);
#endif

    if (is_composite) {
	if (has_mask || src_luminance || dst_luminance)
	    src = ureg_DECL_temporary(ureg);
	else
	    src = out;
	xrender_tex(ureg, src, src_input, src_sampler, imm0,
		    src_repeat_none, src_swizzle, src_set_alpha);
    } else if (is_fill) {
	if (is_solid) {
	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_dst(src_input);
	    else
		ureg_MOV(ureg, out, src_input);
	} else if (is_lingrad || is_radgrad) {
	    struct ureg_src coords, const0124, matrow0, matrow1, matrow2;

	    if (has_mask || src_luminance || dst_luminance)
		src = ureg_DECL_temporary(ureg);
	    else
		src = out;

	    coords = ureg_DECL_constant(ureg, 0);
	    const0124 = ureg_DECL_constant(ureg, 1);
	    matrow0 = ureg_DECL_constant(ureg, 2);
	    matrow1 = ureg_DECL_constant(ureg, 3);
	    matrow2 = ureg_DECL_constant(ureg, 4);

	    if (is_lingrad) {
		linear_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    } else if (is_radgrad) {
		radial_gradient(ureg, src,
				src_input, src_sampler,
				coords, const0124, matrow0, matrow1, matrow2);
	    }
	} else
	    debug_assert(!"Unknown fill type!");
    }
    if (src_luminance) {
	ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X));
	ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ),
		 ureg_scalar(imm0, TGSI_SWIZZLE_X));
	if (!has_mask && !dst_luminance)
	    ureg_MOV(ureg, out, ureg_src(src));
    }

    if (has_mask) {
	mask = ureg_DECL_temporary(ureg);
	xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0,
		    mask_repeat_none, mask_swizzle, mask_set_alpha);
	/* src IN mask */

	src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src),
		    ureg_src(mask),
		    comp_alpha_mask, mask_luminance);

	ureg_release_temporary(ureg, mask);
    }

    if (dst_luminance) {
	/*
	 * Make sure the alpha channel goes into the output L8 surface.
	 */
	ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W));
    }

    ureg_END(ureg);

    return ureg_create_shader_and_destroy(ureg, pipe);
}
static void *
create_shader(struct pipe_context *pipe,
              int id,
              struct pipe_shader_state *shader)
{
   int idx = 0;
   const struct shader_asm_info * shaders[SHADER_STAGES];

   /* the shader has to have a fill */
   debug_assert(id & ALL_FILLS);

   /* first stage */
   if (id & VEGA_SOLID_FILL_SHADER) {
      debug_assert(idx == 0);
      shaders[idx] = &shaders_asm[0];
      debug_assert(shaders_asm[0].id == VEGA_SOLID_FILL_SHADER);
      ++idx;
   }
   if ((id & VEGA_LINEAR_GRADIENT_SHADER)) {
      debug_assert(idx == 0);
      shaders[idx] = &shaders_asm[1];
      debug_assert(shaders_asm[1].id == VEGA_LINEAR_GRADIENT_SHADER);
      ++idx;
   }
   if ((id & VEGA_RADIAL_GRADIENT_SHADER)) {
      debug_assert(idx == 0);
      shaders[idx] = &shaders_asm[2];
      debug_assert(shaders_asm[2].id == VEGA_RADIAL_GRADIENT_SHADER);
      ++idx;
   }
   if ((id & VEGA_PATTERN_SHADER)) {
      debug_assert(idx == 0);
      debug_assert(shaders_asm[3].id == VEGA_PATTERN_SHADER);
      shaders[idx] = &shaders_asm[3];
      ++idx;
   }
   if ((id & VEGA_IMAGE_NORMAL_SHADER)) {
      debug_assert(idx == 0);
      debug_assert(shaders_asm[4].id == VEGA_IMAGE_NORMAL_SHADER);
      shaders[idx] = &shaders_asm[4];
      ++idx;
   }

   /* second stage */
   if ((id & VEGA_IMAGE_MULTIPLY_SHADER)) {
      debug_assert(shaders_asm[5].id == VEGA_IMAGE_MULTIPLY_SHADER);
      shaders[idx] = &shaders_asm[5];
      ++idx;
   } else if ((id & VEGA_IMAGE_STENCIL_SHADER)) {
      debug_assert(shaders_asm[6].id == VEGA_IMAGE_STENCIL_SHADER);
      shaders[idx] = &shaders_asm[6];
      ++idx;
   }

   /* third stage */
   if ((id & VEGA_MASK_SHADER)) {
      debug_assert(idx == 1);
      debug_assert(shaders_asm[7].id == VEGA_MASK_SHADER);
      shaders[idx] = &shaders_asm[7];
      ++idx;
   }

   /* fourth stage */
   if ((id & VEGA_BLEND_MULTIPLY_SHADER)) {
      debug_assert(shaders_asm[8].id == VEGA_BLEND_MULTIPLY_SHADER);
      shaders[idx] = &shaders_asm[8];
      ++idx;
   } else if ((id & VEGA_BLEND_SCREEN_SHADER)) {
      debug_assert(shaders_asm[9].id == VEGA_BLEND_SCREEN_SHADER);
      shaders[idx] = &shaders_asm[9];
      ++idx;
   } else if ((id & VEGA_BLEND_DARKEN_SHADER)) {
      debug_assert(shaders_asm[10].id == VEGA_BLEND_DARKEN_SHADER);
      shaders[idx] = &shaders_asm[10];
      ++idx;
   } else if ((id & VEGA_BLEND_LIGHTEN_SHADER)) {
      debug_assert(shaders_asm[11].id == VEGA_BLEND_LIGHTEN_SHADER);
      shaders[idx] = &shaders_asm[11];
      ++idx;
   }

   /* fifth stage */
   if ((id & VEGA_PREMULTIPLY_SHADER)) {
      debug_assert(shaders_asm[12].id == VEGA_PREMULTIPLY_SHADER);
      shaders[idx] = &shaders_asm[12];
      ++idx;
   } else if ((id & VEGA_UNPREMULTIPLY_SHADER)) {
      debug_assert(shaders_asm[13].id == VEGA_UNPREMULTIPLY_SHADER);
      shaders[idx] = &shaders_asm[13];
      ++idx;
   }

   /* sixth stage */
   if ((id & VEGA_BW_SHADER)) {
      debug_assert(shaders_asm[14].id == VEGA_BW_SHADER);
      shaders[idx] = &shaders_asm[14];
      ++idx;
   }

   return combine_shaders(shaders, idx, pipe, shader);
}
Exemple #16
0
int assign_task_to_group (entity *group, entity *task_en, unsigned int valid_members)
{

	int
		sites_required;

	entity_sub_types
		sub_type,
		group_type;

	entity
		*force,
		*landing,
		*end_keysite,
		*start_keysite,
		*guide,
		*member;

	vec3d
		*pos;

	task
		*task_raw;

   debug_assert (get_comms_model () == COMMS_MODEL_SERVER);

	ASSERT (task_en);

	ASSERT (group);

	ASSERT (!(get_local_group_primary_task (group) && (get_local_entity_int_value (task_en, INT_TYPE_PRIMARY_TASK))));

	task_raw = ( task * ) get_local_entity_data (task_en);

	group_type = get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE);

	member = get_local_entity_first_child (group, LIST_TYPE_MEMBER);

	// don't if no members or if the group is a CARRIER
	if (!member)
	{
		return FALSE;
	}

	if (get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE) == ENTITY_SUB_TYPE_GROUP_ASSAULT_SHIP)
	{
		if (task_raw->sub_type != ENTITY_SUB_TYPE_TASK_ENGAGE)
		{
			return FALSE;
		}
	}

	//
	// check for invalid tasks
	//
	
	switch (task_raw->sub_type)
	{
		case ENTITY_SUB_TYPE_TASK_LANDING:
		case ENTITY_SUB_TYPE_TASK_LANDING_HOLDING:
		case ENTITY_SUB_TYPE_TASK_TAKEOFF:
		case ENTITY_SUB_TYPE_TASK_TAKEOFF_HOLDING:
		{
			#ifdef DEBUG

			debug_fatal ("ASSIGN: Invalid task type (%s) for assign_task_to_group", get_local_entity_string (task_en, STRING_TYPE_FULL_NAME));

			#endif

			return FALSE;
		}
	}

	//
	// Create route
	//

	if (get_local_entity_int_value (group, INT_TYPE_GROUP_LIST_TYPE) == LIST_TYPE_KEYSITE_GROUP)
	{
		start_keysite = get_local_entity_parent (group, LIST_TYPE_KEYSITE_GROUP);
	}
	else
	{
//		start_keysite = get_closest_keysite (NUM_ENTITY_SUB_TYPE_KEYSITES, task_raw->side, get_local_entity_vec3d_ptr (group, VEC3D_TYPE_POSITION), 1.0 * KILOMETRE, NULL);
		start_keysite = NULL;
	}

	pos = get_local_entity_vec3d_ptr (task_en, VEC3D_TYPE_STOP_POSITION);

	force = get_local_force_entity ( ( entity_sides ) get_local_entity_int_value (task_en, INT_TYPE_SIDE) );

	sub_type = group_database [group_type].default_landing_type;

	landing = NULL;

	end_keysite = NULL;

	if (get_local_entity_int_value (task_en, INT_TYPE_ASSESS_LANDING))
	{
		ASSERT (start_keysite);

		end_keysite = ( entity * ) get_local_entity_ptr_value (task_en, PTR_TYPE_RETURN_KEYSITE);

		if (end_keysite)
		{
			//
			// check end keysite has suitble free landing sites
			//

			if (start_keysite != end_keysite)
			{
				//
				// if end keysite == start keysite then keysite MUST have enough sites because the aircraft are already there
				//
				
				sites_required = get_local_group_member_count (group);
	
				if (get_keysite_landing_sites_available (end_keysite, sub_type) < sites_required)
				{
					//
					// END keysite was specified - but no free landing sites for this group
					//
					
					return FALSE;
				}
			}
		}
		else
		{
			//
			// No END keysite specified so return to start keysite
			//

			end_keysite = start_keysite;

			set_local_entity_ptr_value (task_en, PTR_TYPE_RETURN_KEYSITE, end_keysite);
		}

		ASSERT (end_keysite);

		landing = get_local_entity_landing_entity (end_keysite, group_database [get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE)].default_landing_type);
	}

	if (create_generic_waypoint_route (group, task_en, end_keysite, NULL, NULL, NULL, 0))
	{
		#if DEBUG_MODULE

		debug_log ("ASSIGN: group %s (%d) assigned to task %s (%d)",
							get_local_entity_string (group, STRING_TYPE_FULL_NAME),
							get_local_entity_index (group),
							get_local_entity_string (task_en, STRING_TYPE_FULL_NAME),
							get_local_entity_index (task_en));

		#endif

		//
		// Assign task
		//

		guide = push_task_onto_group_task_stack (group, task_en, valid_members);

		assign_task_to_group_members (group, guide, valid_members);

		return TRUE;
	}

	return FALSE;
}
Exemple #17
0
void unpack_local_force_data (entity *en, pack_modes mode)
{

	int
		count,
		loop;

	force
		*raw;

	campaign_criteria_type
		*last_campaign_criteria,
		*campaign_criteria;

	ASSERT ((mode >= 0) && (mode < NUM_PACK_MODES));

	ASSERT (en);

	raw = (force *) get_local_entity_data (en);
		
	switch (mode)
	{
		////////////////////////////////////////
		case PACK_MODE_SERVER_SESSION:
		////////////////////////////////////////
		{
         unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name);

			// keysite_force
		
			// pilot root
		
			unpack_list_root (en, LIST_TYPE_DIVISION, &raw->division_root);
	
			unpack_list_root (en, LIST_TYPE_CAMPAIGN_OBJECTIVE, &raw->campaign_objective_root);
	
			// air_registry_root
			// ground_registry_root
			// sea_registry_root
	
//			unpack_list_root (en, LIST_TYPE_INDEPENDENT_GROUP, &raw->independent_group_root);
	
			// force_link
		
			// update link
		
			// task generation //////////////////////////////////////////////
		
			for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_TASKS; loop ++)
			{
				raw->task_generation [loop].valid = unpack_int_value (en, INT_TYPE_VALID);

				raw->task_generation [loop].created = unpack_int_value (en, INT_TYPE_TASK_GENERATION);
			}
			/////////////////////////////////////////////////////////////////
		
			// campaign criteria ////////////////////////////////////////////
		
			count = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_COUNT);
	
			last_campaign_criteria = NULL;
	
			while (count)
			{
	
				campaign_criteria = (campaign_criteria_type *) malloc_heap_mem (sizeof (campaign_criteria_type));
	
				memset (campaign_criteria, 0, sizeof (campaign_criteria_type));
		
				campaign_criteria->criteria_type = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_TYPE);
		
				campaign_criteria->valid = unpack_int_value (en, INT_TYPE_VALID);
		
				campaign_criteria->result = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_RESULT);
		
				campaign_criteria->value1 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);
		
				campaign_criteria->value2 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);
		
				campaign_criteria->value3 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);
		
				campaign_criteria->value4 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);
		
				campaign_criteria->next = last_campaign_criteria;
	
				raw->campaign_criteria = campaign_criteria;
	
				last_campaign_criteria = campaign_criteria;
	
				count --;
			}
			/////////////////////////////////////////////////////////////////
		
			// force_info_criteria
		
			for (loop = 0; loop < NUM_FORCE_INFO_CATAGORIES; loop ++)
			{
				raw->force_info_current_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE);
		
				raw->force_info_reserve_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE);
			}
			/////////////////////////////////////////////////////////////////
	
			for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_GROUPS; loop ++)
			{
				raw->kills [loop] = unpack_int_value (en, INT_TYPE_VALUE);
				raw->losses [loop] = unpack_int_value (en, INT_TYPE_VALUE);
//				raw->group_count [loop] = unpack_int_value (en, INT_TYPE_VALUE);
			}

			raw->sleep = unpack_float_value (en, FLOAT_TYPE_SLEEP);
		
			raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE);
		
			// sector_count
		
			raw->colour = unpack_int_value (en, INT_TYPE_COLOUR);
		
			raw->side = unpack_int_value (en, INT_TYPE_SIDE);
	
			break;
		}
		////////////////////////////////////////
		case PACK_MODE_CLIENT_SESSION:
		////////////////////////////////////////
		{
			//
			// create entity
			//

			debug_assert (get_free_entity (get_local_entity_safe_index (en)));

			set_local_entity_type (en, ENTITY_TYPE_FORCE);

			raw = (force *) malloc_fast_mem (sizeof (force));

			set_local_entity_data (en, raw);

			memset (raw, 0, sizeof (force));

			//
			// unpack data (in exactly the same order as the data was packed)
			//

         unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name);

			// keysite_force
	
			unpack_list_root (en, LIST_TYPE_PILOT, &raw->pilot_root);

			unpack_list_root (en, LIST_TYPE_DIVISION, &raw->division_root);

			unpack_list_root (en, LIST_TYPE_CAMPAIGN_OBJECTIVE, &raw->campaign_objective_root);

			//	air_registry_root
			//	ground_registry_root
			//	sea_registry_root

//			unpack_list_root (en, LIST_TYPE_INDEPENDENT_GROUP, &raw->independent_group_root);
	
			unpack_list_link (en, LIST_TYPE_FORCE, &raw->force_link);

			// update_link

			////////////////////////////////////////////
			// task_generation
			for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_TASKS; loop ++)
			{
				raw->task_generation [loop].valid = unpack_int_value (en, INT_TYPE_VALID);

				raw->task_generation [loop].created = unpack_int_value (en, INT_TYPE_TASK_GENERATION);
			}
			////////////////////////////////////////////

			////////////////////////////////////////////
			// campaign criteria 
			raw->campaign_criteria = NULL;

			count = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_COUNT);

			while (count)
			{

				count --;

				campaign_criteria = (campaign_criteria_type *) malloc_heap_mem (sizeof (campaign_criteria_type));

				memset (campaign_criteria, 0, sizeof (campaign_criteria_type));

				campaign_criteria->next = raw->campaign_criteria;

				raw->campaign_criteria = campaign_criteria;

				campaign_criteria->criteria_type = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_TYPE);

				campaign_criteria->valid = unpack_int_value (en, INT_TYPE_VALID);

				campaign_criteria->result = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_RESULT);

				campaign_criteria->value1 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);

				campaign_criteria->value2 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);

				campaign_criteria->value3 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);

				campaign_criteria->value4 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE);
			}
			/////////////////////////////////////////////////////////////////

			////////////////////////////////////////////
			// force_info_criteria
			for (loop = 0; loop < NUM_FORCE_INFO_CATAGORIES; loop ++)
			{
				raw->force_info_current_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE);

				raw->force_info_reserve_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE);
			}
			// force_info_criteria

			for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_GROUPS; loop ++)
			{
				raw->kills [loop] = unpack_int_value (en, INT_TYPE_VALUE);
				raw->losses [loop] = unpack_int_value (en, INT_TYPE_VALUE);
//				raw->group_count [loop] = unpack_int_value (en, INT_TYPE_VALUE);
			}

			raw->sleep = unpack_float_value (en, FLOAT_TYPE_SLEEP);

			raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE);

			raw->sector_count = unpack_int_value (en, INT_TYPE_FORCE_SECTOR_COUNT);

			raw->colour = unpack_int_value (en, INT_TYPE_COLOUR);

			raw->side = unpack_int_value (en, INT_TYPE_SIDE);

			insert_local_entity_into_parents_child_list (en, LIST_TYPE_UPDATE, get_update_entity (), NULL);

			break;
		}
		////////////////////////////////////////
		case PACK_MODE_BROWSE_SESSION:
		////////////////////////////////////////
		{
			//
			// create entity
			//

			debug_assert (get_free_entity (get_local_entity_safe_index (en)));

			set_local_entity_type (en, ENTITY_TYPE_FORCE);

			raw = (force *) malloc_fast_mem (sizeof (force));

			set_local_entity_data (en, raw);

			memset (raw, 0, sizeof (force));

			//
			// unpack data (in exactly the same order as the data was packed)
			//

         unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name);

			// keysite_force_root

			unpack_list_root (en, LIST_TYPE_PILOT, &raw->pilot_root);

			unpack_list_link (en, LIST_TYPE_FORCE, &raw->force_link);

			// update_link

			// task_generation

			// campaign_criteria

			// force_info_catagories

			// sleep

			raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE);

			raw->colour = unpack_int_value (en, INT_TYPE_COLOUR);

			raw->side = unpack_int_value (en, INT_TYPE_SIDE);

			break;
		}
	}
}
Exemple #18
0
static void
group_n(struct group_ops *ops, void *arr, unsigned n)
{
	unsigned i, j;

	/* first pass, figure out what has conflicts and needs a mov
	 * inserted.  Do this up front, before starting to setup
	 * left/right neighbor pointers.  Trying to do it in a single
	 * pass could result in a situation where we can't even setup
	 * the mov's right neighbor ptr if the next instr also needs
	 * a mov.
	 */
restart:
	for (i = 0; i < n; i++) {
		struct ir3_instruction *instr = ops->get(arr, i);
		if (instr) {
			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
			bool conflict;

			/* check for left/right neighbor conflicts: */
			conflict = conflicts(instr->cp.left, left) ||
				conflicts(instr->cp.right, right);

			/* RA can't yet deal very well w/ group'd phi's: */
			if (is_meta(instr) && (instr->opc == OPC_META_PHI))
				conflict = true;

			/* we also can't have an instr twice in the group: */
			for (j = i + 1; (j < n) && !conflict; j++)
				if (ops->get(arr, j) == instr)
					conflict = true;

			if (conflict) {
				ops->insert_mov(arr, i, instr);
				/* inserting the mov may have caused a conflict
				 * against the previous:
				 */
				goto restart;
			}
		}
	}

	/* second pass, now that we've inserted mov's, fixup left/right
	 * neighbors.  This is guaranteed to succeed, since by definition
	 * the newly inserted mov's cannot conflict with anything.
	 */
	for (i = 0; i < n; i++) {
		struct ir3_instruction *instr = ops->get(arr, i);
		if (instr) {
			struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
			struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;

			debug_assert(!conflicts(instr->cp.left, left));
			if (left) {
				instr->cp.left_cnt++;
				instr->cp.left = left;
			}

			debug_assert(!conflicts(instr->cp.right, right));
			if (right) {
				instr->cp.right_cnt++;
				instr->cp.right = right;
			}
		}
	}
}
Exemple #19
0
static unsigned
translate_opcode( unsigned op )
{
   switch( op ) {
   case OPCODE_ARL:
      return TGSI_OPCODE_ARL;
   case OPCODE_ADD:
      return TGSI_OPCODE_ADD;
   case OPCODE_CMP:
      return TGSI_OPCODE_CMP;
   case OPCODE_COS:
      return TGSI_OPCODE_COS;
   case OPCODE_DP3:
      return TGSI_OPCODE_DP3;
   case OPCODE_DP4:
      return TGSI_OPCODE_DP4;
   case OPCODE_DPH:
      return TGSI_OPCODE_DPH;
   case OPCODE_DST:
      return TGSI_OPCODE_DST;
   case OPCODE_EX2:
      return TGSI_OPCODE_EX2;
   case OPCODE_EXP:
      return TGSI_OPCODE_EXP;
   case OPCODE_FLR:
      return TGSI_OPCODE_FLR;
   case OPCODE_FRC:
      return TGSI_OPCODE_FRC;
   case OPCODE_KIL:
      return TGSI_OPCODE_KILL_IF;
   case OPCODE_LG2:
      return TGSI_OPCODE_LG2;
   case OPCODE_LOG:
      return TGSI_OPCODE_LOG;
   case OPCODE_LIT:
      return TGSI_OPCODE_LIT;
   case OPCODE_LRP:
      return TGSI_OPCODE_LRP;
   case OPCODE_MAD:
      return TGSI_OPCODE_MAD;
   case OPCODE_MAX:
      return TGSI_OPCODE_MAX;
   case OPCODE_MIN:
      return TGSI_OPCODE_MIN;
   case OPCODE_MOV:
      return TGSI_OPCODE_MOV;
   case OPCODE_MUL:
      return TGSI_OPCODE_MUL;
   case OPCODE_POW:
      return TGSI_OPCODE_POW;
   case OPCODE_RCP:
      return TGSI_OPCODE_RCP;
   case OPCODE_SCS:
      return TGSI_OPCODE_SCS;
   case OPCODE_SGE:
      return TGSI_OPCODE_SGE;
   case OPCODE_SIN:
      return TGSI_OPCODE_SIN;
   case OPCODE_SLT:
      return TGSI_OPCODE_SLT;
   case OPCODE_TEX:
      return TGSI_OPCODE_TEX;
   case OPCODE_TXB:
      return TGSI_OPCODE_TXB;
   case OPCODE_TXP:
      return TGSI_OPCODE_TXP;
   case OPCODE_XPD:
      return TGSI_OPCODE_XPD;
   case OPCODE_END:
      return TGSI_OPCODE_END;
   default:
      debug_assert( 0 );
      return TGSI_OPCODE_NOP;
   }
}
Exemple #20
0
void
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
				 int nr, struct pipe_surface **bufs)
{
	const struct ir3_shader_variant *vp, *fp;
	const struct ir3_info *vsi, *fsi;
	enum a3xx_instrbuffermode fpbuffer, vpbuffer;
	uint32_t fpbuffersz, vpbuffersz, fsoff;
	uint32_t pos_regid, posz_regid, psize_regid;
	uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid;
	uint32_t color_regid[4] = {0};
	int constmode;
	int i, j;

	debug_assert(nr <= ARRAY_SIZE(color_regid));

	vp = fd3_emit_get_vp(emit);
	fp = fd3_emit_get_fp(emit);

	vsi = &vp->info;
	fsi = &fp->info;

	fpbuffer = BUFFER;
	vpbuffer = BUFFER;
	fpbuffersz = fp->instrlen;
	vpbuffersz = vp->instrlen;

	/*
	 * Decide whether to use BUFFER or CACHE mode for VS and FS.  It
	 * appears like 256 is the hard limit, but when the combined size
	 * exceeds 128 then blob will try to keep FS in BUFFER mode and
	 * switch to CACHE for VS until VS is too large.  The blob seems
	 * to switch FS out of BUFFER mode at slightly under 128.  But
	 * a bit fuzzy on the decision tree, so use slightly conservative
	 * limits.
	 *
	 * TODO check if these thresholds for BUFFER vs CACHE mode are the
	 *      same for all a3xx or whether we need to consider the gpuid
	 */

	if ((fpbuffersz + vpbuffersz) > 128) {
		if (fpbuffersz < 112) {
			/* FP:BUFFER   VP:CACHE  */
			vpbuffer = CACHE;
			vpbuffersz = 256 - fpbuffersz;
		} else if (vpbuffersz < 112) {
			/* FP:CACHE    VP:BUFFER */
			fpbuffer = CACHE;
			fpbuffersz = 256 - vpbuffersz;
		} else {
			/* FP:CACHE    VP:CACHE  */
			vpbuffer = fpbuffer = CACHE;
			vpbuffersz = fpbuffersz = 192;
		}
	}

	if (fpbuffer == BUFFER) {
		fsoff = 128 - fpbuffersz;
	} else {
		fsoff = 256 - fpbuffersz;
	}

	/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
	constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;

	pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS);
	posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH);
	psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ);
	if (fp->color0_mrt) {
		color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] =
			ir3_find_output_regid(fp, FRAG_RESULT_COLOR);
	} else {
		color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0);
		color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1);
		color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2);
		color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3);
	}

	face_regid      = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE);
	coord_regid     = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD);
	zwcoord_regid   = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2);
	vcoord_regid    = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PIXEL);

	/* adjust regids for alpha output formats. there is no alpha render
	 * format, so it's just treated like red
	 */
	for (i = 0; i < nr; i++)
		if (util_format_is_alpha(pipe_surface_format(bufs[i])))
			color_regid[i] += 3;

	/* we could probably divide this up into things that need to be
	 * emitted if frag-prog is dirty vs if vert-prog is dirty..
	 */

	OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
			A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
			A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
			/* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
			 * flush some caches? I think we only need to set those
			 * bits if we have updated const or shader..
			 */
			A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
			A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
	OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
			A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
			A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) |
			A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid));
	OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) |
			A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid));
	OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid));
	OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
			A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
			A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
	OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
			A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
			A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));

	OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
	OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
			COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
			A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
			A3XX_SP_SP_CTRL_REG_L0MODE(0));

	OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
	OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));

	OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
	OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
			A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
			COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
			A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
			A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
			A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
			A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
	OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
			A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
			A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
	OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
			A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
			A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in));

	struct ir3_shader_linkage l = {0};
	ir3_link_shaders(&l, vp, fp);

	for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);

		reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid);
		reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask);
		j++;

		reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid);
		reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask);
		j++;

		OUT_RING(ring, reg);
	}

	for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) {
		uint32_t reg = 0;

		OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);

		reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8);
		reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8);
		reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8);
		reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8);

		OUT_RING(ring, reg);
	}

	OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
	OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
			A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
	OUT_RELOC(ring, vp->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */

	if (emit->binning_pass) {
		OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, 0x00000000);

		OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
		OUT_RING(ring, 0x00000000);

		OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
				A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
	} else {
		OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
		OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));

		OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
				A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
				COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
				A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
				A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
				A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
				A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
				A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
				COND(fp->num_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
				A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
		OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
				A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
				A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
				A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));

		OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
		OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
					MAX2(128, vp->constlen)) |
				A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
		OUT_RELOC(ring, fp->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
	}

	OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
	OUT_RING(ring,
			 COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) |
			 A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) |
			 A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1));

	OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
	for (i = 0; i < 4; i++) {
		uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) |
			COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION);

		if (i < nr) {
			enum pipe_format fmt = pipe_surface_format(bufs[i]);
			mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) |
				COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT);
		}
		OUT_RING(ring, mrt_reg);
	}

	if (emit->binning_pass) {
		OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
		OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
				A3XX_VPC_ATTR_LMSIZE(1) |
				COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
		OUT_RING(ring, 0x00000000);
	} else {
		uint32_t vinterp[4], flatshade[2], vpsrepl[4];

		memset(vinterp, 0, sizeof(vinterp));
		memset(flatshade, 0, sizeof(flatshade));
		memset(vpsrepl, 0, sizeof(vpsrepl));

		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
			/* NOTE: varyings are packed, so if compmask is 0xb
			 * then first, third, and fourth component occupy
			 * three consecutive varying slots:
			 */
			unsigned compmask = fp->inputs[j].compmask;

			uint32_t inloc = fp->inputs[j].inloc;

			if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) ||
					(fp->inputs[j].rasterflat && emit->rasterflat)) {
				uint32_t loc = inloc;

				for (i = 0; i < 4; i++) {
					if (compmask & (1 << i)) {
						vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
						flatshade[loc / 32] |= 1 << (loc % 32);
						loc++;
					}
				}
			}

			gl_varying_slot slot = fp->inputs[j].slot;

			/* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */
			if (slot >= VARYING_SLOT_VAR0) {
				unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0);
				/* Replace the .xy coordinates with S/T from the point sprite. Set
				 * interpolation bits for .zw such that they become .01
				 */
				if (emit->sprite_coord_enable & texmask) {
					/* mask is two 2-bit fields, where:
					 *   '01' -> S
					 *   '10' -> T
					 *   '11' -> 1 - T  (flip mode)
					 */
					unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001;
					uint32_t loc = inloc;
					if (compmask & 0x1) {
						vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x2) {
						vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x4) {
						/* .z <- 0.0f */
						vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2);
						loc++;
					}
					if (compmask & 0x8) {
						/* .w <- 1.0f */
						vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2);
						loc++;
					}
				}