void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr, struct pipe_surface **bufs) { const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; int constmode; int i, j, k; debug_assert(nr <= ARRAY_SIZE(color_regid)); vp = fd3_emit_get_vp(emit); if (emit->key.binning_pass) { /* use dummy stateobj to simplify binning vs non-binning: */ static const struct ir3_shader_variant binning_fp = {}; fp = &binning_fp; } else { fp = fd3_emit_get_fp(emit); } vsi = &vp->info; fsi = &fp->info; fpbuffer = BUFFER; vpbuffer = BUFFER; fpbuffersz = fp->instrlen; vpbuffersz = vp->instrlen; /* * Decide whether to use BUFFER or CACHE mode for VS and FS. It * appears like 256 is the hard limit, but when the combined size * exceeds 128 then blob will try to keep FS in BUFFER mode and * switch to CACHE for VS until VS is too large. The blob seems * to switch FS out of BUFFER mode at slightly under 128. But * a bit fuzzy on the decision tree, so use slightly conservative * limits. * * TODO check if these thresholds for BUFFER vs CACHE mode are the * same for all a3xx or whether we need to consider the gpuid */ if ((fpbuffersz + vpbuffersz) > 128) { if (fpbuffersz < 112) { /* FP:BUFFER VP:CACHE */ vpbuffer = CACHE; vpbuffersz = 256 - fpbuffersz; } else if (vpbuffersz < 112) { /* FP:CACHE VP:BUFFER */ fpbuffer = CACHE; fpbuffersz = 256 - vpbuffersz; } else { /* FP:CACHE VP:CACHE */ vpbuffer = fpbuffer = CACHE; vpbuffersz = fpbuffersz = 192; } } if (fpbuffer == BUFFER) { fsoff = 128 - fpbuffersz; } else { fsoff = 256 - fpbuffersz; } /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; pos_regid = ir3_find_output_regid(vp, ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); posz_regid = ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); psize_regid = ir3_find_output_regid(vp, ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); if (fp->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); } else { for (i = 0; i < fp->outputs_count; i++) { ir3_semantic sem = fp->outputs[i].semantic; unsigned idx = sem2idx(sem); if (sem2name(sem) != TGSI_SEMANTIC_COLOR) continue; debug_assert(idx < ARRAY_SIZE(color_regid)); color_regid[idx] = fp->outputs[i].regid; } } /* adjust regids for alpha output formats. there is no alpha render * format, so it's just treated like red */ for (i = 0; i < nr; i++) if (util_format_is_alpha(pipe_surface_format(bufs[i]))) color_regid[i] += 3; /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those * bits if we have updated const or shader.. */ A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD)); OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid)); OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) | A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) | A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz)); OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) | COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | A3XX_SP_SP_CTRL_REG_L0MODE(0)); OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) | COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) | A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz)); OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0))); OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4)); for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); j = ir3_next_varying(fp, j); if (j < fp->inputs_count) { k = ir3_find_output(vp, fp->inputs[j].semantic); reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid); reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask); } j = ir3_next_varying(fp, j); if (j < fp->inputs_count) { k = ir3_find_output(vp, fp->inputs[j].semantic); reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid); reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask); } OUT_RING(ring, reg); } for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); j = ir3_next_varying(fp, j); if (j < fp->inputs_count) reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc); j = ir3_next_varying(fp, j); if (j < fp->inputs_count) reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc); j = ir3_next_varying(fp, j); if (j < fp->inputs_count) reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc); j = ir3_next_varying(fp, j); if (j < fp->inputs_count) reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc); OUT_RING(ring, reg); } OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER)); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); } else { OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) | COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) | A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz)); OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) | A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET( MAX2(128, vp->constlen)) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff)); OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ } OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1)); OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); for (i = 0; i < 4; i++) { uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION); if (i < nr) { enum pipe_format fmt = pipe_surface_format(bufs[i]); mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); } OUT_RING(ring, mrt_reg); } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { uint32_t vinterp[4], flatshade[2], vpsrepl[4]; memset(vinterp, 0, sizeof(vinterp)); memset(flatshade, 0, sizeof(flatshade)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* figure out VARYING_INTERP / FLAT_SHAD register values: */ for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) { uint32_t interp = fp->inputs[j].interpolate; /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG * instead.. rather than -8 everywhere else.. */ uint32_t inloc = fp->inputs[j].inloc - 8; /* currently assuming varyings aligned to 4 (not * packed): */ debug_assert((inloc % 4) == 0); if ((interp == TGSI_INTERPOLATE_CONSTANT) || ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++, loc++) { vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); flatshade[loc / 32] |= 1 << (loc % 32); } } /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) { vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) << ((inloc % 16) * 2); vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); } } OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ } if (vpbuffer == BUFFER) emit_shader(ring, vp); OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ if (!emit->key.binning_pass) { if (fpbuffer == BUFFER) emit_shader(ring, fp); OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ } }
/* open listening socket and initialize */ void cann_listen(char *port) { struct addrinfo hints; struct addrinfo *result, *rp; int sfd, s; char buf[200]; int ret, one = 1; signal(SIGPIPE, SIG_IGN); memset(&hints, 0, sizeof(struct addrinfo)); hints.ai_family = AF_UNSPEC; // Allow IPv4 or IPv6 - ipv4 is mapped into an v6 addr -> ai_v4mapped hints.ai_socktype = SOCK_STREAM; // Datagram socket hints.ai_flags = AI_PASSIVE | AI_V4MAPPED | AI_NUMERICSERV | AI_ALL; // For wildcard IP address s = getaddrinfo(NULL, port, &hints, &result); if (s != 0) { fprintf(stderr, "No listening addresses available(getaddrinfo: %s)\n", gai_strerror(s)); exit(EXIT_FAILURE); } //dump address info for (rp = result; rp != NULL; rp = rp->ai_next) debug(5, "Found Address %s\t\t(family: %s, socktype %i: protocol %i)", get_ip_str((struct sockaddr *)rp->ai_addr, buf, sizeof(buf)), (rp->ai_family == AF_INET) ? "IPv4" : ((rp->ai_family == AF_INET6) ? "IPv6" : ((snprintf(buf, sizeof(buf), "%i", rp->ai_family) != 0) ? buf : "")), rp->ai_socktype, rp->ai_protocol); /* getaddrinfo() returns a list of address structures. Try each address until we successfully bind(2). If socket(2) (or bind(2)) fails, we (close the socket and) try the next address. */ for (rp = result; rp != NULL; rp = rp->ai_next) { sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sfd == -1) continue; debug(5, "Trying to bind to %s (%s)", get_ip_str((struct sockaddr *)rp->ai_addr, buf, sizeof(buf)), (rp->ai_family == AF_INET) ? "IPv4" : ((rp->ai_family == AF_INET6) ? "IPv6" : ((snprintf(buf, sizeof(buf), "%i", rp->ai_family) != 0) ? buf : "")), rp->ai_socktype, rp->ai_protocol); //set reuseaddr to avoid address in use (b/c if close_wait) when restarting ret = setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); if (ret != 0) debug_perror(0, "Could not set socket options: "); if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0) break; /* Success */ debug_perror(0, "Could not bind to %s.", get_ip_str((struct sockaddr *)rp->ai_addr, buf, sizeof(buf))); close(sfd); } if (rp == NULL) { /* No address succeeded */ debug_perror(0, "All addresses in use"); exit(EXIT_FAILURE); } else debug(5, "Bind succeeded!"); freeaddrinfo(result); /* No longer needed */ listen_socket = sfd; int flags = 0; flags = fcntl(listen_socket, F_GETFL, 0); fcntl(listen_socket, F_SETFL, flags | O_NDELAY); /* specify queue */ ret = listen(listen_socket, SOMAXCONN); debug_assert(ret >= 0, "Could listen() listening socket"); }
static int response_to_task_terminated (entity_messages message, entity *receiver, entity *sender, va_list pargs) { entity *guide, *landing_en, *task; aircraft *raw; raw = (aircraft *) get_local_entity_data (receiver); #if DEBUG_MODULE_MESSAGE_TEXT debug_log ("AIRCRAFT MSGS: received task terminated, ac %s made safe", entity_sub_type_aircraft_names [raw->mob.sub_type]); #endif debug_assert (raw->member_link.parent); // // Check if aircraft was on a landing/takeoff/holding route // guide = get_local_entity_parent (receiver, LIST_TYPE_FOLLOWER); ASSERT (guide); task = get_local_entity_parent (guide, LIST_TYPE_GUIDE); ASSERT (task); landing_en = get_local_entity_parent (task, LIST_TYPE_ASSIGNED_TASK); switch (get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)) { case ENTITY_SUB_TYPE_TASK_LANDING: { ASSERT (landing_en); #if DEBUG_MODULE_MESSAGE_TEXT debug_log ("AC_MSGS: mobile was on a %s route, unlocking route and landing site", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]); #endif if (get_local_entity_int_value (receiver, INT_TYPE_PLAYER) == ENTITY_PLAYER_AI) { notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_ROUTE, landing_en, receiver); } notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_SITE, landing_en, receiver); break; } case ENTITY_SUB_TYPE_TASK_TAKEOFF: { ASSERT (landing_en); #if DEBUG_MODULE_MESSAGE_TEXT debug_log ("AC_MSGS: mobile was on a %s route, unlocking route", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]); #endif notify_local_entity (ENTITY_MESSAGE_UNLOCK_TAKEOFF_ROUTE, landing_en, receiver); break; } case ENTITY_SUB_TYPE_TASK_LANDING_HOLDING: { ASSERT (landing_en); #if DEBUG_MODULE_MESSAGE_TEXT debug_log ("AC_MSGS: mobile was on a %s route, locking route and landing site", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]); #endif notify_local_entity (ENTITY_MESSAGE_UNLOCK_LANDING_SITE, landing_en, receiver); break; } case ENTITY_SUB_TYPE_TASK_TAKEOFF_HOLDING: { ASSERT (landing_en); #if DEBUG_MODULE_MESSAGE_TEXT debug_log ("AC_MSGS: mobile was on a %s route, no route to unlock", entity_sub_type_task_names [get_local_entity_int_value (task, INT_TYPE_ENTITY_SUB_TYPE)]); #endif break; } } return (TRUE); }
static unsigned long mk_unixtime(int y, int m, int d, int hh, int mm, int ss) { unsigned long u=0; int i; /* Clash with NetBSD/x86-64 patch: leaving rc as unsigned long still permits to escape the year 2038 problem in favor of year 2106 problem, while a dedicated time_t structure can be expected as a 64-bit value on relevant platforms -- ASR fix 25/01/2004 */ unsigned long rc; time_t tt; long tzshift; #ifndef TZ_VAR long shiftd1, shiftd2; #endif struct tm *stm; if(y>=2001) { i=y-2001; u=11323; /* The following piece of code is rather paranoid in 16/32-bit world, where the timestamps are limited to year 2108. */ #if defined(__32BIT__)||defined(TILED) if(i>=400) { u+=1022679L*(i/400); i%=400; } #endif if(i>=100) { u+=36524L*(i/100); i%=100; } u+=1461L*(i/4); u+=365L*(i%4); } else if(y>=1973) u=1096+(y-1973)/4*1461L+((y-1973)%4)*365L; else u=(y-1970)*365L; for(i=1; i<m; i++) { u+=(int)monthdays[i-1]; if(i==2) u+=isleapyear(y); } rc=86400*(unsigned long)(u+d-1)+(unsigned long)hh*3600+(unsigned long)mm*60+(unsigned long)ss; /* If we have to use the timezone variable, do it now */ #ifdef TZ_VAR tzshift=-TZ_VAR; if(_daylight) tzshift+=3600; #else tt=(time_t)rc; stm=localtime(&tt); debug_assert(stm!=NULL); /* LIBCS.DLL returns NULL for unixtime beyond 0x7FFFFFFF */ tzshift=(long)stm->tm_hour*3600+(long)stm->tm_min*60; shiftd1=stm->tm_mday; stm=gmtime(&tt); debug_assert(stm!=NULL); shiftd2=stm->tm_mday; /* Local time overruns GMT, add 24 hours for safety */ if(shiftd1<shiftd2&&shiftd1==1&&shiftd2>=28) tzshift+=86400; else if(shiftd1>shiftd2&&shiftd1>=28&&shiftd2==1) tzshift-=86400; else if(shiftd1>shiftd2) tzshift+=86400; else if(shiftd1<shiftd2) tzshift-=86400; tzshift-=(long)stm->tm_hour*3600+(long)stm->tm_min*60; tzshift%=86400; #endif /* Fix the timezone if it does not roll over the zero */ return((tzshift>0&&rc<tzshift)?rc:rc-tzshift); }
rs232can_msg *cann_buffer_get() { rs232can_msg *cmsg = malloc(sizeof(rs232can_msg)); debug_assert(cmsg == NULL, "cann_buffer_get malloc fail!\n"); return cmsg; }
/** * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: * * SWZ dst, src.x-y10 * * becomes: * * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} */ static void emit_swz( struct st_translate *t, struct ureg_dst dst, const struct prog_src_register *SrcReg ) { struct ureg_program *ureg = t->ureg; struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); unsigned negate_mask = SrcReg->Negate; unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); unsigned negative_one_mask = one_mask & negate_mask; unsigned positive_one_mask = one_mask & ~negate_mask; struct ureg_src imm; unsigned i; unsigned mul_swizzle[4] = {0,0,0,0}; unsigned add_swizzle[4] = {0,0,0,0}; unsigned src_swizzle[4] = {0,0,0,0}; boolean need_add = FALSE; boolean need_mul = FALSE; if (dst.WriteMask == 0) return; /* Is this just a MOV? */ if (zero_mask == 0 && one_mask == 0 && (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) { ureg_MOV( ureg, dst, translate_src( t, SrcReg )); return; } #define IMM_ZERO 0 #define IMM_ONE 1 #define IMM_NEG_ONE 2 imm = ureg_imm3f( ureg, 0, 1, -1 ); for (i = 0; i < 4; i++) { unsigned bit = 1 << i; if (dst.WriteMask & bit) { if (positive_one_mask & bit) { mul_swizzle[i] = IMM_ZERO; add_swizzle[i] = IMM_ONE; need_add = TRUE; } else if (negative_one_mask & bit) { mul_swizzle[i] = IMM_ZERO; add_swizzle[i] = IMM_NEG_ONE; need_add = TRUE; } else if (zero_mask & bit) { mul_swizzle[i] = IMM_ZERO; add_swizzle[i] = IMM_ZERO; need_add = TRUE; } else { add_swizzle[i] = IMM_ZERO; src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); need_mul = TRUE; if (negate_mask & bit) { mul_swizzle[i] = IMM_NEG_ONE; } else { mul_swizzle[i] = IMM_ONE; } } } } if (need_mul && need_add) { ureg_MAD( ureg, dst, swizzle_4v( src, src_swizzle ), swizzle_4v( imm, mul_swizzle ), swizzle_4v( imm, add_swizzle ) ); } else if (need_mul) { ureg_MUL( ureg, dst, swizzle_4v( src, src_swizzle ), swizzle_4v( imm, mul_swizzle ) ); } else if (need_add) { ureg_MOV( ureg, dst, swizzle_4v( imm, add_swizzle ) ); } else { debug_assert(0); } #undef IMM_ZERO #undef IMM_ONE #undef IMM_NEG_ONE }
void fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit, int nr, struct pipe_surface **bufs) { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; uint32_t vcoord_regid, vertex_regid, instance_regid; int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); if (emit->key.binning_pass) nr = 0; setup_stages(emit, s); pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); vertex_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); instance_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONTROL_REG, 5); OUT_RING(ring, A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_HLSQ_VS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_HLSQ_FS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_HLSQ_HS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_HLSQ_DS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_HLSQ_GS_CONTROL_REG_ENABLED)); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen)); OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen)); OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen)); OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen)); OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen)); OUT_PKT4(ring, REG_A5XX_SP_VS_CONTROL_REG, 5); OUT_RING(ring, A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_SP_VS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_SP_FS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_SP_HS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_SP_DS_CONTROL_REG_ENABLED)); OUT_RING(ring, A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_SP_GS_CONTROL_REG_ENABLED)); OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2); OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */ OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2); OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */ OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2); OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */ OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2); OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */ OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2); OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */ OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_3, 2); OUT_RING(ring, 0x00000000); /* HLSQ_CONTEXT_SWITCH_CS_SW_3 */ OUT_RING(ring, 0x00000000); /* HLSQ_CONTEXT_SWITCH_CS_SW_4 */ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, s[VS].v, s[FS].v); BITSET_DECLARE(varbs, 128) = {0}; uint32_t *varmask = (uint32_t *)varbs; for (i = 0; i < l.cnt; i++) for (j = 0; j < util_last_bit(l.var[i].compmask); j++) BITSET_SET(varbs, l.var[i].loc + j); OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ /* a5xx appends pos/psize to end of the linkage map: */ if (pos_regid != regid(63,0)) ir3_link_add(&l, pos_regid, 0xf, l.max_loc); if (psize_regid != regid(63,0)) ir3_link_add(&l, psize_regid, 0x1, l.max_loc); for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1); reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); j++; reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); j++; OUT_RING(ring, reg); } for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); OUT_RING(ring, reg); } OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ if (s[VS].instrlen) emit_shader(ring, s[VS].v); // TODO depending on other bits in this reg (if any) set somewhere else? OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE)); if (emit->key.binning_pass) { OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ } else { uint32_t stride_in_vpc = align(s[FS].v->total_in, 4) + 4; if (s[VS].v->writes_psize) stride_in_vpc++; // TODO if some of these other bits depend on something other than // program state we should probably move these next three regs: OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1); OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(stride_in_vpc) | COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | 0x10000); // XXX OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1); OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(stride_in_vpc) | 0x400); // XXX OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ } OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, 0x00000881); /* XXX HLSQ_CONTROL_0 */ OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63)); OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | 0x0000fcfc); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | 0x4000e | /* XXX set pretty much everywhere */ A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x020fffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1); OUT_RING(ring, 0x0000ffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); OUT_RING(ring, 0x00000010); /* XXX */ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) | COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD | A5XX_GRAS_CNTL_YCOORD | A5XX_GRAS_CNTL_ZCOORD | A5XX_GRAS_CNTL_WCOORD | A5XX_GRAS_CNTL_UNK3) | COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3)); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 3); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | A5XX_RB_RENDER_CONTROL0_YCOORD | A5XX_RB_RENDER_CONTROL0_ZCOORD | A5XX_RB_RENDER_CONTROL0_WCOORD | A5XX_RB_RENDER_CONTROL0_UNK3) | COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3)); OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS)); OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) | COND(s[FS].v->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z)); OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 9); OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) | A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) | A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0))); for (i = 0; i < 8; i++) { OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | COND(emit->key.half_precision, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); } if (emit->key.binning_pass) { OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(0)); } else { uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a5xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 * ldlv.u32 r0.y,l[r0.x+1], 1 * (ss)bary.f (ei)r63.x, 0, r0.x * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * * Possibly on later a5xx variants we'll be able to use * something like the code below instead of workaround * in the shader: */ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { /* NOTE: varyings are packed, so if compmask is 0xb * then first, third, and fourth component occupy * three consecutive varying slots: */ unsigned compmask = s[FS].v->inputs[j].compmask; uint32_t inloc = s[FS].v->inputs[j].inloc; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++) { if (compmask & (1 << i)) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); //flatshade[loc / 32] |= 1 << (loc % 32); loc++; } } } gl_varying_slot slot = s[FS].v->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { /* mask is two 2-bit fields, where: * '01' -> S * '10' -> T * '11' -> 1 - T (flip mode) */ unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; uint32_t loc = inloc; if (compmask & 0x1) { vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x2) { vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x4) { /* .z <- 0.0f */ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); loc++; } if (compmask & 0x8) { /* .w <- 1.0f */ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); loc++; } }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; /* Don't CP absneg into meta instructions, that won't end well: */ if (is_meta(instr) && (src->opc != OPC_MOV)) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; instr->barrier_class |= cond->barrier_class; instr->barrier_conflict |= cond->barrier_conflict; unuse(cond); break; default: break; } } /* Handle converting a sam.s2en (taking samp/tex idx params via * register) into a normal sam (encoding immediate samp/tex idx) * if they are immediate. This saves some instructions and regs * in the common case where we know samp/tex at compile time: */ if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) && !(ir3_shader_debug & IR3_DBG_FORCES2EN)) { /* The first src will be a fan-in (collect), if both of it's * two sources are mov from imm, then we can */ struct ir3_instruction *samp_tex = ssa(instr->regs[1]); debug_assert(samp_tex->opc == OPC_META_FI); struct ir3_instruction *samp = ssa(samp_tex->regs[1]); struct ir3_instruction *tex = ssa(samp_tex->regs[2]); if ((samp->opc == OPC_MOV) && (samp->regs[1]->flags & IR3_REG_IMMED) && (tex->opc == OPC_MOV) && (tex->regs[1]->flags & IR3_REG_IMMED)) { instr->flags &= ~IR3_INSTR_S2EN; instr->cat5.samp = samp->regs[1]->iim_val; instr->cat5.tex = tex->regs[1]->iim_val; instr->regs[1]->instr = NULL; } } }
/** * Translate a vertex program to create a new variant. */ static struct st_vp_variant * st_translate_vertex_program(struct st_context *st, struct st_vertex_program *stvp, const struct st_vp_variant_key *key) { struct st_vp_variant *vpv = CALLOC_STRUCT(st_vp_variant); struct pipe_context *pipe = st->pipe; struct ureg_program *ureg; enum pipe_error error; unsigned num_outputs; st_prepare_vertex_program(st->ctx, stvp); if (!stvp->glsl_to_tgsi) { _mesa_remove_output_reads(&stvp->Base.Base, PROGRAM_OUTPUT); } ureg = ureg_create( TGSI_PROCESSOR_VERTEX ); if (ureg == NULL) { free(vpv); return NULL; } vpv->key = *key; vpv->num_inputs = stvp->num_inputs; num_outputs = stvp->num_outputs; if (key->passthrough_edgeflags) { vpv->num_inputs++; num_outputs++; } if (ST_DEBUG & DEBUG_MESA) { _mesa_print_program(&stvp->Base.Base); _mesa_print_program_parameters(st->ctx, &stvp->Base.Base); debug_printf("\n"); } if (stvp->glsl_to_tgsi) error = st_translate_program(st->ctx, TGSI_PROCESSOR_VERTEX, ureg, stvp->glsl_to_tgsi, &stvp->Base.Base, /* inputs */ vpv->num_inputs, stvp->input_to_index, NULL, /* input semantic name */ NULL, /* input semantic index */ NULL, /* interp mode */ NULL, /* interp location */ /* outputs */ num_outputs, stvp->result_to_output, stvp->output_semantic_name, stvp->output_semantic_index, key->passthrough_edgeflags, key->clamp_color); else error = st_translate_mesa_program(st->ctx, TGSI_PROCESSOR_VERTEX, ureg, &stvp->Base.Base, /* inputs */ vpv->num_inputs, stvp->input_to_index, NULL, /* input semantic name */ NULL, /* input semantic index */ NULL, /* outputs */ num_outputs, stvp->result_to_output, stvp->output_semantic_name, stvp->output_semantic_index, key->passthrough_edgeflags, key->clamp_color); if (error) goto fail; vpv->tgsi.tokens = ureg_get_tokens( ureg, NULL ); if (!vpv->tgsi.tokens) goto fail; ureg_destroy( ureg ); if (stvp->glsl_to_tgsi) { st_translate_stream_output_info(stvp->glsl_to_tgsi, stvp->result_to_output, &vpv->tgsi.stream_output); } if (ST_DEBUG & DEBUG_TGSI) { tgsi_dump(vpv->tgsi.tokens, 0); debug_printf("\n"); } vpv->driver_shader = pipe->create_vs_state(pipe, &vpv->tgsi); return vpv; fail: debug_printf("%s: failed to translate Mesa program:\n", __FUNCTION__); _mesa_print_program(&stvp->Base.Base); debug_assert(0); ureg_destroy( ureg ); return NULL; }
void * debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ) { struct debug_memory_header *old_hdr, *new_hdr; struct debug_memory_footer *old_ftr, *new_ftr; void *new_ptr; if(!old_ptr) return debug_malloc( file, line, function, new_size ); if(!new_size) { debug_free( file, line, function, old_ptr ); return NULL; } old_hdr = header_from_data(old_ptr); if(old_hdr->magic != DEBUG_MEMORY_MAGIC) { debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n", file, line, function, old_ptr); debug_assert(0); return NULL; } old_ftr = footer_from_header(old_hdr); if(old_ftr->magic != DEBUG_MEMORY_MAGIC) { debug_printf("%s:%u:%s: buffer overflow %p\n", old_hdr->file, old_hdr->line, old_hdr->function, old_ptr); debug_assert(0); } /* alloc new */ new_hdr = os_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); if(!new_hdr) { debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", file, line, function, (long unsigned)new_size); return NULL; } new_hdr->no = old_hdr->no; new_hdr->file = old_hdr->file; new_hdr->line = old_hdr->line; new_hdr->function = old_hdr->function; new_hdr->size = new_size; new_hdr->magic = DEBUG_MEMORY_MAGIC; new_hdr->tag = 0; #if DEBUG_FREED_MEMORY new_hdr->freed = FALSE; #endif new_ftr = footer_from_header(new_hdr); new_ftr->magic = DEBUG_MEMORY_MAGIC; pipe_mutex_lock(list_mutex); LIST_REPLACE(&old_hdr->head, &new_hdr->head); pipe_mutex_unlock(list_mutex); /* copy data */ new_ptr = data_from_header(new_hdr); memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); /* free old */ old_hdr->magic = 0; old_ftr->magic = 0; os_free(old_hdr); return new_ptr; }
/** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src * register with a non-ssa src) or collapsing mov's from relative * src (which needs to also fixup the address src reference by the * instruction). */ static void reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) { struct ir3_instruction *src = ssa(reg); if (is_eligible_mov(src, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; combine_flags(&new_flags, src); if (valid_flags(instr, n, new_flags)) { if (new_flags & IR3_REG_ARRAY) { debug_assert(!(reg->flags & IR3_REG_ARRAY)); reg->array = src_reg->array; } reg->flags = new_flags; reg->instr = ssa(src_reg); instr->barrier_class |= src->barrier_class; instr->barrier_conflict |= src->barrier_conflict; unuse(src); reg->instr->use_count++; } } else if (is_same_type_mov(src) && /* cannot collapse const/immed/etc into meta instrs: */ !is_meta(instr)) { /* immed/const/etc cases, which require some special handling: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { /* See if lowering an immediate to const would help. */ if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { debug_assert(new_flags & IR3_REG_IMMED); instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); return; } /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. * * the "plain" MAD's (ie. the ones that don't shift first * src prior to multiply) can swap their first two srcs if * src[0] is !CONST and src[1] is CONST: */ if ((n == 1) && is_mad(instr->opc) && !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) { /* swap src[0] and src[1]: */ struct ir3_register *tmp; tmp = instr->regs[0 + 1]; instr->regs[0 + 1] = instr->regs[1 + 1]; instr->regs[1 + 1] = tmp; n = 0; } else { return; } } /* Here we handle the special case of mov from * CONST and/or RELATIV. These need to be handled * specially, because in the case of move from CONST * there is no src ir3_instruction so we need to * replace the ir3_register. And in the case of * RELATIV we need to handle the address register * dependency. */ if (src_reg->flags & IR3_REG_CONST) { /* an instruction cannot reference two different * address registers: */ if ((src_reg->flags & IR3_REG_RELATIV) && conflicts(instr->address, reg->instr->address)) return; /* This seems to be a hw bug, or something where the timings * just somehow don't work out. This restriction may only * apply if the first src is also CONST. */ if ((opc_cat(instr->opc) == 3) && (n == 2) && (src_reg->flags & IR3_REG_RELATIV) && (src_reg->array.offset == 0)) return; src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; if (src_reg->flags & IR3_REG_RELATIV) ir3_instr_set_address(instr, reg->instr->address); return; } if ((src_reg->flags & IR3_REG_RELATIV) && !conflicts(instr->address, reg->instr->address)) { src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; ir3_instr_set_address(instr, reg->instr->address); return; } /* NOTE: seems we can only do immed integers, so don't * need to care about float. But we do need to handle * abs/neg *before* checking that the immediate requires * few enough bits to encode: * * TODO: do we need to do something to avoid accidentally * catching a float immed? */ if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; debug_assert((opc_cat(instr->opc) == 1) || (opc_cat(instr->opc) == 6) || ir3_cat2_int(instr->opc) || (is_mad(instr->opc) && (n == 0))); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); if (new_flags & IR3_REG_SNEG) iim_val = -iim_val; if (new_flags & IR3_REG_BNOT) iim_val = ~iim_val; /* other than category 1 (mov) we can only encode up to 10 bits: */ if ((instr->opc == OPC_MOV) || !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; src_reg->iim_val = iim_val; instr->regs[n+1] = src_reg; } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { /* See if lowering an immediate to const would help. */ instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); } return; } } }
void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, struct pipe_surface **bufs) { struct stage s[MAX_STAGES]; uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; int constmode; int i, j, k; debug_assert(nr <= ARRAY_SIZE(color_regid)); setup_stages(emit, s); /* blob seems to always use constmode currently: */ constmode = 1; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); OUT_RING(ring, 0x00000003); OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those * bits if we have updated const or shader.. */ A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) | A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 0x3f3f000 | /* XXX */ A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid) | 0xfcfcfc00); OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) | A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff)); OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) | A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) | A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff)); OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) | A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) | A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff)); OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) | A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) | A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff)); OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) | A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) | A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, 0x140010 | /* XXX */ COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); OUT_RING(ring, 0x7f | /* XXX */ COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) | COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) | COND(s[VS].instrlen && s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER)); OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */ OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3); OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4)); for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask); } j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) { k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].slot); reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid); reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask); } OUT_RING(ring, reg); } for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc); j = ir3_next_varying(s[FS].v, j); if (j < s[FS].v->inputs_count) reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc); OUT_RING(ring, reg); } OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) | A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff)); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */ OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000 | /* XXX */ COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | COND(s[FS].v->frag_coord, A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); if (emit->key.binning_pass) OUT_RING(ring, 0x00000000); else OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) | A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) | A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff)); OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) | A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff)); OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1); OUT_RING(ring, A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | COND(s[FS].v->total_in > 0, A4XX_RB_RENDER_CONTROL2_VARYING) | COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD | A4XX_RB_RENDER_CONTROL2_YCOORD | // TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems // to work everywhere else). // A4XX_RB_RENDER_CONTROL2_ZCOORD | A4XX_RB_RENDER_CONTROL2_WCOORD)); OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_MRT(MAX2(1, nr)) | COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z)); OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr)) | COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); for (i = 0; i < 8; i++) { enum a4xx_color_fmt format = 0; bool srgb = false; if (i < nr) { format = fd4_emit_format(bufs[i]); if (bufs[i] && !emit->no_decode_srgb) srgb = util_format_is_srgb(bufs[i]->format); } OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) | COND(emit->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); } if (emit->key.binning_pass) { OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | 0x40000000 | /* XXX */ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a4xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 * ldlv.u32 r0.y,l[r0.x+1], 1 * (ss)bary.f (ei)r63.x, 0, r0.x * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * * Possibly on later a4xx variants we'll be able to use * something like the code below instead of workaround * in the shader: */ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG * instead.. rather than -8 everywhere else.. */ uint32_t inloc = s[FS].v->inputs[j].inloc - 8; /* currently assuming varyings aligned to 4 (not * packed): */ debug_assert((inloc % 4) == 0); if ((s[FS].v->inputs[j].interpolate == INTERP_QUALIFIER_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++, loc++) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); //flatshade[loc / 32] |= 1 << (loc % 32); } } gl_varying_slot slot = s[FS].v->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) << ((inloc % 16) * 2); vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); } } } OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) | A4XX_VPC_ATTR_THRDASSIGN(1) | COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) | 0x40000000 | /* XXX */ COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) | A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in)); OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8); for (i = 0; i < 8; i++) OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8); for (i = 0; i < 8; i++) OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ } if (s[VS].instrlen) emit_shader(ring, s[VS].v); if (!emit->key.binning_pass) if (s[FS].instrlen) emit_shader(ring, s[FS].v); }
static void emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) { enum a4xx_tile_mode tile_mode; unsigned i; if (bin_w) { tile_mode = 2; } else { tile_mode = TILE4_LINEAR; } for (i = 0; i < 8; i++) { enum a4xx_color_fmt format = 0; enum a3xx_color_swap swap = WZYX; struct fd_resource *rsc = NULL; struct fd_resource_slice *slice = NULL; uint32_t stride = 0; uint32_t base = 0; uint32_t offset = 0; if ((i < nr_bufs) && bufs[i]) { struct pipe_surface *psurf = bufs[i]; rsc = fd_resource(psurf->texture); slice = fd_resource_slice(rsc, psurf->u.tex.level); format = fd4_pipe2color(psurf->format); swap = fd4_pipe2swap(psurf->format); debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); offset = fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); if (bin_w) { stride = bin_w * rsc->cpp; if (bases) { base = bases[i]; } } else { stride = slice->pitch * rsc->cpp; } } OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3); OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | A4XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); if (bin_w || (i >= nr_bufs)) { OUT_RING(ring, base); OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride)); } else { OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_MRT[i].CONTROL3.STRIDE not emitted by c2d.. * not sure if we need to skip it for bypass or * not. */ OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(0)); } } }
static void * create_fs(struct pipe_context *pipe, unsigned fs_traits) { struct ureg_program *ureg; struct ureg_src /*dst_sampler, */ src_sampler, mask_sampler; struct ureg_src /*dst_pos, */ src_input, mask_pos; struct ureg_dst src, mask; struct ureg_dst out; struct ureg_src imm0 = { 0 }; unsigned has_mask = (fs_traits & FS_MASK) != 0; unsigned is_fill = (fs_traits & FS_FILL) != 0; unsigned is_composite = (fs_traits & FS_COMPOSITE) != 0; unsigned is_solid = (fs_traits & FS_SOLID_FILL) != 0; unsigned is_lingrad = (fs_traits & FS_LINGRAD_FILL) != 0; unsigned is_radgrad = (fs_traits & FS_RADGRAD_FILL) != 0; unsigned comp_alpha_mask = fs_traits & FS_COMPONENT_ALPHA; unsigned is_yuv = (fs_traits & FS_YUV) != 0; unsigned src_repeat_none = (fs_traits & FS_SRC_REPEAT_NONE) != 0; unsigned mask_repeat_none = (fs_traits & FS_MASK_REPEAT_NONE) != 0; unsigned src_swizzle = (fs_traits & FS_SRC_SWIZZLE_RGB) != 0; unsigned mask_swizzle = (fs_traits & FS_MASK_SWIZZLE_RGB) != 0; unsigned src_set_alpha = (fs_traits & FS_SRC_SET_ALPHA) != 0; unsigned mask_set_alpha = (fs_traits & FS_MASK_SET_ALPHA) != 0; unsigned src_luminance = (fs_traits & FS_SRC_LUMINANCE) != 0; unsigned mask_luminance = (fs_traits & FS_MASK_LUMINANCE) != 0; unsigned dst_luminance = (fs_traits & FS_DST_LUMINANCE) != 0; #if 0 print_fs_traits(fs_traits); #else (void)print_fs_traits; #endif ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT); if (ureg == NULL) return 0; /* it has to be either a fill, a composite op or a yuv conversion */ debug_assert((is_fill ^ is_composite) ^ is_yuv); (void)is_yuv; out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); if (src_repeat_none || mask_repeat_none || src_set_alpha || mask_set_alpha || src_luminance) { imm0 = ureg_imm4f(ureg, 0, 0, 0, 1); } if (is_composite) { src_sampler = ureg_DECL_sampler(ureg, 0); src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_PERSPECTIVE); } else if (is_fill) { if (is_solid) src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE); else src_input = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_PERSPECTIVE); } else { debug_assert(is_yuv); return create_yuv_shader(pipe, ureg); } if (has_mask) { mask_sampler = ureg_DECL_sampler(ureg, 1); mask_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 1, TGSI_INTERPOLATE_PERSPECTIVE); } #if 0 /* unused right now */ dst_sampler = ureg_DECL_sampler(ureg, 2); dst_pos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 2, TGSI_INTERPOLATE_PERSPECTIVE); #endif if (is_composite) { if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; xrender_tex(ureg, src, src_input, src_sampler, imm0, src_repeat_none, src_swizzle, src_set_alpha); } else if (is_fill) { if (is_solid) { if (has_mask || src_luminance || dst_luminance) src = ureg_dst(src_input); else ureg_MOV(ureg, out, src_input); } else if (is_lingrad || is_radgrad) { struct ureg_src coords, const0124, matrow0, matrow1, matrow2; if (has_mask || src_luminance || dst_luminance) src = ureg_DECL_temporary(ureg); else src = out; coords = ureg_DECL_constant(ureg, 0); const0124 = ureg_DECL_constant(ureg, 1); matrow0 = ureg_DECL_constant(ureg, 2); matrow1 = ureg_DECL_constant(ureg, 3); matrow2 = ureg_DECL_constant(ureg, 4); if (is_lingrad) { linear_gradient(ureg, src, src_input, src_sampler, coords, const0124, matrow0, matrow1, matrow2); } else if (is_radgrad) { radial_gradient(ureg, src, src_input, src_sampler, coords, const0124, matrow0, matrow1, matrow2); } } else debug_assert(!"Unknown fill type!"); } if (src_luminance) { ureg_MOV(ureg, src, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_X)); ureg_MOV(ureg, ureg_writemask(src, TGSI_WRITEMASK_XYZ), ureg_scalar(imm0, TGSI_SWIZZLE_X)); if (!has_mask && !dst_luminance) ureg_MOV(ureg, out, ureg_src(src)); } if (has_mask) { mask = ureg_DECL_temporary(ureg); xrender_tex(ureg, mask, mask_pos, mask_sampler, imm0, mask_repeat_none, mask_swizzle, mask_set_alpha); /* src IN mask */ src_in_mask(ureg, (dst_luminance) ? src : out, ureg_src(src), ureg_src(mask), comp_alpha_mask, mask_luminance); ureg_release_temporary(ureg, mask); } if (dst_luminance) { /* * Make sure the alpha channel goes into the output L8 surface. */ ureg_MOV(ureg, out, ureg_scalar(ureg_src(src), TGSI_SWIZZLE_W)); } ureg_END(ureg); return ureg_create_shader_and_destroy(ureg, pipe); }
static void * create_shader(struct pipe_context *pipe, int id, struct pipe_shader_state *shader) { int idx = 0; const struct shader_asm_info * shaders[SHADER_STAGES]; /* the shader has to have a fill */ debug_assert(id & ALL_FILLS); /* first stage */ if (id & VEGA_SOLID_FILL_SHADER) { debug_assert(idx == 0); shaders[idx] = &shaders_asm[0]; debug_assert(shaders_asm[0].id == VEGA_SOLID_FILL_SHADER); ++idx; } if ((id & VEGA_LINEAR_GRADIENT_SHADER)) { debug_assert(idx == 0); shaders[idx] = &shaders_asm[1]; debug_assert(shaders_asm[1].id == VEGA_LINEAR_GRADIENT_SHADER); ++idx; } if ((id & VEGA_RADIAL_GRADIENT_SHADER)) { debug_assert(idx == 0); shaders[idx] = &shaders_asm[2]; debug_assert(shaders_asm[2].id == VEGA_RADIAL_GRADIENT_SHADER); ++idx; } if ((id & VEGA_PATTERN_SHADER)) { debug_assert(idx == 0); debug_assert(shaders_asm[3].id == VEGA_PATTERN_SHADER); shaders[idx] = &shaders_asm[3]; ++idx; } if ((id & VEGA_IMAGE_NORMAL_SHADER)) { debug_assert(idx == 0); debug_assert(shaders_asm[4].id == VEGA_IMAGE_NORMAL_SHADER); shaders[idx] = &shaders_asm[4]; ++idx; } /* second stage */ if ((id & VEGA_IMAGE_MULTIPLY_SHADER)) { debug_assert(shaders_asm[5].id == VEGA_IMAGE_MULTIPLY_SHADER); shaders[idx] = &shaders_asm[5]; ++idx; } else if ((id & VEGA_IMAGE_STENCIL_SHADER)) { debug_assert(shaders_asm[6].id == VEGA_IMAGE_STENCIL_SHADER); shaders[idx] = &shaders_asm[6]; ++idx; } /* third stage */ if ((id & VEGA_MASK_SHADER)) { debug_assert(idx == 1); debug_assert(shaders_asm[7].id == VEGA_MASK_SHADER); shaders[idx] = &shaders_asm[7]; ++idx; } /* fourth stage */ if ((id & VEGA_BLEND_MULTIPLY_SHADER)) { debug_assert(shaders_asm[8].id == VEGA_BLEND_MULTIPLY_SHADER); shaders[idx] = &shaders_asm[8]; ++idx; } else if ((id & VEGA_BLEND_SCREEN_SHADER)) { debug_assert(shaders_asm[9].id == VEGA_BLEND_SCREEN_SHADER); shaders[idx] = &shaders_asm[9]; ++idx; } else if ((id & VEGA_BLEND_DARKEN_SHADER)) { debug_assert(shaders_asm[10].id == VEGA_BLEND_DARKEN_SHADER); shaders[idx] = &shaders_asm[10]; ++idx; } else if ((id & VEGA_BLEND_LIGHTEN_SHADER)) { debug_assert(shaders_asm[11].id == VEGA_BLEND_LIGHTEN_SHADER); shaders[idx] = &shaders_asm[11]; ++idx; } /* fifth stage */ if ((id & VEGA_PREMULTIPLY_SHADER)) { debug_assert(shaders_asm[12].id == VEGA_PREMULTIPLY_SHADER); shaders[idx] = &shaders_asm[12]; ++idx; } else if ((id & VEGA_UNPREMULTIPLY_SHADER)) { debug_assert(shaders_asm[13].id == VEGA_UNPREMULTIPLY_SHADER); shaders[idx] = &shaders_asm[13]; ++idx; } /* sixth stage */ if ((id & VEGA_BW_SHADER)) { debug_assert(shaders_asm[14].id == VEGA_BW_SHADER); shaders[idx] = &shaders_asm[14]; ++idx; } return combine_shaders(shaders, idx, pipe, shader); }
int assign_task_to_group (entity *group, entity *task_en, unsigned int valid_members) { int sites_required; entity_sub_types sub_type, group_type; entity *force, *landing, *end_keysite, *start_keysite, *guide, *member; vec3d *pos; task *task_raw; debug_assert (get_comms_model () == COMMS_MODEL_SERVER); ASSERT (task_en); ASSERT (group); ASSERT (!(get_local_group_primary_task (group) && (get_local_entity_int_value (task_en, INT_TYPE_PRIMARY_TASK)))); task_raw = ( task * ) get_local_entity_data (task_en); group_type = get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE); member = get_local_entity_first_child (group, LIST_TYPE_MEMBER); // don't if no members or if the group is a CARRIER if (!member) { return FALSE; } if (get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE) == ENTITY_SUB_TYPE_GROUP_ASSAULT_SHIP) { if (task_raw->sub_type != ENTITY_SUB_TYPE_TASK_ENGAGE) { return FALSE; } } // // check for invalid tasks // switch (task_raw->sub_type) { case ENTITY_SUB_TYPE_TASK_LANDING: case ENTITY_SUB_TYPE_TASK_LANDING_HOLDING: case ENTITY_SUB_TYPE_TASK_TAKEOFF: case ENTITY_SUB_TYPE_TASK_TAKEOFF_HOLDING: { #ifdef DEBUG debug_fatal ("ASSIGN: Invalid task type (%s) for assign_task_to_group", get_local_entity_string (task_en, STRING_TYPE_FULL_NAME)); #endif return FALSE; } } // // Create route // if (get_local_entity_int_value (group, INT_TYPE_GROUP_LIST_TYPE) == LIST_TYPE_KEYSITE_GROUP) { start_keysite = get_local_entity_parent (group, LIST_TYPE_KEYSITE_GROUP); } else { // start_keysite = get_closest_keysite (NUM_ENTITY_SUB_TYPE_KEYSITES, task_raw->side, get_local_entity_vec3d_ptr (group, VEC3D_TYPE_POSITION), 1.0 * KILOMETRE, NULL); start_keysite = NULL; } pos = get_local_entity_vec3d_ptr (task_en, VEC3D_TYPE_STOP_POSITION); force = get_local_force_entity ( ( entity_sides ) get_local_entity_int_value (task_en, INT_TYPE_SIDE) ); sub_type = group_database [group_type].default_landing_type; landing = NULL; end_keysite = NULL; if (get_local_entity_int_value (task_en, INT_TYPE_ASSESS_LANDING)) { ASSERT (start_keysite); end_keysite = ( entity * ) get_local_entity_ptr_value (task_en, PTR_TYPE_RETURN_KEYSITE); if (end_keysite) { // // check end keysite has suitble free landing sites // if (start_keysite != end_keysite) { // // if end keysite == start keysite then keysite MUST have enough sites because the aircraft are already there // sites_required = get_local_group_member_count (group); if (get_keysite_landing_sites_available (end_keysite, sub_type) < sites_required) { // // END keysite was specified - but no free landing sites for this group // return FALSE; } } } else { // // No END keysite specified so return to start keysite // end_keysite = start_keysite; set_local_entity_ptr_value (task_en, PTR_TYPE_RETURN_KEYSITE, end_keysite); } ASSERT (end_keysite); landing = get_local_entity_landing_entity (end_keysite, group_database [get_local_entity_int_value (group, INT_TYPE_ENTITY_SUB_TYPE)].default_landing_type); } if (create_generic_waypoint_route (group, task_en, end_keysite, NULL, NULL, NULL, 0)) { #if DEBUG_MODULE debug_log ("ASSIGN: group %s (%d) assigned to task %s (%d)", get_local_entity_string (group, STRING_TYPE_FULL_NAME), get_local_entity_index (group), get_local_entity_string (task_en, STRING_TYPE_FULL_NAME), get_local_entity_index (task_en)); #endif // // Assign task // guide = push_task_onto_group_task_stack (group, task_en, valid_members); assign_task_to_group_members (group, guide, valid_members); return TRUE; } return FALSE; }
void unpack_local_force_data (entity *en, pack_modes mode) { int count, loop; force *raw; campaign_criteria_type *last_campaign_criteria, *campaign_criteria; ASSERT ((mode >= 0) && (mode < NUM_PACK_MODES)); ASSERT (en); raw = (force *) get_local_entity_data (en); switch (mode) { //////////////////////////////////////// case PACK_MODE_SERVER_SESSION: //////////////////////////////////////// { unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name); // keysite_force // pilot root unpack_list_root (en, LIST_TYPE_DIVISION, &raw->division_root); unpack_list_root (en, LIST_TYPE_CAMPAIGN_OBJECTIVE, &raw->campaign_objective_root); // air_registry_root // ground_registry_root // sea_registry_root // unpack_list_root (en, LIST_TYPE_INDEPENDENT_GROUP, &raw->independent_group_root); // force_link // update link // task generation ////////////////////////////////////////////// for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_TASKS; loop ++) { raw->task_generation [loop].valid = unpack_int_value (en, INT_TYPE_VALID); raw->task_generation [loop].created = unpack_int_value (en, INT_TYPE_TASK_GENERATION); } ///////////////////////////////////////////////////////////////// // campaign criteria //////////////////////////////////////////// count = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_COUNT); last_campaign_criteria = NULL; while (count) { campaign_criteria = (campaign_criteria_type *) malloc_heap_mem (sizeof (campaign_criteria_type)); memset (campaign_criteria, 0, sizeof (campaign_criteria_type)); campaign_criteria->criteria_type = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_TYPE); campaign_criteria->valid = unpack_int_value (en, INT_TYPE_VALID); campaign_criteria->result = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_RESULT); campaign_criteria->value1 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value2 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value3 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value4 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->next = last_campaign_criteria; raw->campaign_criteria = campaign_criteria; last_campaign_criteria = campaign_criteria; count --; } ///////////////////////////////////////////////////////////////// // force_info_criteria for (loop = 0; loop < NUM_FORCE_INFO_CATAGORIES; loop ++) { raw->force_info_current_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE); raw->force_info_reserve_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE); } ///////////////////////////////////////////////////////////////// for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_GROUPS; loop ++) { raw->kills [loop] = unpack_int_value (en, INT_TYPE_VALUE); raw->losses [loop] = unpack_int_value (en, INT_TYPE_VALUE); // raw->group_count [loop] = unpack_int_value (en, INT_TYPE_VALUE); } raw->sleep = unpack_float_value (en, FLOAT_TYPE_SLEEP); raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE); // sector_count raw->colour = unpack_int_value (en, INT_TYPE_COLOUR); raw->side = unpack_int_value (en, INT_TYPE_SIDE); break; } //////////////////////////////////////// case PACK_MODE_CLIENT_SESSION: //////////////////////////////////////// { // // create entity // debug_assert (get_free_entity (get_local_entity_safe_index (en))); set_local_entity_type (en, ENTITY_TYPE_FORCE); raw = (force *) malloc_fast_mem (sizeof (force)); set_local_entity_data (en, raw); memset (raw, 0, sizeof (force)); // // unpack data (in exactly the same order as the data was packed) // unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name); // keysite_force unpack_list_root (en, LIST_TYPE_PILOT, &raw->pilot_root); unpack_list_root (en, LIST_TYPE_DIVISION, &raw->division_root); unpack_list_root (en, LIST_TYPE_CAMPAIGN_OBJECTIVE, &raw->campaign_objective_root); // air_registry_root // ground_registry_root // sea_registry_root // unpack_list_root (en, LIST_TYPE_INDEPENDENT_GROUP, &raw->independent_group_root); unpack_list_link (en, LIST_TYPE_FORCE, &raw->force_link); // update_link //////////////////////////////////////////// // task_generation for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_TASKS; loop ++) { raw->task_generation [loop].valid = unpack_int_value (en, INT_TYPE_VALID); raw->task_generation [loop].created = unpack_int_value (en, INT_TYPE_TASK_GENERATION); } //////////////////////////////////////////// //////////////////////////////////////////// // campaign criteria raw->campaign_criteria = NULL; count = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_COUNT); while (count) { count --; campaign_criteria = (campaign_criteria_type *) malloc_heap_mem (sizeof (campaign_criteria_type)); memset (campaign_criteria, 0, sizeof (campaign_criteria_type)); campaign_criteria->next = raw->campaign_criteria; raw->campaign_criteria = campaign_criteria; campaign_criteria->criteria_type = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_TYPE); campaign_criteria->valid = unpack_int_value (en, INT_TYPE_VALID); campaign_criteria->result = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_RESULT); campaign_criteria->value1 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value2 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value3 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); campaign_criteria->value4 = unpack_int_value (en, INT_TYPE_CAMPAIGN_CRITERIA_VALUE); } ///////////////////////////////////////////////////////////////// //////////////////////////////////////////// // force_info_criteria for (loop = 0; loop < NUM_FORCE_INFO_CATAGORIES; loop ++) { raw->force_info_current_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE); raw->force_info_reserve_hardware [loop] = unpack_int_value (en, INT_TYPE_VALUE); } // force_info_criteria for (loop = 0; loop < NUM_ENTITY_SUB_TYPE_GROUPS; loop ++) { raw->kills [loop] = unpack_int_value (en, INT_TYPE_VALUE); raw->losses [loop] = unpack_int_value (en, INT_TYPE_VALUE); // raw->group_count [loop] = unpack_int_value (en, INT_TYPE_VALUE); } raw->sleep = unpack_float_value (en, FLOAT_TYPE_SLEEP); raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE); raw->sector_count = unpack_int_value (en, INT_TYPE_FORCE_SECTOR_COUNT); raw->colour = unpack_int_value (en, INT_TYPE_COLOUR); raw->side = unpack_int_value (en, INT_TYPE_SIDE); insert_local_entity_into_parents_child_list (en, LIST_TYPE_UPDATE, get_update_entity (), NULL); break; } //////////////////////////////////////// case PACK_MODE_BROWSE_SESSION: //////////////////////////////////////// { // // create entity // debug_assert (get_free_entity (get_local_entity_safe_index (en))); set_local_entity_type (en, ENTITY_TYPE_FORCE); raw = (force *) malloc_fast_mem (sizeof (force)); set_local_entity_data (en, raw); memset (raw, 0, sizeof (force)); // // unpack data (in exactly the same order as the data was packed) // unpack_string (en, STRING_TYPE_FORCE_NAME, raw->force_name); // keysite_force_root unpack_list_root (en, LIST_TYPE_PILOT, &raw->pilot_root); unpack_list_link (en, LIST_TYPE_FORCE, &raw->force_link); // update_link // task_generation // campaign_criteria // force_info_catagories // sleep raw->force_attitude = unpack_int_value (en, INT_TYPE_FORCE_ATTITUDE); raw->colour = unpack_int_value (en, INT_TYPE_COLOUR); raw->side = unpack_int_value (en, INT_TYPE_SIDE); break; } } }
static void group_n(struct group_ops *ops, void *arr, unsigned n) { unsigned i, j; /* first pass, figure out what has conflicts and needs a mov * inserted. Do this up front, before starting to setup * left/right neighbor pointers. Trying to do it in a single * pass could result in a situation where we can't even setup * the mov's right neighbor ptr if the next instr also needs * a mov. */ restart: for (i = 0; i < n; i++) { struct ir3_instruction *instr = ops->get(arr, i); if (instr) { struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; bool conflict; /* check for left/right neighbor conflicts: */ conflict = conflicts(instr->cp.left, left) || conflicts(instr->cp.right, right); /* RA can't yet deal very well w/ group'd phi's: */ if (is_meta(instr) && (instr->opc == OPC_META_PHI)) conflict = true; /* we also can't have an instr twice in the group: */ for (j = i + 1; (j < n) && !conflict; j++) if (ops->get(arr, j) == instr) conflict = true; if (conflict) { ops->insert_mov(arr, i, instr); /* inserting the mov may have caused a conflict * against the previous: */ goto restart; } } } /* second pass, now that we've inserted mov's, fixup left/right * neighbors. This is guaranteed to succeed, since by definition * the newly inserted mov's cannot conflict with anything. */ for (i = 0; i < n; i++) { struct ir3_instruction *instr = ops->get(arr, i); if (instr) { struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL; struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL; debug_assert(!conflicts(instr->cp.left, left)); if (left) { instr->cp.left_cnt++; instr->cp.left = left; } debug_assert(!conflicts(instr->cp.right, right)); if (right) { instr->cp.right_cnt++; instr->cp.right = right; } } } }
static unsigned translate_opcode( unsigned op ) { switch( op ) { case OPCODE_ARL: return TGSI_OPCODE_ARL; case OPCODE_ADD: return TGSI_OPCODE_ADD; case OPCODE_CMP: return TGSI_OPCODE_CMP; case OPCODE_COS: return TGSI_OPCODE_COS; case OPCODE_DP3: return TGSI_OPCODE_DP3; case OPCODE_DP4: return TGSI_OPCODE_DP4; case OPCODE_DPH: return TGSI_OPCODE_DPH; case OPCODE_DST: return TGSI_OPCODE_DST; case OPCODE_EX2: return TGSI_OPCODE_EX2; case OPCODE_EXP: return TGSI_OPCODE_EXP; case OPCODE_FLR: return TGSI_OPCODE_FLR; case OPCODE_FRC: return TGSI_OPCODE_FRC; case OPCODE_KIL: return TGSI_OPCODE_KILL_IF; case OPCODE_LG2: return TGSI_OPCODE_LG2; case OPCODE_LOG: return TGSI_OPCODE_LOG; case OPCODE_LIT: return TGSI_OPCODE_LIT; case OPCODE_LRP: return TGSI_OPCODE_LRP; case OPCODE_MAD: return TGSI_OPCODE_MAD; case OPCODE_MAX: return TGSI_OPCODE_MAX; case OPCODE_MIN: return TGSI_OPCODE_MIN; case OPCODE_MOV: return TGSI_OPCODE_MOV; case OPCODE_MUL: return TGSI_OPCODE_MUL; case OPCODE_POW: return TGSI_OPCODE_POW; case OPCODE_RCP: return TGSI_OPCODE_RCP; case OPCODE_SCS: return TGSI_OPCODE_SCS; case OPCODE_SGE: return TGSI_OPCODE_SGE; case OPCODE_SIN: return TGSI_OPCODE_SIN; case OPCODE_SLT: return TGSI_OPCODE_SLT; case OPCODE_TEX: return TGSI_OPCODE_TEX; case OPCODE_TXB: return TGSI_OPCODE_TXB; case OPCODE_TXP: return TGSI_OPCODE_TXP; case OPCODE_XPD: return TGSI_OPCODE_XPD; case OPCODE_END: return TGSI_OPCODE_END; default: debug_assert( 0 ); return TGSI_OPCODE_NOP; } }
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, int nr, struct pipe_surface **bufs) { const struct ir3_shader_variant *vp, *fp; const struct ir3_info *vsi, *fsi; enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t pos_regid, posz_regid, psize_regid; uint32_t vcoord_regid, face_regid, coord_regid, zwcoord_regid; uint32_t color_regid[4] = {0}; int constmode; int i, j; debug_assert(nr <= ARRAY_SIZE(color_regid)); vp = fd3_emit_get_vp(emit); fp = fd3_emit_get_fp(emit); vsi = &vp->info; fsi = &fp->info; fpbuffer = BUFFER; vpbuffer = BUFFER; fpbuffersz = fp->instrlen; vpbuffersz = vp->instrlen; /* * Decide whether to use BUFFER or CACHE mode for VS and FS. It * appears like 256 is the hard limit, but when the combined size * exceeds 128 then blob will try to keep FS in BUFFER mode and * switch to CACHE for VS until VS is too large. The blob seems * to switch FS out of BUFFER mode at slightly under 128. But * a bit fuzzy on the decision tree, so use slightly conservative * limits. * * TODO check if these thresholds for BUFFER vs CACHE mode are the * same for all a3xx or whether we need to consider the gpuid */ if ((fpbuffersz + vpbuffersz) > 128) { if (fpbuffersz < 112) { /* FP:BUFFER VP:CACHE */ vpbuffer = CACHE; vpbuffersz = 256 - fpbuffersz; } else if (vpbuffersz < 112) { /* FP:CACHE VP:BUFFER */ fpbuffer = CACHE; fpbuffersz = 256 - vpbuffersz; } else { /* FP:CACHE VP:CACHE */ vpbuffer = fpbuffer = CACHE; vpbuffersz = fpbuffersz = 192; } } if (fpbuffer == BUFFER) { fsoff = 128 - fpbuffersz; } else { fsoff = 256 - fpbuffersz; } /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; pos_regid = ir3_find_output_regid(vp, VARYING_SLOT_POS); posz_regid = ir3_find_output_regid(fp, FRAG_RESULT_DEPTH); psize_regid = ir3_find_output_regid(vp, VARYING_SLOT_PSIZ); if (fp->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(fp, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(fp, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(fp, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(fp, FRAG_RESULT_DATA3); } face_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRONT_FACE); coord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD); zwcoord_regid = (coord_regid == regid(63,0)) ? regid(63,0) : (coord_regid + 2); vcoord_regid = ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PIXEL); /* adjust regids for alpha output formats. there is no alpha render * format, so it's just treated like red */ for (i = 0; i < nr; i++) if (util_format_is_alpha(pipe_surface_format(bufs[i]))) color_regid[i] += 3; /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those * bits if we have updated const or shader.. */ A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(coord_regid) | A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(zwcoord_regid)); OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31) | A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(face_regid)); OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(vcoord_regid)); OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) | A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) | A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz)); OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) | COND(emit->binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | A3XX_SP_SP_CTRL_REG_L0MODE(0)); OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) | COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) | A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz)); OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0))); OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(fp->varying_in)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, vp, fp); for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); reg |= A3XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); j++; reg |= A3XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); j++; OUT_RING(ring, reg); } for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); OUT_RING(ring, reg); } OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ if (emit->binning_pass) { OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER)); OUT_RING(ring, 0x00000000); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1); OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); } else { OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) | COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) | A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | COND(fp->num_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz)); OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) | A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET( MAX2(128, vp->constlen)) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff)); OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ } OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); OUT_RING(ring, COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1)); OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); for (i = 0; i < 4; i++) { uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION); if (i < nr) { enum pipe_format fmt = pipe_surface_format(bufs[i]); mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); } OUT_RING(ring, mrt_reg); } if (emit->binning_pass) { OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | A3XX_VPC_ATTR_LMSIZE(1) | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); OUT_RING(ring, 0x00000000); } else { uint32_t vinterp[4], flatshade[2], vpsrepl[4]; memset(vinterp, 0, sizeof(vinterp)); memset(flatshade, 0, sizeof(flatshade)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* figure out VARYING_INTERP / FLAT_SHAD register values: */ for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) { /* NOTE: varyings are packed, so if compmask is 0xb * then first, third, and fourth component occupy * three consecutive varying slots: */ unsigned compmask = fp->inputs[j].compmask; uint32_t inloc = fp->inputs[j].inloc; if ((fp->inputs[j].interpolate == INTERP_MODE_FLAT) || (fp->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++) { if (compmask & (1 << i)) { vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); flatshade[loc / 32] |= 1 << (loc % 32); loc++; } } } gl_varying_slot slot = fp->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { /* mask is two 2-bit fields, where: * '01' -> S * '10' -> T * '11' -> 1 - T (flip mode) */ unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; uint32_t loc = inloc; if (compmask & 0x1) { vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x2) { vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x4) { /* .z <- 0.0f */ vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); loc++; } if (compmask & 0x8) { /* .w <- 1.0f */ vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); loc++; } }