static bool repair_ssa_def(nir_ssa_def *def, void *void_state) { struct repair_ssa_state *state = void_state; bool is_valid = true; nir_foreach_use(def, src) { if (!nir_block_dominates(def->parent_instr->block, get_src_block(src))) { is_valid = false; break; } } if (is_valid) return true; struct nir_phi_builder *pb = prep_build_phi(state); BITSET_SET(state->def_set, def->parent_instr->block->index); struct nir_phi_builder_value *val = nir_phi_builder_add_value(pb, def->num_components, def->bit_size, state->def_set); nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def); nir_foreach_use_safe(def, src) { nir_block *src_block = get_src_block(src); if (!nir_block_dominates(def->parent_instr->block, src_block)) { nir_instr_rewrite_src(src->parent_instr, src, nir_src_for_ssa( nir_phi_builder_value_get_block_def(val, src_block))); } }
static physaddr_t _pmm_set_addr(physaddr_t addr, size_t pages, struct MMAPRegion* reg, int force_addr) { uint32_t startpage = (addr - reg->start) / PMM_PAGE_SIZE; uint32_t limit = startpage + (reg->len / PMM_PAGE_SIZE); //knotice("<<%x>>",addr); while (startpage < limit) { if (BITSET_ISSET(reg->region_bitset, startpage/8, startpage%8)) { //Occupied if (force_addr) return NULL; //Can't allocate at exact address. // knotice("f**k %d", startpage); startpage++; } else { //Not occupied. //Check if all future pages are occupied. size_t page = 0; do { if (BITSET_ISSET(reg->region_bitset, (startpage+page)/8, (startpage+page)%8)) { //One of future pages are occupied. //Reset the outer loop and find some other occupied. startpage += page; // knotice("shit %d", startpage); } page++; } while (page < pages); if (page == pages) { break; } else { if (force_addr) return NULL; //Can't allocate at exact address. } } } addr = reg->start + (startpage * PMM_PAGE_SIZE); /* all OK, fill the buffer */ for (size_t p = 0; p < pages; p++) { // knotice("%x", addr+(p*PMM_PAGE_SIZE)); BITSET_SET(reg->region_bitset, (startpage+p)/8, (startpage+p)%8); } if (!force_addr) reg->first_free_addr = addr + (pages * PMM_PAGE_SIZE); //knotice("[%x]", addr); return addr; }
static inline void update_single_program_texture_state(struct gl_context *ctx, struct gl_program *prog, int unit, BITSET_WORD *enabled_texture_units) { struct gl_texture_object *texObj; texObj = update_single_program_texture(ctx, prog, unit); _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj); BITSET_SET(enabled_texture_units, unit); ctx->Texture._MaxEnabledTexImageUnit = MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit); }
static bool set_src_live(nir_src *src, void *void_live) { BITSET_WORD *live = void_live; if (!src->is_ssa) return true; if (src->ssa->live_index == 0) return true; /* undefined variables are never live */ BITSET_SET(live, src->ssa->live_index); return true; }
static void qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip, struct qreg src) { int var = qir_reg_to_var(src); if (var == -1) return; c->temp_start[var] = MIN2(c->temp_start[var], ip); c->temp_end[var] = MAX2(c->temp_end[var], ip); /* The use[] bitset marks when the block makes * use of a variable without having completely * defined that variable within the block. */ if (!BITSET_TEST(block->def, var)) BITSET_SET(block->use, var); }
static void fix_missing_textures_for_atifs(struct gl_context *ctx, struct gl_program *prog, BITSET_WORD *enabled_texture_units) { GLbitfield mask = prog->SamplersUsed; while (mask) { const int s = u_bit_scan(&mask); const int unit = prog->SamplerUnits[s]; const gl_texture_index target_index = ffs(prog->TexturesUsed[unit]) - 1; if (!ctx->Texture.Unit[unit]._Current) { struct gl_texture_object *texObj = _mesa_get_fallback_texture(ctx, target_index); _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj); BITSET_SET(enabled_texture_units, unit); ctx->Texture._MaxEnabledTexImageUnit = MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit); } } }
struct qreg vir_get_temp(struct v3d_compile *c) { struct qreg reg; reg.file = QFILE_TEMP; reg.index = c->num_temps++; if (c->num_temps > c->defs_array_size) { uint32_t old_size = c->defs_array_size; c->defs_array_size = MAX2(old_size * 2, 16); c->defs = reralloc(c, c->defs, struct qinst *, c->defs_array_size); memset(&c->defs[old_size], 0, sizeof(c->defs[0]) * (c->defs_array_size - old_size)); c->spillable = reralloc(c, c->spillable, BITSET_WORD, BITSET_WORDS(c->defs_array_size)); for (int i = old_size; i < c->defs_array_size; i++) BITSET_SET(c->spillable, i); }
static void validate_ssa_def(nir_ssa_def *def, validate_state *state) { assert(def->index < state->impl->ssa_alloc); assert(!BITSET_TEST(state->ssa_defs_found, def->index)); BITSET_SET(state->ssa_defs_found, def->index); assert(def->parent_instr == state->instr); assert(def->num_components <= 4); list_validate(&def->uses); list_validate(&def->if_uses); ssa_def_validate_state *def_state = ralloc(state->ssa_defs, ssa_def_validate_state); def_state->where_defined = state->impl; def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, _mesa_key_pointer_equal); def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, _mesa_key_pointer_equal); _mesa_hash_table_insert(state->ssa_defs, def, def_state); }
static void update_program_texture_state(struct gl_context *ctx, struct gl_program **prog, BITSET_WORD *enabled_texture_units) { int i; for (i = 0; i < MESA_SHADER_STAGES; i++) { int s; if (!prog[i]) continue; /* We can't only do the shifting trick as the loop condition because if * sampler 31 is active, the next iteration tries to shift by 32, which is * undefined. */ for (s = 0; s < MAX_SAMPLERS && (1 << s) <= prog[i]->SamplersUsed; s++) { struct gl_texture_object *texObj; texObj = update_single_program_texture(ctx, prog[i], s); if (texObj) { int unit = prog[i]->SamplerUnits[s]; _mesa_reference_texobj(&ctx->Texture.Unit[unit]._Current, texObj); BITSET_SET(enabled_texture_units, unit); ctx->Texture._MaxEnabledTexImageUnit = MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit); } } } if (prog[MESA_SHADER_FRAGMENT]) { const GLuint coordMask = (1 << MAX_TEXTURE_COORD_UNITS) - 1; ctx->Texture._EnabledCoordUnits |= (prog[MESA_SHADER_FRAGMENT]->InputsRead >> VARYING_SLOT_TEX0) & coordMask; } }
/* Initialize physical memory manager. Autodetect RAM regions, report usable physical RAM at end */ int pmm_init(struct mmap_list* mm_list, physaddr_t kstart, /*out*/ physaddr_t* kend) { regs_count = mm_list->size; /* No regions detected */ if (regs_count <= 0) { kerror("PMM: No regions detected!"); return 0; } /* Allocate memory for mmap regions, set kernel end accordingly */ regs_data = (struct MMAPRegion*)(*kend); *kend = (*kend) + (sizeof(struct MMAPRegion) * regs_count); memset(regs_data, 0, sizeof(struct MMAPRegion)* (regs_count)); int ir = 0; uint64_t freemem = 0; struct MMAPRegion* reg_kernel = NULL; for (int i = 0; i < mm_list->size; i++) { regs_data[ir].start = mm_list->maps[i].start; regs_data[ir].len = mm_list->maps[i].length; regs_data[ir].first_free_addr = regs_data[i].start; switch (mm_list->maps[i].type) { case MMAP_FREE: regs_data[ir].region_type = PMM_REG_DEFAULT; break; default: regs_data[ir].region_type = PMM_REG_HARDWARE; break; } /* if start and start + len < 1M, then map to legacy */ if (regs_data[ir].start < 1048576 && (regs_data[ir].start + regs_data[ir].len) < 1048576 && regs_data[ir].region_type == PMM_REG_DEFAULT) { regs_data[ir].region_type = PMM_REG_LEGACY; regs_data[ir].first_free_addr = 0x1000; //safe area. } /* if start > 0xb0000 and ends below 1m, then map to rom, because this is where the BIOS ROM lies*/ if (regs_data[ir].start > 0xb0000 && (regs_data[ir].start + regs_data[ir].len) < 1048576 && regs_data[ir].region_type == PMM_REG_HARDWARE) { regs_data[ir].region_type = PMM_REG_ROM; } if (regs_data[ir].region_type == PMM_REG_LEGACY || regs_data[ir].region_type == PMM_REG_DEFAULT) { freemem += regs_data[ir].len; } /* TODO: Treat overlapping areas */ char* typestr; switch (regs_data[ir].region_type) { case PMM_REG_DEFAULT: typestr = "default"; break; case PMM_REG_LEGACY: typestr = "legacy"; break; case PMM_REG_HARDWARE: typestr = "hardware-used"; break; case PMM_REG_FAULTY: typestr = "faulty"; break; default: typestr = "unknown"; break; } knotice("PMM: detected RAM type %s starting at %x, %d bytes", typestr, regs_data[ir].start, regs_data[ir].len); /* Get area that kernel occups */ if (regs_data[ir].start >= kstart && *kend <= regs_data[ir].start+regs_data[ir].len && regs_data[ir].region_type == PMM_REG_DEFAULT ) { reg_kernel = ®s_data[ir]; } ir++; } regs_count = ir; if (!reg_kernel) { /* Could not find kernel area */ kerror("PMM: memory area not found for kernel (0x%x-0x%x)", kstart, *kend); return 0; } knotice("PMM: %d kB of usable RAM detected", (uint32_t)((freemem / 1024) & 0xffffffff)); /* Allocate memory for bitsets */ for (int i = 0; i < regs_count; i++) { regs_data[i].region_bitset = (uint8_t*)*kend; size_t len = 1+(regs_data[i].len / PMM_PAGE_SIZE / 8); memset(regs_data[i].region_bitset, 0, (len + 3) & ~3); /* Each bit means a page (now its 4 kB) C only allocates bytes, then we divide by 8 to get the amount of bytes for that bitset */ *kend = (*kend) + len; /* Pad the bitset to 4 bytes */ *kend = (*kend+3) & ~3; } uint64_t memocc = 0; /* Fill memory used by bitsets */ uintptr_t kmemstart = kstart - reg_kernel->start; uintptr_t kmemend = (*kend) - reg_kernel->start; for (unsigned kpage = (kmemstart / PMM_PAGE_SIZE); kpage <= (kmemend / PMM_PAGE_SIZE); kpage++) { if (kpage + 8 < (kmemend / PMM_PAGE_SIZE)) { reg_kernel->region_bitset[kpage/8] = 0xff; kpage += 7; memocc += PMM_PAGE_SIZE*8; continue; } BITSET_SET(reg_kernel->region_bitset, kpage/8, kpage%8); memocc += PMM_PAGE_SIZE; } _kernel_start = kstart; _kernel_end = (*kend); reg_kernel->first_free_addr = kstart + memocc; knotice("PMM: %d kB of occupied RAM", (uint32_t)(memocc / 1024) & 0xffffffff); kshell_add("mem", "Show avaliable memory", &pmm_shell_cmd); return 1; }
bool fs_visitor::dead_code_eliminate() { bool progress = false; calculate_live_intervals(); int num_vars = live_intervals->num_vars; BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars)); foreach_block (block, cfg) { memcpy(live, live_intervals->bd[block->num].liveout, sizeof(BITSET_WORD) * BITSET_WORDS(num_vars)); foreach_inst_in_block_reverse(fs_inst, inst, block) { if (inst->dst.file == GRF && !inst->has_side_effects() && !inst->writes_flag()) { bool result_live = false; if (inst->regs_written == 1) { int var = live_intervals->var_from_reg(&inst->dst); result_live = BITSET_TEST(live, var); } else { int var = live_intervals->var_from_vgrf[inst->dst.reg]; for (int i = 0; i < inst->regs_written; i++) { result_live = result_live || BITSET_TEST(live, var + i); } } if (!result_live) { progress = true; if (inst->writes_accumulator) { inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type)); } else { inst->opcode = BRW_OPCODE_NOP; continue; } } } if (inst->dst.file == GRF) { if (!inst->is_partial_write()) { int var = live_intervals->var_from_vgrf[inst->dst.reg]; for (int i = 0; i < inst->regs_written; i++) { BITSET_CLEAR(live, var + inst->dst.reg_offset + i); } } } for (int i = 0; i < inst->sources; i++) { if (inst->src[i].file == GRF) { int var = live_intervals->var_from_vgrf[inst->src[i].reg]; for (int j = 0; j < inst->regs_read(this, i); j++) { BITSET_SET(live, var + inst->src[i].reg_offset + j); } } } } }
static void update_ff_texture_state(struct gl_context *ctx, BITSET_WORD *enabled_texture_units) { int unit; for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit]; GLbitfield mask; bool complete; if (texUnit->Enabled == 0x0) continue; /* If a shader already dictated what texture target was used for this * unit, just go along with it. */ if (BITSET_TEST(enabled_texture_units, unit)) continue; /* From the GL 4.4 compat specification, section 16.2 ("Texture Application"): * * "Texturing is enabled or disabled using the generic Enable and * Disable commands, respectively, with the symbolic constants * TEXTURE_1D, TEXTURE_2D, TEXTURE_RECTANGLE, TEXTURE_3D, or * TEXTURE_CUBE_MAP to enable the one-, two-, rectangular, * three-dimensional, or cube map texture, respectively. If more * than one of these textures is enabled, the first one enabled * from the following list is used: * * • cube map texture * • three-dimensional texture * • rectangular texture * • two-dimensional texture * • one-dimensional texture" * * Note that the TEXTURE_x_INDEX values are in high to low priority. * Also: * * "If a texture unit is disabled or has an invalid or incomplete * texture (as defined in section 8.17) bound to it, then blending * is disabled for that texture unit. If the texture environment * for a given enabled texture unit references a disabled texture * unit, or an invalid or incomplete texture that is bound to * another unit, then the results of texture blending are * undefined." */ complete = false; mask = texUnit->Enabled; while (mask) { const int texIndex = u_bit_scan(&mask); struct gl_texture_object *texObj = texUnit->CurrentTex[texIndex]; struct gl_sampler_object *sampler = texUnit->Sampler ? texUnit->Sampler : &texObj->Sampler; if (!_mesa_is_texture_complete(texObj, sampler)) { _mesa_test_texobj_completeness(ctx, texObj); } if (_mesa_is_texture_complete(texObj, sampler)) { _mesa_reference_texobj(&texUnit->_Current, texObj); complete = true; break; } } if (!complete) continue; /* if we get here, we know this texture unit is enabled */ BITSET_SET(enabled_texture_units, unit); ctx->Texture._MaxEnabledTexImageUnit = MAX2(ctx->Texture._MaxEnabledTexImageUnit, (int)unit); ctx->Texture._EnabledCoordUnits |= 1 << unit; update_tex_combine(ctx, texUnit); } }
void fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, struct fd5_emit *emit) { struct stage s[MAX_STAGES]; uint32_t pos_regid, psize_regid, color_regid[8]; uint32_t face_regid, coord_regid, zwcoord_regid; uint32_t vcoord_regid, vertex_regid, instance_regid; enum a3xx_threadsize fssz; uint8_t psize_loc = ~0; int i, j; setup_stages(emit, s); fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS; pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); instance_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); if (s[FS].v->color0_mrt) { color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); } else { color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); } /* TODO get these dynamically: */ face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0); /* we could probably divide this up into things that need to be * emitted if frag-prog is dirty vs if vert-prog is dirty.. */ OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONFIG, 5); OUT_RING(ring, A5XX_HLSQ_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_HLSQ_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_HLSQ_VS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_HLSQ_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_HLSQ_FS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_HLSQ_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_HLSQ_HS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_HLSQ_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_HLSQ_DS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_HLSQ_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_HLSQ_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_HLSQ_GS_CONFIG_ENABLED)); OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen) | COND(s[VS].v && s[VS].v->has_ssbo, A5XX_HLSQ_VS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen) | COND(s[FS].v && s[FS].v->has_ssbo, A5XX_HLSQ_FS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen) | COND(s[HS].v && s[HS].v->has_ssbo, A5XX_HLSQ_HS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen) | COND(s[DS].v && s[DS].v->has_ssbo, A5XX_HLSQ_DS_CNTL_SSBO_ENABLE)); OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen) | COND(s[GS].v && s[GS].v->has_ssbo, A5XX_HLSQ_GS_CNTL_SSBO_ENABLE)); OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG, 5); OUT_RING(ring, A5XX_SP_VS_CONFIG_CONSTOBJECTOFFSET(s[VS].constoff) | A5XX_SP_VS_CONFIG_SHADEROBJOFFSET(s[VS].instroff) | COND(s[VS].v, A5XX_SP_VS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_FS_CONFIG_CONSTOBJECTOFFSET(s[FS].constoff) | A5XX_SP_FS_CONFIG_SHADEROBJOFFSET(s[FS].instroff) | COND(s[FS].v, A5XX_SP_FS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_HS_CONFIG_CONSTOBJECTOFFSET(s[HS].constoff) | A5XX_SP_HS_CONFIG_SHADEROBJOFFSET(s[HS].instroff) | COND(s[HS].v, A5XX_SP_HS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_DS_CONFIG_CONSTOBJECTOFFSET(s[DS].constoff) | A5XX_SP_DS_CONFIG_SHADEROBJOFFSET(s[DS].instroff) | COND(s[DS].v, A5XX_SP_DS_CONFIG_ENABLED)); OUT_RING(ring, A5XX_SP_GS_CONFIG_CONSTOBJECTOFFSET(s[GS].constoff) | A5XX_SP_GS_CONFIG_SHADEROBJOFFSET(s[GS].instroff) | COND(s[GS].v, A5XX_SP_GS_CONFIG_ENABLED)); OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); OUT_RING(ring, 0x00000000); OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2); OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */ OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2); OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */ OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2); OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */ OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2); OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */ OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2); OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */ OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2); OUT_RING(ring, 0x00000000); /* HLSQ_CS_CONSTLEN */ OUT_RING(ring, 0x00000000); /* HLSQ_CS_INSTRLEN */ OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 0x6 | /* XXX seems to be always set? */ A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; ir3_link_shaders(&l, s[VS].v, s[FS].v); if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->key.binning_pass) link_stream_out(&l, s[VS].v); BITSET_DECLARE(varbs, 128) = {0}; uint32_t *varmask = (uint32_t *)varbs; for (i = 0; i < l.cnt; i++) for (j = 0; j < util_last_bit(l.var[i].compmask); j++) BITSET_SET(varbs, l.var[i].loc + j); OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ /* a5xx appends pos/psize to end of the linkage map: */ if (pos_regid != regid(63,0)) ir3_link_add(&l, pos_regid, 0xf, l.max_loc); if (psize_regid != regid(63,0)) { psize_loc = l.max_loc; ir3_link_add(&l, psize_regid, 0x1, l.max_loc); } if ((s[VS].v->shader->stream_output.num_outputs > 0) && !emit->key.binning_pass) { emit_stream_out(ring, s[VS].v, &l); OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, 0x00000000); } else { OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); } for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1); reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); j++; reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); j++; OUT_RING(ring, reg); } for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { uint32_t reg = 0; OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); OUT_RING(ring, reg); } OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ if (s[VS].instrlen) fd5_emit_shader(ring, s[VS].v); // TODO depending on other bits in this reg (if any) set somewhere else? OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE)); OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1); OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(l.max_loc) | COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | COND(s[FS].v->frag_coord, A5XX_VPC_CNTL_0_VARYING) | 0x10000); // XXX fd5_context(ctx)->max_loc = l.max_loc; if (emit->key.binning_pass) { OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ } else { OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ } OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) | A5XX_HLSQ_CONTROL_0_REG_CSTHREADSIZE(TWO_QUADS) | 0x00000880); /* XXX HLSQ_CONTROL_0 */ OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63)); OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | 0xfcfcfc00); /* XXX */ OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | 0x0000fcfc); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) | 0x40006 | /* XXX set pretty much everywhere */ A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); OUT_RING(ring, 0x020fffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1); OUT_RING(ring, 0x0000ffff); /* XXX */ OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); OUT_RING(ring, 0x00000010); /* XXX */ OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING) | COND(s[FS].v->frag_coord, A5XX_GRAS_CNTL_XCOORD | A5XX_GRAS_CNTL_YCOORD | A5XX_GRAS_CNTL_ZCOORD | A5XX_GRAS_CNTL_WCOORD | A5XX_GRAS_CNTL_UNK3) | COND(s[FS].v->frag_face, A5XX_GRAS_CNTL_UNK3)); OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 2); OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | A5XX_RB_RENDER_CONTROL0_YCOORD | A5XX_RB_RENDER_CONTROL0_ZCOORD | A5XX_RB_RENDER_CONTROL0_WCOORD | A5XX_RB_RENDER_CONTROL0_UNK3) | COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL0_UNK3)); OUT_RING(ring, COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS)); OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8); for (i = 0; i < 8; i++) { OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | COND(emit->key.half_precision, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); } OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) | A5XX_VPC_PACK_PSIZELOC(psize_loc)); if (!emit->key.binning_pass) { uint32_t vinterp[8], vpsrepl[8]; memset(vinterp, 0, sizeof(vinterp)); memset(vpsrepl, 0, sizeof(vpsrepl)); /* looks like we need to do int varyings in the frag * shader on a5xx (no flatshad reg? or a420.0 bug?): * * (sy)(ss)nop * (sy)ldlv.u32 r0.x,l[r0.x], 1 * ldlv.u32 r0.y,l[r0.x+1], 1 * (ss)bary.f (ei)r63.x, 0, r0.x * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x * (rpt5)nop * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 * * Possibly on later a5xx variants we'll be able to use * something like the code below instead of workaround * in the shader: */ /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { /* NOTE: varyings are packed, so if compmask is 0xb * then first, third, and fourth component occupy * three consecutive varying slots: */ unsigned compmask = s[FS].v->inputs[j].compmask; uint32_t inloc = s[FS].v->inputs[j].inloc; if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { uint32_t loc = inloc; for (i = 0; i < 4; i++) { if (compmask & (1 << i)) { vinterp[loc / 16] |= 1 << ((loc % 16) * 2); //flatshade[loc / 32] |= 1 << (loc % 32); loc++; } } } gl_varying_slot slot = s[FS].v->inputs[j].slot; /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ if (slot >= VARYING_SLOT_VAR0) { unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); /* Replace the .xy coordinates with S/T from the point sprite. Set * interpolation bits for .zw such that they become .01 */ if (emit->sprite_coord_enable & texmask) { /* mask is two 2-bit fields, where: * '01' -> S * '10' -> T * '11' -> 1 - T (flip mode) */ unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; uint32_t loc = inloc; if (compmask & 0x1) { vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); loc++; } if (compmask & 0x2) { vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); loc++; }
static void qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip, struct hash_table *partial_update_ht, struct qinst *inst) { /* The def[] bitset marks when an initialization in a * block completely screens off previous updates of * that variable. */ int var = qir_reg_to_var(inst->dst); if (var == -1) return; c->temp_start[var] = MIN2(c->temp_start[var], ip); c->temp_end[var] = MAX2(c->temp_end[var], ip); /* If we've already tracked this as a def, or already used it within * the block, there's nothing to do. */ if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var)) return; /* Easy, common case: unconditional full register update. */ if (inst->cond == QPU_COND_ALWAYS && !inst->dst.pack) { BITSET_SET(block->def, var); return; } /* Finally, look at the condition code and packing and mark it as a * def. We need to make sure that we understand sequences * instructions like: * * mov.zs t0, t1 * mov.zc t0, t2 * * or: * * mmov t0.8a, t1 * mmov t0.8b, t2 * mmov t0.8c, t3 * mmov t0.8d, t4 * * as defining the temp within the block, because otherwise dst's live * range will get extended up the control flow to the top of the * program. */ struct partial_update_state *state = get_partial_update_state(partial_update_ht, inst); uint8_t mask = qir_channels_written(inst); if (inst->cond == QPU_COND_ALWAYS) { state->channels |= mask; } else { for (int i = 0; i < 4; i++) { if (!(mask & (1 << i))) continue; if (state->insts[i] && state->insts[i]->cond == qpu_cond_complement(inst->cond)) state->channels |= 1 << i; else state->insts[i] = inst; } } if (state->channels == 0xf) BITSET_SET(block->def, var); }