static bool inst_result_equals(struct qinst *a, struct qinst *b) { if (a->op != b->op || qir_depends_on_flags(a) || qir_depends_on_flags(b)) { return false; } for (int i = 0; i < qir_get_op_nsrc(a->op); i++) { if (!qir_reg_equals(a->src[i], b->src[i]) || src_file_varies_on_reread(a->src[i]) || src_file_varies_on_reread(b->src[i])) { return false; } } return true; }
static struct qinst * vc4_find_cse(struct vc4_compile *c, struct hash_table *ht, struct qinst *inst, uint32_t sf_count, uint32_t r4_count) { if (inst->dst.file != QFILE_TEMP || inst->op == QOP_MOV || qir_get_op_nsrc(inst->op) > 4) { return NULL; } struct inst_key key; memset(&key, 0, sizeof(key)); key.op = inst->op; memcpy(key.src, inst->src, qir_get_op_nsrc(inst->op) * sizeof(key.src[0])); if (qir_depends_on_flags(inst)) key.implicit_arg_update_count = sf_count; if (qir_reads_r4(inst)) key.implicit_arg_update_count = r4_count; uint32_t hash = _mesa_hash_data(&key, sizeof(key)); struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ht, hash, &key); if (entry) { if (debug) { fprintf(stderr, "CSE found match:\n"); fprintf(stderr, " Original inst: "); qir_dump_inst(c, entry->data); fprintf(stderr, "\n"); fprintf(stderr, " Our inst: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } return entry->data; } struct inst_key *alloc_key = ralloc(ht, struct inst_key); if (!alloc_key) return NULL; memcpy(alloc_key, &key, sizeof(*alloc_key)); _mesa_hash_table_insert_pre_hashed(ht, hash, alloc_key, inst); if (debug) { fprintf(stderr, "Added to CSE HT: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } return NULL; }
static bool qir_opt_peephole_sf_block(struct vc4_compile *c, struct qblock *block) { bool progress = false; /* We don't have liveness dataflow analysis for flags, but we also * never generate a use of flags across control flow, so just treat * them as unused at block exit. */ bool sf_live = false; struct qinst *last_sf = NULL; /* Walk the block from bottom to top, tracking if the SF is used, and * removing unused or repeated ones. */ qir_for_each_inst_rev(inst, block) { if (inst->sf) { if (!sf_live) { /* Our instruction's SF isn't read, so drop it. */ dump_from(c, inst, "dead SF"); inst->sf = false; dump_to(c, inst); progress = true; } else if (last_sf && inst_result_equals(last_sf, inst)) { /* The last_sf sets up same value as inst, so * just drop the later one. */ dump_from(c, last_sf, "repeated SF"); last_sf->sf = false; dump_to(c, last_sf); progress = true; last_sf = inst; } else { last_sf = inst; } sf_live = false; } if (last_sf) { if (inst_srcs_updated(last_sf, inst)) last_sf = NULL; } if (qir_depends_on_flags(inst)) sf_live = true; } return progress; }
bool qir_opt_peephole_sf(struct vc4_compile *c) { bool progress = false; bool sf_live = false; struct qinst *last_sf = NULL; /* Walk the block from bottom to top, tracking if the SF is used, and * removing unused or repeated ones. */ list_for_each_entry_rev(struct qinst, inst, &c->instructions, link) { if (inst->sf) { if (!sf_live) { /* Our instruction's SF isn't read, so drop it. */ dump_from(c, inst, "dead SF"); inst->sf = false; dump_to(c, inst); progress = true; } else if (last_sf && inst_result_equals(last_sf, inst)) { /* The last_sf sets up same value as inst, so * just drop the later one. */ dump_from(c, last_sf, "repeated SF"); last_sf->sf = false; dump_to(c, last_sf); progress = true; last_sf = inst; } else { last_sf = inst; } sf_live = false; } if (last_sf) { if (inst_srcs_updated(last_sf, inst)) last_sf = NULL; } if (qir_depends_on_flags(inst)) sf_live = true; } return progress; }
bool qir_opt_dead_code(struct vc4_compile *c) { bool progress = false; bool *used = calloc(c->num_temps, sizeof(bool)); bool sf_used = false; /* Whether we're eliminating texture setup currently. */ bool dce_tex = false; struct simple_node *node, *t; for (node = c->instructions.prev, t = node->prev; &c->instructions != node; node = t, t = t->prev) { struct qinst *inst = (struct qinst *)node; if (inst->dst.file == QFILE_TEMP && !used[inst->dst.index] && !inst->sf && (!qir_has_side_effects(c, inst) || inst->op == QOP_TEX_RESULT) && !has_nonremovable_reads(c, inst)) { if (inst->op == QOP_TEX_RESULT) { dce_tex = true; c->num_texture_samples--; } for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file != QFILE_VPM) continue; uint32_t attr = inst->src[i].index / 4; uint32_t offset = (inst->src[i].index % 4) * 4; if (c->vattr_sizes[attr] == offset + 4) { c->num_inputs--; c->vattr_sizes[attr] -= 4; } } dce(c, inst); progress = true; continue; } if (qir_depends_on_flags(inst)) sf_used = true; if (inst->sf) { if (!sf_used) { if (debug) { fprintf(stderr, "Removing SF on: "); qir_dump_inst(c, inst); fprintf(stderr, "\n"); } inst->sf = false; progress = true; } sf_used = false; } if (inst->op == QOP_TEX_RESULT) dce_tex = false; if (dce_tex && (inst->op == QOP_TEX_S || inst->op == QOP_TEX_T || inst->op == QOP_TEX_R || inst->op == QOP_TEX_B || inst->op == QOP_TEX_DIRECT)) { dce(c, inst); progress = true; continue; } for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file == QFILE_TEMP) used[inst->src[i].index] = true; } } free(used); return progress; }
bool qir_opt_dead_code(struct vc4_compile *c) { bool progress = false; bool *used = calloc(c->num_temps, sizeof(bool)); bool sf_used = false; /* Whether we're eliminating texture setup currently. */ bool dce_tex = false; struct simple_node *node, *t; for (node = c->instructions.prev, t = node->prev; &c->instructions != node; node = t, t = t->prev) { struct qinst *inst = (struct qinst *)node; if (inst->dst.file == QFILE_TEMP && !used[inst->dst.index] && (!qir_has_side_effects(inst) || inst->op == QOP_TEX_RESULT)) { if (inst->op == QOP_TEX_RESULT) { dce_tex = true; c->num_texture_samples--; } dce(c, inst); progress = true; continue; } if (qir_depends_on_flags(inst)) sf_used = true; if (inst->op == QOP_SF) { if (!sf_used) { dce(c, inst); progress = true; continue; } sf_used = false; } if (inst->op == QOP_TEX_RESULT) dce_tex = false; if (dce_tex && (inst->op == QOP_TEX_S || inst->op == QOP_TEX_T || inst->op == QOP_TEX_R || inst->op == QOP_TEX_B)) { dce(c, inst); progress = true; continue; } for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file == QFILE_TEMP) used[inst->src[i].index] = true; } } free(used); return progress; }