/* calculate required # of delay slots between the instruction that * assigns a value and the one that consumes */ int ir3_delayslots(struct ir3_instruction *assigner, struct ir3_instruction *consumer, unsigned n) { /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch * handled with sync bits */ if (is_meta(assigner)) return 0; if (writes_addr(assigner)) return 6; /* handled via sync flags: */ if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) return 0; /* assigner must be alu: */ if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || is_mem(consumer)) { return 6; } else if ((consumer->category == 3) && (is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; } else { return 3; } }
static void schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { debug_assert(ctx->block == instr->block); /* maybe there is a better way to handle this than just stuffing * a nop.. ideally we'd know about this constraint in the * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr)) ir3_NOP(ctx->block); /* remove from depth list: */ list_delinit(&instr->node); if (writes_addr(instr)) { debug_assert(ctx->addr == NULL); ctx->addr = instr; } if (writes_pred(instr)) { debug_assert(ctx->pred == NULL); ctx->pred = instr; } instr->flags |= IR3_INSTR_MARK; list_addtail(&instr->node, &instr->block->instr_list); ctx->scheduled = instr; if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) { clear_cache(ctx, NULL); } else { /* invalidate only the necessary entries.. */ clear_cache(ctx, instr); } }
static void schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, bool remove) { struct ir3_block *block = instr->block; /* maybe there is a better way to handle this than just stuffing * a nop.. ideally we'd know about this constraint in the * scheduling and depth calculation.. */ if (ctx->scheduled && is_sfu(ctx->scheduled) && is_sfu(instr)) schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); /* remove from depth list: */ if (remove) { struct ir3_instruction *p = prev(instr); /* NOTE: this can happen for inputs which are not * read.. in that case there is no need to schedule * the input, so just bail: */ if (instr != (p ? p->next : block->head)) return; if (p) p->next = instr->next; else block->head = instr->next; } if (writes_addr(instr)) { assert(ctx->addr == NULL); ctx->addr = instr; } if (writes_pred(instr)) { assert(ctx->pred == NULL); ctx->pred = instr; } instr->flags |= IR3_INSTR_MARK; instr->next = ctx->scheduled; ctx->scheduled = instr; ctx->cnt++; }
/* A negative return value signals that an instruction has been newly * scheduled, return back up to the top of the stack (to block_sched()) */ static int trysched(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1]; struct ir3_instruction *src; unsigned i, delay, nsrcs = 0; /* if already scheduled: */ if (instr->flags & IR3_INSTR_MARK) return 0; /* figure out our src's: */ for (i = 1; i < instr->regs_count; i++) { struct ir3_register *reg = instr->regs[i]; if (reg->flags & IR3_REG_SSA) srcs[nsrcs++] = reg->instr; } /* for each src register in sorted order: */ delay = 0; while ((src = deepest(srcs, nsrcs))) { delay = trysched(ctx, src); if (delay) return delay; } /* all our dependents are scheduled, figure out if * we have enough delay slots to schedule ourself: */ delay = delay_calc(ctx, instr); if (delay) return delay; /* if the instruction is a kill, we need to ensure *every* * bary.f is scheduled. The hw seems unhappy if the thread * gets killed before the end-input (ei) flag is hit. * * We could do this by adding each bary.f instruction as * virtual ssa src for the kill instruction. But we have * fixed length instr->regs[]. * * TODO this wouldn't be quite right if we had multiple * basic blocks, if any block was conditional. We'd need * to schedule the bary.f's outside of any block which * was conditional that contained a kill.. I think.. */ if (is_kill(instr)) { struct ir3 *ir = instr->block->shader; unsigned i; for (i = 0; i < ir->baryfs_count; i++) { if (ir->baryfs[i]->depth == DEPTH_UNUSED) continue; delay = trysched(ctx, ir->baryfs[i]); if (delay) return delay; } } /* if this is a write to address/predicate register, and that * register is currently in use, we need to defer until it is * free: */ if (writes_addr(instr) && ctx->addr) { assert(ctx->addr != instr); return DELAYED; } if (writes_pred(instr) && ctx->pred) { assert(ctx->pred != instr); return DELAYED; } schedule(ctx, instr, true); return SCHEDULED; }