static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block) { struct ir3_instruction *instr; /* schedule all the shader input's (meta-instr) first so that * the RA step sees that the input registers contain a value * from the start of the shader: */ if (!block->parent) { unsigned i; for (i = 0; i < block->ninputs; i++) { struct ir3_instruction *in = block->inputs[i]; if (in) schedule(ctx, in, true); } } while ((instr = block->head) && !ctx->error) { /* NOTE: always grab next *before* trysched(), in case the * instruction is actually scheduled (and therefore moved * from depth list into scheduled list) */ struct ir3_instruction *next = instr->next; int cnt = trysched(ctx, instr); if (cnt == DELAYED) cnt = block_sched_undelayed(ctx, block); /* -1 is signal to return up stack, but to us means same as 0: */ cnt = MAX2(0, cnt); cnt += ctx->cnt; instr = next; /* if deepest remaining instruction cannot be scheduled, try * the increasingly more shallow instructions until needed * number of delay slots is filled: */ while (instr && (cnt > ctx->cnt)) { next = instr->next; trysched(ctx, instr); instr = next; } /* and if we run out of instructions that can be scheduled, * then it is time for nop's: */ while (cnt > ctx->cnt) schedule(ctx, ir3_instr_create(block, 0, OPC_NOP), false); } /* at this point, scheduled list is in reverse order, so fix that: */ block->head = reverse(ctx->scheduled); }
/* when we encounter an instruction that writes to the address register * when it is in use, we delay that instruction and try to schedule all * other instructions using the current address register: */ static int block_sched_undelayed(struct ir3_sched_ctx *ctx, struct ir3_block *block) { struct ir3_instruction *instr = block->head; bool addr_in_use = false; bool pred_in_use = false; bool all_delayed = true; unsigned cnt = ~0, attempted = 0; while (instr) { struct ir3_instruction *next = instr->next; bool addr = uses_current_addr(ctx, instr); bool pred = uses_current_pred(ctx, instr); if (addr || pred) { int ret = trysched(ctx, instr); if (ret != DELAYED) all_delayed = false; if (ret == SCHEDULED) cnt = 0; else if (ret > 0) cnt = MIN2(cnt, ret); if (addr) addr_in_use = true; if (pred) pred_in_use = true; attempted++; } instr = next; } if (!addr_in_use) ctx->addr = NULL; if (!pred_in_use) ctx->pred = NULL; /* detect if we've gotten ourselves into an impossible situation * and bail if needed */ if (all_delayed && (attempted > 0)) ctx->error = true; return cnt; }
/* A negative return value signals that an instruction has been newly * scheduled, return back up to the top of the stack (to block_sched()) */ static int trysched(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1]; struct ir3_instruction *src; unsigned i, delay, nsrcs = 0; /* if already scheduled: */ if (instr->flags & IR3_INSTR_MARK) return 0; /* figure out our src's: */ for (i = 1; i < instr->regs_count; i++) { struct ir3_register *reg = instr->regs[i]; if (reg->flags & IR3_REG_SSA) srcs[nsrcs++] = reg->instr; } /* for each src register in sorted order: */ delay = 0; while ((src = deepest(srcs, nsrcs))) { delay = trysched(ctx, src); if (delay) return delay; } /* all our dependents are scheduled, figure out if * we have enough delay slots to schedule ourself: */ delay = delay_calc(ctx, instr); if (!delay) { schedule(ctx, instr, true); return -1; } return delay; }
/* A negative return value signals that an instruction has been newly * scheduled, return back up to the top of the stack (to block_sched()) */ static int trysched(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr) { struct ir3_instruction *srcs[ARRAY_SIZE(instr->regs) - 1]; struct ir3_instruction *src; unsigned i, delay, nsrcs = 0; /* if already scheduled: */ if (instr->flags & IR3_INSTR_MARK) return 0; /* figure out our src's: */ for (i = 1; i < instr->regs_count; i++) { struct ir3_register *reg = instr->regs[i]; if (reg->flags & IR3_REG_SSA) srcs[nsrcs++] = reg->instr; } /* for each src register in sorted order: */ delay = 0; while ((src = deepest(srcs, nsrcs))) { delay = trysched(ctx, src); if (delay) return delay; } /* all our dependents are scheduled, figure out if * we have enough delay slots to schedule ourself: */ delay = delay_calc(ctx, instr); if (delay) return delay; /* if the instruction is a kill, we need to ensure *every* * bary.f is scheduled. The hw seems unhappy if the thread * gets killed before the end-input (ei) flag is hit. * * We could do this by adding each bary.f instruction as * virtual ssa src for the kill instruction. But we have * fixed length instr->regs[]. * * TODO this wouldn't be quite right if we had multiple * basic blocks, if any block was conditional. We'd need * to schedule the bary.f's outside of any block which * was conditional that contained a kill.. I think.. */ if (is_kill(instr)) { struct ir3 *ir = instr->block->shader; unsigned i; for (i = 0; i < ir->baryfs_count; i++) { if (ir->baryfs[i]->depth == DEPTH_UNUSED) continue; delay = trysched(ctx, ir->baryfs[i]); if (delay) return delay; } } /* if this is a write to address/predicate register, and that * register is currently in use, we need to defer until it is * free: */ if (writes_addr(instr) && ctx->addr) { assert(ctx->addr != instr); return DELAYED; } if (writes_pred(instr) && ctx->pred) { assert(ctx->pred != instr); return DELAYED; } schedule(ctx, instr, true); return SCHEDULED; }