/* is it a type preserving mov, with ok flags? */ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) { if (is_same_type_mov(instr)) { struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; struct ir3_instruction *src_instr = ssa(src); /* only if mov src is SSA (not const/immed): */ if (!src_instr) return false; /* no indirect: */ if (dst->flags & IR3_REG_RELATIV) return false; if (src->flags & IR3_REG_RELATIV) return false; if (src->flags & IR3_REG_ARRAY) return false; if (!allow_flags) if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) return false; /* TODO: remove this hack: */ if (src_instr->opc == OPC_META_FO) return false; return true; } return false; }
static void console_int(uint32_t port, void *data) { XENCONS_RING_IDX cons, prod; cons = console.intf->in_cons; prod = console.intf->in_prod; rmb(); if (prod == cons) return; uint32_t in_size = prod - cons; uint8_t buf[in_size]; uint8_t *ptr = buf; ssa(SYS_STATS_IO_INPUT, in_size); while (prod > cons) { int idx = MASK_XENCONS_IDX(cons++, console.intf->in); #ifdef DEBUG_CONSOLE if (debug_key(console.intf->in[idx])) { in_size--; continue; } #endif *ptr++ = console.intf->in[idx]; } console.intf->in_cons = prod; wmb(); if (console.attached) outlet_new_data(console.attached, buf, in_size); }
/* Handle special case of eliminating output mov, and similar cases where * there isn't a normal "consuming" instruction. In this case we cannot * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot * be eliminated) */ static struct ir3_instruction * eliminate_output_mov(struct ir3_instruction *instr) { if (is_eligible_mov(instr, false)) { struct ir3_register *reg = instr->regs[1]; if (!(reg->flags & IR3_REG_ARRAY)) { struct ir3_instruction *src_instr = ssa(reg); debug_assert(src_instr); return src_instr; } } return instr; }
ArgGroup& ArgGroup::typedValue(int i) { // If there's exactly one register argument slot left, the whole TypedValue // goes on the stack instead of being split between a register and the // stack. if (m_gpArgs.size() == num_arg_regs() - 1) { m_override = &m_stkArgs; } static_assert(offsetof(TypedValue, m_data) == 0, ""); static_assert(offsetof(TypedValue, m_type) == 8, ""); ssa(i).type(i); m_override = nullptr; return *this; }
void goto_symext::symex_dead(statet &state) { const goto_programt::instructiont &instruction=*state.source.pc; const codet &code=to_code(instruction.code); if(code.operands().size()!=1) throw "dead expects one operand"; if(code.op0().id()!=ID_symbol) throw "dead expects symbol as first operand"; // We increase the L2 renaming to make these non-deterministic. // We also prevent propagation of old values. ssa_exprt ssa(to_symbol_expr(code.op0())); state.rename(ssa, ns, goto_symex_statet::L1); // in case of pointers, put something into the value set if(ns.follow(code.op0().type()).id()==ID_pointer) { exprt failed= get_failed_symbol(to_symbol_expr(code.op0()), ns); exprt rhs; if(failed.is_not_nil()) { address_of_exprt address_of_expr; address_of_expr.object()=failed; address_of_expr.type()=code.op0().type(); rhs=address_of_expr; } else rhs=exprt(ID_invalid); state.rename(rhs, ns, goto_symex_statet::L1); state.value_set.assign(ssa, rhs, ns, true, false); } ssa_exprt ssa_lhs=to_ssa_expr(ssa); const irep_idt &l1_identifier=ssa_lhs.get_identifier(); // prevent propagation state.propagation.remove(l1_identifier); // L2 renaming if(state.level2.current_names.find(l1_identifier)!= state.level2.current_names.end()) state.level2.increase_counter(l1_identifier); }
/* propagate register flags from src to dst.. negates need special * handling to cancel each other out. */ static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) { unsigned srcflags = src->regs[1]->flags; /* if what we are combining into already has (abs) flags, * we can drop (neg) from src: */ if (*dstflags & IR3_REG_FABS) srcflags &= ~IR3_REG_FNEG; if (*dstflags & IR3_REG_SABS) srcflags &= ~IR3_REG_SNEG; if (srcflags & IR3_REG_FABS) *dstflags |= IR3_REG_FABS; if (srcflags & IR3_REG_SABS) *dstflags |= IR3_REG_SABS; if (srcflags & IR3_REG_FNEG) *dstflags ^= IR3_REG_FNEG; if (srcflags & IR3_REG_SNEG) *dstflags ^= IR3_REG_SNEG; if (srcflags & IR3_REG_BNOT) *dstflags ^= IR3_REG_BNOT; *dstflags &= ~IR3_REG_SSA; *dstflags |= srcflags & IR3_REG_SSA; *dstflags |= srcflags & IR3_REG_CONST; *dstflags |= srcflags & IR3_REG_IMMED; *dstflags |= srcflags & IR3_REG_RELATIV; *dstflags |= srcflags & IR3_REG_ARRAY; *dstflags |= srcflags & IR3_REG_HIGH; /* if src of the src is boolean we can drop the (abs) since we know * the source value is already a postitive integer. This cleans * up the absnegs that get inserted when converting between nir and * native boolean (see ir3_b2n/n2b) */ struct ir3_instruction *srcsrc = ssa(src->regs[1]); if (srcsrc && is_bool(srcsrc)) *dstflags &= ~IR3_REG_SABS; }
void console_write(const char *msg, int len) { static int was_cr = 0; int sent = 0; while (sent < len) { XENCONS_RING_IDX cons, prod; cons = console.intf->out_cons; rmb(); prod = console.intf->out_prod; // while ((sent < len) && (prod - cons < sizeof(console.intf->out))) // console.intf->out[MASK_XENCONS_IDX(prod++, console.intf->out)] = msg[sent++]; // // It may be possible to use stty or ESC sequence instead of this nastiness // while ((sent < len) && (prod - cons < sizeof(console.intf->out))) { if (msg[sent] == '\n' && !was_cr) { int idx = MASK_XENCONS_IDX(prod, console.intf->out); console.intf->out[idx] = '\r'; prod++; if (prod - cons >= sizeof(console.intf->out)) break; } was_cr = (msg[sent] == '\r'); int idx = MASK_XENCONS_IDX(prod, console.intf->out); console.intf->out[idx] = msg[sent++]; prod++; } console.intf->out_prod = prod; wmb(); event_kick(console.chan); } ssa(SYS_STATS_IO_OUTPUT, len); }
/* is it a type preserving mov, with ok flags? */ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) { if (is_same_type_mov(instr)) { struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; struct ir3_instruction *src_instr = ssa(src); /* only if mov src is SSA (not const/immed): */ if (!src_instr) return false; /* no indirect: */ if (dst->flags & IR3_REG_RELATIV) return false; if (src->flags & IR3_REG_RELATIV) return false; if (!allow_flags) if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) return false; /* TODO: remove this hack: */ if (src_instr->opc == OPC_META_FO) return false; /* TODO: we currently don't handle left/right neighbors * very well when inserting parallel-copies into phi.. * to avoid problems don't eliminate a mov coming out * of phi.. */ if (src_instr->opc == OPC_META_PHI) return false; return true; } return false; }
proc_t *scheduler_next(proc_t *current, int reds_left) { set_phase(PHASE_NEXT); uint32_t reds_used = SLICE_REDUCTIONS -reds_left; ssi(SYS_STATS_CTX_SWITCHES); ssa(SYS_STATS_REDUCTIONS, reds_used); current->total_reds += reds_used; proc_t *next_proc = 0; uint64_t ticks = monotonic_clock(); // freeze time assert(current->my_queue == MY_QUEUE_NONE); #ifdef PROFILE_HARNESS static uint64_t proc_started_ns = 0; if (proc_started_ns != 0) prof_slice_complete(current->pid, current->result.what, current->cap.ip, proc_started_ns, ticks); #endif proc_t *expired; while ((expired = wait_list_expired(&queues.on_timed_receive, ticks)) != 0) { expired->cap.ip = expired->result.jump_to; if (scheduler_park_runnable_N(expired) < 0) scheduler_exit_process(expired, A_NO_MEMORY); } int memory_exhausted = 0; switch (current->result.what) { case SLICE_RESULT_YIELD: if (scheduler_park_runnable_N(current) < 0) memory_exhausted = 1; break; case SLICE_RESULT_WAIT: if (current->result.until_when == LING_INFINITY) { if (proc_list_put_N(&queues.on_infinite_receive, current) < 0) memory_exhausted = 1; else current->my_queue = MY_QUEUE_INF_WAIT; } else { if (wait_list_put_N(&queues.on_timed_receive, current, current->result.until_when) < 0) memory_exhausted = 1; else current->my_queue = MY_QUEUE_TIMED_WAIT; } break; case SLICE_RESULT_DONE: scheduler_exit_process(current, A_NORMAL); break; case SLICE_RESULT_PURGE_PROCS: // purge_module() call may have detected processes lingering on the old // code - terminate them if (scheduler_park_runnable_N(current) < 0) memory_exhausted = 1; for (int i = 0; i < num_purged; i++) if (scheduler_signal_exit_N(purgatory[i], current->pid, A_KILL) < 0) memory_exhausted = 1; num_purged = 0; break; case SLICE_RESULT_EXIT: scheduler_exit_process(current, current->result.reason); // what about the returned value when main function just returns? break; case SLICE_RESULT_EXIT2: // only needed to implement erlang:exit/2 if (scheduler_park_runnable_N(current) < 0 || (scheduler_signal_exit_N(current->result.victim, current->pid, current->result.reason2) < 0)) memory_exhausted = 1; break; case SLICE_RESULT_ERROR: scheduler_exit_process(current, current->result.reason); // how is this different from SLICE_RESULT_EXIT? break; case SLICE_RESULT_THROW: scheduler_exit_process(current, current->result.reason); // how is this different from SLICE_RESULT_EXIT? break; default: { assert(current->result.what == SLICE_RESULT_OUTLET_CLOSE); if (scheduler_park_runnable_N(current) < 0) memory_exhausted = 1; outlet_t *closing = current->result.closing; //assert(is_atom(current->result.why)); outlet_close(closing, current->result.why); break; } } if (memory_exhausted) scheduler_exit_process(current, A_NO_MEMORY); do_pending: ticks = monotonic_clock(); while ((expired = wait_list_expired(&queues.on_timed_receive, ticks)) != 0) { expired->cap.ip = expired->result.jump_to; if (scheduler_park_runnable_N(expired) < 0) scheduler_exit_process(expired, A_NO_MEMORY); } set_phase(PHASE_EVENTS); // software events/timeouts net_check_timeouts(); etimer_expired(ticks); // 'hardware' events int nr_fired = events_do_pending(); update_event_times(nr_fired, ticks); set_phase(PHASE_NEXT); // select_runnable if (!proc_queue_is_empty(&queues.high_prio)) next_proc = proc_queue_get(&queues.high_prio); else if (normal_count < NORMAL_ADVANTAGE) { if (!proc_queue_is_empty(&queues.normal_prio)) next_proc = proc_queue_get(&queues.normal_prio); else if (!proc_queue_is_empty(&queues.low_prio)) next_proc = proc_queue_get(&queues.low_prio); normal_count++; } else { if (!proc_queue_is_empty(&queues.low_prio)) next_proc = proc_queue_get(&queues.low_prio); else if (!proc_queue_is_empty(&queues.normal_prio)) next_proc = proc_queue_get(&queues.normal_prio); normal_count = 0; } if (next_proc == 0) { // no runnable processes; poll for events from all three sources // Beware that events_poll() reports events 5us after they occur. If // a new event is expected very soon we are better off polling event // bits manually (using events_do_pending()) // Devote a portion of time until the next event to gc waiting processes garbage_collect_waiting_processes(expect_event_in_ns /2); if (expect_event_in_ns < MANUAL_POLLING_THRESHOLD) goto do_pending; uint64_t next_ticks = wait_list_timeout(&queues.on_timed_receive); uint64_t closest_timeout = etimer_closest_timeout(); if (closest_timeout < next_ticks) next_ticks = closest_timeout; closest_timeout = lwip_closest_timeout(); if (closest_timeout < next_ticks) next_ticks = closest_timeout; scheduler_runtime_update(); events_poll(next_ticks); // LING_INFINITY is big enough scheduler_runtime_start(); goto do_pending; } next_proc->my_queue = MY_QUEUE_NONE; //TODO: update stats #ifdef PROFILE_HARNESS proc_started_ns = ticks; #endif set_phase(PHASE_ERLANG); return next_proc; }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; break; default: break; } } }
/** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src * register with a non-ssa src) or collapsing mov's from relative * src (which needs to also fixup the address src reference by the * instruction). */ static void reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) { struct ir3_instruction *src = ssa(reg); /* don't propagate copies into a PHI, since we don't know if the * src block executed: */ if (instr->opc == OPC_META_PHI) return; if (is_eligible_mov(src, true)) { /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; combine_flags(&new_flags, src); if (valid_flags(instr, n, new_flags)) { if (new_flags & IR3_REG_ARRAY) { debug_assert(!(reg->flags & IR3_REG_ARRAY)); reg->array = src_reg->array; } reg->flags = new_flags; reg->instr = ssa(src_reg); } src = ssa(reg); /* could be null for IR3_REG_ARRAY case */ if (!src) return; } else if (is_same_type_mov(src) && /* cannot collapse const/immed/etc into meta instrs: */ !is_meta(instr)) { /* immed/const/etc cases, which require some special handling: */ struct ir3_register *src_reg = src->regs[1]; unsigned new_flags = reg->flags; combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { /* See if lowering an immediate to const would help. */ if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { debug_assert(new_flags & IR3_REG_IMMED); instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); return; } /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. * * the "plain" MAD's (ie. the ones that don't shift first * src prior to multiply) can swap their first two srcs if * src[0] is !CONST and src[1] is CONST: */ if ((n == 1) && is_mad(instr->opc) && !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags)) { /* swap src[0] and src[1]: */ struct ir3_register *tmp; tmp = instr->regs[0 + 1]; instr->regs[0 + 1] = instr->regs[1 + 1]; instr->regs[1 + 1] = tmp; n = 0; } else { return; } } /* Here we handle the special case of mov from * CONST and/or RELATIV. These need to be handled * specially, because in the case of move from CONST * there is no src ir3_instruction so we need to * replace the ir3_register. And in the case of * RELATIV we need to handle the address register * dependency. */ if (src_reg->flags & IR3_REG_CONST) { /* an instruction cannot reference two different * address registers: */ if ((src_reg->flags & IR3_REG_RELATIV) && conflicts(instr->address, reg->instr->address)) return; /* This seems to be a hw bug, or something where the timings * just somehow don't work out. This restriction may only * apply if the first src is also CONST. */ if ((opc_cat(instr->opc) == 3) && (n == 2) && (src_reg->flags & IR3_REG_RELATIV) && (src_reg->array.offset == 0)) return; src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; if (src_reg->flags & IR3_REG_RELATIV) ir3_instr_set_address(instr, reg->instr->address); return; } if ((src_reg->flags & IR3_REG_RELATIV) && !conflicts(instr->address, reg->instr->address)) { src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; ir3_instr_set_address(instr, reg->instr->address); return; } /* NOTE: seems we can only do immed integers, so don't * need to care about float. But we do need to handle * abs/neg *before* checking that the immediate requires * few enough bits to encode: * * TODO: do we need to do something to avoid accidentally * catching a float immed? */ if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; debug_assert((opc_cat(instr->opc) == 1) || (opc_cat(instr->opc) == 6) || ir3_cat2_int(instr->opc)); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); if (new_flags & IR3_REG_SNEG) iim_val = -iim_val; if (new_flags & IR3_REG_BNOT) iim_val = ~iim_val; /* other than category 1 (mov) we can only encode up to 10 bits: */ if ((instr->opc == OPC_MOV) || !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; src_reg->iim_val = iim_val; instr->regs[n+1] = src_reg; } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { /* See if lowering an immediate to const would help. */ instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); } return; } } }
static struct ir3_instruction *instr_get(void *arr, int idx) { return ssa(((struct ir3_instruction *)arr)->regs[idx+1]); }
bool fixSSA(Procedure& proc) { PhaseScope phaseScope(proc, "fixSSA"); // Collect the stack "variables". If there aren't any, then we don't have anything to do. // That's a fairly common case. HashMap<StackSlotValue*, Type> stackVariable; for (Value* value : proc.values()) { if (StackSlotValue* stack = value->as<StackSlotValue>()) { if (stack->kind() == StackSlotKind::Anonymous) stackVariable.add(stack, Void); } } if (stackVariable.isEmpty()) return false; // Make sure that we know how to optimize all of these. We only know how to handle Load and // Store on anonymous variables. for (Value* value : proc.values()) { auto reject = [&] (Value* value) { if (StackSlotValue* stack = value->as<StackSlotValue>()) stackVariable.remove(stack); }; auto handleAccess = [&] (Value* access, Type type) { StackSlotValue* stack = access->lastChild()->as<StackSlotValue>(); if (!stack) return; if (value->as<MemoryValue>()->offset()) { stackVariable.remove(stack); return; } auto result = stackVariable.find(stack); if (result == stackVariable.end()) return; if (result->value == Void) { result->value = type; return; } if (result->value == type) return; stackVariable.remove(result); }; switch (value->opcode()) { case Load: // We're OK with loads from stack variables at an offset of zero. handleAccess(value, value->type()); break; case Store: // We're OK with stores to stack variables, but not storing stack variables. reject(value->child(0)); handleAccess(value, value->child(0)->type()); break; default: for (Value* child : value->children()) reject(child); break; } } Vector<StackSlotValue*> deadValues; for (auto& entry : stackVariable) { if (entry.value == Void) deadValues.append(entry.key); } for (StackSlotValue* deadValue : deadValues) { deadValue->replaceWithNop(); stackVariable.remove(deadValue); } if (stackVariable.isEmpty()) return false; // We know that we have variables to optimize, so do that now. breakCriticalEdges(proc); SSACalculator ssa(proc); // Create a SSACalculator::Variable for every stack variable. Vector<StackSlotValue*> variableToStack; HashMap<StackSlotValue*, SSACalculator::Variable*> stackToVariable; for (auto& entry : stackVariable) { StackSlotValue* stack = entry.key; SSACalculator::Variable* variable = ssa.newVariable(); RELEASE_ASSERT(variable->index() == variableToStack.size()); variableToStack.append(stack); stackToVariable.add(stack, variable); } // Create Defs for all of the stores to the stack variable. for (BasicBlock* block : proc) { for (Value* value : *block) { if (value->opcode() != Store) continue; StackSlotValue* stack = value->child(1)->as<StackSlotValue>(); if (!stack) continue; if (SSACalculator::Variable* variable = stackToVariable.get(stack)) ssa.newDef(variable, block, value->child(0)); } } // Decide where Phis are to be inserted. This creates them but does not insert them. ssa.computePhis( [&] (SSACalculator::Variable* variable, BasicBlock* block) -> Value* { StackSlotValue* stack = variableToStack[variable->index()]; Value* phi = proc.add<Value>(Phi, stackVariable.get(stack), stack->origin()); if (verbose) { dataLog( "Adding Phi for ", pointerDump(stack), " at ", *block, ": ", deepDump(proc, phi), "\n"); } return phi; }); // Now perform the conversion. InsertionSet insertionSet(proc); HashMap<StackSlotValue*, Value*> mapping; for (BasicBlock* block : proc.blocksInPreOrder()) { mapping.clear(); for (auto& entry : stackToVariable) { StackSlotValue* stack = entry.key; SSACalculator::Variable* variable = entry.value; SSACalculator::Def* def = ssa.reachingDefAtHead(block, variable); if (def) mapping.set(stack, def->value()); } for (SSACalculator::Def* phiDef : ssa.phisForBlock(block)) { StackSlotValue* stack = variableToStack[phiDef->variable()->index()]; insertionSet.insertValue(0, phiDef->value()); mapping.set(stack, phiDef->value()); } for (unsigned valueIndex = 0; valueIndex < block->size(); ++valueIndex) { Value* value = block->at(valueIndex); value->performSubstitution(); switch (value->opcode()) { case Load: { if (StackSlotValue* stack = value->child(0)->as<StackSlotValue>()) { if (Value* replacement = mapping.get(stack)) value->replaceWithIdentity(replacement); } break; } case Store: { if (StackSlotValue* stack = value->child(1)->as<StackSlotValue>()) { if (stackToVariable.contains(stack)) { mapping.set(stack, value->child(0)); value->replaceWithNop(); } } break; } default: break; } } unsigned upsilonInsertionPoint = block->size() - 1; Origin upsilonOrigin = block->last()->origin(); for (BasicBlock* successorBlock : block->successorBlocks()) { for (SSACalculator::Def* phiDef : ssa.phisForBlock(successorBlock)) { Value* phi = phiDef->value(); SSACalculator::Variable* variable = phiDef->variable(); StackSlotValue* stack = variableToStack[variable->index()]; Value* mappedValue = mapping.get(stack); if (verbose) { dataLog( "Mapped value for ", *stack, " with successor Phi ", *phi, " at end of ", *block, ": ", pointerDump(mappedValue), "\n"); } if (!mappedValue) mappedValue = insertionSet.insertBottom(upsilonInsertionPoint, phi); insertionSet.insert<UpsilonValue>( upsilonInsertionPoint, upsilonOrigin, mappedValue, phi); } } insertionSet.execute(block); } // Finally, kill the stack slots. for (StackSlotValue* stack : variableToStack) stack->replaceWithNop(); if (verbose) { dataLog("B3 after SSA conversion:\n"); dataLog(proc); } return true; }
/** * Find instruction src's which are mov's that can be collapsed, replacing * the mov dst with the mov src */ static void instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) { struct ir3_register *reg; if (instr->regs_count == 0) return; if (ir3_instr_check_mark(instr)) return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { struct ir3_instruction *src = ssa(reg); if (!src) continue; instr_cp(ctx, src); /* TODO non-indirect access we could figure out which register * we actually want and allow cp.. */ if (reg->flags & IR3_REG_ARRAY) continue; /* Don't CP absneg into meta instructions, that won't end well: */ if (is_meta(instr) && (src->opc != OPC_MOV)) continue; reg_cp(ctx, instr, reg, n); } if (instr->regs[0]->flags & IR3_REG_ARRAY) { struct ir3_instruction *src = ssa(instr->regs[0]); if (src) instr_cp(ctx, src); } if (instr->address) { instr_cp(ctx, instr->address); ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); } /* we can end up with extra cmps.s from frontend, which uses a * * cmps.s p0.x, cond, 0 * * as a way to mov into the predicate register. But frequently 'cond' * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and * just re-write the instruction writing predicate register to get rid * of the double cmps. */ if ((instr->opc == OPC_CMPS_S) && (instr->regs[0]->num == regid(REG_P0, 0)) && ssa(instr->regs[1]) && (instr->regs[2]->flags & IR3_REG_IMMED) && (instr->regs[2]->iim_val == 0)) { struct ir3_instruction *cond = ssa(instr->regs[1]); switch (cond->opc) { case OPC_CMPS_S: case OPC_CMPS_F: case OPC_CMPS_U: instr->opc = cond->opc; instr->flags = cond->flags; instr->cat2 = cond->cat2; instr->address = cond->address; instr->regs[1] = cond->regs[1]; instr->regs[2] = cond->regs[2]; instr->barrier_class |= cond->barrier_class; instr->barrier_conflict |= cond->barrier_conflict; unuse(cond); break; default: break; } } /* Handle converting a sam.s2en (taking samp/tex idx params via * register) into a normal sam (encoding immediate samp/tex idx) * if they are immediate. This saves some instructions and regs * in the common case where we know samp/tex at compile time: */ if (is_tex(instr) && (instr->flags & IR3_INSTR_S2EN) && !(ir3_shader_debug & IR3_DBG_FORCES2EN)) { /* The first src will be a fan-in (collect), if both of it's * two sources are mov from imm, then we can */ struct ir3_instruction *samp_tex = ssa(instr->regs[1]); debug_assert(samp_tex->opc == OPC_META_FI); struct ir3_instruction *samp = ssa(samp_tex->regs[1]); struct ir3_instruction *tex = ssa(samp_tex->regs[2]); if ((samp->opc == OPC_MOV) && (samp->regs[1]->flags & IR3_REG_IMMED) && (tex->opc == OPC_MOV) && (tex->regs[1]->flags & IR3_REG_IMMED)) { instr->flags &= ~IR3_INSTR_S2EN; instr->cat5.samp = samp->regs[1]->iim_val; instr->cat5.tex = tex->regs[1]->iim_val; instr->regs[1]->instr = NULL; } } }
void dBasicBlocksGraph::ConvertToSSA () { dConvertToSSASolver ssa (this); ssa.Solve(); }
void cgCallBuiltin(IRLS& env, const IRInstruction* inst) { auto const extra = inst->extra<CallBuiltin>(); auto const callee = extra->callee; auto const returnType = inst->typeParam(); auto const funcReturnType = callee->returnType(); auto const returnByValue = callee->isReturnByValue(); auto const dstData = dstLoc(env, inst, 0).reg(0); auto const dstType = dstLoc(env, inst, 0).reg(1); auto& v = vmain(env); // Whether `t' is passed in/out of C++ as String&/Array&/Object&. auto const isReqPtrRef = [] (MaybeDataType t) { return isStringType(t) || isArrayLikeType(t) || t == KindOfObject || t == KindOfResource; }; if (FixupMap::eagerRecord(callee)) { auto const sp = srcLoc(env, inst, 1).reg(); auto const spOffset = cellsToBytes(extra->spOffset.offset); auto const& marker = inst->marker(); auto const pc = marker.fixupSk().unit()->entry() + marker.fixupBcOff(); auto const synced_sp = v.makeReg(); v << lea{sp[spOffset], synced_sp}; emitEagerSyncPoint(v, pc, rvmtl(), srcLoc(env, inst, 0).reg(), synced_sp); } int returnOffset = rds::kVmMInstrStateOff + offsetof(MInstrState, tvBuiltinReturn); auto args = argGroup(env, inst); if (!returnByValue) { if (isBuiltinByRef(funcReturnType)) { if (isReqPtrRef(funcReturnType)) { returnOffset += TVOFF(m_data); } // Pass the address of tvBuiltinReturn to the native function as the // location where it can construct the return Array, String, Object, or // Variant. args.addr(rvmtl(), returnOffset); args.indirect(); } } // The srcs past the first two (sp and fp) are the arguments to the callee. auto srcNum = uint32_t{2}; // Add the this_ or self_ argument for HNI builtins. if (callee->isMethod()) { if (callee->isStatic()) { args.ssa(srcNum); ++srcNum; } else { // Note that we don't support objects with vtables here (if they may need // a $this pointer adjustment). This should be filtered out during irgen // or before. args.ssa(srcNum); ++srcNum; } } // Add the func_num_args() value if needed. if (callee->attrs() & AttrNumArgs) { // If `numNonDefault' is negative, this is passed as an src. if (extra->numNonDefault >= 0) { args.imm((int64_t)extra->numNonDefault); } else { args.ssa(srcNum); ++srcNum; } } // Add the positional arguments. for (uint32_t i = 0; i < callee->numParams(); ++i, ++srcNum) { auto const& pi = callee->params()[i]; // Non-pointer and NativeArg args are passed by value. String, Array, // Object, and Variant are passed by const&, i.e. a pointer to stack memory // holding the value, so we expect PtrToT types for these. Pointers to // req::ptr types (String, Array, Object) need adjusting to point to // &ptr->m_data. if (TVOFF(m_data) && !pi.nativeArg && isReqPtrRef(pi.builtinType)) { assertx(inst->src(srcNum)->type() <= TPtrToGen); args.addr(srcLoc(env, inst, srcNum).reg(), TVOFF(m_data)); } else if (pi.nativeArg && !pi.builtinType && !callee->byRef(i)) { // This condition indicates a MixedTV (i.e., TypedValue-by-value) arg. args.typedValue(srcNum); } else { args.ssa(srcNum, pi.builtinType == KindOfDouble); } } auto dest = [&] () -> CallDest { if (isBuiltinByRef(funcReturnType)) { if (!returnByValue) return kVoidDest; // indirect return return funcReturnType ? callDest(dstData) // String, Array, or Object : callDest(dstData, dstType); // Variant } return funcReturnType == KindOfDouble ? callDestDbl(env, inst) : callDest(env, inst); }(); cgCallHelper(v, env, CallSpec::direct(callee->nativeFuncPtr()), dest, SyncOptions::Sync, args); // For primitive return types (int, bool, double) and returnByValue, the // return value is already in dstData/dstType. if (returnType.isSimpleType() || returnByValue) return; // For return by reference (String, Object, Array, Variant), the builtin // writes the return value into MInstrState::tvBuiltinReturn, from where it // has to be tested and copied. if (returnType.isReferenceType()) { // The return type is String, Array, or Object; fold nullptr to KindOfNull. assertx(isBuiltinByRef(funcReturnType) && isReqPtrRef(funcReturnType)); v << load{rvmtl()[returnOffset], dstData}; if (dstType.isValid()) { auto const sf = v.makeReg(); auto const rtype = v.cns(returnType.toDataType()); auto const nulltype = v.cns(KindOfNull); v << testq{dstData, dstData, sf}; v << cmovb{CC_Z, sf, rtype, nulltype, dstType}; } return; } if (returnType <= TCell || returnType <= TBoxedCell) { // The return type is Variant; fold KindOfUninit to KindOfNull. assertx(isBuiltinByRef(funcReturnType) && !isReqPtrRef(funcReturnType)); static_assert(KindOfUninit == 0, "KindOfUninit must be 0 for test"); v << load{rvmtl()[returnOffset + TVOFF(m_data)], dstData}; if (dstType.isValid()) { auto const rtype = v.makeReg(); v << loadb{rvmtl()[returnOffset + TVOFF(m_type)], rtype}; auto const sf = v.makeReg(); auto const nulltype = v.cns(KindOfNull); v << testb{rtype, rtype, sf}; v << cmovb{CC_Z, sf, rtype, nulltype, dstType}; } return; } not_reached(); }
void goto_symext::symex_decl(statet &state, const symbol_exprt &expr) { // We increase the L2 renaming to make these non-deterministic. // We also prevent propagation of old values. ssa_exprt ssa(expr); state.rename(ssa, ns, goto_symex_statet::L1); const irep_idt &l1_identifier=ssa.get_identifier(); // rename type to L2 state.rename(ssa.type(), l1_identifier, ns); ssa.update_type(); // in case of pointers, put something into the value set if(ns.follow(expr.type()).id()==ID_pointer) { exprt failed= get_failed_symbol(expr, ns); exprt rhs; if(failed.is_not_nil()) { address_of_exprt address_of_expr; address_of_expr.object()=failed; address_of_expr.type()=expr.type(); rhs=address_of_expr; } else rhs=exprt(ID_invalid); state.rename(rhs, ns, goto_symex_statet::L1); state.value_set.assign(ssa, rhs, ns, true, false); } // prevent propagation state.propagation.remove(l1_identifier); // L2 renaming // inlining may yield multiple declarations of the same identifier // within the same L1 context if(state.level2.current_names.find(l1_identifier)== state.level2.current_names.end()) state.level2.current_names[l1_identifier]=std::make_pair(ssa, 0); state.level2.increase_counter(l1_identifier); const bool record_events=state.record_events; state.record_events=false; state.rename(ssa, ns); state.record_events=record_events; // we hide the declaration of auxiliary variables // and if the statement itself is hidden bool hidden= ns.lookup(expr.get_identifier()).is_auxiliary || state.top().hidden_function || state.source.pc->source_location.get_hide(); target.decl( state.guard.as_expr(), ssa, state.source, hidden?symex_targett::HIDDEN:symex_targett::STATE); assert(state.dirty); if((*state.dirty)(ssa.get_object_name()) && state.atomic_section_id==0) target.shared_write( state.guard.as_expr(), ssa, state.atomic_section_id, state.source); }