bool ShortLoopOptimizer::process(BlockBegin* loop_header) { TRACE_VALUE_NUMBERING(tty->print_cr("** loop header block")); _too_complicated_loop = false; _loop_blocks.clear(); _loop_blocks.append(loop_header); for (int i = 0; i < _loop_blocks.length(); i++) { BlockBegin* block = _loop_blocks.at(i); TRACE_VALUE_NUMBERING(tty->print_cr("processing loop block B%d", block->block_id())); if (block->is_set(BlockBegin::exception_entry_flag)) { // this would be too complicated return false; } // add predecessors to worklist for (int j = block->number_of_preds() - 1; j >= 0; j--) { BlockBegin* pred = block->pred_at(j); if (pred->is_set(BlockBegin::osr_entry_flag)) { return false; } ValueMap* pred_map = value_map_of(pred); if (pred_map != NULL) { current_map()->kill_map(pred_map); } else if (!_loop_blocks.contains(pred)) { if (_loop_blocks.length() >= ValueMapMaxLoopSize) { return false; } _loop_blocks.append(pred); } } // use the instruction visitor for killing values for (Value instr = block->next(); instr != NULL; instr = instr->next()) { instr->visit(this); if (_too_complicated_loop) { return false; } } } bool optimistic = this->_gvn->compilation()->is_optimistic(); if (UseLoopInvariantCodeMotion && optimistic) { LoopInvariantCodeMotion code_motion(this, _gvn, loop_header, &_loop_blocks); } TRACE_VALUE_NUMBERING(tty->print_cr("** loop successfully optimized")); return true; }
void LinearScan::allocate_fpu_stack() { // First compute which FPU registers are live at the start of each basic block // (To minimize the amount of work we have to do if we have to merge FPU stacks) if (ComputeExactFPURegisterUsage) { Interval* intervals_in_register, *intervals_in_memory; create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL); // ignore memory intervals by overwriting intervals_in_memory // the dummy interval is needed to enforce the walker to walk until the given id: // without it, the walker stops when the unhandled-list is empty -> live information // beyond this point would be incorrect. Interval* dummy_interval = new Interval(any_reg); dummy_interval->add_range(max_jint - 2, max_jint - 1); dummy_interval->set_next(Interval::end()); intervals_in_memory = dummy_interval; IntervalWalker iw(this, intervals_in_register, intervals_in_memory); const int num_blocks = block_count(); for (int i = 0; i < num_blocks; i++) { BlockBegin* b = block_at(i); // register usage is only needed for merging stacks -> compute only // when more than one predecessor. // the block must not have any spill moves at the beginning (checked by assertions) // spill moves would use intervals that are marked as handled and so the usage bit // would been set incorrectly // NOTE: the check for number_of_preds > 1 is necessary. A block with only one // predecessor may have spill moves at the begin of the block. // If an interval ends at the current instruction id, it is not possible // to decide if the register is live or not at the block begin -> the // register information would be incorrect. if (b->number_of_preds() > 1) { int id = b->first_lir_instruction_id(); ResourceBitMap regs(FrameMap::nof_fpu_regs); iw.walk_to(id); // walk after the first instruction (always a label) of the block assert(iw.current_position() == id, "did not walk completely to id"); // Only consider FPU values in registers Interval* interval = iw.active_first(fixedKind); while (interval != Interval::end()) { int reg = interval->assigned_reg(); assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register"); assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)"); assert(interval->from() <= id && id < interval->to(), "interval out of range"); #ifndef PRODUCT if (TraceFPURegisterUsage) { tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print(); } #endif regs.set_bit(reg - pd_first_fpu_reg); interval = interval->next(); } b->set_fpu_register_usage(regs); #ifndef PRODUCT if (TraceFPURegisterUsage) { tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr(); } #endif } } } FpuStackAllocator alloc(ir()->compilation(), this); _fpu_stack_allocator = &alloc; alloc.allocate(); _fpu_stack_allocator = NULL; }
GlobalValueNumbering::GlobalValueNumbering(IR* ir) : _current_map(NULL) , _value_maps(ir->linear_scan_order()->length(), NULL) { TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering")); ShortLoopOptimizer short_loop_optimizer(this); int subst_count = 0; BlockList* blocks = ir->linear_scan_order(); int num_blocks = blocks->length(); BlockBegin* start_block = blocks->at(0); assert(start_block == ir->start() && start_block->number_of_preds() == 0 && start_block->dominator() == NULL, "must be start block"); assert(start_block->next()->as_Base() != NULL && start_block->next()->next() == NULL, "start block must not have instructions"); // initial, empty value map with nesting 0 set_value_map_of(start_block, new ValueMap()); for (int i = 1; i < num_blocks; i++) { BlockBegin* block = blocks->at(i); TRACE_VALUE_NUMBERING(tty->print_cr("**** processing block B%d", block->block_id())); int num_preds = block->number_of_preds(); assert(num_preds > 0, "block must have predecessors"); BlockBegin* dominator = block->dominator(); assert(dominator != NULL, "dominator must exist"); assert(value_map_of(dominator) != NULL, "value map of dominator must exist"); // create new value map with increased nesting _current_map = new ValueMap(value_map_of(dominator)); if (num_preds == 1) { assert(dominator == block->pred_at(0), "dominator must be equal to predecessor"); // nothing to do here } else if (block->is_set(BlockBegin::linear_scan_loop_header_flag)) { // block has incoming backward branches -> try to optimize short loops if (!short_loop_optimizer.process(block)) { // loop is too complicated, so kill all memory loads because there might be // stores to them in the loop current_map()->kill_memory(); } } else { // only incoming forward branches that are already processed for (int j = 0; j < num_preds; j++) { BlockBegin* pred = block->pred_at(j); ValueMap* pred_map = value_map_of(pred); if (pred_map != NULL) { // propagate killed values of the predecessor to this block current_map()->kill_map(value_map_of(pred)); } else { // kill all memory loads because predecessor not yet processed // (this can happen with non-natural loops and OSR-compiles) current_map()->kill_memory(); } } } if (block->is_set(BlockBegin::exception_entry_flag)) { current_map()->kill_exception(); } TRACE_VALUE_NUMBERING(tty->print("value map before processing block: "); current_map()->print()); // visit all instructions of this block for (Value instr = block->next(); instr != NULL; instr = instr->next()) { assert(!instr->has_subst(), "substitution already set"); // check if instruction kills any values instr->visit(this); if (instr->hash() != 0) { Value f = current_map()->find_insert(instr); if (f != instr) { assert(!f->has_subst(), "can't have a substitution"); instr->set_subst(f); subst_count++; } } } // remember value map for successors set_value_map_of(block, current_map()); } if (subst_count != 0) { SubstitutionResolver resolver(ir); } TRACE_VALUE_NUMBERING(tty->print("****** end of global value numbering. "); ValueMap::print_statistics()); }
bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) { #ifndef PRODUCT if (TraceFPUStack) { tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:", block->block_id(), pos()); sim()->print(); tty->cr(); } #endif bool changed = false; int number_of_sux = block->number_of_sux(); if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) { // The successor has at least two incoming edges, so a stack merge will be necessary // If this block is the first predecessor, cleanup the current stack and propagate it // If this block is not the first predecessor, a stack merge will be necessary BlockBegin* sux = block->sux_at(0); intArray* state = sux->fpu_stack_state(); LIR_List* instrs = new LIR_List(_compilation); if (state != NULL) { // Merge with a successors that already has a FPU stack state // the block must only have one successor because critical edges must been split FpuStackSim* cur_sim = sim(); FpuStackSim* sux_sim = temp_sim(); sux_sim->read_state(state); merge_fpu_stack(instrs, cur_sim, sux_sim); } else { // propagate current FPU stack state to successor without state // clean up stack first so that there are no dead values on the stack if (ComputeExactFPURegisterUsage) { FpuStackSim* cur_sim = sim(); ResourceBitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage(); assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage"); merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs); } intArray* state = sim()->write_state(); if (TraceFPUStack) { tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id()); sim()->print(); tty->cr(); } sux->set_fpu_stack_state(state); } if (instrs->instructions_list()->length() > 0) { lir()->insert_before(pos(), instrs); set_pos(instrs->instructions_list()->length() + pos()); changed = true; } } else { // Propagate unmodified Stack to successors where a stack merge is not necessary intArray* state = sim()->write_state(); for (int i = 0; i < number_of_sux; i++) { BlockBegin* sux = block->sux_at(i); #ifdef ASSERT for (int j = 0; j < sux->number_of_preds(); j++) { assert(block == sux->pred_at(j), "all critical edges must be broken"); } // check if new state is same if (sux->fpu_stack_state() != NULL) { intArray* sux_state = sux->fpu_stack_state(); assert(state->length() == sux_state->length(), "overwriting existing stack state"); for (int j = 0; j < state->length(); j++) { assert(state->at(j) == sux_state->at(j), "overwriting existing stack state"); } } #endif #ifndef PRODUCT if (TraceFPUStack) { tty->print_cr("Setting FPU stack state of B%d", sux->block_id()); sim()->print(); tty->cr(); } #endif sux->set_fpu_stack_state(state); } } #ifndef PRODUCT // assertions that FPU stack state conforms to all successors' states intArray* cur_state = sim()->write_state(); for (int i = 0; i < number_of_sux; i++) { BlockBegin* sux = block->sux_at(i); intArray* sux_state = sux->fpu_stack_state(); assert(sux_state != NULL, "no fpu state"); assert(cur_state->length() == sux_state->length(), "incorrect length"); for (int i = 0; i < cur_state->length(); i++) { assert(cur_state->at(i) == sux_state->at(i), "element not equal"); } } #endif return changed; }