BlockBegin* LoopFinder::new_block(IRScope* scope, int bci) { BlockBegin* b = new BlockBegin(bci); _valid_doms = false; assert(b->block_id() == max_blocks(), "illegal block_id"); _max_blocks++; BlockLoopInfo* bli = new BlockLoopInfo(b, max_blocks()); _info->append(bli); assert(_info->length() == max_blocks(), "operation failed"); return b; }
bool ShortLoopOptimizer::process(BlockBegin* loop_header) { TRACE_VALUE_NUMBERING(tty->print_cr("** loop header block")); _too_complicated_loop = false; _loop_blocks.clear(); _loop_blocks.append(loop_header); for (int i = 0; i < _loop_blocks.length(); i++) { BlockBegin* block = _loop_blocks.at(i); TRACE_VALUE_NUMBERING(tty->print_cr("processing loop block B%d", block->block_id())); if (block->is_set(BlockBegin::exception_entry_flag)) { // this would be too complicated return false; } // add predecessors to worklist for (int j = block->number_of_preds() - 1; j >= 0; j--) { BlockBegin* pred = block->pred_at(j); if (pred->is_set(BlockBegin::osr_entry_flag)) { return false; } ValueMap* pred_map = value_map_of(pred); if (pred_map != NULL) { current_map()->kill_map(pred_map); } else if (!_loop_blocks.contains(pred)) { if (_loop_blocks.length() >= ValueMapMaxLoopSize) { return false; } _loop_blocks.append(pred); } } // use the instruction visitor for killing values for (Value instr = block->next(); instr != NULL; instr = instr->next()) { instr->visit(this); if (_too_complicated_loop) { return false; } } } bool optimistic = this->_gvn->compilation()->is_optimistic(); if (UseLoopInvariantCodeMotion && optimistic) { LoopInvariantCodeMotion code_motion(this, _gvn, loop_header, &_loop_blocks); } TRACE_VALUE_NUMBERING(tty->print_cr("** loop successfully optimized")); return true; }
void block_do(BlockBegin* from) { int n = from->end()->number_of_sux(); int tag = _tags->at(from->block_id()); for (int i = 0; i < n; i++) { BlockBegin* to = from->end()->sux_at(i); if (tag != _tags->at(to->block_id())) { // this edge is a transition between two different // caching regions, so we need to insert a CachingChange _pairs->append(new BlockPair(from, to)); } } }
void LoopFinder::gather_loop_blocks(LoopList* loops) { int lng = loops->length(); BitMap blocks_in_loop(max_blocks()); for (int i = 0; i < lng; i++) { // for each loop do the following blocks_in_loop.clear(); Loop* loop = loops->at(i); BlockList* ends = loop->ends(); if (!loop->is_end(loop->start())) { GrowableArray<BlockBegin*>* stack = new GrowableArray<BlockBegin*>(); blocks_in_loop.at_put(loop->start()->block_id(), true); // insert all the ends into the list for (int i = 0; i < ends->length(); i++) { blocks_in_loop.at_put(ends->at(i)->block_id() , true); stack->push(ends->at(i)); } while (!stack->is_empty()) { BlockBegin* bb = stack->pop(); BlockLoopInfo* bli = get_block_info(bb); // push all predecessors that are not yet in loop int npreds = bli->nof_preds(); for (int m = 0; m < npreds; m++) { BlockBegin* pred = bli->pred_no(m); if (!blocks_in_loop.at(pred->block_id())) { blocks_in_loop.at_put(pred->block_id(), true); loop->append_node(pred); stack->push(pred); } } } loop->append_node(loop->start()); } // insert all the ends into the loop for (int i = 0; i < ends->length(); i++) { loop->append_node(ends->at(i)); } } }
void FpuStackAllocator::allocate() { int num_blocks = allocator()->block_count(); for (int i = 0; i < num_blocks; i++) { // Set up to process block BlockBegin* block = allocator()->block_at(i); intArray* fpu_stack_state = block->fpu_stack_state(); #ifndef PRODUCT if (TraceFPUStack) { tty->cr(); tty->print_cr("------- Begin of new Block %d -------", block->block_id()); } #endif assert(fpu_stack_state != NULL || block->end()->as_Base() != NULL || block->is_set(BlockBegin::exception_entry_flag), "FPU stack state must be present due to linear-scan order for FPU stack allocation"); // note: exception handler entries always start with an empty fpu stack // because stack merging would be too complicated if (fpu_stack_state != NULL) { sim()->read_state(fpu_stack_state); } else { sim()->clear(); } #ifndef PRODUCT if (TraceFPUStack) { tty->print("Reading FPU state for block %d:", block->block_id()); sim()->print(); tty->cr(); } #endif allocate_block(block); CHECK_BAILOUT(); } }
// Gather backedges of natural loops: an edge a -> b where b dominates a LoopList* LoopFinder::find_backedges(boolArray* visited) { int i; LoopList* backedges = new LoopList(); for (i = 0; i < max_blocks(); i++) { if (visited->at(i)) { BlockLoopInfo* bli = _info->at(i); BlockBegin* bb = bli->block(); BlockEnd* be = bb->end(); int n = be->number_of_sux(); for (int i = 0; i < n; i++) { BlockBegin* sux = be->sux_at(i); if (bli->is_dom_block(sux->block_id())) { bli->mark_backedge_start(); backedges->push(new Loop(sux, bb)); } } } } // backedges contains single pairs of blocks which are a backedge. // some of these loops may share entry points, so walk over the backedges // and merge loops which have the same entry point if (backedges->length() > 1) { backedges->sort(sort_by_start_block); Loop* current_loop = backedges->at(0); for (i = 1; i < backedges->length();) { Loop* this_loop = backedges->at(i); if (current_loop->start() == this_loop->start()) { // same entry point assert(this_loop->ends()->length() == 1, "should only have one end at this point"); #ifndef PRODUCT if (PrintLoops && Verbose) { tty->print_cr("Merging loops with same start"); current_loop->print(); this_loop->print(); } #endif BlockBegin* e = this_loop->ends()->at(0); current_loop->add_end(e); backedges->remove(this_loop); } else { // start processing the next loop entry point i++; } } } return backedges; }
BlockBegin* LoopFinder::insert_caching_block(LoopList* loops, BlockBegin* from, BlockBegin* to) { if (from->next() && from->next()->as_CachingChange() != NULL && from->end()->default_sux() == to) { // we already have a caching change block // check that the precision flags are the same #ifdef ASSERT CachingChange* cc = from->next()->as_CachingChange(); assert(cc->pred_block()->is_set(BlockBegin::single_precision_flag) == from->is_set(BlockBegin::single_precision_flag), "consistency check"); assert(cc->sux_block()->is_set(BlockBegin::single_precision_flag) == to->is_set(BlockBegin::single_precision_flag), "consistency check"); #endif return NULL; } else { // insert a caching change block, making it close to any single successor int bci = -1; BlockLoopInfo* bli = get_block_info(to); if (bli->nof_preds() == 1) { bci = to->bci(); } else { bci = from->end()->bci(); } BlockBegin* cc = new_block(to->scope(), bci); BlockEnd* e = new Goto(to, false); cc->set_end(e); cc->set_next(new CachingChange(from, to), bci)->set_next(e, bci); if (PrintLoops && Verbose) { tty->print_cr("Added caching block B%d (dest B%d)", cc->block_id(), to->block_id()); } BlockEnd* from_end = from->end(); from_end->substitute_sux(to, cc); cc->join(from_end->state()); assert(cc->state() != NULL, "illegal operation"); ValueStack* end_state = cc->state()->copy(); cc->end()->set_state(end_state); to->join(end_state); assert(cc->end()->state() != NULL, "should have state"); loops->update_loops(from, to, cc); return cc; } }
// Compute dominators for bb and // walk the successors of bb in depth first order void LoopFinder::dominator_walk_sux(BlockBegin* bb, boolArray* visited) { // we may not visit a block that is jsr-target if (bb->is_set(BlockBegin::subroutine_entry_flag)) set_not_ok(); BlockEnd* be = bb->end(); BlockLoopInfo* bli = get_block_info(bb); visited->at_put(bb->block_id(), true); // compute new dominators using predecessors BitMap map(max_blocks()); map.set_from(*BlockLoopInfo::all_blocks_map()); { // Compute dominators for myself (looking at predecessors) int nof_preds = bli->nof_preds(); for (int i = 0; i < nof_preds; i++) { BlockBegin* pred = bli->pred_no(i); BitMap pred_map = get_block_info(pred)->doms_map(); map.set_intersection(pred_map); } // add itself map.at_put(bb->block_id(), true); // if the computed dominators differ from the one stored, // then we need another iteration BitMap bb_map = bli->doms_map(); if (!bb_map.is_same(map)) { set_changed(true); bb_map.set_from(map); } } { // Visit all successors int n = be->number_of_sux(); for (int i = 0; i < n; i++) { BlockBegin* sux = be->sux_at(i); if (!visited->at(sux->block_id())) { dominator_walk_sux(sux, visited); } } } }
GlobalValueNumbering::GlobalValueNumbering(IR* ir) : _current_map(NULL) , _value_maps(ir->linear_scan_order()->length(), NULL) { TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering")); ShortLoopOptimizer short_loop_optimizer(this); int subst_count = 0; BlockList* blocks = ir->linear_scan_order(); int num_blocks = blocks->length(); BlockBegin* start_block = blocks->at(0); assert(start_block == ir->start() && start_block->number_of_preds() == 0 && start_block->dominator() == NULL, "must be start block"); assert(start_block->next()->as_Base() != NULL && start_block->next()->next() == NULL, "start block must not have instructions"); // initial, empty value map with nesting 0 set_value_map_of(start_block, new ValueMap()); for (int i = 1; i < num_blocks; i++) { BlockBegin* block = blocks->at(i); TRACE_VALUE_NUMBERING(tty->print_cr("**** processing block B%d", block->block_id())); int num_preds = block->number_of_preds(); assert(num_preds > 0, "block must have predecessors"); BlockBegin* dominator = block->dominator(); assert(dominator != NULL, "dominator must exist"); assert(value_map_of(dominator) != NULL, "value map of dominator must exist"); // create new value map with increased nesting _current_map = new ValueMap(value_map_of(dominator)); if (num_preds == 1) { assert(dominator == block->pred_at(0), "dominator must be equal to predecessor"); // nothing to do here } else if (block->is_set(BlockBegin::linear_scan_loop_header_flag)) { // block has incoming backward branches -> try to optimize short loops if (!short_loop_optimizer.process(block)) { // loop is too complicated, so kill all memory loads because there might be // stores to them in the loop current_map()->kill_memory(); } } else { // only incoming forward branches that are already processed for (int j = 0; j < num_preds; j++) { BlockBegin* pred = block->pred_at(j); ValueMap* pred_map = value_map_of(pred); if (pred_map != NULL) { // propagate killed values of the predecessor to this block current_map()->kill_map(value_map_of(pred)); } else { // kill all memory loads because predecessor not yet processed // (this can happen with non-natural loops and OSR-compiles) current_map()->kill_memory(); } } } if (block->is_set(BlockBegin::exception_entry_flag)) { current_map()->kill_exception(); } TRACE_VALUE_NUMBERING(tty->print("value map before processing block: "); current_map()->print()); // visit all instructions of this block for (Value instr = block->next(); instr != NULL; instr = instr->next()) { assert(!instr->has_subst(), "substitution already set"); // check if instruction kills any values instr->visit(this); if (instr->hash() != 0) { Value f = current_map()->find_insert(instr); if (f != instr) { assert(!f->has_subst(), "can't have a substitution"); instr->set_subst(f); subst_count++; } } } // remember value map for successors set_value_map_of(block, current_map()); } if (subst_count != 0) { SubstitutionResolver resolver(ir); } TRACE_VALUE_NUMBERING(tty->print("****** end of global value numbering. "); ValueMap::print_statistics()); }
void LoopFinder::compute_dominators(boolArray* visited) { // set up a bitmap that contains all blocks BitMap all_map(max_blocks()); all_map.clear(); for (int i = 0; i < max_blocks(); i++) all_map.at_put(i, true); BlockLoopInfo::set_all_blocks_map(&all_map); { // initialize block loop info and set all predecessors CreateInfoClosure c(this); ir()->iterate_preorder(&c); SetPredsClosure s(this); ir()->iterate_preorder(&s); } { // compute dominators // init dominators // set entry block (only one exists) and its dominators BlockBegin* root_bb = ir()->start(); assert(!root_bb->is_set(BlockBegin::subroutine_entry_flag), "root may not be jsr target"); { // initialize root dominators (only itself) BitMap root_doms(max_blocks()); root_doms.clear(); root_doms.at_put(root_bb->block_id(), true); BlockLoopInfo* bli = get_block_info(root_bb); bli->doms_map().set_from(root_doms); } // iterate until either iter_count exceeds or dominators stable int iter_count = 0; do { iter_count++; visited->at_put(root_bb->block_id(), true); set_changed(false); BlockEnd* be = root_bb->end(); int n = be->number_of_sux(); for (int i = 0; i < n; i++) { BlockBegin* sux = be->sux_at(i); if (!visited->at(sux->block_id())) { dominator_walk_sux(sux, visited); } } if (changed()) { for (int i = visited->length() - 1; i >= 0; i--) { visited->at_put(i, false); } } } while (changed() && iter_count <= max_nof_dom_iterations); if (iter_count == max_nof_dom_iterations) { if (PrintLoops) { tty->print_cr("could not computer dominators"); } set_not_ok(); } // if (PrintLoops) { // tty->print_cr(" Dominators: %d iterations", iter_count); // PrintBlockDominators p(this); // ir()->iterate_topological(&p); // } } BlockLoopInfo::set_all_blocks_map(NULL); // go through all blocks; if a block has not been analyzed, then check its // predecessors and successors: all must be also un-analyzed; // Note: the block may not be JSR blocks if (ok()) { _valid_doms = true; #ifdef ASSERT CheckDomClosure cc(visited); ir()->iterate_preorder(&cc); #endif } }
void LinearScan::allocate_fpu_stack() { // First compute which FPU registers are live at the start of each basic block // (To minimize the amount of work we have to do if we have to merge FPU stacks) if (ComputeExactFPURegisterUsage) { Interval* intervals_in_register, *intervals_in_memory; create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL); // ignore memory intervals by overwriting intervals_in_memory // the dummy interval is needed to enforce the walker to walk until the given id: // without it, the walker stops when the unhandled-list is empty -> live information // beyond this point would be incorrect. Interval* dummy_interval = new Interval(any_reg); dummy_interval->add_range(max_jint - 2, max_jint - 1); dummy_interval->set_next(Interval::end()); intervals_in_memory = dummy_interval; IntervalWalker iw(this, intervals_in_register, intervals_in_memory); const int num_blocks = block_count(); for (int i = 0; i < num_blocks; i++) { BlockBegin* b = block_at(i); // register usage is only needed for merging stacks -> compute only // when more than one predecessor. // the block must not have any spill moves at the beginning (checked by assertions) // spill moves would use intervals that are marked as handled and so the usage bit // would been set incorrectly // NOTE: the check for number_of_preds > 1 is necessary. A block with only one // predecessor may have spill moves at the begin of the block. // If an interval ends at the current instruction id, it is not possible // to decide if the register is live or not at the block begin -> the // register information would be incorrect. if (b->number_of_preds() > 1) { int id = b->first_lir_instruction_id(); ResourceBitMap regs(FrameMap::nof_fpu_regs); iw.walk_to(id); // walk after the first instruction (always a label) of the block assert(iw.current_position() == id, "did not walk completely to id"); // Only consider FPU values in registers Interval* interval = iw.active_first(fixedKind); while (interval != Interval::end()) { int reg = interval->assigned_reg(); assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register"); assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)"); assert(interval->from() <= id && id < interval->to(), "interval out of range"); #ifndef PRODUCT if (TraceFPURegisterUsage) { tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print(); } #endif regs.set_bit(reg - pd_first_fpu_reg); interval = interval->next(); } b->set_fpu_register_usage(regs); #ifndef PRODUCT if (TraceFPURegisterUsage) { tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr(); } #endif } } } FpuStackAllocator alloc(ir()->compilation(), this); _fpu_stack_allocator = &alloc; alloc.allocate(); _fpu_stack_allocator = NULL; }
bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) { #ifndef PRODUCT if (TraceFPUStack) { tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:", block->block_id(), pos()); sim()->print(); tty->cr(); } #endif bool changed = false; int number_of_sux = block->number_of_sux(); if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) { // The successor has at least two incoming edges, so a stack merge will be necessary // If this block is the first predecessor, cleanup the current stack and propagate it // If this block is not the first predecessor, a stack merge will be necessary BlockBegin* sux = block->sux_at(0); intArray* state = sux->fpu_stack_state(); LIR_List* instrs = new LIR_List(_compilation); if (state != NULL) { // Merge with a successors that already has a FPU stack state // the block must only have one successor because critical edges must been split FpuStackSim* cur_sim = sim(); FpuStackSim* sux_sim = temp_sim(); sux_sim->read_state(state); merge_fpu_stack(instrs, cur_sim, sux_sim); } else { // propagate current FPU stack state to successor without state // clean up stack first so that there are no dead values on the stack if (ComputeExactFPURegisterUsage) { FpuStackSim* cur_sim = sim(); ResourceBitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage(); assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage"); merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs); } intArray* state = sim()->write_state(); if (TraceFPUStack) { tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id()); sim()->print(); tty->cr(); } sux->set_fpu_stack_state(state); } if (instrs->instructions_list()->length() > 0) { lir()->insert_before(pos(), instrs); set_pos(instrs->instructions_list()->length() + pos()); changed = true; } } else { // Propagate unmodified Stack to successors where a stack merge is not necessary intArray* state = sim()->write_state(); for (int i = 0; i < number_of_sux; i++) { BlockBegin* sux = block->sux_at(i); #ifdef ASSERT for (int j = 0; j < sux->number_of_preds(); j++) { assert(block == sux->pred_at(j), "all critical edges must be broken"); } // check if new state is same if (sux->fpu_stack_state() != NULL) { intArray* sux_state = sux->fpu_stack_state(); assert(state->length() == sux_state->length(), "overwriting existing stack state"); for (int j = 0; j < state->length(); j++) { assert(state->at(j) == sux_state->at(j), "overwriting existing stack state"); } } #endif #ifndef PRODUCT if (TraceFPUStack) { tty->print_cr("Setting FPU stack state of B%d", sux->block_id()); sim()->print(); tty->cr(); } #endif sux->set_fpu_stack_state(state); } } #ifndef PRODUCT // assertions that FPU stack state conforms to all successors' states intArray* cur_state = sim()->write_state(); for (int i = 0; i < number_of_sux; i++) { BlockBegin* sux = block->sux_at(i); intArray* sux_state = sux->fpu_stack_state(); assert(sux_state != NULL, "no fpu state"); assert(cur_state->length() == sux_state->length(), "incorrect length"); for (int i = 0; i < cur_state->length(); i++) { assert(cur_state->at(i) == sux_state->at(i), "element not equal"); } } #endif return changed; }