Пример #1
0
BlockBegin* LoopFinder::new_block(IRScope* scope, int bci) {
  BlockBegin* b = new BlockBegin(bci);
  _valid_doms = false;
  assert(b->block_id() == max_blocks(), "illegal block_id");
  _max_blocks++;
  BlockLoopInfo* bli = new BlockLoopInfo(b, max_blocks());
  _info->append(bli);
  assert(_info->length() == max_blocks(), "operation failed");
  return b;
}
Пример #2
0
bool ShortLoopOptimizer::process(BlockBegin* loop_header) {
  TRACE_VALUE_NUMBERING(tty->print_cr("** loop header block"));

  _too_complicated_loop = false;
  _loop_blocks.clear();
  _loop_blocks.append(loop_header);

  for (int i = 0; i < _loop_blocks.length(); i++) {
    BlockBegin* block = _loop_blocks.at(i);
    TRACE_VALUE_NUMBERING(tty->print_cr("processing loop block B%d", block->block_id()));

    if (block->is_set(BlockBegin::exception_entry_flag)) {
      // this would be too complicated
      return false;
    }

    // add predecessors to worklist
    for (int j = block->number_of_preds() - 1; j >= 0; j--) {
      BlockBegin* pred = block->pred_at(j);

      if (pred->is_set(BlockBegin::osr_entry_flag)) {
        return false;
      }

      ValueMap* pred_map = value_map_of(pred);
      if (pred_map != NULL) {
        current_map()->kill_map(pred_map);
      } else if (!_loop_blocks.contains(pred)) {
        if (_loop_blocks.length() >= ValueMapMaxLoopSize) {
          return false;
        }
        _loop_blocks.append(pred);
      }
    }

    // use the instruction visitor for killing values
    for (Value instr = block->next(); instr != NULL; instr = instr->next()) {
      instr->visit(this);
      if (_too_complicated_loop) {
        return false;
      }
    }
  }

  bool optimistic = this->_gvn->compilation()->is_optimistic();

  if (UseLoopInvariantCodeMotion && optimistic) {
    LoopInvariantCodeMotion code_motion(this, _gvn, loop_header, &_loop_blocks);
  }

  TRACE_VALUE_NUMBERING(tty->print_cr("** loop successfully optimized"));
  return true;
}
Пример #3
0
 void block_do(BlockBegin* from) {
   int n = from->end()->number_of_sux();
   int tag = _tags->at(from->block_id());
   for (int i = 0; i < n; i++) {
     BlockBegin* to = from->end()->sux_at(i);
     if (tag != _tags->at(to->block_id())) {
       // this edge is a transition between two different
       // caching regions, so we need to insert a CachingChange
       _pairs->append(new BlockPair(from, to));
     }
   }
 }
Пример #4
0
void LoopFinder::gather_loop_blocks(LoopList* loops) {
  int lng = loops->length();
  BitMap blocks_in_loop(max_blocks());
  for (int i = 0; i < lng; i++) {
    // for each loop do the following
    blocks_in_loop.clear();
    Loop* loop = loops->at(i);
    BlockList* ends = loop->ends();
    if (!loop->is_end(loop->start())) {
      GrowableArray<BlockBegin*>* stack = new GrowableArray<BlockBegin*>();
      blocks_in_loop.at_put(loop->start()->block_id(), true);
      
      // insert all the ends into the list
      for (int i = 0; i < ends->length(); i++) {
        blocks_in_loop.at_put(ends->at(i)->block_id()  , true);
        stack->push(ends->at(i));
      }
      
      while (!stack->is_empty()) {
        BlockBegin* bb = stack->pop();
        BlockLoopInfo* bli = get_block_info(bb);
        // push all predecessors that are not yet in loop
        int npreds = bli->nof_preds();
        for (int m = 0; m < npreds; m++) {
          BlockBegin* pred = bli->pred_no(m);
          if (!blocks_in_loop.at(pred->block_id())) {
            blocks_in_loop.at_put(pred->block_id(), true);
            loop->append_node(pred);
            stack->push(pred);
          }
        }
      }
      loop->append_node(loop->start());
    }
    // insert all the ends into the loop
    for (int i = 0; i < ends->length(); i++) {
      loop->append_node(ends->at(i));
    }
  }
}
Пример #5
0
void FpuStackAllocator::allocate() {
  int num_blocks = allocator()->block_count();
  for (int i = 0; i < num_blocks; i++) {
    // Set up to process block
    BlockBegin* block = allocator()->block_at(i);
    intArray* fpu_stack_state = block->fpu_stack_state();

#ifndef PRODUCT
    if (TraceFPUStack) {
      tty->cr();
      tty->print_cr("------- Begin of new Block %d -------", block->block_id());
    }
#endif

    assert(fpu_stack_state != NULL ||
           block->end()->as_Base() != NULL ||
           block->is_set(BlockBegin::exception_entry_flag),
           "FPU stack state must be present due to linear-scan order for FPU stack allocation");
    // note: exception handler entries always start with an empty fpu stack
    //       because stack merging would be too complicated

    if (fpu_stack_state != NULL) {
      sim()->read_state(fpu_stack_state);
    } else {
      sim()->clear();
    }

#ifndef PRODUCT
    if (TraceFPUStack) {
      tty->print("Reading FPU state for block %d:", block->block_id());
      sim()->print();
      tty->cr();
    }
#endif

    allocate_block(block);
    CHECK_BAILOUT();
  }
}
Пример #6
0
// Gather backedges of natural loops: an edge a -> b where b dominates a
LoopList* LoopFinder::find_backedges(boolArray* visited) {
  int i;
  LoopList* backedges = new LoopList();
  for (i = 0; i < max_blocks(); i++) {
    if (visited->at(i)) {
      BlockLoopInfo* bli = _info->at(i);
      BlockBegin*    bb  = bli->block();
      BlockEnd*      be  = bb->end();
      int n = be->number_of_sux();
      for (int i = 0; i < n; i++) {
        BlockBegin* sux = be->sux_at(i);
        if (bli->is_dom_block(sux->block_id())) {
          bli->mark_backedge_start();
          backedges->push(new Loop(sux, bb));
        }
      }
    }
  }

  // backedges contains single pairs of blocks which are a backedge.
  // some of these loops may share entry points, so walk over the backedges
  // and merge loops which have the same entry point
  if (backedges->length() > 1) {
    backedges->sort(sort_by_start_block);
    Loop* current_loop = backedges->at(0);
    for (i = 1; i < backedges->length();) {
      Loop* this_loop = backedges->at(i);
      if (current_loop->start() == this_loop->start()) {
        // same entry point
        assert(this_loop->ends()->length() == 1, "should only have one end at this point");
#ifndef PRODUCT
        if (PrintLoops && Verbose) {
          tty->print_cr("Merging loops with same start");
          current_loop->print();
          this_loop->print();
        }
#endif
        BlockBegin* e = this_loop->ends()->at(0);
        current_loop->add_end(e);
        backedges->remove(this_loop);
      } else {
        // start processing the next loop entry point
        i++;
      }
    }
  }

  return backedges;
}
Пример #7
0
BlockBegin* LoopFinder::insert_caching_block(LoopList* loops, BlockBegin* from, BlockBegin* to) {
  if (from->next() && from->next()->as_CachingChange() != NULL &&
      from->end()->default_sux() == to) {
    // we already have a caching change block
    // check that the precision flags are the same
#ifdef ASSERT
    CachingChange* cc = from->next()->as_CachingChange();
    assert(cc->pred_block()->is_set(BlockBegin::single_precision_flag) == from->is_set(BlockBegin::single_precision_flag), "consistency check");
    assert(cc->sux_block()->is_set(BlockBegin::single_precision_flag) == to->is_set(BlockBegin::single_precision_flag), "consistency check");
#endif
    return NULL;
  } else {
    // insert a caching change block, making it close to any single successor
    int bci = -1;
    BlockLoopInfo* bli = get_block_info(to);
    if (bli->nof_preds() == 1) {
      bci = to->bci();
    } else {
      bci = from->end()->bci();
    }
    BlockBegin* cc = new_block(to->scope(), bci);
    BlockEnd* e = new Goto(to, false);
    cc->set_end(e);
    cc->set_next(new CachingChange(from, to), bci)->set_next(e, bci);
    if (PrintLoops && Verbose) {
      tty->print_cr("Added caching block B%d (dest B%d)", cc->block_id(), to->block_id());
    }
    BlockEnd* from_end = from->end();
    from_end->substitute_sux(to, cc);
    cc->join(from_end->state());
    assert(cc->state() != NULL, "illegal operation");

    ValueStack* end_state = cc->state()->copy();
    cc->end()->set_state(end_state);
    to->join(end_state);

    assert(cc->end()->state() != NULL, "should have state");

    loops->update_loops(from, to, cc);
    return cc;
  }
}
Пример #8
0
// Compute dominators for bb and 
// walk the successors of bb in depth first order
void LoopFinder::dominator_walk_sux(BlockBegin* bb, boolArray* visited) {
  // we may not visit a block that is jsr-target
  if (bb->is_set(BlockBegin::subroutine_entry_flag)) set_not_ok();
  BlockEnd*      be  = bb->end();
  BlockLoopInfo* bli = get_block_info(bb);
  visited->at_put(bb->block_id(), true);

  // compute new dominators using predecessors
  BitMap map(max_blocks());
  map.set_from(*BlockLoopInfo::all_blocks_map());
  { // Compute dominators for myself (looking at predecessors)
    int nof_preds = bli->nof_preds();
    for (int i = 0; i < nof_preds; i++) {
      BlockBegin* pred     = bli->pred_no(i);
      BitMap      pred_map = get_block_info(pred)->doms_map();
      map.set_intersection(pred_map);
    }
    // add itself
    map.at_put(bb->block_id(), true);
    
    // if the computed dominators differ from the one stored,
    // then we need another iteration
    BitMap bb_map = bli->doms_map();
    if (!bb_map.is_same(map)) {
      set_changed(true);
      bb_map.set_from(map);
    }
  }
  { // Visit all successors
    int n = be->number_of_sux();
    for (int i = 0; i < n; i++) {
      BlockBegin* sux = be->sux_at(i);
      if (!visited->at(sux->block_id())) {
        dominator_walk_sux(sux, visited);
      }
    }
  }
}
Пример #9
0
GlobalValueNumbering::GlobalValueNumbering(IR* ir)
    : _current_map(NULL)
    , _value_maps(ir->linear_scan_order()->length(), NULL)
{
    TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering"));

    ShortLoopOptimizer short_loop_optimizer(this);
    int subst_count = 0;

    BlockList* blocks = ir->linear_scan_order();
    int num_blocks = blocks->length();

    BlockBegin* start_block = blocks->at(0);
    assert(start_block == ir->start() && start_block->number_of_preds() == 0 && start_block->dominator() == NULL, "must be start block");
    assert(start_block->next()->as_Base() != NULL && start_block->next()->next() == NULL, "start block must not have instructions");

    // initial, empty value map with nesting 0
    set_value_map_of(start_block, new ValueMap());

    for (int i = 1; i < num_blocks; i++) {
        BlockBegin* block = blocks->at(i);
        TRACE_VALUE_NUMBERING(tty->print_cr("**** processing block B%d", block->block_id()));

        int num_preds = block->number_of_preds();
        assert(num_preds > 0, "block must have predecessors");

        BlockBegin* dominator = block->dominator();
        assert(dominator != NULL, "dominator must exist");
        assert(value_map_of(dominator) != NULL, "value map of dominator must exist");

        // create new value map with increased nesting
        _current_map = new ValueMap(value_map_of(dominator));

        if (num_preds == 1) {
            assert(dominator == block->pred_at(0), "dominator must be equal to predecessor");
            // nothing to do here

        } else if (block->is_set(BlockBegin::linear_scan_loop_header_flag)) {
            // block has incoming backward branches -> try to optimize short loops
            if (!short_loop_optimizer.process(block)) {
                // loop is too complicated, so kill all memory loads because there might be
                // stores to them in the loop
                current_map()->kill_memory();
            }

        } else {
            // only incoming forward branches that are already processed
            for (int j = 0; j < num_preds; j++) {
                BlockBegin* pred = block->pred_at(j);
                ValueMap* pred_map = value_map_of(pred);

                if (pred_map != NULL) {
                    // propagate killed values of the predecessor to this block
                    current_map()->kill_map(value_map_of(pred));
                } else {
                    // kill all memory loads because predecessor not yet processed
                    // (this can happen with non-natural loops and OSR-compiles)
                    current_map()->kill_memory();
                }
            }
        }

        if (block->is_set(BlockBegin::exception_entry_flag)) {
            current_map()->kill_exception();
        }

        TRACE_VALUE_NUMBERING(tty->print("value map before processing block: "); current_map()->print());

        // visit all instructions of this block
        for (Value instr = block->next(); instr != NULL; instr = instr->next()) {
            assert(!instr->has_subst(), "substitution already set");

            // check if instruction kills any values
            instr->visit(this);

            if (instr->hash() != 0) {
                Value f = current_map()->find_insert(instr);
                if (f != instr) {
                    assert(!f->has_subst(), "can't have a substitution");
                    instr->set_subst(f);
                    subst_count++;
                }
            }
        }

        // remember value map for successors
        set_value_map_of(block, current_map());
    }

    if (subst_count != 0) {
        SubstitutionResolver resolver(ir);
    }

    TRACE_VALUE_NUMBERING(tty->print("****** end of global value numbering. "); ValueMap::print_statistics());
}
Пример #10
0
void LoopFinder::compute_dominators(boolArray* visited) {
  // set up a bitmap that contains all blocks
  BitMap all_map(max_blocks());
  all_map.clear();
  for (int i = 0; i < max_blocks(); i++) all_map.at_put(i, true);
  BlockLoopInfo::set_all_blocks_map(&all_map);
  { // initialize block loop info and set all predecessors
    CreateInfoClosure c(this);
    ir()->iterate_preorder(&c);
    SetPredsClosure s(this);
    ir()->iterate_preorder(&s);
  }
  { // compute dominators
    // init dominators
    // set entry block (only one exists) and its dominators
    BlockBegin* root_bb = ir()->start();
    assert(!root_bb->is_set(BlockBegin::subroutine_entry_flag), "root may not be jsr target");

    { // initialize root dominators (only itself)
      BitMap root_doms(max_blocks());
      root_doms.clear();
      root_doms.at_put(root_bb->block_id(), true);
      BlockLoopInfo* bli = get_block_info(root_bb);
      bli->doms_map().set_from(root_doms);
    }

    // iterate until either iter_count exceeds or dominators stable
    int iter_count = 0;
    do {
      iter_count++;
      visited->at_put(root_bb->block_id(), true);
      set_changed(false);
      BlockEnd* be = root_bb->end();
      int n = be->number_of_sux();
      for (int i = 0; i < n; i++) {
        BlockBegin* sux = be->sux_at(i);
        if (!visited->at(sux->block_id())) {
          dominator_walk_sux(sux, visited);
        }
      }
      if (changed()) {
        for (int i = visited->length() - 1; i >= 0; i--) {
          visited->at_put(i, false);
        }
      }
    } while (changed() && iter_count <= max_nof_dom_iterations);
    if (iter_count == max_nof_dom_iterations) {
      if (PrintLoops) {
        tty->print_cr("could not computer dominators");
      }
      set_not_ok();
    }

    // if (PrintLoops) {
    //   tty->print_cr("  Dominators: %d iterations", iter_count);
    //   PrintBlockDominators p(this);
    //   ir()->iterate_topological(&p);
    // }
  }  
  BlockLoopInfo::set_all_blocks_map(NULL);


  // go through all blocks; if a block has not been analyzed, then check its
  // predecessors and successors: all must be also un-analyzed;
  // Note: the block may not be JSR blocks
  if (ok()) {
    _valid_doms = true;
#ifdef ASSERT
    CheckDomClosure cc(visited);
    ir()->iterate_preorder(&cc);
#endif
  }

}
Пример #11
0
void LinearScan::allocate_fpu_stack() {
  // First compute which FPU registers are live at the start of each basic block
  // (To minimize the amount of work we have to do if we have to merge FPU stacks)
  if (ComputeExactFPURegisterUsage) {
    Interval* intervals_in_register, *intervals_in_memory;
    create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, NULL);

    // ignore memory intervals by overwriting intervals_in_memory
    // the dummy interval is needed to enforce the walker to walk until the given id:
    // without it, the walker stops when the unhandled-list is empty -> live information
    // beyond this point would be incorrect.
    Interval* dummy_interval = new Interval(any_reg);
    dummy_interval->add_range(max_jint - 2, max_jint - 1);
    dummy_interval->set_next(Interval::end());
    intervals_in_memory = dummy_interval;

    IntervalWalker iw(this, intervals_in_register, intervals_in_memory);

    const int num_blocks = block_count();
    for (int i = 0; i < num_blocks; i++) {
      BlockBegin* b = block_at(i);

      // register usage is only needed for merging stacks -> compute only
      // when more than one predecessor.
      // the block must not have any spill moves at the beginning (checked by assertions)
      // spill moves would use intervals that are marked as handled and so the usage bit
      // would been set incorrectly

      // NOTE: the check for number_of_preds > 1 is necessary. A block with only one
      //       predecessor may have spill moves at the begin of the block.
      //       If an interval ends at the current instruction id, it is not possible
      //       to decide if the register is live or not at the block begin -> the
      //       register information would be incorrect.
      if (b->number_of_preds() > 1) {
        int id = b->first_lir_instruction_id();
        ResourceBitMap regs(FrameMap::nof_fpu_regs);

        iw.walk_to(id);   // walk after the first instruction (always a label) of the block
        assert(iw.current_position() == id, "did not walk completely to id");

        // Only consider FPU values in registers
        Interval* interval = iw.active_first(fixedKind);
        while (interval != Interval::end()) {
          int reg = interval->assigned_reg();
          assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register");
          assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)");
          assert(interval->from() <= id && id < interval->to(), "interval out of range");

#ifndef PRODUCT
          if (TraceFPURegisterUsage) {
            tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print();
          }
#endif

          regs.set_bit(reg - pd_first_fpu_reg);
          interval = interval->next();
        }

        b->set_fpu_register_usage(regs);

#ifndef PRODUCT
        if (TraceFPURegisterUsage) {
          tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr();
        }
#endif
      }
    }
  }

  FpuStackAllocator alloc(ir()->compilation(), this);
  _fpu_stack_allocator = &alloc;
  alloc.allocate();
  _fpu_stack_allocator = NULL;
}
Пример #12
0
bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) {
#ifndef PRODUCT
  if (TraceFPUStack) {
    tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:",
                  block->block_id(), pos());
    sim()->print();
    tty->cr();
  }
#endif

  bool changed = false;
  int number_of_sux = block->number_of_sux();

  if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) {
    // The successor has at least two incoming edges, so a stack merge will be necessary
    // If this block is the first predecessor, cleanup the current stack and propagate it
    // If this block is not the first predecessor, a stack merge will be necessary

    BlockBegin* sux = block->sux_at(0);
    intArray* state = sux->fpu_stack_state();
    LIR_List* instrs = new LIR_List(_compilation);

    if (state != NULL) {
      // Merge with a successors that already has a FPU stack state
      // the block must only have one successor because critical edges must been split
      FpuStackSim* cur_sim = sim();
      FpuStackSim* sux_sim = temp_sim();
      sux_sim->read_state(state);

      merge_fpu_stack(instrs, cur_sim, sux_sim);

    } else {
      // propagate current FPU stack state to successor without state
      // clean up stack first so that there are no dead values on the stack
      if (ComputeExactFPURegisterUsage) {
        FpuStackSim* cur_sim = sim();
        ResourceBitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage();
        assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage");

        merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs);
      }

      intArray* state = sim()->write_state();
      if (TraceFPUStack) {
        tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id());
        sim()->print(); tty->cr();
      }
      sux->set_fpu_stack_state(state);
    }

    if (instrs->instructions_list()->length() > 0) {
      lir()->insert_before(pos(), instrs);
      set_pos(instrs->instructions_list()->length() + pos());
      changed = true;
    }

  } else {
    // Propagate unmodified Stack to successors where a stack merge is not necessary
    intArray* state = sim()->write_state();
    for (int i = 0; i < number_of_sux; i++) {
      BlockBegin* sux = block->sux_at(i);

#ifdef ASSERT
      for (int j = 0; j < sux->number_of_preds(); j++) {
        assert(block == sux->pred_at(j), "all critical edges must be broken");
      }

      // check if new state is same
      if (sux->fpu_stack_state() != NULL) {
        intArray* sux_state = sux->fpu_stack_state();
        assert(state->length() == sux_state->length(), "overwriting existing stack state");
        for (int j = 0; j < state->length(); j++) {
          assert(state->at(j) == sux_state->at(j), "overwriting existing stack state");
        }
      }
#endif
#ifndef PRODUCT
      if (TraceFPUStack) {
        tty->print_cr("Setting FPU stack state of B%d", sux->block_id());
        sim()->print(); tty->cr();
      }
#endif

      sux->set_fpu_stack_state(state);
    }
  }

#ifndef PRODUCT
  // assertions that FPU stack state conforms to all successors' states
  intArray* cur_state = sim()->write_state();
  for (int i = 0; i < number_of_sux; i++) {
    BlockBegin* sux = block->sux_at(i);
    intArray* sux_state = sux->fpu_stack_state();

    assert(sux_state != NULL, "no fpu state");
    assert(cur_state->length() == sux_state->length(), "incorrect length");
    for (int i = 0; i < cur_state->length(); i++) {
      assert(cur_state->at(i) == sux_state->at(i), "element not equal");
    }
  }
#endif

  return changed;
}