Example #1
0
// Find the IndexSet such that modDown to that set of primes makes the
// additive term due to rounding into the dominant noise term 
void Ctxt::findBaseSet(IndexSet& s) const
{
  if (getNoiseVar()<=0.0) { // an empty ciphertext
    s = context.ctxtPrimes;
    return;
  }

  assert(verifyPrimeSet());
  bool halfSize = context.containsSmallPrime();
  double curNoise = log(getNoiseVar())/2;
  double firstNoise = context.logOfPrime(0);
  double noiseThreshold = log(modSwitchAddedNoiseVar())*0.55;
  // FIXME: The above should have been 0.5. Making it a bit more means
  // that we will mod-switch a little less frequently, whether this is
  // a good thing needs to be tested.

  // remove special primes, if they are included in this->primeSet
  s = getPrimeSet();
  if (!s.disjointFrom(context.specialPrimes)) { 
    // scale down noise
    curNoise -= context.logOfProduct(context.specialPrimes);
    s.remove(context.specialPrimes);
  }

  /* We compare below to noiseThreshold+1 rather than to noiseThreshold
   * to make sure that if you mod-switch down to c.findBaseSet() and
   * then immediately call c.findBaseSet() again, it will not tell you
   * to mod-switch further down. Note that mod-switching adds close to
   * noiseThreshold to the scaled noise, so if the scaled noise was
   * equal to noiseThreshold then after mod-switchign you would have
   * roughly twice as much noise. Since we're mesuring the log, it means
   * that you may have as much as noiseThreshold+log(2), which we round
   * up to noiseThreshold+1 in the test below.
   */
  if (curNoise<=noiseThreshold+1) return; // no need to mod down

  // if the first prime in half size, begin by removing it
  if (halfSize && s.contains(0)) {
    curNoise -= firstNoise;
    s.remove(0);
  }

  // while noise is larger than threshold, scale down by the next prime
  while (curNoise>noiseThreshold && !empty(s)) {
    curNoise -= context.logOfPrime(s.last());
    s.remove(s.last());
  }

  // Add 1st prime if s is empty or if this does not increase noise too much
  if (empty(s) || (!s.contains(0) && curNoise+firstNoise<=noiseThreshold)) {
    s.insert(0);
    curNoise += firstNoise;
  }

  if (curNoise>noiseThreshold && log_of_ratio()>-0.5)
    cerr << "Ctxt::findBaseSet warning: already at lowest level\n";
}
Example #2
0
File: Ctxt.cpp Project: 2080/HElib
// Find the IndexSet such that modDown to that set of primes makes the
// additive term due to rounding into the dominant noise term 
void Ctxt::findBaseSet(IndexSet& s) const
{
  if (getNoiseVar()<=0.0) { // an empty ciphertext
    s = context.ctxtPrimes;
    return;
  }

  assert(verifyPrimeSet());
  bool halfSize = context.containsSmallPrime();
  double addedNoise = log(modSwitchAddedNoiseVar())/2;
  double curNoise = log(getNoiseVar())/2;
  double firstNoise = context.logOfPrime(0);

  // remove special primes, if they are included in this->primeSet
  s = getPrimeSet();
  if (!s.disjointFrom(context.specialPrimes)) { 
    // scale down noise
    curNoise -= context.logOfProduct(context.specialPrimes);
    s.remove(context.specialPrimes);
  }

  if (curNoise<=2*addedNoise) return; // no need to mod down

  // if the first prime in half size, begin by removing it
  if (halfSize && s.contains(0)) {
    curNoise -= firstNoise;
    s.remove(0);
  }

  // while noise is larger than added term, scale down by the next prime
  while (curNoise>addedNoise && card(s)>1) {
    curNoise -= context.logOfPrime(s.last());
    s.remove(s.last());
  }

  if (halfSize) {
    // If noise is still too big, drop last big prime and insert half-size prime
    if (curNoise>addedNoise) {
      curNoise = firstNoise;
      s = IndexSet(0);
    } 
    // Otherwise check if you can add back the half-size prime
    else if (curNoise+firstNoise <= addedNoise) {
      curNoise += firstNoise;
      s.insert(0);
    }
  }

  if (curNoise>addedNoise && log_of_ratio()>-0.5)
    cerr << "Ctxt::findBaseSet warning: already at lowest level\n";
}
Example #3
0
void PhaseLive::compute(uint maxlrg) {
  _maxlrg   = maxlrg;
  _worklist = new (_arena) Block_List();

  // Init the sparse live arrays.  This data is live on exit from here!
  // The _live info is the live-out info.
  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
  uint i;
  for( i=0; i<_cfg._num_blocks; i++ ) {
    _live[i].initialize(_maxlrg);
  }

  // Init the sparse arrays for delta-sets.  
  ResourceMark rm;              // Nuke temp storage on exit

  // Does the memory used by _defs and _deltas get reclaimed?  Does it matter?  TT

  // Array of values defined locally in blocks
  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
  for( i=0; i<_cfg._num_blocks; i++ ) {
    _defs[i].initialize(_maxlrg);
  }

  // Array of delta-set pointers, indexed by block pre_order-1.
  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
  memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);

  _free_IndexSet = NULL;

  // Blocks having done pass-1
  VectorSet first_pass(Thread::current()->resource_area());

  // Outer loop: must compute local live-in sets and push into predecessors.
  uint iters = _cfg._num_blocks;        // stat counters
  for( uint j=_cfg._num_blocks; j>0; j-- ) {
    Block *b = _cfg._blocks[j-1];

    // Compute the local live-in set.  Start with any new live-out bits.
    IndexSet *use = getset( b );
    IndexSet *def = &_defs[b->_pre_order-1];
    uint i;
    for( i=b->_nodes.size(); i>1; i-- ) {
      Node *n = b->_nodes[i-1];
      if( n->is_Phi() ) break;
      // BoxNodes keep their input alive as long as their uses.  If we
      // see a BoxNode then make its input live to the Root block.
      // Because we are solving LIVEness, the input now becomes live
      // over the whole procedure, interferencing with everything else
      // and getting a private unshared stack slot.  YeeeHaw!
      MachNode *mach = n->is_Mach();
      if( mach && mach->ideal_Opcode() == Op_Box ) 
        getset(_cfg._broot)->insert( _names[n->in(1)->_idx] );

      uint r = _names[n->_idx];
      def->insert( r );
      use->remove( r );
      uint cnt = n->req();
      for( uint k=1; k<cnt; k++ ) {
        Node *nk = n->in(k);
        uint nkidx = nk->_idx;
        if( _cfg._bbs[nkidx] != b )
          use->insert( _names[nkidx] );
      }
    }
    // Remove anything defined by Phis and the block start instruction
    for( uint k=i; k>0; k-- ) {
      uint r = _names[b->_nodes[k-1]->_idx];
      def->insert( r );
      use->remove( r );
    }

    // Push these live-in things to predecessors
    for( uint l=1; l<b->num_preds(); l++ ) {
      Block *p = _cfg._bbs[b->pred(l)->_idx];
      add_liveout( p, use, first_pass );

      // PhiNode uses go in the live-out set of prior blocks.
      for( uint k=i; k>0; k-- ) 
        add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
    }
    freeset( b );
    first_pass.set(b->_pre_order);
    
    // Inner loop: blocks that picked up new live-out values to be propagated
    while( _worklist->size() ) {
        // !!!!!
// #ifdef ASSERT
      iters++;
// #endif
      Block *b = _worklist->pop();
      IndexSet *delta = getset(b);
      assert( delta->count(), "missing delta set" );

      // Add new-live-in to predecessors live-out sets
      for( uint l=1; l<b->num_preds(); l++ ) 
        add_liveout( _cfg._bbs[b->pred(l)->_idx], delta, first_pass );

      freeset(b);
    } // End of while-worklist-not-empty

  } // End of for-all-blocks-outer-loop

  // We explicitly clear all of the IndexSets which we are about to release.
  // This allows us to recycle their internal memory into IndexSet's free list.

  for( i=0; i<_cfg._num_blocks; i++ ) {
    _defs[i].clear();
    if (_deltas[i]) {
      // Is this always true?
      _deltas[i]->clear();
    }
  }
  IndexSet *free = _free_IndexSet;
  while (free != NULL) {
    IndexSet *temp = free;
    free = free->next();
    temp->clear();
  }

}
void fixPartialRegisterStalls(Code& code)
{
    if (!isX86())
        return;

    PhaseScope phaseScope(code, "fixPartialRegisterStalls");

    Vector<BasicBlock*> candidates;

    for (BasicBlock* block : code) {
        for (const Inst& inst : *block) {
            if (hasPartialXmmRegUpdate(inst)) {
                candidates.append(block);
                break;
            }
        }
    }

    // Fortunately, Partial Stalls are rarely used. Return early if no block
    // cares about them.
    if (candidates.isEmpty())
        return;

    // For each block, this provides the distance to the last instruction setting each register
    // on block *entry*.
    IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size());

    // Blocks with dirty distance at head.
    IndexSet<BasicBlock> dirty;

    // First, we compute the local distance for each block and push it to the successors.
    for (BasicBlock* block : code) {
        FPDefDistance localDistance;

        unsigned distanceToBlockEnd = block->size();
        for (Inst& inst : *block)
            updateDistances(inst, localDistance, distanceToBlockEnd);

        for (BasicBlock* successor : block->successorBlocks()) {
            if (lastDefDistance[successor].updateFromPrecessor(localDistance))
                dirty.add(successor);
        }
    }

    // Now we propagate the minimums accross blocks.
    bool changed;
    do {
        changed = false;

        for (BasicBlock* block : code) {
            if (!dirty.remove(block))
                continue;

            // Little shortcut: if the block is big enough, propagating it won't add any information.
            if (block->size() >= minimumSafeDistance)
                continue;

            unsigned blockSize = block->size();
            FPDefDistance& blockDistance = lastDefDistance[block];
            for (BasicBlock* successor : block->successorBlocks()) {
                if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) {
                    dirty.add(successor);
                    changed = true;
                }
            }
        }
    } while (changed);

    // Finally, update each block as needed.
    InsertionSet insertionSet(code);
    for (BasicBlock* block : candidates) {
        unsigned distanceToBlockEnd = block->size();
        FPDefDistance& localDistance = lastDefDistance[block];

        for (unsigned i = 0; i < block->size(); ++i) {
            Inst& inst = block->at(i);

            if (hasPartialXmmRegUpdate(inst)) {
                RegisterSet defs;
                RegisterSet uses;
                inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type) {
                    if (tmp.isFPR()) {
                        if (Arg::isDef(role))
                            defs.set(tmp.fpr());
                        if (Arg::isAnyUse(role))
                            uses.set(tmp.fpr());
                    }
                });
                // We only care about values we define but not use. Otherwise we have to wait
                // for the value to be resolved anyway.
                defs.exclude(uses);

                defs.forEach([&] (Reg reg) {
                    if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance)
                        insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg));
                });
            }

            updateDistances(inst, localDistance, distanceToBlockEnd);
        }
        insertionSet.execute(block);
    }
}
Example #5
0
void adjustLevelForMult(Ctxt& c1, const char name1[], const ZZX& p1,
			Ctxt& c2, const char name2[], const ZZX& p2, 
			const FHESecKey& sk)
{
  const FHEcontext& context = c1.getContext();

  // The highest possible level for this multiplication is the
  // intersection of the two primeSets, without the special primes.
  IndexSet primes = c1.getPrimeSet() & c2.getPrimeSet();
  primes.remove(context.specialPrimes);
  assert (!empty(primes));

  //  double phim = (double) context.zMstar.phiM();
  //  double factor = c_m*sqrt(log(phim))*4;

  xdouble n1,n2,d1,d2;
  xdouble dvar1 = c1.modSwitchAddedNoiseVar();
  xdouble dvar2 = c2.modSwitchAddedNoiseVar();
  //  xdouble dmag1 = c1.modSwitchAddedNoiseMag(c_m);
  //  xdouble dmag2 = c2.modSwitchAddedNoiseMag(c_m);
  //  cout << " ** log(dvar1)=" << log(dvar1) 
  //       << ", log(dvar2)=" << log(dvar2) <<endl;

  double logF1, logF2;
  xdouble n1var, n2var, modSize; // n1mag, n2mag,
  // init to large number
  xdouble noiseVarRatio=xexp(2*(context.logOfProduct(context.ctxtPrimes)
				+ context.logOfProduct(context.specialPrimes)));
  //  xdouble noiseMagRatio=noiseVarRatio;

  // Find the level that minimizes the noise-to-modulus ratio
  bool oneLevelMore = false;
  for (IndexSet levelDown = primes; 
       !empty(levelDown); levelDown.remove(levelDown.last())) {

    // compute noise variane/magnitude after mod-switchign to this level
    logF1 = context.logOfProduct(c1.getPrimeSet() / levelDown);
    n1var = c1.getNoiseVar()/xexp(2*logF1);

    logF2 = context.logOfProduct(c2.getPrimeSet() / levelDown);
    n2var = c2.getNoiseVar()/xexp(2*logF2);

    // compute modulus/noise ratio at this level
    modSize = xexp(context.logOfProduct(levelDown));
    xdouble nextNoiseVarRatio = sqrt((n1var+dvar1)*(n2var+dvar2))/modSize;

    if (nextNoiseVarRatio < 2*noiseVarRatio || oneLevelMore) {
      noiseVarRatio = nextNoiseVarRatio;
      primes = levelDown;  // record the currently best prime set
      n1 = n1var; d1=dvar1; n2 = n2var; d2=dvar2;
    }
    oneLevelMore = (n1var > dvar1 || n2var > dvar2);
  }

  if (primes < c1.getPrimeSet()) {
    cout << "          ** " << c1.getPrimeSet()<<"=>"<<primes << endl;
    cout << "             n1var="<<n1<<", d1var="<<d1<<endl;;
    c1.modDownToSet(primes);
    cout << name1 << ".mDown:"; checkCiphertext(c1, p1, sk);
  }
  if (primes < c2.getPrimeSet()) {
    cout << "          ** " << c2.getPrimeSet()<<"=>"<<primes << endl;
    cout << "             n2var="<<n2<<", d2var="<<d2<<endl;;
    c2.modDownToSet(primes);
    cout << name2 << ".mDown:"; checkCiphertext(c2, p2, sk);
  }
}
Example #6
0
//------------------------------insert_copies----------------------------------
void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
  // We do LRGs compressing and fix a liveout data only here since the other
  // place in Split() is guarded by the assert which we never hit.
  _phc.compress_uf_map_for_nodes();
  // Fix block's liveout data for compressed live ranges.
  for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) {
    uint compressed_lrg = _phc.Find(lrg);
    if( lrg != compressed_lrg ) {
      for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) {
        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
        if( liveout->member(lrg) ) {
          liveout->remove(lrg);
          liveout->insert(compressed_lrg);
        }
      }
    }
  }

  // All new nodes added are actual copies to replace virtual copies.
  // Nodes with index less than '_unique' are original, non-virtual Nodes.
  _unique = C->unique();

  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
    Block *b = _phc._cfg._blocks[i];
    uint cnt = b->num_preds();  // Number of inputs to the Phi

    for( uint l = 1; l<b->_nodes.size(); l++ ) {
      Node *n = b->_nodes[l];

      // Do not use removed-copies, use copied value instead
      uint ncnt = n->req();
      for( uint k = 1; k<ncnt; k++ ) {
        Node *copy = n->in(k);
        uint cidx = copy->is_Copy();
        if( cidx ) {
          Node *def = copy->in(cidx);
          if( _phc.Find(copy) == _phc.Find(def) )
            n->set_req(k,def);
        }
      }

      // Remove any explicit copies that get coalesced.
      uint cidx = n->is_Copy();
      if( cidx ) {
        Node *def = n->in(cidx);
        if( _phc.Find(n) == _phc.Find(def) ) {
          n->replace_by(def);
          n->set_req(cidx,NULL);
          b->_nodes.remove(l);
          l--;
          continue;
        }
      }

      if( n->is_Phi() ) {
        // Get the chosen name for the Phi
        uint phi_name = _phc.Find( n );
        // Ignore the pre-allocated specials
        if( !phi_name ) continue;
        // Check for mismatch inputs to Phi
        for( uint j = 1; j<cnt; j++ ) {
          Node *m = n->in(j);
          uint src_name = _phc.Find(m);
          if( src_name != phi_name ) {
            Block *pred = _phc._cfg._bbs[b->pred(j)->_idx];
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // Rematerialize constants instead of copying them
            if( m->is_Mach() && m->as_Mach()->is_Con() &&
                m->as_Mach()->rematerialize() ) {
              copy = m->clone();
              // Insert the copy in the predecessor basic block
              pred->add_inst(copy);
              // Copy any flags as well
              _phc.clone_projs( pred, pred->end_idx(), m, copy, _phc._maxlrg );
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
              copy = new (C) MachSpillCopyNode(m,*rm,*rm);
              // Find a good place to insert.  Kinda tricky, use a subroutine
              insert_copy_with_overlap(pred,copy,phi_name,src_name);
            }
            // Insert the copy in the use-def chain
            n->set_req( j, copy );
            _phc._cfg._bbs.map( copy->_idx, pred );
            // Extend ("register allocate") the names array for the copy.
            _phc._names.extend( copy->_idx, phi_name );
          } // End of if Phi names do not match
        } // End of for all inputs to Phi
      } else { // End of if Phi

        // Now check for 2-address instructions
        uint idx;
        if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
          // Get the chosen name for the Node
          uint name = _phc.Find( n );
          assert( name, "no 2-address specials" );
          // Check for name mis-match on the 2-address input
          Node *m = n->in(idx);
          if( _phc.Find(m) != name ) {
            Node *copy;
            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
            // At this point it is unsafe to extend live ranges (6550579).
            // Rematerialize only constants as we do for Phi above.
            if( m->is_Mach() && m->as_Mach()->is_Con() &&
                m->as_Mach()->rematerialize() ) {
              copy = m->clone();
              // Insert the copy in the basic block, just before us
              b->_nodes.insert( l++, copy );
              if( _phc.clone_projs( b, l, m, copy, _phc._maxlrg ) )
                l++;
            } else {
              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
              copy = new (C) MachSpillCopyNode( m, *rm, *rm );
              // Insert the copy in the basic block, just before us
              b->_nodes.insert( l++, copy );
            }
            // Insert the copy in the use-def chain
            n->set_req(idx, copy );
            // Extend ("register allocate") the names array for the copy.
            _phc._names.extend( copy->_idx, name );
            _phc._cfg._bbs.map( copy->_idx, b );
          }

        } // End of is two-adr

        // Insert a copy at a debug use for a lrg which has high frequency
        if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) {
          // Walk the debug inputs to the node and check for lrg freq
          JVMState* jvms = n->jvms();
          uint debug_start = jvms ? jvms->debug_start() : 999999;
          uint debug_end   = jvms ? jvms->debug_end()   : 999999;
          for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
            // Do not split monitors; they are only needed for debug table
            // entries and need no code.
            if( jvms->is_monitor_use(inpidx) ) continue;
            Node *inp = n->in(inpidx);
            uint nidx = _phc.n2lidx(inp);
            LRG &lrg = lrgs(nidx);

            // If this lrg has a high frequency use/def
            if( lrg._maxfreq >= _phc.high_frequency_lrg() ) {
              // If the live range is also live out of this block (like it
              // would be for a fast/slow idiom), the normal spill mechanism
              // does an excellent job.  If it is not live out of this block
              // (like it would be for debug info to uncommon trap) splitting
              // the live range now allows a better allocation in the high
              // frequency blocks.
              //   Build_IFG_virtual has converted the live sets to
              // live-IN info, not live-OUT info.
              uint k;
              for( k=0; k < b->_num_succs; k++ )
                if( _phc._live->live(b->_succs[k])->member( nidx ) )
                  break;      // Live in to some successor block?
              if( k < b->_num_succs )
                continue;     // Live out; do not pre-split
              // Split the lrg at this use
              const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
              // Insert the copy in the use-def chain
              n->set_req(inpidx, copy );
              // Insert the copy in the basic block, just before us
              b->_nodes.insert( l++, copy );
              // Extend ("register allocate") the names array for the copy.
              _phc.new_lrg( copy, _phc._maxlrg++ );
              _phc._cfg._bbs.map( copy->_idx, b );
              //tty->print_cr("Split a debug use in Aggressive Coalesce");
            }  // End of if high frequency use/def
          }  // End of for all debug inputs
        }  // End of if low frequency safepoint

      } // End of if Phi

    } // End of for all instructions
  } // End of for all blocks
}
Example #7
0
//------------------------------build_ifg_virtual------------------------------
// Actually build the interference graph.  Uses virtual registers only, no
// physical register masks.  This allows me to be very aggressive when
// coalescing copies.  Some of this aggressiveness will have to be undone
// later, but I'd rather get all the copies I can now (since unremoved copies
// at this point can end up in bad places).  Copies I re-insert later I have
// more opportunity to insert them in low-frequency locations.
void PhaseChaitin::build_ifg_virtual( ) {

  // For all blocks (in any order) do...
  for( uint i=0; i<_cfg._num_blocks; i++ ) {
    Block *b = _cfg._blocks[i];
    IndexSet *liveout = _live->live(b);

    // The IFG is built by a single reverse pass over each basic block.
    // Starting with the known live-out set, we remove things that get
    // defined and add things that become live (essentially executing one
    // pass of a standard LIVE analysis). Just before a Node defines a value
    // (and removes it from the live-ness set) that value is certainly live.
    // The defined value interferes with everything currently live.  The
    // value is then removed from the live-ness set and it's inputs are
    // added to the live-ness set.
    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
      Node *n = b->_nodes[j-1];

      // Get value being defined
      uint r = n2lidx(n);

      // Some special values do not allocate
      if( r ) {

        // Remove from live-out set
        liveout->remove(r);

        // Copies do not define a new value and so do not interfere.
        // Remove the copies source from the liveout set before interfering.
        uint idx = n->is_Copy();
        if( idx ) liveout->remove( n2lidx(n->in(idx)) );

        // Interfere with everything live
        interfere_with_live( r, liveout );
      }

      // Make all inputs live
      if( !n->is_Phi() ) {      // Phi function uses come from prior block
        for( uint k = 1; k < n->req(); k++ )
          liveout->insert( n2lidx(n->in(k)) );
      }

      // 2-address instructions always have the defined value live
      // on entry to the instruction, even though it is being defined
      // by the instruction.  We pretend a virtual copy sits just prior
      // to the instruction and kills the src-def'd register.
      // In other words, for 2-address instructions the defined value
      // interferes with all inputs.
      uint idx;
      if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
        const MachNode *mach = n->as_Mach();
        // Sometimes my 2-address ADDs are commuted in a bad way.
        // We generally want the USE-DEF register to refer to the
        // loop-varying quantity, to avoid a copy.
        uint op = mach->ideal_Opcode();
        // Check that mach->num_opnds() == 3 to ensure instruction is
        // not subsuming constants, effectively excludes addI_cin_imm
        // Can NOT swap for instructions like addI_cin_imm since it
        // is adding zero to yhi + carry and the second ideal-input
        // points to the result of adding low-halves.
        // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
        if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
            n->in(1)->bottom_type()->base() == Type::Int &&
            // See if the ADD is involved in a tight data loop the wrong way
            n->in(2)->is_Phi() &&
            n->in(2)->in(2) == n ) {
          Node *tmp = n->in(1);
          n->set_req( 1, n->in(2) );
          n->set_req( 2, tmp );
        }
        // Defined value interferes with all inputs
        uint lidx = n2lidx(n->in(idx));
        for( uint k = 1; k < n->req(); k++ ) {
          uint kidx = n2lidx(n->in(k));
          if( kidx != lidx )
            _ifg->add_edge( r, kidx );
        }
      }
    } // End of forall instructions in block
  } // End of forall blocks
}
Example #8
0
// set minus
IndexSet operator/(const IndexSet& s, const IndexSet& t) {
  IndexSet r = s;
  r.remove(t);
  return r;
}
Example #9
0
// exclusive-or
IndexSet operator^(const IndexSet& s, const IndexSet& t) {
  IndexSet r = s | t;
  r.remove(s & t);
  return r;
}