コード例 #1
0
void DeadCodeEliminationPass::runOnKernel(ir::IRKernel& k)
{
	report("Running dead code elimination on kernel " << k.name);
	reportE(REPORT_PTX, k);
	
	Analysis* dfgAnalysis = getAnalysis(Analysis::DataflowGraphAnalysis);
	assert(dfgAnalysis != 0);

	analysis::DataflowGraph& dfg =
		*static_cast<analysis::DataflowGraph*>(dfgAnalysis);
	
	assert(dfg.ssa() != analysis::DataflowGraph::SsaType::None);
	
	BlockSet blocks;
	
	report(" Starting by scanning all basic blocks");
	
	for(iterator block = dfg.begin(); block != dfg.end(); ++block)
	{
		report("  Queueing up BB_" << block->id());
		blocks.insert(block);
	}
	
	while(!blocks.empty())
	{
		iterator block = *blocks.begin();
		blocks.erase(blocks.begin());
	
		eliminateDeadInstructions(dfg, blocks, block);
	}
	
	report("Finished running dead code elimination on kernel " << k.name);
	reportE(REPORT_PTX, k);
}
コード例 #2
0
ファイル: Allocator.cpp プロジェクト: dougct/ocelot-ufmg
  void Allocator::clearExpired(const Interval::Point &p)
  {
    SpillPolicy::CoalescedSet::iterator cr = _onRegister.begin();
    SpillPolicy::CoalescedSet::iterator crEnd = _onRegister.end();

    while(cr != crEnd)
    {
      if((*cr)->interval.end <= p)
      {
        reportE(DEBUG,
            "Point: "<< p << "; Coalesced register " << (*cr)->reg() << " expired at point (" << (*cr)->interval.end << ')');
        while((*cr)->allocated.size() > 0)
        {
          reportE(DEBUG_DETAILS,
              "\tPoint: "<< p << "; Coalesced register was using physical register" << (*(*cr)->allocated.begin()));
          _available.insert(*(*cr)->allocated.begin());
          _registerVariableMap.erase(*(*cr)->allocated.begin());
          (*cr)->allocated.erase((*cr)->allocated.begin());
        }
        SpillPolicy::CoalescedSet::iterator erase = cr;
        cr++;
        _onRegister.erase(erase);
      }else
      {
        cr++;
      }
    }
  }
コード例 #3
0
ファイル: Allocator.cpp プロジェクト: dougct/ocelot-ufmg
 void Allocator::setRegisters(const unsigned regs)
 {
   _regs = regs;
   reportE(INFO, "Setting number of physical registers to " << _regs);
   clear();
   assertM(_regs > 0, "No physical registers");
 }
コード例 #4
0
void AffineLinearScan::_coalesce()
{
	LinearScanRegisterAllocationPass::_coalesce();
	RegisterCoalescedMap::iterator affReg = _ssa.begin();
	while(affReg != _ssa.end())
	{
		AffineRegister &ar =
			static_cast<AffineRegister &>(*affReg->second);
		reportE(DEBUG, "Coalesced: " << affReg->second);
		reportE(DEBUG, "Start: " << ar.state());
		reportE(DEBUG, "In state: " << _afa().state(affReg->first));
		ar.combineState(_afa().state(affReg->first));
		reportE(DEBUG, "Out state: " << ar.state());
		affReg++;
	}
}
コード例 #5
0
void DivergenceLinearScan::_coalesce()
{
	LinearScanRegisterAllocationPass::_coalesce();
	CoalescedRegisterMap::iterator divReg = _ssa.begin();
	while(divReg != _ssa.end())
	{
		DivergenceRegister &dr =
			static_cast<DivergenceRegister &>(*_coalesced[divReg->second]);
		reportE(DEBUG, "Coalesced: " << divReg->second);
		reportE(DEBUG, "Start: " << dr.state());
		reportE(DEBUG, "In state: "
			<< _diva().getDivergenceGraph().isDivNode(divReg->first));
		dr.combineState(_diva().getDivergenceGraph().isDivNode(divReg->first));
		reportE(DEBUG, "Out state: " << dr.state());
		divReg++;
	}
}
コード例 #6
0
ファイル: Allocator.cpp プロジェクト: dougct/ocelot-ufmg
  void Allocator::selectByLRU(SpillPolicy::CoalescedSet &used, const Interval::Point &p, unsigned &totalRequired){
    LRUSpillPolicy lru;
    LRUSpillPolicy::CoalescedCostMap rank = lru.rank(used, p);
    reportE(INFO, "Discarding variables by access distance");
    std::multimap<unsigned, CoalescedRegister*> costCoalescedMap;
    while(rank.size() > 0){
    	auto it = rank.begin();
    	costCoalescedMap.insert(std::make_pair(it->second, it->first));
    	rank.erase(it);
    }

    while(totalRequired > _regs){
      assert(used.size() > 0);
      auto last = costCoalescedMap.end();
      --last;
      totalRequired -= last->second->size();
      reportE(DEBUG, "Discarding variable " << last->second->reg());
      last->second->spill();
      used.erase(last->second);
      costCoalescedMap.erase(last);
    }
  }
コード例 #7
0
void ConstantPropagationPass::runOnKernel(ir::IRKernel& k)
{
	report("Running constant propagation on kernel " << k.name);
	
	Analysis* dfgAnalysis = getAnalysis("DataflowGraphAnalysis");
	assert(dfgAnalysis != 0);
	
	analysis::DataflowGraph& dfg =
		*static_cast<analysis::DataflowGraph*>(dfgAnalysis);
	
	dfg.convertToSSAType(analysis::DataflowGraph::Minimal);
	
	assert(dfg.ssa() == analysis::DataflowGraph::Minimal);
	
	BlockSet blocks;
	
	report(" Starting by scanning all basic blocks");
	
	for(iterator block = dfg.begin(); block != dfg.end(); ++block)
	{
		report("  Queueing up BB_" << block->id());
		blocks.insert(block);
	}
	
	while(!blocks.empty())
	{
		iterator block = *blocks.begin();
		blocks.erase(blocks.begin());
	
		eliminateRedundantInstructions(dfg, blocks, block);
	}

	report("Finished running constant propagation on kernel " << k.name);
	reportE(REPORT_PTX, k);

}
コード例 #8
0
ファイル: DivergenceAnalysis.cpp プロジェクト: Aeternam/gdev
void DivergenceAnalysis::_analyzeDataFlow()
{
	Analysis* dfg = getAnalysis("DataflowGraphAnalysis");
	assert(dfg != 0);

	DataflowGraph &nonConstGraph = static_cast<DataflowGraph&>(*dfg);
	DataflowGraph::const_iterator block = nonConstGraph.begin();
	DataflowGraph::const_iterator endBlock = nonConstGraph.end();

	report("Analyzing data flow");

	/* 1) Analyze the data flow adding divergence sources */
	for (; block != endBlock; ++block) {
		report(" for block " << block->label());
		
		DataflowGraph::PhiInstructionVector::const_iterator
			phiInstruction = block->phis().begin();
		DataflowGraph::PhiInstructionVector::const_iterator
			endPhiInstruction = block->phis().end();
        /* Go over the phi functions and add their dependences to the
         * dependence graph. */
		for (; phiInstruction != endPhiInstruction; phiInstruction++) {
			for (DataflowGraph::RegisterVector::const_iterator
				si = phiInstruction->s.begin();
				si != phiInstruction->s.end(); ++si) {
				_divergGraph.insertEdge(si->id, phiInstruction->d.id);
				report("  phi r" << phiInstruction->d.id << " <- r" << si->id);
			}
		}

		DataflowGraph::InstructionVector::const_iterator
			ii = block->instructions().begin();
		DataflowGraph::InstructionVector::const_iterator
			iiEnd = block->instructions().end();
		for (; ii != iiEnd; ++ii) {

			ir::PTXInstruction *ptxInstruction = NULL;
			bool atom = false;
			bool functionStackArgument = false;
			bool localMemoryOperand = false;
			bool isCall = false;

			std::set<const ir::PTXOperand*> divergenceSources;

			/* First we populate divergenceSources with all the
			 * source operands that might diverge.
			 */
			if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) {
				ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i);
				if (isDivergenceSource(ptxInstruction->a)) {
					divergenceSources.insert(&ptxInstruction->a);
				}
				if (isDivergenceSource(ptxInstruction->b)) {
					divergenceSources.insert(&ptxInstruction->b);
				}
				if (isDivergenceSource(ptxInstruction->c)) {
					divergenceSources.insert(&ptxInstruction->c);
				}

				if (ptxInstruction->opcode == ir::PTXInstruction::Atom){
					atom = true;
				}
				
				if (ptxInstruction->mayHaveAddressableOperand()) {
					if (_doesOperandUseLocalMemory(ptxInstruction->a)) {
						localMemoryOperand = true;
					}
				}
				
				if (ptxInstruction->opcode == ir::PTXInstruction::Call){
					isCall = true;
				}
			}

			/* Second, if this is a function call, we populate divergenceSources
			 * with all the source operands that might diverge in a call.
			 */
			if (_kernel->function()) {
				if (typeid(ir::PTXInstruction) == typeid(*(ii->i))) {
					ptxInstruction = static_cast<ir::PTXInstruction*> (ii->i);
				
					if (ptxInstruction->mayHaveAddressableOperand()) {
						if (_isOperandAnArgument(ptxInstruction->a)) {
							functionStackArgument = true;
							report("  operand '" << ptxInstruction->a.toString()
								<< "' is a function call argument.");
						}
					}
				}
			}
						
			/* Third, we link the source operands to the
			 * destination operands, and check if the destination
			 * can diverge. This will only happen in case the
			 * instruction is atomic. */
			DataflowGraph::RegisterPointerVector::const_iterator
				destinationReg = ii->d.begin();
			DataflowGraph::RegisterPointerVector::const_iterator
				destinationEndReg = ii->d.end();

			for (; destinationReg != destinationEndReg; destinationReg++) {
				if (divergenceSources.size() != 0) {
					std::set<const ir::PTXOperand*>::iterator
						divergenceSource = divergenceSources.begin();
					std::set<const ir::PTXOperand*>::iterator
						divergenceSourceEnd = divergenceSources.end();

					for (; divergenceSource != divergenceSourceEnd;
						divergenceSource++) {
						report("  destination register r"
							<< *destinationReg->pointer
							<< " is derived from a divergence source r"
							<< *divergenceSource);
						_divergGraph.insertEdge(*divergenceSource,
							*destinationReg->pointer);
					}
				}

				DataflowGraph::RegisterPointerVector::const_iterator
					sourceReg = ii->s.begin();
				DataflowGraph::RegisterPointerVector::const_iterator
					sourceRegEnd = ii->s.end();

				for (; sourceReg != sourceRegEnd; sourceReg++) {
					_divergGraph.insertEdge(*sourceReg->pointer,
						*destinationReg->pointer);
					reportE(REPORT_ALL_DEPENDENCES,
						"  r" << *destinationReg->pointer
						<< " <- r" << *sourceReg->pointer);
				}

				if (atom || functionStackArgument ||
					localMemoryOperand || isCall) {
					
					report("  destination register r"
						<< *destinationReg->pointer
						<< " is a divergence source.");
					_divergGraph.insertNode(*destinationReg->pointer);
					_divergGraph.setAsDiv(*destinationReg->pointer);
				}
			}
		}
	}
	/* 2) Computes the divergence propagation */
	_divergGraph.computeDivergence();
}
コード例 #9
0
ファイル: Allocator.cpp プロジェクト: dougct/ocelot-ufmg
  const SpillPolicy::CoalescedSet & Allocator::use(SpillPolicy::CoalescedSet used,
    const Interval::Point &p, const bool ignoreSpilled, const bool spillUsed,
    const bool coalesced, const bool removeSpilled)
  {
    assert((_available.size() + _registerVariableMap.size()) == _regs);
    reportE(INFO, "Point: "<< p << "; Variables count: " << used.size());

    /* coalesced tells that all used variables must be placed on a coalesced amount of registers,
     * TODO: This is meant for vector variables, check if it is really required, and if it require
     * consequent registers
     * TODO: Spill/store/load vector variables all together using vector instructions */
    assertM(!(coalesced && (spillUsed || ignoreSpilled)), "Can't be coalesced when ignoring spilled");
    clearExpired(p);
    if(used.empty()){
      reportE(INFO, "No variables used");
      return _onRegister;
    }
    SpillPolicy::CoalescedSet::iterator crI = used.begin();
    SpillPolicy::CoalescedSet::iterator crIEnd = used.end();
    unsigned totalRegisters = 0;
    reportE(INFO, "Counting required registers");
    while(crI != crIEnd)
    {
      CoalescedRegister *cr = *crI;
      reportE(DEBUG, "Variable: " << cr->reg() << "; Size: " << cr->size());
      /* If spilled variables must be ignored, they are not allocated and removed from on register status */
      if(ignoreSpilled && cr->spilled())
      {
        reportE(DEBUG, "\tNot allocating spilled variable: " << cr->reg());
        if(removeSpilled && (_onRegister.find(cr) != _onRegister.end()))
        {
          reportE(DEBUG, "\t\tSpilled variable " << cr->reg() << " marked as on-register, removing it");
          while(cr->allocated.size() > 0)
          {
            reportE(DEBUG_DETAILS, "\t\t\tFreeing physical register " << *(cr->allocated.begin()));
            _available.insert(*(cr->allocated.begin()));
            _registerVariableMap.erase(_registerVariableMap.find(*(cr->allocated.begin())));
            cr->allocated.erase(cr->allocated.begin());
          }
          _onRegister.erase(cr);
        }
        SpillPolicy::CoalescedSet::iterator erase = crI;
        crI++;
        used.erase(erase);
        continue;
      }
      totalRegisters += cr->size();
      crI++;
    }

    if(used.empty())
    {
      reportE(DEBUG, "Nothing to do, all variables are spilled");
      return _onRegister;
    }

    /* If we can't spill variables on the used list, they must use at most the same amount of registers
     * as available */
    assertM((totalRegisters <= _regs) || spillUsed,
        "Variables larger than available registers");

    if(totalRegisters > _regs){
      selectByLRU(used, p, totalRegisters);
    }

    SpillPolicy::CoalescedSet worklist = used;
    /* If the variables need to be coalesced, then:
     * 1) All variables need to be the same size
     * 2) The first position will be aligned based on its size
     * This is expecting that vector require coalesced aligned registers */
    reportE(INFO, "Required registers: " << totalRegisters);

    if(coalesced)
    {//TODO: Implement coalesced allocating, if required for vectors
      reportE(INFO, "Coalesced allocation");
      assertM(false, "TODO: Implement coalesced allocating, required by vectors");
    }


    SpillPolicy::CoalescedSet::iterator variable = worklist.begin();
    while(variable != worklist.end()){
      if(_onRegister.find(*variable) != _onRegister.end()){
        reportE(DEBUG, "\tVariable " << (*variable)->reg()
            << " already on register, erasing it from worklist");
        SpillPolicy::CoalescedSet::iterator erase = variable;
        variable++;
        worklist.erase(erase);
        continue;
      }
      variable++;
    }
    /* Must fit larger variables first --> map variables by registers size */
    std::multimap<ushort, CoalescedRegister*> sizeCoalescedMap;
    for(SpillPolicy::CoalescedSet::const_iterator cr = worklist.begin();
        cr != worklist.end(); cr++)
    {
      reportE(DEBUG_DETAILS, "\tVariable " << (*cr)->reg() << "; Size: " << (*cr)->size());
      sizeCoalescedMap.insert(std::make_pair((*cr)->size(), *cr));
    }

    reportE(INFO, "Start testing if all fits on current available registers");
    /* Try to allocate variables by largest size first. If one won't fit, start spilling */
    while(worklist.size() > 0)
    {
      bool fit = true;

      std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end();
      CoalescedRegister *cr = crI->second;
      reportE(DEBUG, "Test variable " << cr->reg() << " of size " << cr->size());
      unsigned int maxReg = _regs - cr->size();
      maxReg -= (maxReg % cr->size());

      for(unsigned i = 0; i <= maxReg; i += cr->size())
      {
        fit = true;
        reportE(DEBUG_DETAILS, "Starting position " << i);
        for(unsigned u = i; fit && (u < (i + cr->size())); u++)
        {
          fit &= (_available.find(u) != _available.end());
          reportE(DEBUG_DETAILS, "Position " << u << " is busy?" << !fit);
        }
        if(fit)
        {
          reportE(DEBUG, "Allocating " << cr->reg() << " on position " << i);
          for(unsigned u = i; u < (i + cr->size()); u++)
          {
            cr->allocated.insert(u);
            assert(_available.erase(u));
            _registerVariableMap[u] = cr;
          }
          sizeCoalescedMap.erase(crI);
          _onRegister.insert(cr);
          worklist.erase(cr);
          break;
        }
      }
      if(!fit){
        break;
      }
    }

    if(worklist.empty())
    {
      reportE(INFO, "All variable allocated without spilling");
      return _onRegister;
    }

    /* Create variable spill cost ranking, based on active spill policies and weights */
    PolicyMap::const_iterator policy = _policies.begin();
    PolicyMap::const_iterator policyEnd = _policies.end();
    RegisterCostMap scoresSum;

    for(SpillPolicy::RegisterId i = 0; i < _regs; i++)
    {
      scoresSum[i] = 0;
    }

    reportE(INFO, "Spilling is required");
    for(; policy != policyEnd; policy++)
    {
      reportE(DEBUG, "Calculating spill cost based on spill policy: " << policy->first);

      SpillPolicy::CoalescedCostMap score;
      score = policy->second->rank(_onRegister, p);
      while(score.size() > 0)
      {
      	CoalescedRegister* cr = score.begin()->first;
      	assertM(cr->allocated.size() > 0, "Variable marked as on register and without allocated registers");
      	SpillPolicy::RegisterId reg = *cr->allocated.begin();
        if(used.find(cr) != used.end()){
          scoresSum[reg] = std::numeric_limits<unsigned>::max();
          reportE(DEBUG_DETAILS,
              "Variable " << cr->reg()
              << " on position " << reg
              << " is used, setting cost to maximum");
        } else {
          scoresSum[reg] += (_weights[policy->first] * score.begin()->second);
          reportE(DEBUG_DETAILS,
            "\tVariable " << cr->reg()
            << " on position " << reg
            << " has cost " << scoresSum[reg]);
        }
        score.erase(score.begin());
      }
    }

    reportE(INFO, "Locating best locations based on spill costs");
    /* Find best spilling position, starting by largest variables */
    while(worklist.size() > 0)
    {
      std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end();
      CoalescedRegister *cr = crI->second;
      long int best = std::numeric_limits<long int>::max();
      SpillPolicy::RegisterId bestStart = _regs;
      reportE(DEBUG, "\tAllocating variable " << cr->reg() << "; Size: " << cr->size());
      unsigned int maxReg = _regs - cr->size();
      maxReg -= (maxReg % cr->size());

      for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size())
      {
        bool noUsedVariable = true;
        long int cost = 0;
        CoalescedRegister *last = NULL;
        reportE(DEBUG, "\t\tTesting start position:" << i);
        for(unsigned u = i; noUsedVariable && (u < (i + cr->size())); u++)
        {
          if(_registerVariableMap.find(u) == _registerVariableMap.end()){
            reportE(DEBUG_DETAILS, "\t\t\tNo variable starts on position " << u);
            continue;
          }

          if(last == _registerVariableMap[u]){
            reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is part of already accounted variable " << last->reg());
            continue;
          }

          last = _registerVariableMap[u];
          /* Assure that the variable using register is not being used now */
          reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is beginning of variable " << last->reg());
          noUsedVariable &= (used.find(last) == used.end());
          if(noUsedVariable){
            reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is not required to be allocated.");
          } else {
            reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is required to be allocated.");
          }
          cost += scoresSum[*last->allocated.begin()];
          reportE(DEBUG, "\t\tCost to insert at point "  << i << ": " << cost);
        }
        if(!noUsedVariable)
        {
          reportE(DEBUG, "\t\tCan't use position " << i << ", variable required to be on register");
          continue;
        }
        if(cost < best)
        {
          best = cost;
          bestStart = i;
          reportE(INFO, "\t\tNew best position for variable " << cr->reg() << ": " << i);
        }
      }
      assertM(bestStart != _regs, "Error finding a insertion position");

      reportE(INFO, "\tAllocating at position " << bestStart);
      for(unsigned u = bestStart; u < (bestStart + cr->size()); u++)
      {
        if(_registerVariableMap.find(u) == _registerVariableMap.end())
        {
          reportE(DEBUG_DETAILS, "\tNo variable at position " << u);
          continue;
        }
        CoalescedRegister *spillCr = _registerVariableMap[u];
        if(!spillCr->spilled())
        {
          reportE(DEBUG, "\tSpilling variable " << spillCr->reg());
          spillCr->spill();
        }
        _onRegister.erase(spillCr);
        while(spillCr->allocated.size() > 0)
        {
          reportE(DEBUG_DETAILS, "\tFreeing physical register " << *(spillCr->allocated.begin()));
          _available.insert(*(spillCr->allocated.begin()));
          _registerVariableMap.erase(
              _registerVariableMap.find(*(spillCr->allocated.begin())));
          spillCr->allocated.erase(spillCr->allocated.begin());
        }
      }

      _onRegister.insert(cr);
      for(unsigned u = bestStart; u < (bestStart + (cr->size())); u++)
      {
        reportE(DEBUG, "\tAllocating register " << u << " to variable " << cr->reg());
        cr->allocated.insert(u);
        _registerVariableMap[u] = cr;
        assertM(_available.erase(u), "Register being allocated not marked as available");
      }
      worklist.erase(cr);
      sizeCoalescedMap.erase(crI);
      ;
    }
    assertM(sizeCoalescedMap.empty(), "Size mapped worklist not clear");
    return _onRegister;
  }
コード例 #10
0
ファイル: Allocator.cpp プロジェクト: dougct/ocelot-ufmg
  const SpillPolicy::CoalescedSet & Allocator::use(CoalescedRegister *cr,
    const Interval::Point &p)
  {
    assert((_available.size() + _registerVariableMap.size()) == _regs);
    clearExpired(p);
    reportE(INFO, "Point: "<< p << "; Variable " << cr->reg() << "; Size: " << cr->size());

    /* If the variable cr is marked as on register, there is nothing to do */
    if(_onRegister.find(cr) != _onRegister.end())
    {
      reportE(DEBUG,
          "Point: "<< p << "; Variable " << cr->reg() << " already marked as onRegister, nothing to do");

      return _onRegister;
    }
    /* There can't be a variable larger than the available registers */
    assertM(cr->size() <= _regs, "Variable larger than available registers");
    /* Test if the variable fits on available holes. We assume the variable must
     * be placed on a register position aligned to it's size */
    //TODO: Discover if vectors require aligned positions and how may affect here
    unsigned int maxReg = _regs - cr->size();
    maxReg -= (maxReg % cr->size());
    for(unsigned i = 0; i <= maxReg; i += cr->size())
    {
      bool fit = true;
      for(unsigned u = i; fit && (u < (i + cr->size())); u++)
      {
        fit &= _available.find(u) != _available.end();
      }
      if(fit)
      {
        reportE(DEBUG,
            "Point: "<< p << "; Variable " << cr->reg() << ", with size " << cr->size() << " fits on starting register " << i);
        for(unsigned u = i; fit && (u < (i + cr->size())); u++)
        {
          reportE(DEBUG_DETAILS,
              "\tPoint: "<< p << "; Variable " << cr->reg() << " receives physical register " << u);
          cr->allocated.insert(u);
          _available.erase(u);
          _registerVariableMap[u] = cr;
        }
        _onRegister.insert(cr);
        return _onRegister;
      }
    }

    /* Create a cost ranking based on all spill policies active, and weight of each one of them */
    PolicyMap::const_iterator policy = _policies.begin();
    PolicyMap::const_iterator policyEnd = _policies.end();
    RegisterCostMap scoresSum;
    for(SpillPolicy::RegisterId i = 0; i < _regs; i++)
    {
      scoresSum[i] = 0;
    }

    for(; policy != policyEnd; policy++)
    {
      reportE(DEBUG,
          "Point: "<< p << "; Building spilling cost score for spill policy" << policy->first);
      SpillPolicy::CoalescedCostMap score;
      score = policy->second->rank(_onRegister, p);
      while(score.size() > 0)
      {
      	assertM(score.begin()->first->allocated.size() > 0, "No allocated physical registers");
      	SpillPolicy::RegisterId reg = *score.begin()->first->allocated.begin();
        reportE(DEBUG_DETAILS,
            "\tPoint: "<< p << "; Variable " << score.begin()->first->reg() << ", at register " << reg << " receives cost " << score.begin()->second);
        scoresSum[reg] += (_weights[policy->first] * score.begin()->second);
        reportE(DEBUG_DETAILS,
            "\tPoint: "<< p << "; Register" << reg << " has total cost " << scoresSum[reg]);
        score.erase(score.begin());
      }
    }

    /* Locate the best insertion point */
    long int best = std::numeric_limits<long int>::max();
    SpillPolicy::RegisterId bestStart = _regs;
    for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size())
    {
      long int cost = 0;
      CoalescedRegister* last = NULL;
      for(unsigned u = i; (u < (i + cr->size())); u++)
      {
        if(_registerVariableMap.find(u) == _registerVariableMap.end())
          continue;

        if(last == _registerVariableMap[u])
          continue;

        last = _registerVariableMap[u];
        cost += scoresSum[*last->allocated.begin()];
      }
      reportE(DEBUG_DETAILS,
          "\tPoint: "<< p << "; Spilling from position " << i << ", a size of " << cr->size() << " registers has a cost of " << cost);

      if(cost < best)
      {
        best = cost;
        bestStart = i;
        reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Is best spilling cost so far" );
      }
    }
    assertM(bestStart < _regs, "Error finding best insert position");
    /* Spill required variables */
    reportE(DEBUG, "Spilling from start position " << bestStart );
    for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++)
    {
      reportE(DEBUG_DETAILS, "\tRegister "<< u << ":" );
      if(_registerVariableMap.find(u) == _registerVariableMap.end())
      {
        assert(_available.find(u) != _available.end());
        continue;
      }
      CoalescedRegister *spillCr = _registerVariableMap[u];
      reportE(DEBUG_DETAILS, "\t\tContains variable "<< spillCr->reg() << " of size " << spillCr->size() );
      if(!spillCr->spilled())
      {
        reportE(DEBUG_DETAILS, "\t\t\tNew spill" );
        spillCr->spill();
      }
      assertM((*(spillCr->allocated.begin())) == u, "Wrong mapping");
      while(spillCr->allocated.size() > 0)
      {
        SpillPolicy::RegisterId a = *(spillCr->allocated.begin());
        _available.insert(a);
        reportE(DEBUG_DETAILS, "\t\t\tFreeing register " << a );
        _registerVariableMap.erase(a);
        spillCr->allocated.erase(a);
      }
      _onRegister.erase(spillCr);
    }

    reportE(DEBUG, "Associating registers to variable " << cr->reg() );
    /* Allocate registers to the new variable */
    _onRegister.insert(cr);
    for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++)
    {
      cr->allocated.insert(u);
      assertM(_available.erase(u), "Register being allocated not marked as available");
      _registerVariableMap[u] = cr;
    }
    return _onRegister;
  }
コード例 #11
0
static void convertParametersToRegisters(
	const BasicBlockMap& newBlocks, ir::IRKernel& kernel,
	ir::ControlFlowGraph::instruction_iterator callIterator,
	const ir::IRKernel& calledKernel)
{
	typedef std::unordered_map<std::string,	ir::PTXOperand> OperandMap;
	typedef std::unordered_set<std::string> StringSet;

	reportE(REPORT_DETAILS, "   Converting parameters to registers...");
	
	// Get a map from argument name to register in the calling function
	OperandMap  argumentMap;
	StringSet   bitBucketArguments;
	
	auto argument = calledKernel.arguments.begin();
	
	ir::PTXInstruction& call = static_cast<ir::PTXInstruction&>(**callIterator);
	
	for(auto parameter = call.d.array.begin();
		parameter != call.d.array.end(); ++parameter, ++argument)
	{
		if(parameter->addressMode == ir::PTXOperand::BitBucket)
		{
			bitBucketArguments.insert(argument->name);
			continue;
		}

		assert(argument != calledKernel.arguments.end());
		assert(parameter->addressMode == ir::PTXOperand::Register ||
			parameter->addressMode == ir::PTXOperand::Immediate);
		assert(argumentMap.count(argument->name) == 0);
		assert(argument->returnArgument);

		argumentMap.insert(std::make_pair(argument->name, *parameter));
	}

	for(auto parameter = call.b.array.begin();
		parameter != call.b.array.end(); ++parameter, ++argument)
	{
		if(parameter->addressMode == ir::PTXOperand::BitBucket)
		{
			bitBucketArguments.insert(argument->name);
			continue;
		}

		assert(argument != calledKernel.arguments.end());
		assert(parameter->addressMode == ir::PTXOperand::Register ||
			parameter->addressMode == ir::PTXOperand::Immediate);
		assert(argumentMap.count(argument->name) == 0);
		assert(!argument->returnArgument);

		argumentMap.insert(std::make_pair(argument->name, *parameter));
	}
	
	// Convert all stores to that parameter to moves to the associated register
	for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block)
	{
		for(auto instruction = block->second->instructions.begin();
			instruction != block->second->instructions.end(); ++instruction)
		{
			ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(
				**instruction);
		
			if(ptx.opcode        != ir::PTXInstruction::St)    continue;
			if(ptx.addressSpace  != ir::PTXInstruction::Param) continue;
			if(ptx.d.addressMode != ir::PTXOperand::Address)   continue;
			
			if(bitBucketArguments.count(ptx.d.identifier))
			{
				delete *instruction;
				instruction = --block->second->instructions.erase(instruction);
				
				continue;
			}
			
			auto argument = argumentMap.find(ptx.d.identifier);
			
			if(argument == argumentMap.end()) continue;

			ptx.type = argument->second.type;
			ptx.pg   = call.pg;
			ptx.d    = argument->second;
				
			if(argument->second.addressMode == ir::PTXOperand::Register)
			{
				// If the types match, it is a move
				if(argument->second.type == ptx.d.type)
				{
					ptx.opcode = ir::PTXInstruction::Mov;
				}
				else
				{
					// otherwise, we need a cast
					ptx.opcode   = ir::PTXInstruction::Cvt;
					ptx.modifier = ir::PTXInstruction::Modifier_invalid;
				}
			}
			else
			{
				assert(argument->second.addressMode ==
					ir::PTXOperand::Immediate);
		
				ptx.opcode = ir::PTXInstruction::Mov;
			}
		}
	}
	
	// Convert all loads from that parameter to moves from the register
	for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block)
	{
		for(auto instruction = block->second->instructions.begin();
			instruction != block->second->instructions.end(); ++instruction)
		{
			ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(
				**instruction);
	
			if(ptx.opcode        != ir::PTXInstruction::Ld)    continue;
			if(ptx.addressSpace  != ir::PTXInstruction::Param) continue;
			if(ptx.a.addressMode != ir::PTXOperand::Address)   continue;
			
			if(bitBucketArguments.count(ptx.a.identifier))
			{
				delete *instruction;
				instruction = --block->second->instructions.erase(instruction);
				
				continue;
			}
			
			auto argument = argumentMap.find(ptx.a.identifier);
			
			if(argument == argumentMap.end()) continue;
			
			assert(ptx.d.addressMode == ir::PTXOperand::Register);
			
			ptx.type = argument->second.type;			
			ptx.pg   = call.pg;
			ptx.a    = argument->second;
			
			// If the types match, it is a move
			if(ptx.type == ptx.a.type)
			{
				ptx.opcode = ir::PTXInstruction::Mov;
			}
			else
			{
				// otherwise, we need a cast
				ptx.opcode        = ir::PTXInstruction::Cvt;
				ptx.modifier      = ir::PTXInstruction::Modifier_invalid;
			}
		}
	}
}
コード例 #12
0
void AffineLinearScan::_extendStack()
{
	_shared.declaration(_kernel->locals, MAX_WARPS);
	reportE(INFO, "Kernel " << _kernel->name << " requires " << _shared.bytes()
		<< " bytes of shared memory per warp, total of "
		<< MAX_WARPS * _shared.bytes() << '(' << MAX_WARPS << " warps)");
	LinearScanRegisterAllocationPass::_extendStack();
	reportE(DEBUG, "Writing warp local memory stack access information");
	
	if(_shared.bytes() == 0) return;
	/* warpid = (size_x * ( size_y * z + y ) + x) >> 5
	 * a = size_y
	 * b = z
	 * c = y
	 * a = mad a z c
	 * b = size_x
	 * c = x
	 * a = mad a b c
	 * a = shr a 5 (>>5 == /32)
	 * memPosition = memInitialPosition [ warpid * bytesPerWarp ]
	 */
	analysis::DataflowGraph::iterator block = _dfg().begin();
	RegisterId a, b, c;

	/* Use a AffineRegister temporary register of type u32 if available */
	if(AffineRegister::tempRegisters.count(ir::PTXOperand::DataType::u32) != 0)
	{
		a = AffineRegister::tempRegisters[ir::PTXOperand::DataType::u32];
	}
	else
	{
		a = _dfg().newRegister();
	}
	
	b = _dfg().newRegister();

	/* If memory size is 32 bits, can use warpPosition variable as temporary */
	if(_m->addressSize() == 32)
	{
		c = AffineRegister::warpPosition;
	}
	else
	{
		c = _dfg().newRegister();
	}
	
	// size_y = %ntid.y
	ir::PTXInstruction sizeY(ir::PTXInstruction::Mov);
	sizeY.d = ir::PTXOperand(ir::PTXOperand::Register,
		ir::PTXOperand::DataType::u32, a);
	sizeY.a = ir::PTXOperand(ir::PTXOperand::ntid,
		ir::PTXOperand::iy, ir::PTXOperand::u32);
	sizeY.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, sizeY, 0);

	// z = %tid.z
	ir::PTXInstruction z(ir::PTXInstruction::Mov);
	z.d = ir::PTXOperand(ir::PTXOperand::Register,
		ir::PTXOperand::DataType::u32, b);
	z.a = ir::PTXOperand(ir::PTXOperand::tid,
		ir::PTXOperand::iz, ir::PTXOperand::u32);
	z.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, z, 1);

	// y = %tid.y
	ir::PTXInstruction y(ir::PTXInstruction::Mov);
	y.d = ir::PTXOperand(ir::PTXOperand::Register,
		ir::PTXOperand::DataType::u32, c);
	y.a = ir::PTXOperand(ir::PTXOperand::tid,
		ir::PTXOperand::iy, ir::PTXOperand::u32);
	y.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, y, 2);

	ir::PTXInstruction mad1(ir::PTXInstruction::Mad);
	mad1.d = sizeY.d;
	mad1.a = sizeY.d;
	mad1.b = z.d;
	mad1.c = y.d;
	mad1.type = ir::PTXOperand::DataType::u32;
	mad1.modifier = ir::PTXInstruction::Modifier::lo;
	_dfg().insert(block, mad1, 3);

	// size_x = %ntid.x
	ir::PTXInstruction sizeX(ir::PTXInstruction::Mov);
	sizeX.d = z.d;
	sizeX.a = ir::PTXOperand(ir::PTXOperand::ntid,
		ir::PTXOperand::ix, ir::PTXOperand::u32);
	sizeX.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, sizeX, 4);

	// x = %tid.x
	ir::PTXInstruction x(ir::PTXInstruction::Mov);
	x.d = y.d;
	x.a = ir::PTXOperand(ir::PTXOperand::tid,
		ir::PTXOperand::ix, ir::PTXOperand::u32);
	x.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, x, 5);

	// 1) warpid = size_x * size_y
	ir::PTXInstruction mad2(ir::PTXInstruction::Mad);
	mad2.d = mad1.d;
	mad2.a = mad1.d;
	mad2.b = sizeX.d;
	mad2.c = x.d;
	mad2.type = ir::PTXOperand::DataType::u32;
	mad2.modifier = ir::PTXInstruction::Modifier::lo;
	_dfg().insert(block, mad2, 6);

	// 5) warpid = [size_x * y + size_x * size_y * z + x] >> 5
	ir::PTXInstruction shr(ir::PTXInstruction::Shr);
	shr.d = mad2.d;
	shr.a = mad2.d;
	shr.b = ir::PTXOperand(5);
	shr.type = ir::PTXOperand::DataType::u32;
	_dfg().insert(block, shr, 7);

	// 6) position = warpid * stride
	ir::PTXInstruction position(ir::PTXInstruction::Mul);
	position.d = shr.d;
	position.a = shr.d;
	position.b = ir::PTXOperand(_shared.bytes());
	position.type = ir::PTXOperand::DataType::u32;
	position.modifier = ir::PTXInstruction::Modifier::lo;
	_dfg().insert(block, position, 8);

	//%memoryStart = stack name;
	ir::PTXInstruction memoryStart(ir::PTXInstruction::Mov);
	memoryStart.a = ir::PTXOperand(_shared.name() +
		"[" + position.d.toString() + "]");
	if(_m->addressSize() == 32)
	{
		memoryStart.d = x.d;
		memoryStart.type = ir::PTXOperand::DataType::u32;
	}
	else
	{
		memoryStart.d = ir::PTXOperand(ir::PTXOperand::Register,
			ir::PTXOperand::DataType::u64, AffineRegister::warpPosition);
		memoryStart.type = ir::PTXOperand::DataType::u64;
	}
	_dfg().insert(block, memoryStart, 9);

}
コード例 #13
0
void AffineLinearScan::finalize()
{
	_clear();
	reportE(DEBUG, "Finalizing affine linear scan");
}
コード例 #14
0
void AffineLinearScan::initialize(const ir::Module& m)
{
	reportE(DEBUG, "Running affine linear scan");
	_m = &m;
}
コード例 #15
0
void DivergenceLinearScan::finalize()
{
	_clear();
	reportE(DEBUG, "Finalizing divergence linear scan");
}
コード例 #16
0
void DivergenceLinearScan::initialize(const ir::Module& m)
{
	reportE(DEBUG, "Running divergence linear scan");
	_m = &m;
}