bool ConstantConstraint::isSatisfied(const Matrix& m) const
{
	if(_comparison == LessThanOrEqual)
	{
		return m.greaterThanOrEqual(_value).reduceSum() == 0;
	}
	else if(_comparison == GreaterThanOrEqual)
	{
		return m.lessThanOrEqual(_value).reduceSum() == 0;
	}
	else
	{
		assertM(false, "not implemented");
	}
	
	return false;
} 
Esempio n. 2
0
unsigned int Instruction::index() const
{
	unsigned int index = 0;
	
	for(auto instruction : *block)
	{
		if(instruction == this)
		{
			return index;
		}
		++index;
	}
	
	assertM(false, "Could not find instruction in parent block.");
	
	return index;
}
Esempio n. 3
0
void RemoveBarrierPass::runOnKernel( ir::IRKernel& k )
{
	report( "Removing barriers from kernel " << k.name );
	assertM( k.ISA == ir::Instruction::PTX, 
		"This pass is valid for PTX kernels only." );
	_reentryPoint = 1;
	_spillBytes = 1;
	_kernel = static_cast< ir::PTXKernel* >( &k );
	
	for( analysis::DataflowGraph::iterator block = _dfg().begin(); 
		block != _dfg().end(); ++block )
	{
		_runOnBlock( block );
	}
	
	_addLocalVariables();
}
LLVMStatement::LLVMStatement( Type t, const LLVMInstruction* i )
    : instruction( 0 ), type( t ), linkage( InvalidLinkage ),
      convention( LLVMInstruction::InvalidCallingConvention ),
      visibility( InvalidVisibility ),
      returnAttribute( LLVMInstruction::InvalidParameterAttribute ),
      functionAttributes( 0 ), alignment( 1 ), space( 0 ), constant( false )
{
    if( i != 0 )
    {
        instruction = static_cast< LLVMInstruction* >( i->clone() );
        assertM( type == Instruction, "Statement given non-zero "
                 << "instruction pointer, but not specified as an "
                 << "instruction statement." );
    }
    else
    {
    }
}
Esempio n. 5
0
bool CoalescedArray::insertVar(const RegisterId reg, const Type type)
{
	assertM(_canInsert, "Can't insert new values after getting a offset");
	if(!MemoryArray::insertVar(reg, type)) return false;
	
	_declared[reg] = type;
	unsigned varSize = ir::PTXOperand::bytes(type);
	
	if(_mem.find(varSize) == _mem.end())
	{
		std::set<RegisterId> tmp;
		_mem[varSize] = tmp;
	}
	
	_mem[varSize].insert(reg);
	_stackSize += varSize;
	return true;
}
	unsigned int ATIGPUDevice::deviceCount()
	{
		CALuint count = 0;

		try {
			CalDriver()->calDeviceGetCount(&count);

			// Multiple devices is not supported yet
			if (count > 1) {
				assertM(false, "Multiple devices is not supported yet");
			}
		} catch (hydrazine::Exception he) {
			// Swallow the exception and return 0 devices
			report(he.what());
		}

		return count;
	}	
Esempio n. 7
0
bool isDirectory(const std::string& path)
{
    #ifndef _WIN32
    struct stat fileStats;

    auto result = stat(path.c_str(), &fileStats);

    if(result != 0)
    {
        return false;
    }

    return S_ISDIR(fileStats.st_mode);

    #else
    assertM(false, "Not implemented for this platform.");
    #endif
}
	Device::MemoryAllocation *ATIGPUDevice::allocate(size_t size)
	{
		// uav0 accesses should be aligned to 4
		size_t aSize = AlignUp(size, 4);

		// Check uav0 size limits
		assertM(_uav0AllocPtr - Uav0BaseAddr + aSize < Uav0Size,
				"Out of global memory: " << _uav0AllocPtr - Uav0BaseAddr
				<< " + " << aSize
				<< " greater than " << Uav0Size);

		MemoryAllocation *allocation = 
			new MemoryAllocation(&_uav0Resource, _uav0AllocPtr, size);
		_uav0Allocations.insert(
				std::make_pair(allocation->pointer(), allocation));

		_uav0AllocPtr += aSize;

		return allocation;
	}
Esempio n. 9
0
static std::string findNumericSuffix(ir::Module& module,
	const std::string& base, unsigned int begin)
{
	while(true)
	{
		std::stringstream stream;
		
		stream << base << begin++;
		
		if(module.globals().count(stream.str()) == 0 &&
			module.kernels().count(stream.str()) == 0 &&
			module.textures().count(stream.str()) == 0)
		{
			return stream.str();
		}
	}
	
	assertM(false, "Could not find any valid identifier.");
	
	return base;
}
Esempio n. 10
0
	void ATIGPUDevice::MemoryAllocation::memset(size_t offset, int value, 
			size_t size)
	{
		assertM(offset + size <= _size, "Invalid memset size");
		
		CALvoid *data = NULL;
		CALuint pitch = 0;
		CALuint flags = 0;

		CalDriver()->calResMap(&data, &pitch, *_resource, flags);

		CALdeviceptr addr = (_basePtr - ATIGPUDevice::Uav0BaseAddr) + offset;
		std::memset((char *)data + addr, value, size);

		report("MemoryAllocation::memset("
				<< "offset = " << std::dec << offset
				<< ", value = " << std::dec << value
				<< ", size = " << std::dec << size
				<< ")");
		
		CalDriver()->calResUnmap(*_resource);
	}
Esempio n. 11
0
	Device::MemoryAllocation *ATIGPUDevice::getMemoryAllocation(
			const void *address, AllocationType type) const
	{
		MemoryAllocation *allocation = 0;

		if (type == HostAllocation) {
			assertM(false, "Not implemented yet");
		} else {
			if (!_uav0Allocations.empty()) {
				// Device pointer arithmetic is not supported yet
				const AllocationMap::const_iterator alloc = 
					_uav0Allocations.find((void *)address);
				if (alloc != _uav0Allocations.end()) {
					allocation = alloc->second;
				} else {
					Throw("No allocation found for this pointer - " << address);
				}
			}
		}

		return allocation;
	}
Esempio n. 12
0
static void runModulePass(ir::Module* module, Pass* pass)
{
	report("  Running module pass '" << pass->toString() << "'" );
	switch(pass->type)
	{
	case Pass::ImmutablePass:
	{
		ImmutablePass* immutablePass = static_cast<ImmutablePass*>(pass);
		immutablePass->runOnModule(*module);
	}
	break;
	case Pass::ModulePass:
	{
		ModulePass* modulePass = static_cast<ModulePass*>(pass);
		modulePass->runOnModule(*module);
	}
	break;
	case Pass::KernelPass: /* fall through */
	case Pass::BasicBlockPass:
	break;
	case Pass::InvalidPass: assertM(false, "Invalid pass type.");
	}
}
Esempio n. 13
0
static void finalizeKernelPass(ir::Module* module, Pass* pass)
{
	switch(pass->type)
	{
	case Pass::ImmutablePass: /* fall through */
	case Pass::ModulePass:
	break;
	case Pass::KernelPass:
	{
		report("  Finalizing kernel pass '" << pass->toString() << "'" );
		KernelPass* kernelPass = static_cast<KernelPass*>(pass);
		kernelPass->finalize();
	}
	break;
	case Pass::BasicBlockPass:
	{
		report("  Finalizing basic block pass '" << pass->toString() << "'" );
		BasicBlockPass* bbPass = static_cast<BasicBlockPass*>(pass);
		bbPass->finalize();
	}
	break;
	case Pass::InvalidPass: assertM(false, "Invalid pass type.");
	}
}
Esempio n. 14
0
void Phi::removeSource(BasicBlock* predecessor)
{
	auto readPosition = reads.begin();
	for(++readPosition; readPosition != reads.end(); ++readPosition)
	{
		++readPosition;
		assert(readPosition != reads.end());
		
		auto operand = static_cast<AddressOperand*>(*readPosition);
		
		if(operand->globalValue != predecessor) continue;
		
		--readPosition;
		delete *readPosition;
		readPosition = reads.erase(readPosition);
		delete *readPosition;
		reads.erase(readPosition);
		
		break;
	}
	
	assertM(false, "Phi instruction " << toString()
		<< " does not contain basic block " << predecessor->name());
}
Esempio n. 15
0
	cudaFuncAttributes ATIGPUDevice::getAttributes(const std::string& module, 
			const std::string& kernel)
	{
		assertM(false, "Not implemented yet");
	}
Esempio n. 16
0
unsigned MemoryArray::physicalElements() const{
	if(elements() == 0)
		return 0;
	assertM(((_stackSize % _minVarSize) == 0), "Not divisible stack size by minimal variable size");
	return _stackSize / _minVarSize;
}
Esempio n. 17
0
bool DependenceAnalysis::hasDependence(const Instruction& predecessor,
	const Instruction& successor) const
{
	assertM(false, "not implemented");
}
Esempio n. 18
0
	void* ATIGPUDevice::getTextureReference(const std::string& moduleName, 
		const std::string& textureName)
	{
		assertM(false, "Not implemented yet");
	}
Esempio n. 19
0
	void ATIGPUDevice::unbindTexture(const std::string& moduleName, 
		const std::string& textureName)
	{
		assertM(false, "Not implemented yet");
	}
Esempio n. 20
0
static void insertAndConnectBlocks(BasicBlockMap& newBlocks,
	ir::ControlFlowGraph::iterator& functionEntry,
	ir::ControlFlowGraph::iterator& functionExit,
	ir::IRKernel& kernel, unsigned int& nextRegister,
	const ir::IRKernel& inlinedKernel)
{
	typedef std::unordered_map<ir::PTXOperand::RegisterType,
		ir::PTXOperand::RegisterType> RegisterMap;
	
	ir::IRKernel copy;
	const ir::IRKernel* inlinedKernelPointer = &inlinedKernel;
	
	// create a copy if the call is recursive
	if(inlinedKernelPointer == &kernel)
	{
		copy = inlinedKernel;
		inlinedKernelPointer = &copy;
	}
	
	//  Insert new blocks
	for(auto block = inlinedKernelPointer->cfg()->begin();
		block != inlinedKernelPointer->cfg()->end(); ++block)
	{
		auto newBlock = kernel.cfg()->clone_block(block);
		
		newBlocks.insert(std::make_pair(block, newBlock));
	}
	
	//  Connect new blocks, rename branch labels
	for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block)
	{
		for(auto edge = block->first->out_edges.begin();
			edge != block->first->out_edges.end(); ++edge)
		{
			auto headBlock = block->second;
			auto tail      = (*edge)->tail;
			
			auto tailBlock = newBlocks.find(tail);
			assert(tailBlock != newBlocks.end());
			
			kernel.cfg()->insert_edge(ir::Edge(headBlock,
				tailBlock->second, (*edge)->type));
			
			if((*edge)->type == ir::Edge::Branch)
			{
				assert(!headBlock->instructions.empty());
				auto instruction = headBlock->instructions.back();
				
				auto branch = static_cast<ir::PTXInstruction*>(instruction);

				if(branch->opcode == ir::PTXInstruction::Ret) continue;

				assertM(branch->opcode == ir::PTXInstruction::Bra, "Expecting "
					<< branch->toString() << " to be a branch");
				
				branch->d.identifier = tailBlock->second->label();
			}
		}
	}
	
	//  Assign copied blocks new registers
	RegisterMap newRegisters;
	
	for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block)
	{
		for(auto instruction = block->second->instructions.begin();
			instruction != block->second->instructions.end(); ++instruction)
		{
			ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(
				**instruction);
		
			ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a,
				&ptx.b, &ptx.c};
				
			for(unsigned int i = 0; i < 6; ++i)
			{
				ir::PTXOperand& operand = *operands[i];
				
				if( operand.addressMode != ir::PTXOperand::Register &&
					operand.addressMode != ir::PTXOperand::Indirect &&
					operand.addressMode != ir::PTXOperand::ArgumentList)
				{
					continue;
				}
				
				if(operand.type != ir::PTXOperand::pred)
				{
					if(operand.array.empty() &&
						operand.addressMode != ir::PTXOperand::ArgumentList)
					{
						auto mapping = newRegisters.find(operand.reg);
						
						if(mapping == newRegisters.end())
						{
							mapping = newRegisters.insert(std::make_pair(
								operand.reg, nextRegister++)).first;
						}
						
						operand.reg = mapping->second;
					}
					else
					{
						for(auto subOperand = operand.array.begin(); 
							subOperand != operand.array.end(); ++subOperand )
						{
							if(!subOperand->isRegister()) continue;
							
							auto mapping = newRegisters.find(subOperand->reg);
						
							if(mapping == newRegisters.end())
							{
								mapping = newRegisters.insert(std::make_pair(
									subOperand->reg, nextRegister++)).first;
							}
						
							subOperand->reg = mapping->second;
						}
					}
				}
				else if(operand.addressMode != ir::PTXOperand::ArgumentList)
				{
					if(operand.condition == ir::PTXOperand::Pred
						|| operand.condition == ir::PTXOperand::InvPred)
					{
						auto mapping = newRegisters.find(operand.reg);
						
						if(mapping == newRegisters.end())
						{
							mapping = newRegisters.insert(std::make_pair(
								operand.reg, nextRegister++)).first;
						}
						
						operand.reg = mapping->second;
					}
				}
			}
		}
	}
	
	//  Assign copied blocks new local variables
	typedef std::unordered_map<std::string, std::string> LocalMap;
	
	LocalMap locals;
	
	for(auto local = inlinedKernel.locals.begin();
		local != inlinedKernel.locals.end(); ++local)
	{
		std::string newName = "_Zinlined_" + local->first;
	
		locals.insert(std::make_pair(local->first, newName));
		
		auto newLocal = kernel.locals.insert(
			std::make_pair(newName, local->second)).first;
		
		newLocal->second.name = newName;
	}
	
	for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block)
	{
		for(auto instruction = block->second->instructions.begin();
			instruction != block->second->instructions.end(); ++instruction)
		{
			ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>(
				**instruction);
		
			if(!ptx.mayHaveAddressableOperand()) continue;
		
			ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a,
				&ptx.b, &ptx.c};
				
			for(unsigned int i = 0; i < 6; ++i)
			{
				ir::PTXOperand& operand = *operands[i];
				
				if(operand.addressMode != ir::PTXOperand::Address) continue;
				
				auto local = locals.find(operand.identifier);
				
				if(local == locals.end()) continue;
				
				operand.identifier = local->second;
			}
		}
	}
	
	//  Get the entry and exit points
	auto entryMapping = newBlocks.find(
		inlinedKernelPointer->cfg()->get_entry_block());
	assert(entryMapping != newBlocks.end());
	
	functionEntry = entryMapping->second;
	
	auto exitMapping = newBlocks.find(
		inlinedKernelPointer->cfg()->get_exit_block());
	assert(exitMapping != newBlocks.end());
	
	functionExit = exitMapping->second;
}
void ReversePostOrderTraversal::analyze(Function& function)
{
	typedef util::LargeSet<BasicBlock*> BlockSet;
	typedef std::stack<BasicBlock*>     BlockStack;

	order.clear();
	
	BlockSet   visited;
	BlockStack stack;
	
	auto cfgAnalysis = getAnalysis("ControlFlowGraph");
	auto cfg         = static_cast<ControlFlowGraph*>(cfgAnalysis);	

	report("Creating reverse post order traversal over function '" +
		function.name() + "'");

	// reverse post order is reversed topological order
	stack.push(&*function.entry_block());
	
	while(order.size() != function.size())
	{
		if(stack.empty())
		{
			for(auto block : order)
			{
				auto successors = cfg->getSuccessors(*block);
				
				for(auto successor : successors)
				{
					if(visited.insert(successor).second)
					{
						stack.push(successor);
						break;
					}
				}
				
				if(!stack.empty()) break;
			}
		}
		
		assertM(!stack.empty(), (function.size() - order.size())
			<< " blocks are not connected.");
		
		while(!stack.empty())
		{
			BasicBlock* top = stack.top();
			stack.pop();
		
			auto successors = cfg->getSuccessors(*top);
			
			for(auto successor : successors)
			{
				assert(successor != nullptr);
				
				auto predecessors = cfg->getPredecessors(*successor);
				
				bool allPredecessorsVisited = true;
		
				for(auto predecessor : predecessors)
				{
					if(visited.count(predecessor) == 0)
					{
						allPredecessorsVisited = false;
						break;
					}
				}
				
				if(!allPredecessorsVisited) continue;
				
				if(visited.insert(successor).second)
				{
					stack.push(successor);
				}
			}

			order.push_back(top);
		
			report(" " << top->name());
		}
	}
	
	// reverse the order
	std::reverse(order.begin(), order.end());
}
Esempio n. 22
0
	void *ATIGPUDevice::MemoryAllocation::mappedPointer() const
	{
		assertM(false, "Not implemented yet");
	}
Esempio n. 23
0
  const SpillPolicy::CoalescedSet & Allocator::use(CoalescedRegister *cr,
    const Interval::Point &p)
  {
    assert((_available.size() + _registerVariableMap.size()) == _regs);
    clearExpired(p);
    reportE(INFO, "Point: "<< p << "; Variable " << cr->reg() << "; Size: " << cr->size());

    /* If the variable cr is marked as on register, there is nothing to do */
    if(_onRegister.find(cr) != _onRegister.end())
    {
      reportE(DEBUG,
          "Point: "<< p << "; Variable " << cr->reg() << " already marked as onRegister, nothing to do");

      return _onRegister;
    }
    /* There can't be a variable larger than the available registers */
    assertM(cr->size() <= _regs, "Variable larger than available registers");
    /* Test if the variable fits on available holes. We assume the variable must
     * be placed on a register position aligned to it's size */
    //TODO: Discover if vectors require aligned positions and how may affect here
    unsigned int maxReg = _regs - cr->size();
    maxReg -= (maxReg % cr->size());
    for(unsigned i = 0; i <= maxReg; i += cr->size())
    {
      bool fit = true;
      for(unsigned u = i; fit && (u < (i + cr->size())); u++)
      {
        fit &= _available.find(u) != _available.end();
      }
      if(fit)
      {
        reportE(DEBUG,
            "Point: "<< p << "; Variable " << cr->reg() << ", with size " << cr->size() << " fits on starting register " << i);
        for(unsigned u = i; fit && (u < (i + cr->size())); u++)
        {
          reportE(DEBUG_DETAILS,
              "\tPoint: "<< p << "; Variable " << cr->reg() << " receives physical register " << u);
          cr->allocated.insert(u);
          _available.erase(u);
          _registerVariableMap[u] = cr;
        }
        _onRegister.insert(cr);
        return _onRegister;
      }
    }

    /* Create a cost ranking based on all spill policies active, and weight of each one of them */
    PolicyMap::const_iterator policy = _policies.begin();
    PolicyMap::const_iterator policyEnd = _policies.end();
    RegisterCostMap scoresSum;
    for(SpillPolicy::RegisterId i = 0; i < _regs; i++)
    {
      scoresSum[i] = 0;
    }

    for(; policy != policyEnd; policy++)
    {
      reportE(DEBUG,
          "Point: "<< p << "; Building spilling cost score for spill policy" << policy->first);
      SpillPolicy::CoalescedCostMap score;
      score = policy->second->rank(_onRegister, p);
      while(score.size() > 0)
      {
      	assertM(score.begin()->first->allocated.size() > 0, "No allocated physical registers");
      	SpillPolicy::RegisterId reg = *score.begin()->first->allocated.begin();
        reportE(DEBUG_DETAILS,
            "\tPoint: "<< p << "; Variable " << score.begin()->first->reg() << ", at register " << reg << " receives cost " << score.begin()->second);
        scoresSum[reg] += (_weights[policy->first] * score.begin()->second);
        reportE(DEBUG_DETAILS,
            "\tPoint: "<< p << "; Register" << reg << " has total cost " << scoresSum[reg]);
        score.erase(score.begin());
      }
    }

    /* Locate the best insertion point */
    long int best = std::numeric_limits<long int>::max();
    SpillPolicy::RegisterId bestStart = _regs;
    for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size())
    {
      long int cost = 0;
      CoalescedRegister* last = NULL;
      for(unsigned u = i; (u < (i + cr->size())); u++)
      {
        if(_registerVariableMap.find(u) == _registerVariableMap.end())
          continue;

        if(last == _registerVariableMap[u])
          continue;

        last = _registerVariableMap[u];
        cost += scoresSum[*last->allocated.begin()];
      }
      reportE(DEBUG_DETAILS,
          "\tPoint: "<< p << "; Spilling from position " << i << ", a size of " << cr->size() << " registers has a cost of " << cost);

      if(cost < best)
      {
        best = cost;
        bestStart = i;
        reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Is best spilling cost so far" );
      }
    }
    assertM(bestStart < _regs, "Error finding best insert position");
    /* Spill required variables */
    reportE(DEBUG, "Spilling from start position " << bestStart );
    for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++)
    {
      reportE(DEBUG_DETAILS, "\tRegister "<< u << ":" );
      if(_registerVariableMap.find(u) == _registerVariableMap.end())
      {
        assert(_available.find(u) != _available.end());
        continue;
      }
      CoalescedRegister *spillCr = _registerVariableMap[u];
      reportE(DEBUG_DETAILS, "\t\tContains variable "<< spillCr->reg() << " of size " << spillCr->size() );
      if(!spillCr->spilled())
      {
        reportE(DEBUG_DETAILS, "\t\t\tNew spill" );
        spillCr->spill();
      }
      assertM((*(spillCr->allocated.begin())) == u, "Wrong mapping");
      while(spillCr->allocated.size() > 0)
      {
        SpillPolicy::RegisterId a = *(spillCr->allocated.begin());
        _available.insert(a);
        reportE(DEBUG_DETAILS, "\t\t\tFreeing register " << a );
        _registerVariableMap.erase(a);
        spillCr->allocated.erase(a);
      }
      _onRegister.erase(spillCr);
    }

    reportE(DEBUG, "Associating registers to variable " << cr->reg() );
    /* Allocate registers to the new variable */
    _onRegister.insert(cr);
    for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++)
    {
      cr->allocated.insert(u);
      assertM(_available.erase(u), "Register being allocated not marked as available");
      _registerVariableMap[u] = cr;
    }
    return _onRegister;
  }
void FeatureResultProcessor::process(const ResultVector& results)
{
	// TODO:
	assertM(false, "Not implemented.");
}
Esempio n. 25
0
static void displayVideo(const std::string& inputPath, size_t xPixels,
    size_t yPixels, size_t colors, const std::string& text)
{
    assertM(false, "Not implemented.");
}
Esempio n. 26
0
	unsigned int ATIGPUDevice::getLastError() const
	{
		assertM(false, "Not implemented yet");
	}
Esempio n. 27
0
  const SpillPolicy::CoalescedSet & Allocator::use(SpillPolicy::CoalescedSet used,
    const Interval::Point &p, const bool ignoreSpilled, const bool spillUsed,
    const bool coalesced, const bool removeSpilled)
  {
    assert((_available.size() + _registerVariableMap.size()) == _regs);
    reportE(INFO, "Point: "<< p << "; Variables count: " << used.size());

    /* coalesced tells that all used variables must be placed on a coalesced amount of registers,
     * TODO: This is meant for vector variables, check if it is really required, and if it require
     * consequent registers
     * TODO: Spill/store/load vector variables all together using vector instructions */
    assertM(!(coalesced && (spillUsed || ignoreSpilled)), "Can't be coalesced when ignoring spilled");
    clearExpired(p);
    if(used.empty()){
      reportE(INFO, "No variables used");
      return _onRegister;
    }
    SpillPolicy::CoalescedSet::iterator crI = used.begin();
    SpillPolicy::CoalescedSet::iterator crIEnd = used.end();
    unsigned totalRegisters = 0;
    reportE(INFO, "Counting required registers");
    while(crI != crIEnd)
    {
      CoalescedRegister *cr = *crI;
      reportE(DEBUG, "Variable: " << cr->reg() << "; Size: " << cr->size());
      /* If spilled variables must be ignored, they are not allocated and removed from on register status */
      if(ignoreSpilled && cr->spilled())
      {
        reportE(DEBUG, "\tNot allocating spilled variable: " << cr->reg());
        if(removeSpilled && (_onRegister.find(cr) != _onRegister.end()))
        {
          reportE(DEBUG, "\t\tSpilled variable " << cr->reg() << " marked as on-register, removing it");
          while(cr->allocated.size() > 0)
          {
            reportE(DEBUG_DETAILS, "\t\t\tFreeing physical register " << *(cr->allocated.begin()));
            _available.insert(*(cr->allocated.begin()));
            _registerVariableMap.erase(_registerVariableMap.find(*(cr->allocated.begin())));
            cr->allocated.erase(cr->allocated.begin());
          }
          _onRegister.erase(cr);
        }
        SpillPolicy::CoalescedSet::iterator erase = crI;
        crI++;
        used.erase(erase);
        continue;
      }
      totalRegisters += cr->size();
      crI++;
    }

    if(used.empty())
    {
      reportE(DEBUG, "Nothing to do, all variables are spilled");
      return _onRegister;
    }

    /* If we can't spill variables on the used list, they must use at most the same amount of registers
     * as available */
    assertM((totalRegisters <= _regs) || spillUsed,
        "Variables larger than available registers");

    if(totalRegisters > _regs){
      selectByLRU(used, p, totalRegisters);
    }

    SpillPolicy::CoalescedSet worklist = used;
    /* If the variables need to be coalesced, then:
     * 1) All variables need to be the same size
     * 2) The first position will be aligned based on its size
     * This is expecting that vector require coalesced aligned registers */
    reportE(INFO, "Required registers: " << totalRegisters);

    if(coalesced)
    {//TODO: Implement coalesced allocating, if required for vectors
      reportE(INFO, "Coalesced allocation");
      assertM(false, "TODO: Implement coalesced allocating, required by vectors");
    }


    SpillPolicy::CoalescedSet::iterator variable = worklist.begin();
    while(variable != worklist.end()){
      if(_onRegister.find(*variable) != _onRegister.end()){
        reportE(DEBUG, "\tVariable " << (*variable)->reg()
            << " already on register, erasing it from worklist");
        SpillPolicy::CoalescedSet::iterator erase = variable;
        variable++;
        worklist.erase(erase);
        continue;
      }
      variable++;
    }
    /* Must fit larger variables first --> map variables by registers size */
    std::multimap<ushort, CoalescedRegister*> sizeCoalescedMap;
    for(SpillPolicy::CoalescedSet::const_iterator cr = worklist.begin();
        cr != worklist.end(); cr++)
    {
      reportE(DEBUG_DETAILS, "\tVariable " << (*cr)->reg() << "; Size: " << (*cr)->size());
      sizeCoalescedMap.insert(std::make_pair((*cr)->size(), *cr));
    }

    reportE(INFO, "Start testing if all fits on current available registers");
    /* Try to allocate variables by largest size first. If one won't fit, start spilling */
    while(worklist.size() > 0)
    {
      bool fit = true;

      std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end();
      CoalescedRegister *cr = crI->second;
      reportE(DEBUG, "Test variable " << cr->reg() << " of size " << cr->size());
      unsigned int maxReg = _regs - cr->size();
      maxReg -= (maxReg % cr->size());

      for(unsigned i = 0; i <= maxReg; i += cr->size())
      {
        fit = true;
        reportE(DEBUG_DETAILS, "Starting position " << i);
        for(unsigned u = i; fit && (u < (i + cr->size())); u++)
        {
          fit &= (_available.find(u) != _available.end());
          reportE(DEBUG_DETAILS, "Position " << u << " is busy?" << !fit);
        }
        if(fit)
        {
          reportE(DEBUG, "Allocating " << cr->reg() << " on position " << i);
          for(unsigned u = i; u < (i + cr->size()); u++)
          {
            cr->allocated.insert(u);
            assert(_available.erase(u));
            _registerVariableMap[u] = cr;
          }
          sizeCoalescedMap.erase(crI);
          _onRegister.insert(cr);
          worklist.erase(cr);
          break;
        }
      }
      if(!fit){
        break;
      }
    }

    if(worklist.empty())
    {
      reportE(INFO, "All variable allocated without spilling");
      return _onRegister;
    }

    /* Create variable spill cost ranking, based on active spill policies and weights */
    PolicyMap::const_iterator policy = _policies.begin();
    PolicyMap::const_iterator policyEnd = _policies.end();
    RegisterCostMap scoresSum;

    for(SpillPolicy::RegisterId i = 0; i < _regs; i++)
    {
      scoresSum[i] = 0;
    }

    reportE(INFO, "Spilling is required");
    for(; policy != policyEnd; policy++)
    {
      reportE(DEBUG, "Calculating spill cost based on spill policy: " << policy->first);

      SpillPolicy::CoalescedCostMap score;
      score = policy->second->rank(_onRegister, p);
      while(score.size() > 0)
      {
      	CoalescedRegister* cr = score.begin()->first;
      	assertM(cr->allocated.size() > 0, "Variable marked as on register and without allocated registers");
      	SpillPolicy::RegisterId reg = *cr->allocated.begin();
        if(used.find(cr) != used.end()){
          scoresSum[reg] = std::numeric_limits<unsigned>::max();
          reportE(DEBUG_DETAILS,
              "Variable " << cr->reg()
              << " on position " << reg
              << " is used, setting cost to maximum");
        } else {
          scoresSum[reg] += (_weights[policy->first] * score.begin()->second);
          reportE(DEBUG_DETAILS,
            "\tVariable " << cr->reg()
            << " on position " << reg
            << " has cost " << scoresSum[reg]);
        }
        score.erase(score.begin());
      }
    }

    reportE(INFO, "Locating best locations based on spill costs");
    /* Find best spilling position, starting by largest variables */
    while(worklist.size() > 0)
    {
      std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end();
      CoalescedRegister *cr = crI->second;
      long int best = std::numeric_limits<long int>::max();
      SpillPolicy::RegisterId bestStart = _regs;
      reportE(DEBUG, "\tAllocating variable " << cr->reg() << "; Size: " << cr->size());
      unsigned int maxReg = _regs - cr->size();
      maxReg -= (maxReg % cr->size());

      for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size())
      {
        bool noUsedVariable = true;
        long int cost = 0;
        CoalescedRegister *last = NULL;
        reportE(DEBUG, "\t\tTesting start position:" << i);
        for(unsigned u = i; noUsedVariable && (u < (i + cr->size())); u++)
        {
          if(_registerVariableMap.find(u) == _registerVariableMap.end()){
            reportE(DEBUG_DETAILS, "\t\t\tNo variable starts on position " << u);
            continue;
          }

          if(last == _registerVariableMap[u]){
            reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is part of already accounted variable " << last->reg());
            continue;
          }

          last = _registerVariableMap[u];
          /* Assure that the variable using register is not being used now */
          reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is beginning of variable " << last->reg());
          noUsedVariable &= (used.find(last) == used.end());
          if(noUsedVariable){
            reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is not required to be allocated.");
          } else {
            reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is required to be allocated.");
          }
          cost += scoresSum[*last->allocated.begin()];
          reportE(DEBUG, "\t\tCost to insert at point "  << i << ": " << cost);
        }
        if(!noUsedVariable)
        {
          reportE(DEBUG, "\t\tCan't use position " << i << ", variable required to be on register");
          continue;
        }
        if(cost < best)
        {
          best = cost;
          bestStart = i;
          reportE(INFO, "\t\tNew best position for variable " << cr->reg() << ": " << i);
        }
      }
      assertM(bestStart != _regs, "Error finding a insertion position");

      reportE(INFO, "\tAllocating at position " << bestStart);
      for(unsigned u = bestStart; u < (bestStart + cr->size()); u++)
      {
        if(_registerVariableMap.find(u) == _registerVariableMap.end())
        {
          reportE(DEBUG_DETAILS, "\tNo variable at position " << u);
          continue;
        }
        CoalescedRegister *spillCr = _registerVariableMap[u];
        if(!spillCr->spilled())
        {
          reportE(DEBUG, "\tSpilling variable " << spillCr->reg());
          spillCr->spill();
        }
        _onRegister.erase(spillCr);
        while(spillCr->allocated.size() > 0)
        {
          reportE(DEBUG_DETAILS, "\tFreeing physical register " << *(spillCr->allocated.begin()));
          _available.insert(*(spillCr->allocated.begin()));
          _registerVariableMap.erase(
              _registerVariableMap.find(*(spillCr->allocated.begin())));
          spillCr->allocated.erase(spillCr->allocated.begin());
        }
      }

      _onRegister.insert(cr);
      for(unsigned u = bestStart; u < (bestStart + (cr->size())); u++)
      {
        reportE(DEBUG, "\tAllocating register " << u << " to variable " << cr->reg());
        cr->allocated.insert(u);
        _registerVariableMap[u] = cr;
        assertM(_available.erase(u), "Register being allocated not marked as available");
      }
      worklist.erase(cr);
      sizeCoalescedMap.erase(crI);
      ;
    }
    assertM(sizeCoalescedMap.empty(), "Size mapped worklist not clear");
    return _onRegister;
  }
Esempio n. 28
0
	void ATIGPUDevice::limitWorkerThreads(unsigned int threads)
	{
		assertM(false, "Not implemented yet");
	}		
Esempio n. 29
0
	void CudaDriver::Interface::load()
	{
		if( _driver != 0 ) return;
		#if __GNUC__
		report( "Loading " << _libname );
		_driver = dlopen( _libname.c_str(), RTLD_LAZY );
		if( _driver == 0 )
		{
			report( "Failed to load cuda driver." );
			report( "  " << dlerror() );
			return;
		}
		
		DynLink(cuInit);
		DynLink(cuDriverGetVersion);
		DynLink(cuDeviceGet);
		DynLink(cuDeviceGetCount);
		DynLink(cuDeviceGetName);
		DynLink(cuDeviceComputeCapability);

		DynLinkV(cuDeviceTotalMem);
		
		DynLink(cuDeviceGetProperties);
		DynLink(cuDeviceGetAttribute);
		DynLink(cuCtxGetLimit);
		DynLink(cuCtxGetApiVersion);
		DynLinkV(cuCtxCreate);
		
		DynLink(cuCtxDestroy);
		DynLink(cuCtxAttach);
		DynLink(cuCtxDetach);
		DynLink(cuCtxPushCurrent);
		DynLink(cuCtxPopCurrent);
		DynLink(cuCtxGetDevice);
		DynLink(cuCtxSynchronize);
		DynLink(cuModuleLoad);
		DynLink(cuModuleLoadData);
		DynLink(cuModuleLoadDataEx);
		DynLink(cuModuleLoadFatBinary);
		DynLink(cuModuleUnload);
		DynLink(cuModuleGetFunction);
		
		DynLinkV(cuModuleGetGlobal);
		
		DynLink(cuModuleGetTexRef);
		
		DynLinkV(cuMemGetInfo);
		DynLinkV(cuMemAlloc);
		DynLinkV(cuMemAllocPitch);
		DynLinkV(cuMemFree);
		DynLinkV(cuMemGetAddressRange);

		DynLinkV(cuMemAllocHost);
		DynLinkV(cuMemHostRegister);
		DynLinkV(cuMemHostUnregister);
		
		DynLink(cuMemFreeHost);
		DynLink(cuMemHostAlloc);
		
		DynLinkV(cuMemHostGetDevicePointer);
		DynLink(cuMemHostGetFlags);
		DynLinkV(cuMemcpyHtoD);
		DynLinkV(cuMemcpyDtoH);
		DynLinkV(cuMemcpyDtoD);
		DynLinkV(cuMemcpyDtoA);
		DynLinkV(cuMemcpyAtoD);
		DynLinkV(cuMemcpyHtoA);
		DynLinkV(cuMemcpyAtoH);
		DynLinkV(cuMemcpyAtoA);
		DynLinkV(cuMemcpy2D);
		DynLinkV(cuMemcpy2DUnaligned);
		DynLinkV(cuMemcpy3D);
		DynLinkV(cuMemcpyHtoDAsync);
		DynLinkV(cuMemcpyDtoHAsync);
		DynLinkV(cuMemcpyHtoAAsync);
		DynLinkV(cuMemcpyAtoHAsync);
		DynLinkV(cuMemcpy2DAsync);
		DynLinkV(cuMemcpy3DAsync);
		DynLinkV(cuMemsetD8);
		DynLinkV(cuMemsetD16);
		DynLinkV(cuMemsetD32);
		DynLinkV(cuMemsetD2D8);
		DynLinkV(cuMemsetD2D16);
		DynLinkV(cuMemsetD2D32);
		
		DynLink(cuFuncSetBlockShape);
		DynLink(cuFuncSetSharedSize);
		DynLink(cuFuncGetAttribute);
		DynLink(cuFuncSetCacheConfig);
		
		DynLinkV(cuArrayCreate);
		DynLinkV(cuArrayGetDescriptor);
		DynLink(cuArrayDestroy);
		DynLinkV(cuArray3DCreate);
		DynLinkV(cuArray3DGetDescriptor);
		DynLink(cuTexRefCreate);
		DynLink(cuTexRefDestroy);
		DynLink(cuTexRefSetArray);
		DynLinkV(cuTexRefSetAddress);
		DynLinkV(cuTexRefSetAddress2D);
		DynLink(cuTexRefSetFormat);
		DynLink(cuTexRefSetAddressMode);
		DynLink(cuTexRefSetFilterMode);
		DynLink(cuTexRefSetFlags);
		DynLinkV(cuTexRefGetAddress);
		DynLink(cuTexRefGetArray);
		DynLink(cuTexRefGetAddressMode);
		DynLink(cuTexRefGetFilterMode);
		DynLink(cuTexRefGetFormat);
		DynLink(cuTexRefGetFlags);
		DynLink(cuParamSetSize);
		DynLink(cuParamSeti);
		DynLink(cuParamSetf);
		DynLink(cuParamSetv);
		DynLink(cuParamSetTexRef);
		DynLink(cuLaunch);
		DynLink(cuLaunchGrid);
		DynLink(cuLaunchGridAsync);
		DynLink(cuEventCreate);
		DynLink(cuEventRecord);
		DynLink(cuEventQuery);
		DynLink(cuEventSynchronize);
		DynLink(cuEventDestroy);
		DynLink(cuEventElapsedTime);
		DynLink(cuStreamCreate);
		DynLink(cuStreamQuery);
		DynLink(cuStreamSynchronize);
		DynLink(cuStreamDestroy);

		DynLink(cuGraphicsUnregisterResource);
		DynLink(cuGraphicsSubResourceGetMappedArray);
		DynLinkV(cuGraphicsResourceGetMappedPointer);
		DynLink(cuGraphicsResourceSetMapFlags);
		DynLink(cuGraphicsMapResources);
		DynLink(cuGraphicsUnmapResources);

		DynLink(cuGetExportTable);

		DynLink(cuGLInit);
		DynLinkV(cuGLCtxCreate);
		DynLink(cuGraphicsGLRegisterBuffer);
		DynLink(cuGraphicsGLRegisterImage);
		DynLink(cuGLRegisterBufferObject);
		DynLink(cuGLSetBufferObjectMapFlags);

		CUresult result = (*cuDriverGetVersion)(&_version);
		
		if (result == CUDA_SUCCESS) {
			report(" Driver version is: " << _version << " and was called successfully");
		}
		else {
			report("cuDriverGetVersion() returned " << result);
		}

		#else
		assertM(false, "CUDA Driver support not compiled into Ocelot.");
		#endif
	}
Esempio n. 30
0
	unsigned int ATIGPUDevice::MemoryAllocation::flags() const
	{
		assertM(false, "Not implemented yet");
	}