bool ConstantConstraint::isSatisfied(const Matrix& m) const { if(_comparison == LessThanOrEqual) { return m.greaterThanOrEqual(_value).reduceSum() == 0; } else if(_comparison == GreaterThanOrEqual) { return m.lessThanOrEqual(_value).reduceSum() == 0; } else { assertM(false, "not implemented"); } return false; }
unsigned int Instruction::index() const { unsigned int index = 0; for(auto instruction : *block) { if(instruction == this) { return index; } ++index; } assertM(false, "Could not find instruction in parent block."); return index; }
void RemoveBarrierPass::runOnKernel( ir::IRKernel& k ) { report( "Removing barriers from kernel " << k.name ); assertM( k.ISA == ir::Instruction::PTX, "This pass is valid for PTX kernels only." ); _reentryPoint = 1; _spillBytes = 1; _kernel = static_cast< ir::PTXKernel* >( &k ); for( analysis::DataflowGraph::iterator block = _dfg().begin(); block != _dfg().end(); ++block ) { _runOnBlock( block ); } _addLocalVariables(); }
LLVMStatement::LLVMStatement( Type t, const LLVMInstruction* i ) : instruction( 0 ), type( t ), linkage( InvalidLinkage ), convention( LLVMInstruction::InvalidCallingConvention ), visibility( InvalidVisibility ), returnAttribute( LLVMInstruction::InvalidParameterAttribute ), functionAttributes( 0 ), alignment( 1 ), space( 0 ), constant( false ) { if( i != 0 ) { instruction = static_cast< LLVMInstruction* >( i->clone() ); assertM( type == Instruction, "Statement given non-zero " << "instruction pointer, but not specified as an " << "instruction statement." ); } else { } }
bool CoalescedArray::insertVar(const RegisterId reg, const Type type) { assertM(_canInsert, "Can't insert new values after getting a offset"); if(!MemoryArray::insertVar(reg, type)) return false; _declared[reg] = type; unsigned varSize = ir::PTXOperand::bytes(type); if(_mem.find(varSize) == _mem.end()) { std::set<RegisterId> tmp; _mem[varSize] = tmp; } _mem[varSize].insert(reg); _stackSize += varSize; return true; }
unsigned int ATIGPUDevice::deviceCount() { CALuint count = 0; try { CalDriver()->calDeviceGetCount(&count); // Multiple devices is not supported yet if (count > 1) { assertM(false, "Multiple devices is not supported yet"); } } catch (hydrazine::Exception he) { // Swallow the exception and return 0 devices report(he.what()); } return count; }
bool isDirectory(const std::string& path) { #ifndef _WIN32 struct stat fileStats; auto result = stat(path.c_str(), &fileStats); if(result != 0) { return false; } return S_ISDIR(fileStats.st_mode); #else assertM(false, "Not implemented for this platform."); #endif }
Device::MemoryAllocation *ATIGPUDevice::allocate(size_t size) { // uav0 accesses should be aligned to 4 size_t aSize = AlignUp(size, 4); // Check uav0 size limits assertM(_uav0AllocPtr - Uav0BaseAddr + aSize < Uav0Size, "Out of global memory: " << _uav0AllocPtr - Uav0BaseAddr << " + " << aSize << " greater than " << Uav0Size); MemoryAllocation *allocation = new MemoryAllocation(&_uav0Resource, _uav0AllocPtr, size); _uav0Allocations.insert( std::make_pair(allocation->pointer(), allocation)); _uav0AllocPtr += aSize; return allocation; }
static std::string findNumericSuffix(ir::Module& module, const std::string& base, unsigned int begin) { while(true) { std::stringstream stream; stream << base << begin++; if(module.globals().count(stream.str()) == 0 && module.kernels().count(stream.str()) == 0 && module.textures().count(stream.str()) == 0) { return stream.str(); } } assertM(false, "Could not find any valid identifier."); return base; }
void ATIGPUDevice::MemoryAllocation::memset(size_t offset, int value, size_t size) { assertM(offset + size <= _size, "Invalid memset size"); CALvoid *data = NULL; CALuint pitch = 0; CALuint flags = 0; CalDriver()->calResMap(&data, &pitch, *_resource, flags); CALdeviceptr addr = (_basePtr - ATIGPUDevice::Uav0BaseAddr) + offset; std::memset((char *)data + addr, value, size); report("MemoryAllocation::memset(" << "offset = " << std::dec << offset << ", value = " << std::dec << value << ", size = " << std::dec << size << ")"); CalDriver()->calResUnmap(*_resource); }
Device::MemoryAllocation *ATIGPUDevice::getMemoryAllocation( const void *address, AllocationType type) const { MemoryAllocation *allocation = 0; if (type == HostAllocation) { assertM(false, "Not implemented yet"); } else { if (!_uav0Allocations.empty()) { // Device pointer arithmetic is not supported yet const AllocationMap::const_iterator alloc = _uav0Allocations.find((void *)address); if (alloc != _uav0Allocations.end()) { allocation = alloc->second; } else { Throw("No allocation found for this pointer - " << address); } } } return allocation; }
static void runModulePass(ir::Module* module, Pass* pass) { report(" Running module pass '" << pass->toString() << "'" ); switch(pass->type) { case Pass::ImmutablePass: { ImmutablePass* immutablePass = static_cast<ImmutablePass*>(pass); immutablePass->runOnModule(*module); } break; case Pass::ModulePass: { ModulePass* modulePass = static_cast<ModulePass*>(pass); modulePass->runOnModule(*module); } break; case Pass::KernelPass: /* fall through */ case Pass::BasicBlockPass: break; case Pass::InvalidPass: assertM(false, "Invalid pass type."); } }
static void finalizeKernelPass(ir::Module* module, Pass* pass) { switch(pass->type) { case Pass::ImmutablePass: /* fall through */ case Pass::ModulePass: break; case Pass::KernelPass: { report(" Finalizing kernel pass '" << pass->toString() << "'" ); KernelPass* kernelPass = static_cast<KernelPass*>(pass); kernelPass->finalize(); } break; case Pass::BasicBlockPass: { report(" Finalizing basic block pass '" << pass->toString() << "'" ); BasicBlockPass* bbPass = static_cast<BasicBlockPass*>(pass); bbPass->finalize(); } break; case Pass::InvalidPass: assertM(false, "Invalid pass type."); } }
void Phi::removeSource(BasicBlock* predecessor) { auto readPosition = reads.begin(); for(++readPosition; readPosition != reads.end(); ++readPosition) { ++readPosition; assert(readPosition != reads.end()); auto operand = static_cast<AddressOperand*>(*readPosition); if(operand->globalValue != predecessor) continue; --readPosition; delete *readPosition; readPosition = reads.erase(readPosition); delete *readPosition; reads.erase(readPosition); break; } assertM(false, "Phi instruction " << toString() << " does not contain basic block " << predecessor->name()); }
cudaFuncAttributes ATIGPUDevice::getAttributes(const std::string& module, const std::string& kernel) { assertM(false, "Not implemented yet"); }
unsigned MemoryArray::physicalElements() const{ if(elements() == 0) return 0; assertM(((_stackSize % _minVarSize) == 0), "Not divisible stack size by minimal variable size"); return _stackSize / _minVarSize; }
bool DependenceAnalysis::hasDependence(const Instruction& predecessor, const Instruction& successor) const { assertM(false, "not implemented"); }
void* ATIGPUDevice::getTextureReference(const std::string& moduleName, const std::string& textureName) { assertM(false, "Not implemented yet"); }
void ATIGPUDevice::unbindTexture(const std::string& moduleName, const std::string& textureName) { assertM(false, "Not implemented yet"); }
static void insertAndConnectBlocks(BasicBlockMap& newBlocks, ir::ControlFlowGraph::iterator& functionEntry, ir::ControlFlowGraph::iterator& functionExit, ir::IRKernel& kernel, unsigned int& nextRegister, const ir::IRKernel& inlinedKernel) { typedef std::unordered_map<ir::PTXOperand::RegisterType, ir::PTXOperand::RegisterType> RegisterMap; ir::IRKernel copy; const ir::IRKernel* inlinedKernelPointer = &inlinedKernel; // create a copy if the call is recursive if(inlinedKernelPointer == &kernel) { copy = inlinedKernel; inlinedKernelPointer = © } // Insert new blocks for(auto block = inlinedKernelPointer->cfg()->begin(); block != inlinedKernelPointer->cfg()->end(); ++block) { auto newBlock = kernel.cfg()->clone_block(block); newBlocks.insert(std::make_pair(block, newBlock)); } // Connect new blocks, rename branch labels for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto edge = block->first->out_edges.begin(); edge != block->first->out_edges.end(); ++edge) { auto headBlock = block->second; auto tail = (*edge)->tail; auto tailBlock = newBlocks.find(tail); assert(tailBlock != newBlocks.end()); kernel.cfg()->insert_edge(ir::Edge(headBlock, tailBlock->second, (*edge)->type)); if((*edge)->type == ir::Edge::Branch) { assert(!headBlock->instructions.empty()); auto instruction = headBlock->instructions.back(); auto branch = static_cast<ir::PTXInstruction*>(instruction); if(branch->opcode == ir::PTXInstruction::Ret) continue; assertM(branch->opcode == ir::PTXInstruction::Bra, "Expecting " << branch->toString() << " to be a branch"); branch->d.identifier = tailBlock->second->label(); } } } // Assign copied blocks new registers RegisterMap newRegisters; for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto instruction = block->second->instructions.begin(); instruction != block->second->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( **instruction); ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a, &ptx.b, &ptx.c}; for(unsigned int i = 0; i < 6; ++i) { ir::PTXOperand& operand = *operands[i]; if( operand.addressMode != ir::PTXOperand::Register && operand.addressMode != ir::PTXOperand::Indirect && operand.addressMode != ir::PTXOperand::ArgumentList) { continue; } if(operand.type != ir::PTXOperand::pred) { if(operand.array.empty() && operand.addressMode != ir::PTXOperand::ArgumentList) { auto mapping = newRegisters.find(operand.reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( operand.reg, nextRegister++)).first; } operand.reg = mapping->second; } else { for(auto subOperand = operand.array.begin(); subOperand != operand.array.end(); ++subOperand ) { if(!subOperand->isRegister()) continue; auto mapping = newRegisters.find(subOperand->reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( subOperand->reg, nextRegister++)).first; } subOperand->reg = mapping->second; } } } else if(operand.addressMode != ir::PTXOperand::ArgumentList) { if(operand.condition == ir::PTXOperand::Pred || operand.condition == ir::PTXOperand::InvPred) { auto mapping = newRegisters.find(operand.reg); if(mapping == newRegisters.end()) { mapping = newRegisters.insert(std::make_pair( operand.reg, nextRegister++)).first; } operand.reg = mapping->second; } } } } } // Assign copied blocks new local variables typedef std::unordered_map<std::string, std::string> LocalMap; LocalMap locals; for(auto local = inlinedKernel.locals.begin(); local != inlinedKernel.locals.end(); ++local) { std::string newName = "_Zinlined_" + local->first; locals.insert(std::make_pair(local->first, newName)); auto newLocal = kernel.locals.insert( std::make_pair(newName, local->second)).first; newLocal->second.name = newName; } for(auto block = newBlocks.begin(); block != newBlocks.end(); ++block) { for(auto instruction = block->second->instructions.begin(); instruction != block->second->instructions.end(); ++instruction) { ir::PTXInstruction& ptx = static_cast<ir::PTXInstruction&>( **instruction); if(!ptx.mayHaveAddressableOperand()) continue; ir::PTXOperand* operands[] = {&ptx.pg, &ptx.pq, &ptx.d, &ptx.a, &ptx.b, &ptx.c}; for(unsigned int i = 0; i < 6; ++i) { ir::PTXOperand& operand = *operands[i]; if(operand.addressMode != ir::PTXOperand::Address) continue; auto local = locals.find(operand.identifier); if(local == locals.end()) continue; operand.identifier = local->second; } } } // Get the entry and exit points auto entryMapping = newBlocks.find( inlinedKernelPointer->cfg()->get_entry_block()); assert(entryMapping != newBlocks.end()); functionEntry = entryMapping->second; auto exitMapping = newBlocks.find( inlinedKernelPointer->cfg()->get_exit_block()); assert(exitMapping != newBlocks.end()); functionExit = exitMapping->second; }
void ReversePostOrderTraversal::analyze(Function& function) { typedef util::LargeSet<BasicBlock*> BlockSet; typedef std::stack<BasicBlock*> BlockStack; order.clear(); BlockSet visited; BlockStack stack; auto cfgAnalysis = getAnalysis("ControlFlowGraph"); auto cfg = static_cast<ControlFlowGraph*>(cfgAnalysis); report("Creating reverse post order traversal over function '" + function.name() + "'"); // reverse post order is reversed topological order stack.push(&*function.entry_block()); while(order.size() != function.size()) { if(stack.empty()) { for(auto block : order) { auto successors = cfg->getSuccessors(*block); for(auto successor : successors) { if(visited.insert(successor).second) { stack.push(successor); break; } } if(!stack.empty()) break; } } assertM(!stack.empty(), (function.size() - order.size()) << " blocks are not connected."); while(!stack.empty()) { BasicBlock* top = stack.top(); stack.pop(); auto successors = cfg->getSuccessors(*top); for(auto successor : successors) { assert(successor != nullptr); auto predecessors = cfg->getPredecessors(*successor); bool allPredecessorsVisited = true; for(auto predecessor : predecessors) { if(visited.count(predecessor) == 0) { allPredecessorsVisited = false; break; } } if(!allPredecessorsVisited) continue; if(visited.insert(successor).second) { stack.push(successor); } } order.push_back(top); report(" " << top->name()); } } // reverse the order std::reverse(order.begin(), order.end()); }
void *ATIGPUDevice::MemoryAllocation::mappedPointer() const { assertM(false, "Not implemented yet"); }
const SpillPolicy::CoalescedSet & Allocator::use(CoalescedRegister *cr, const Interval::Point &p) { assert((_available.size() + _registerVariableMap.size()) == _regs); clearExpired(p); reportE(INFO, "Point: "<< p << "; Variable " << cr->reg() << "; Size: " << cr->size()); /* If the variable cr is marked as on register, there is nothing to do */ if(_onRegister.find(cr) != _onRegister.end()) { reportE(DEBUG, "Point: "<< p << "; Variable " << cr->reg() << " already marked as onRegister, nothing to do"); return _onRegister; } /* There can't be a variable larger than the available registers */ assertM(cr->size() <= _regs, "Variable larger than available registers"); /* Test if the variable fits on available holes. We assume the variable must * be placed on a register position aligned to it's size */ //TODO: Discover if vectors require aligned positions and how may affect here unsigned int maxReg = _regs - cr->size(); maxReg -= (maxReg % cr->size()); for(unsigned i = 0; i <= maxReg; i += cr->size()) { bool fit = true; for(unsigned u = i; fit && (u < (i + cr->size())); u++) { fit &= _available.find(u) != _available.end(); } if(fit) { reportE(DEBUG, "Point: "<< p << "; Variable " << cr->reg() << ", with size " << cr->size() << " fits on starting register " << i); for(unsigned u = i; fit && (u < (i + cr->size())); u++) { reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Variable " << cr->reg() << " receives physical register " << u); cr->allocated.insert(u); _available.erase(u); _registerVariableMap[u] = cr; } _onRegister.insert(cr); return _onRegister; } } /* Create a cost ranking based on all spill policies active, and weight of each one of them */ PolicyMap::const_iterator policy = _policies.begin(); PolicyMap::const_iterator policyEnd = _policies.end(); RegisterCostMap scoresSum; for(SpillPolicy::RegisterId i = 0; i < _regs; i++) { scoresSum[i] = 0; } for(; policy != policyEnd; policy++) { reportE(DEBUG, "Point: "<< p << "; Building spilling cost score for spill policy" << policy->first); SpillPolicy::CoalescedCostMap score; score = policy->second->rank(_onRegister, p); while(score.size() > 0) { assertM(score.begin()->first->allocated.size() > 0, "No allocated physical registers"); SpillPolicy::RegisterId reg = *score.begin()->first->allocated.begin(); reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Variable " << score.begin()->first->reg() << ", at register " << reg << " receives cost " << score.begin()->second); scoresSum[reg] += (_weights[policy->first] * score.begin()->second); reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Register" << reg << " has total cost " << scoresSum[reg]); score.erase(score.begin()); } } /* Locate the best insertion point */ long int best = std::numeric_limits<long int>::max(); SpillPolicy::RegisterId bestStart = _regs; for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size()) { long int cost = 0; CoalescedRegister* last = NULL; for(unsigned u = i; (u < (i + cr->size())); u++) { if(_registerVariableMap.find(u) == _registerVariableMap.end()) continue; if(last == _registerVariableMap[u]) continue; last = _registerVariableMap[u]; cost += scoresSum[*last->allocated.begin()]; } reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Spilling from position " << i << ", a size of " << cr->size() << " registers has a cost of " << cost); if(cost < best) { best = cost; bestStart = i; reportE(DEBUG_DETAILS, "\tPoint: "<< p << "; Is best spilling cost so far" ); } } assertM(bestStart < _regs, "Error finding best insert position"); /* Spill required variables */ reportE(DEBUG, "Spilling from start position " << bestStart ); for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++) { reportE(DEBUG_DETAILS, "\tRegister "<< u << ":" ); if(_registerVariableMap.find(u) == _registerVariableMap.end()) { assert(_available.find(u) != _available.end()); continue; } CoalescedRegister *spillCr = _registerVariableMap[u]; reportE(DEBUG_DETAILS, "\t\tContains variable "<< spillCr->reg() << " of size " << spillCr->size() ); if(!spillCr->spilled()) { reportE(DEBUG_DETAILS, "\t\t\tNew spill" ); spillCr->spill(); } assertM((*(spillCr->allocated.begin())) == u, "Wrong mapping"); while(spillCr->allocated.size() > 0) { SpillPolicy::RegisterId a = *(spillCr->allocated.begin()); _available.insert(a); reportE(DEBUG_DETAILS, "\t\t\tFreeing register " << a ); _registerVariableMap.erase(a); spillCr->allocated.erase(a); } _onRegister.erase(spillCr); } reportE(DEBUG, "Associating registers to variable " << cr->reg() ); /* Allocate registers to the new variable */ _onRegister.insert(cr); for(unsigned u = bestStart; (u < (bestStart + cr->size())); u++) { cr->allocated.insert(u); assertM(_available.erase(u), "Register being allocated not marked as available"); _registerVariableMap[u] = cr; } return _onRegister; }
void FeatureResultProcessor::process(const ResultVector& results) { // TODO: assertM(false, "Not implemented."); }
static void displayVideo(const std::string& inputPath, size_t xPixels, size_t yPixels, size_t colors, const std::string& text) { assertM(false, "Not implemented."); }
unsigned int ATIGPUDevice::getLastError() const { assertM(false, "Not implemented yet"); }
const SpillPolicy::CoalescedSet & Allocator::use(SpillPolicy::CoalescedSet used, const Interval::Point &p, const bool ignoreSpilled, const bool spillUsed, const bool coalesced, const bool removeSpilled) { assert((_available.size() + _registerVariableMap.size()) == _regs); reportE(INFO, "Point: "<< p << "; Variables count: " << used.size()); /* coalesced tells that all used variables must be placed on a coalesced amount of registers, * TODO: This is meant for vector variables, check if it is really required, and if it require * consequent registers * TODO: Spill/store/load vector variables all together using vector instructions */ assertM(!(coalesced && (spillUsed || ignoreSpilled)), "Can't be coalesced when ignoring spilled"); clearExpired(p); if(used.empty()){ reportE(INFO, "No variables used"); return _onRegister; } SpillPolicy::CoalescedSet::iterator crI = used.begin(); SpillPolicy::CoalescedSet::iterator crIEnd = used.end(); unsigned totalRegisters = 0; reportE(INFO, "Counting required registers"); while(crI != crIEnd) { CoalescedRegister *cr = *crI; reportE(DEBUG, "Variable: " << cr->reg() << "; Size: " << cr->size()); /* If spilled variables must be ignored, they are not allocated and removed from on register status */ if(ignoreSpilled && cr->spilled()) { reportE(DEBUG, "\tNot allocating spilled variable: " << cr->reg()); if(removeSpilled && (_onRegister.find(cr) != _onRegister.end())) { reportE(DEBUG, "\t\tSpilled variable " << cr->reg() << " marked as on-register, removing it"); while(cr->allocated.size() > 0) { reportE(DEBUG_DETAILS, "\t\t\tFreeing physical register " << *(cr->allocated.begin())); _available.insert(*(cr->allocated.begin())); _registerVariableMap.erase(_registerVariableMap.find(*(cr->allocated.begin()))); cr->allocated.erase(cr->allocated.begin()); } _onRegister.erase(cr); } SpillPolicy::CoalescedSet::iterator erase = crI; crI++; used.erase(erase); continue; } totalRegisters += cr->size(); crI++; } if(used.empty()) { reportE(DEBUG, "Nothing to do, all variables are spilled"); return _onRegister; } /* If we can't spill variables on the used list, they must use at most the same amount of registers * as available */ assertM((totalRegisters <= _regs) || spillUsed, "Variables larger than available registers"); if(totalRegisters > _regs){ selectByLRU(used, p, totalRegisters); } SpillPolicy::CoalescedSet worklist = used; /* If the variables need to be coalesced, then: * 1) All variables need to be the same size * 2) The first position will be aligned based on its size * This is expecting that vector require coalesced aligned registers */ reportE(INFO, "Required registers: " << totalRegisters); if(coalesced) {//TODO: Implement coalesced allocating, if required for vectors reportE(INFO, "Coalesced allocation"); assertM(false, "TODO: Implement coalesced allocating, required by vectors"); } SpillPolicy::CoalescedSet::iterator variable = worklist.begin(); while(variable != worklist.end()){ if(_onRegister.find(*variable) != _onRegister.end()){ reportE(DEBUG, "\tVariable " << (*variable)->reg() << " already on register, erasing it from worklist"); SpillPolicy::CoalescedSet::iterator erase = variable; variable++; worklist.erase(erase); continue; } variable++; } /* Must fit larger variables first --> map variables by registers size */ std::multimap<ushort, CoalescedRegister*> sizeCoalescedMap; for(SpillPolicy::CoalescedSet::const_iterator cr = worklist.begin(); cr != worklist.end(); cr++) { reportE(DEBUG_DETAILS, "\tVariable " << (*cr)->reg() << "; Size: " << (*cr)->size()); sizeCoalescedMap.insert(std::make_pair((*cr)->size(), *cr)); } reportE(INFO, "Start testing if all fits on current available registers"); /* Try to allocate variables by largest size first. If one won't fit, start spilling */ while(worklist.size() > 0) { bool fit = true; std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end(); CoalescedRegister *cr = crI->second; reportE(DEBUG, "Test variable " << cr->reg() << " of size " << cr->size()); unsigned int maxReg = _regs - cr->size(); maxReg -= (maxReg % cr->size()); for(unsigned i = 0; i <= maxReg; i += cr->size()) { fit = true; reportE(DEBUG_DETAILS, "Starting position " << i); for(unsigned u = i; fit && (u < (i + cr->size())); u++) { fit &= (_available.find(u) != _available.end()); reportE(DEBUG_DETAILS, "Position " << u << " is busy?" << !fit); } if(fit) { reportE(DEBUG, "Allocating " << cr->reg() << " on position " << i); for(unsigned u = i; u < (i + cr->size()); u++) { cr->allocated.insert(u); assert(_available.erase(u)); _registerVariableMap[u] = cr; } sizeCoalescedMap.erase(crI); _onRegister.insert(cr); worklist.erase(cr); break; } } if(!fit){ break; } } if(worklist.empty()) { reportE(INFO, "All variable allocated without spilling"); return _onRegister; } /* Create variable spill cost ranking, based on active spill policies and weights */ PolicyMap::const_iterator policy = _policies.begin(); PolicyMap::const_iterator policyEnd = _policies.end(); RegisterCostMap scoresSum; for(SpillPolicy::RegisterId i = 0; i < _regs; i++) { scoresSum[i] = 0; } reportE(INFO, "Spilling is required"); for(; policy != policyEnd; policy++) { reportE(DEBUG, "Calculating spill cost based on spill policy: " << policy->first); SpillPolicy::CoalescedCostMap score; score = policy->second->rank(_onRegister, p); while(score.size() > 0) { CoalescedRegister* cr = score.begin()->first; assertM(cr->allocated.size() > 0, "Variable marked as on register and without allocated registers"); SpillPolicy::RegisterId reg = *cr->allocated.begin(); if(used.find(cr) != used.end()){ scoresSum[reg] = std::numeric_limits<unsigned>::max(); reportE(DEBUG_DETAILS, "Variable " << cr->reg() << " on position " << reg << " is used, setting cost to maximum"); } else { scoresSum[reg] += (_weights[policy->first] * score.begin()->second); reportE(DEBUG_DETAILS, "\tVariable " << cr->reg() << " on position " << reg << " has cost " << scoresSum[reg]); } score.erase(score.begin()); } } reportE(INFO, "Locating best locations based on spill costs"); /* Find best spilling position, starting by largest variables */ while(worklist.size() > 0) { std::multimap<ushort, CoalescedRegister*>::iterator crI = --sizeCoalescedMap.end(); CoalescedRegister *cr = crI->second; long int best = std::numeric_limits<long int>::max(); SpillPolicy::RegisterId bestStart = _regs; reportE(DEBUG, "\tAllocating variable " << cr->reg() << "; Size: " << cr->size()); unsigned int maxReg = _regs - cr->size(); maxReg -= (maxReg % cr->size()); for(SpillPolicy::RegisterId i = 0; i <= maxReg; i += cr->size()) { bool noUsedVariable = true; long int cost = 0; CoalescedRegister *last = NULL; reportE(DEBUG, "\t\tTesting start position:" << i); for(unsigned u = i; noUsedVariable && (u < (i + cr->size())); u++) { if(_registerVariableMap.find(u) == _registerVariableMap.end()){ reportE(DEBUG_DETAILS, "\t\t\tNo variable starts on position " << u); continue; } if(last == _registerVariableMap[u]){ reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is part of already accounted variable " << last->reg()); continue; } last = _registerVariableMap[u]; /* Assure that the variable using register is not being used now */ reportE(DEBUG_DETAILS, "\t\t\tPosition " << u << " is beginning of variable " << last->reg()); noUsedVariable &= (used.find(last) == used.end()); if(noUsedVariable){ reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is not required to be allocated."); } else { reportE(DEBUG_DETAILS, "\t\t\tVariable " << last->reg() << " is required to be allocated."); } cost += scoresSum[*last->allocated.begin()]; reportE(DEBUG, "\t\tCost to insert at point " << i << ": " << cost); } if(!noUsedVariable) { reportE(DEBUG, "\t\tCan't use position " << i << ", variable required to be on register"); continue; } if(cost < best) { best = cost; bestStart = i; reportE(INFO, "\t\tNew best position for variable " << cr->reg() << ": " << i); } } assertM(bestStart != _regs, "Error finding a insertion position"); reportE(INFO, "\tAllocating at position " << bestStart); for(unsigned u = bestStart; u < (bestStart + cr->size()); u++) { if(_registerVariableMap.find(u) == _registerVariableMap.end()) { reportE(DEBUG_DETAILS, "\tNo variable at position " << u); continue; } CoalescedRegister *spillCr = _registerVariableMap[u]; if(!spillCr->spilled()) { reportE(DEBUG, "\tSpilling variable " << spillCr->reg()); spillCr->spill(); } _onRegister.erase(spillCr); while(spillCr->allocated.size() > 0) { reportE(DEBUG_DETAILS, "\tFreeing physical register " << *(spillCr->allocated.begin())); _available.insert(*(spillCr->allocated.begin())); _registerVariableMap.erase( _registerVariableMap.find(*(spillCr->allocated.begin()))); spillCr->allocated.erase(spillCr->allocated.begin()); } } _onRegister.insert(cr); for(unsigned u = bestStart; u < (bestStart + (cr->size())); u++) { reportE(DEBUG, "\tAllocating register " << u << " to variable " << cr->reg()); cr->allocated.insert(u); _registerVariableMap[u] = cr; assertM(_available.erase(u), "Register being allocated not marked as available"); } worklist.erase(cr); sizeCoalescedMap.erase(crI); ; } assertM(sizeCoalescedMap.empty(), "Size mapped worklist not clear"); return _onRegister; }
void ATIGPUDevice::limitWorkerThreads(unsigned int threads) { assertM(false, "Not implemented yet"); }
void CudaDriver::Interface::load() { if( _driver != 0 ) return; #if __GNUC__ report( "Loading " << _libname ); _driver = dlopen( _libname.c_str(), RTLD_LAZY ); if( _driver == 0 ) { report( "Failed to load cuda driver." ); report( " " << dlerror() ); return; } DynLink(cuInit); DynLink(cuDriverGetVersion); DynLink(cuDeviceGet); DynLink(cuDeviceGetCount); DynLink(cuDeviceGetName); DynLink(cuDeviceComputeCapability); DynLinkV(cuDeviceTotalMem); DynLink(cuDeviceGetProperties); DynLink(cuDeviceGetAttribute); DynLink(cuCtxGetLimit); DynLink(cuCtxGetApiVersion); DynLinkV(cuCtxCreate); DynLink(cuCtxDestroy); DynLink(cuCtxAttach); DynLink(cuCtxDetach); DynLink(cuCtxPushCurrent); DynLink(cuCtxPopCurrent); DynLink(cuCtxGetDevice); DynLink(cuCtxSynchronize); DynLink(cuModuleLoad); DynLink(cuModuleLoadData); DynLink(cuModuleLoadDataEx); DynLink(cuModuleLoadFatBinary); DynLink(cuModuleUnload); DynLink(cuModuleGetFunction); DynLinkV(cuModuleGetGlobal); DynLink(cuModuleGetTexRef); DynLinkV(cuMemGetInfo); DynLinkV(cuMemAlloc); DynLinkV(cuMemAllocPitch); DynLinkV(cuMemFree); DynLinkV(cuMemGetAddressRange); DynLinkV(cuMemAllocHost); DynLinkV(cuMemHostRegister); DynLinkV(cuMemHostUnregister); DynLink(cuMemFreeHost); DynLink(cuMemHostAlloc); DynLinkV(cuMemHostGetDevicePointer); DynLink(cuMemHostGetFlags); DynLinkV(cuMemcpyHtoD); DynLinkV(cuMemcpyDtoH); DynLinkV(cuMemcpyDtoD); DynLinkV(cuMemcpyDtoA); DynLinkV(cuMemcpyAtoD); DynLinkV(cuMemcpyHtoA); DynLinkV(cuMemcpyAtoH); DynLinkV(cuMemcpyAtoA); DynLinkV(cuMemcpy2D); DynLinkV(cuMemcpy2DUnaligned); DynLinkV(cuMemcpy3D); DynLinkV(cuMemcpyHtoDAsync); DynLinkV(cuMemcpyDtoHAsync); DynLinkV(cuMemcpyHtoAAsync); DynLinkV(cuMemcpyAtoHAsync); DynLinkV(cuMemcpy2DAsync); DynLinkV(cuMemcpy3DAsync); DynLinkV(cuMemsetD8); DynLinkV(cuMemsetD16); DynLinkV(cuMemsetD32); DynLinkV(cuMemsetD2D8); DynLinkV(cuMemsetD2D16); DynLinkV(cuMemsetD2D32); DynLink(cuFuncSetBlockShape); DynLink(cuFuncSetSharedSize); DynLink(cuFuncGetAttribute); DynLink(cuFuncSetCacheConfig); DynLinkV(cuArrayCreate); DynLinkV(cuArrayGetDescriptor); DynLink(cuArrayDestroy); DynLinkV(cuArray3DCreate); DynLinkV(cuArray3DGetDescriptor); DynLink(cuTexRefCreate); DynLink(cuTexRefDestroy); DynLink(cuTexRefSetArray); DynLinkV(cuTexRefSetAddress); DynLinkV(cuTexRefSetAddress2D); DynLink(cuTexRefSetFormat); DynLink(cuTexRefSetAddressMode); DynLink(cuTexRefSetFilterMode); DynLink(cuTexRefSetFlags); DynLinkV(cuTexRefGetAddress); DynLink(cuTexRefGetArray); DynLink(cuTexRefGetAddressMode); DynLink(cuTexRefGetFilterMode); DynLink(cuTexRefGetFormat); DynLink(cuTexRefGetFlags); DynLink(cuParamSetSize); DynLink(cuParamSeti); DynLink(cuParamSetf); DynLink(cuParamSetv); DynLink(cuParamSetTexRef); DynLink(cuLaunch); DynLink(cuLaunchGrid); DynLink(cuLaunchGridAsync); DynLink(cuEventCreate); DynLink(cuEventRecord); DynLink(cuEventQuery); DynLink(cuEventSynchronize); DynLink(cuEventDestroy); DynLink(cuEventElapsedTime); DynLink(cuStreamCreate); DynLink(cuStreamQuery); DynLink(cuStreamSynchronize); DynLink(cuStreamDestroy); DynLink(cuGraphicsUnregisterResource); DynLink(cuGraphicsSubResourceGetMappedArray); DynLinkV(cuGraphicsResourceGetMappedPointer); DynLink(cuGraphicsResourceSetMapFlags); DynLink(cuGraphicsMapResources); DynLink(cuGraphicsUnmapResources); DynLink(cuGetExportTable); DynLink(cuGLInit); DynLinkV(cuGLCtxCreate); DynLink(cuGraphicsGLRegisterBuffer); DynLink(cuGraphicsGLRegisterImage); DynLink(cuGLRegisterBufferObject); DynLink(cuGLSetBufferObjectMapFlags); CUresult result = (*cuDriverGetVersion)(&_version); if (result == CUDA_SUCCESS) { report(" Driver version is: " << _version << " and was called successfully"); } else { report("cuDriverGetVersion() returned " << result); } #else assertM(false, "CUDA Driver support not compiled into Ocelot."); #endif }
unsigned int ATIGPUDevice::MemoryAllocation::flags() const { assertM(false, "Not implemented yet"); }