static bool dependsOn(iterator next, iterator dependee, InstructionSet& visited, DependenceMap& savedDependencies) { auto dependence = InstructionPair(next->instruction, dependee->instruction); auto savedDependence = savedDependencies.find(dependence); if(savedDependence != savedDependencies.end()) { return savedDependence->second; } if(!visited.insert(next->instruction).second) return false; if(next == dependee) return true; for(auto successor : next->successors) { if(dependsOn(successor, dependee, visited, savedDependencies)) { return true; } savedDependencies.insert(std::make_pair( InstructionPair(next->instruction, successor->instruction), true)); } savedDependencies.insert(std::make_pair( InstructionPair(next->instruction, dependee->instruction), false)); return false; }
static InstructionSet getControlDependentInstructions( const PTXInstruction* branch, const InstructionSet& instructions, analysis::ControlDependenceAnalysis* controlDependenceAnalysis) { InstructionSet controlDependentInstructions; for(auto instruction : instructions) { if(controlDependenceAnalysis->dependsOn(branch, instruction)) { controlDependentInstructions.insert(instruction); } } return controlDependentInstructions; }
static void addPredecessors(InstructionSet& predecessors, ir::BasicBlock::const_iterator instruction) { auto end = (*instruction)->block->rend(); auto position = ir::BasicBlock::const_reverse_iterator(instruction); for(; position != end; ++position) { if(!hasDependence(**position, **instruction)) continue; report(" " << (*position)->toString() << " (" << (*position)->index() << ") -> " << (*instruction)->toString() << " (" << (*instruction)->index() << ")"); predecessors.insert(*position); } }
static InstructionSet getInstructionsThatCanObserveSideEffects(ir::IRKernel& k) { InstructionSet instructions; report(" Getting instructions that can observe side-effects"); for(auto& block : *k.cfg()) { for(auto instruction : block.instructions) { auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction); if(ptxInstruction->canObserveSideEffects()) { report(" " << ptxInstruction->toString()); instructions.insert(ptxInstruction); } } } return instructions; }
void ParallelismTraceAnalyzer::parallelism() const { typedef std::vector< ParallelismTraceGenerator::Event > EventVector; typedef std::set< long long unsigned int > InstructionSet; double averageSIMD = 0; double averageMIMD = 0; double averageCTAs = 0; for( KernelMap::const_iterator vector = _kernels.begin(); vector != _kernels.end(); ++vector ) { std::cout << " From program \"" << vector->first << "\".\n"; double localSIMD = 0; double localMIMD = 0; double localCTAs = 0; for( KernelVector::const_iterator kernel = vector->second.begin(); kernel != vector->second.end(); ++kernel ) { std::ifstream hstream( kernel->header.c_str() ); boost::archive::text_iarchive harchive( hstream ); ParallelismTraceGenerator::Header header; harchive >> header; assert( header.format == TraceGenerator::ParallelismTraceFormat ); hstream.close(); std::ifstream stream( kernel->path.c_str() ); if( !stream.is_open() ) { throw hydrazine::Exception( "Failed to open ParallelismTrace kernel trace file " + kernel->path ); } boost::archive::text_iarchive archive( stream ); EventVector events( header.dimensions ); for( EventVector::iterator event = events.begin(); event != events.end(); ++event ) { archive >> *event; } std::cout << " From file " << kernel->path << "\n"; std::cout << " kernel: " << kernel->name << "\n"; std::cout << " module: " << kernel->module << "\n"; std::cout << " statistics:\n"; std::cout << " ctas: " << header.dimensions << "\n"; std::cout << " threads: " << header.threads << "\n"; InstructionSet instructions; long long unsigned int totalInstructions = 0; double activity = 0; for( EventVector::iterator event = events.begin(); event != events.end(); ++event ) { totalInstructions += event->instructions; instructions.insert( event->instructions ); activity += event->activity * event->instructions; } activity /= totalInstructions + DBL_EPSILON; unsigned int previous = 0; unsigned int count = header.dimensions; double mimd = 0; for( InstructionSet::iterator instruction = instructions.begin(); instruction != instructions.end(); ++instruction ) { mimd += (*instruction - previous) * count; previous = *instruction; --count; } if( !instructions.empty() ) { mimd /= *(--instructions.end()) + DBL_EPSILON; } std::cout << " SIMD parallelism: " << activity << "\n"; std::cout << " MIMD parallelism: " << mimd << "\n"; localSIMD += activity; localMIMD += mimd; localCTAs += header.dimensions; } localSIMD /= vector->second.size() + DBL_EPSILON; localMIMD /= vector->second.size() + DBL_EPSILON; localCTAs /= vector->second.size() + DBL_EPSILON; std::cout << " Kernel " << vector->first << " statistics:\n"; std::cout << " average CTAs: " << localCTAs << "\n"; std::cout << " average SIMD parallelism: " << localSIMD << "\n"; std::cout << " average MIMD parallelism: " << localMIMD << "\n"; averageSIMD += localSIMD; averageMIMD += localMIMD; averageCTAs += localCTAs; } averageSIMD /= _kernels.size() + DBL_EPSILON; averageMIMD /= _kernels.size() + DBL_EPSILON; averageCTAs /= _kernels.size() + DBL_EPSILON; std::cout << "Aggregate statistics:\n"; std::cout << " average CTAs: " << averageCTAs << "\n"; std::cout << " average SIMD parallelism: " << averageSIMD << "\n"; std::cout << " average MIMD parallelism: " << averageMIMD << "\n"; }