static bool dependsOn(iterator next, iterator dependee, InstructionSet& visited,
	DependenceMap& savedDependencies)
{
	auto dependence = InstructionPair(next->instruction, dependee->instruction);

	auto savedDependence = savedDependencies.find(dependence);

	if(savedDependence != savedDependencies.end())
	{
		return savedDependence->second;
	}

	if(!visited.insert(next->instruction).second) return false;
	
	if(next == dependee) return true;
	
	for(auto successor : next->successors)
	{
		if(dependsOn(successor, dependee, visited, savedDependencies))
		{
			return true;
		}
		
		savedDependencies.insert(std::make_pair(
			InstructionPair(next->instruction, successor->instruction), true));
	}

	savedDependencies.insert(std::make_pair(
		InstructionPair(next->instruction, dependee->instruction), false));

	return false;
}
static InstructionSet getControlDependentInstructions(
	const PTXInstruction* branch, const InstructionSet& instructions,
	analysis::ControlDependenceAnalysis* controlDependenceAnalysis)
{
	InstructionSet controlDependentInstructions;
	
	for(auto instruction : instructions)
	{
		if(controlDependenceAnalysis->dependsOn(branch, instruction))
		{
			controlDependentInstructions.insert(instruction);
		}
	}
	
	return controlDependentInstructions;
}
예제 #3
0
static void addPredecessors(InstructionSet& predecessors,
	ir::BasicBlock::const_iterator instruction)
{
	auto      end = (*instruction)->block->rend();
	auto position = ir::BasicBlock::const_reverse_iterator(instruction);
	
	for(; position != end; ++position)
	{
		if(!hasDependence(**position, **instruction)) continue;
		
		report("  " << (*position)->toString() << " (" << (*position)->index()
			<< ") -> " << (*instruction)->toString() << " ("
			<< (*instruction)->index() << ")");
		
		predecessors.insert(*position);
	}
}
static InstructionSet getInstructionsThatCanObserveSideEffects(ir::IRKernel& k)
{
	InstructionSet instructions;
	
	report(" Getting instructions that can observe side-effects");
	
	for(auto& block : *k.cfg())
	{
		for(auto instruction : block.instructions)
		{
			auto ptxInstruction = static_cast<ir::PTXInstruction*>(instruction);
		
			if(ptxInstruction->canObserveSideEffects())
			{
				report("  " << ptxInstruction->toString());
				
				instructions.insert(ptxInstruction);
			}
		}
	}
	
	return instructions;
}
	void ParallelismTraceAnalyzer::parallelism() const
	{
	
		typedef std::vector< ParallelismTraceGenerator::Event > EventVector;
		typedef std::set< long long unsigned int > InstructionSet;
	
		double averageSIMD = 0;
		double averageMIMD = 0;
		double averageCTAs = 0;
		
		for( KernelMap::const_iterator vector = _kernels.begin(); 
			vector != _kernels.end(); ++vector ) 
		{
			std::cout << " From program \"" << vector->first << "\".\n";

			double localSIMD = 0;
			double localMIMD = 0;
			double localCTAs = 0;

			for( KernelVector::const_iterator kernel = vector->second.begin(); 
				kernel != vector->second.end(); ++kernel )	
			{

				std::ifstream hstream( kernel->header.c_str() );
				boost::archive::text_iarchive harchive( hstream );
			
				ParallelismTraceGenerator::Header header;
			
				harchive >> header;
				assert( header.format 
					== TraceGenerator::ParallelismTraceFormat );
			
				hstream.close();

				std::ifstream stream( kernel->path.c_str() );
			
				if( !stream.is_open() )
				{
					throw hydrazine::Exception(
						"Failed to open ParallelismTrace kernel trace file " 
						+ kernel->path );
				}
			
				boost::archive::text_iarchive archive( stream );
			
				EventVector events( header.dimensions );
			
				for( EventVector::iterator event = events.begin(); 
					event != events.end(); ++event )
				{
					archive >> *event;
				}

				std::cout << " From file " << kernel->path << "\n";
				std::cout << "  kernel: " << kernel->name << "\n";
				std::cout << "  module: " << kernel->module << "\n";
				std::cout << "  statistics:\n";
				std::cout << "   ctas: " << header.dimensions << "\n";
				std::cout << "   threads: " << header.threads << "\n";
			
				InstructionSet instructions;
				long long unsigned int totalInstructions = 0;
				double activity = 0;
			
				for( EventVector::iterator event = events.begin(); 
					event != events.end(); ++event )
				{
					totalInstructions += event->instructions;
					instructions.insert( event->instructions );
					activity += event->activity * event->instructions;
				
				}
			
				activity /= totalInstructions + DBL_EPSILON;
			
				unsigned int previous = 0;
				unsigned int count = header.dimensions;
				double mimd = 0;
			
				for( InstructionSet::iterator 
					instruction = instructions.begin();
					instruction != instructions.end(); ++instruction )
				{
					mimd += (*instruction - previous) * count;
					previous = *instruction;
					--count;
				}
			
				if( !instructions.empty() )
				{
					mimd /= *(--instructions.end()) + DBL_EPSILON;
				}
				
				std::cout << "   SIMD parallelism: " << activity 
					<< "\n";		
				std::cout << "   MIMD parallelism: " << mimd << "\n";
			
				localSIMD += activity;
				localMIMD += mimd;
				localCTAs += header.dimensions;
			
			}

			localSIMD /= vector->second.size() + DBL_EPSILON;
			localMIMD /= vector->second.size() + DBL_EPSILON;
			localCTAs /= vector->second.size() + DBL_EPSILON;

			std::cout << " Kernel " << vector->first << " statistics:\n";
			std::cout << "  average CTAs: " << localCTAs << "\n";
			std::cout << "  average SIMD parallelism: " << localSIMD << "\n";
			std::cout << "  average MIMD parallelism: " << localMIMD << "\n";
			
			averageSIMD += localSIMD;
			averageMIMD += localMIMD;
			averageCTAs += localCTAs;
		}
				
		averageSIMD /= _kernels.size() + DBL_EPSILON;
		averageMIMD /= _kernels.size() + DBL_EPSILON;
		averageCTAs /= _kernels.size() + DBL_EPSILON;
		
		std::cout << "Aggregate statistics:\n";
		std::cout << " average CTAs: " << averageCTAs << "\n";
		std::cout << " average SIMD parallelism: " << averageSIMD << "\n";
		std::cout << " average MIMD parallelism: " << averageMIMD << "\n";
	}