예제 #1
0
float finalizeEvents(hipEvent_t start, hipEvent_t stop){
	CUDA_SAFE_CALL( hipGetLastError() );
	CUDA_SAFE_CALL( hipEventRecord(stop, 0) );
	CUDA_SAFE_CALL( hipEventSynchronize(stop) );
	float kernel_time;
	CUDA_SAFE_CALL( hipEventElapsedTime(&kernel_time, start, stop) );
	CUDA_SAFE_CALL( hipEventDestroy(start) );
	CUDA_SAFE_CALL( hipEventDestroy(stop) );
	return kernel_time;
}
int main(){

	hipError_t err;
	float *A, *Ad;

	A = new float[LEN];

	for(int i=0;i<LEN;i++){
		A[i] = 1.0f;
	}

	hipStream_t stream;
	err = hipStreamCreate(&stream);
	check("Creating stream",err);

	err = hipMalloc(&Ad, SIZE);
	check("Allocating Ad memory on device", err);

	err = hipMemcpy(Ad, A, SIZE, hipMemcpyHostToDevice);
	check("Doing memory copy from A to Ad", err);

	float mS = 0;
	hipEvent_t start, stop;
	hipEventCreate(&start);
	hipEventCreate(&stop);

	ResultDatabase resultDB[8];

	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[0].AddResult(std::string("First Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"First Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[0].DumpSummary(std::cout);
	hipEventRecord(start);
	hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[1].AddResult(std::string("Second Kernel Launch"), "", "uS", mS*1000); 
//	std::cout<<"Second Kernel Launch: \t\t"<<mS*1000<<" uS"<<std::endl;
	resultDB[1].DumpSummary(std::cout);
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[2].AddResult(std::string("NULL Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[2].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Sync dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipDeviceSynchronize();
	hipEventElapsedTime(&mS, start, stop);
	resultDB[3].AddResult(std::string("NULL Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[3].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Async dispatch wait: \t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
		hipDeviceSynchronize();
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[4].AddResult(std::string("Stream Sync dispatch wait"), "", "uS", mS*1000/ITER); 
	resultDB[4].DumpSummary(std::cout);
//	std::cout<<"Stream Sync dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipDeviceSynchronize();
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[5].AddResult(std::string("Stream Async dispatch wait"), "", "uS", mS*1000/ITER); 
//	std::cout<<"Stream Async dispatch wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	resultDB[5].DumpSummary(std::cout);
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, 0, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[6].AddResult(std::string("NULL Stream No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[6].DumpSummary(std::cout);
//	std::cout<<"NULL Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();

	hipEventRecord(start);
	for(int i=0;i<ITER;i++){
		hipLaunchKernel(HIP_KERNEL_NAME(One), dim3(LEN/512), dim3(512), 0, stream, Ad);
	}
	hipEventRecord(stop);
	hipEventElapsedTime(&mS, start, stop);
	resultDB[7].AddResult(std::string("Stream Dispatch No Wait"), "", "uS", mS*1000/ITER); 
	resultDB[7].DumpSummary(std::cout);
//	std::cout<<"Stream Dispatch No Wait: \t\t"<<mS*1000/ITER<<" uS"<<std::endl;
	hipDeviceSynchronize();
}