float finalizeEvents(hipEvent_t start, hipEvent_t stop){ CUDA_SAFE_CALL( hipGetLastError() ); CUDA_SAFE_CALL( hipEventRecord(stop, 0) ); CUDA_SAFE_CALL( hipEventSynchronize(stop) ); float kernel_time; CUDA_SAFE_CALL( hipEventElapsedTime(&kernel_time, start, stop) ); CUDA_SAFE_CALL( hipEventDestroy(start) ); CUDA_SAFE_CALL( hipEventDestroy(stop) ); return kernel_time; }
void runbench_warmup(double *cd, long size){ const long reduced_grid_size = size/(UNROLLED_MEMORY_ACCESSES)/32; const int BLOCK_SIZE = 256; const int TOTAL_REDUCED_BLOCKS = reduced_grid_size/BLOCK_SIZE; dim3 dimBlock(BLOCK_SIZE, 1, 1); dim3 dimReducedGrid(TOTAL_REDUCED_BLOCKS, 1, 1); hipLaunchKernel(HIP_KERNEL_NAME(benchmark_func< short, BLOCK_SIZE, 0 >), dim3(dimReducedGrid), dim3(dimBlock ), 0, 0, (short)1, (short*)cd); CUDA_SAFE_CALL( hipGetLastError() ); CUDA_SAFE_CALL( hipDeviceSynchronize() ); }
int main(int argc, char **argv) { uchar4 *h_rgbaImage, *d_rgbaImage; unsigned char *h_greyImage, *d_greyImage; std::string input_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 2: input_file = std::string(argv[1]); output_file = "HW1_output.png"; reference_file = "HW1_reference.png"; break; case 3: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = "HW1_reference.png"; break; case 4: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); break; case 6: useEpsCheck=true; input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); perPixelError = atof(argv[4]); globalError = atof(argv[5]); break; default: std::cerr << "Usage: ./HW1 input_file [output_filename] [reference_filename] [perPixelError] [globalError]" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage, input_file); GpuTimer timer; timer.Start(); //call the students' code your_rgba_to_greyscale(h_rgbaImage, d_rgbaImage, d_greyImage, numRows(), numCols()); timer.Stop(); hipDeviceSynchronize(); checkCudaErrors(hipGetLastError()); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } size_t numPixels = numRows()*numCols(); checkCudaErrors(hipMemcpy(h_greyImage, d_greyImage, sizeof(unsigned char) * numPixels, hipMemcpyDeviceToHost)); //check results and output the grey image postProcess(output_file, h_greyImage); referenceCalculation(h_rgbaImage, h_greyImage, numRows(), numCols()); postProcess(reference_file, h_greyImage); //generateReferenceImage(input_file, reference_file); compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError); cleanup(); return 0; }
inline void _hipCheckError(const char *file, const int line) { hipError_t err = hipGetLastError(); _hipSafeCall(err, file, line); }