int main(int argc, char **argv) { uchar4 *h_inputImageRGBA, *d_inputImageRGBA; uchar4 *h_outputImageRGBA, *d_outputImageRGBA; unsigned char *d_redBlurred, *d_greenBlurred, *d_blueBlurred; float *h_filter; int filterWidth; std::string input_file; std::string output_file; if (argc == 3) { input_file = std::string(argv[1]); output_file = std::string(argv[2]); } else { std::cerr << "Usage: ./hw input_file output_file" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_inputImageRGBA, &h_outputImageRGBA, &d_inputImageRGBA, &d_outputImageRGBA, &d_redBlurred, &d_greenBlurred, &d_blueBlurred, &h_filter, &filterWidth, input_file); allocateMemoryAndCopyToGPU(numRows(), numCols(), h_filter, filterWidth); GpuTimer timer; timer.Start(); //call the students' code your_gaussian_blur(h_inputImageRGBA, d_inputImageRGBA, d_outputImageRGBA, numRows(), numCols(), d_redBlurred, d_greenBlurred, d_blueBlurred, filterWidth); timer.Stop(); cudaDeviceSynchronize(); //checkCudaErrors(cudaGetLastError()); int err = printf("%f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } cleanup(); //check results and output the blurred image postProcess(output_file); checkCudaErrors(cudaFree(d_redBlurred)); checkCudaErrors(cudaFree(d_greenBlurred)); checkCudaErrors(cudaFree(d_blueBlurred)); return 0; }
int main(int argc, char **argv) { //uchar4 is defined in vector_types.h in the SDK //uchar4 { unsigned char x,y,z,w; { uchar4 *h_rgbaImage, *d_rgbaImage; unsigned char *h_greyImage, *d_greyImage; string input_file; string output_file; input_file = "cinque_terre_small.jpg"; output_file = "output_terre_small.jpg"; /*Load image and give input and output pointers*/ preProcess(&h_rgbaImage,&h_greyImage,&d_rgbaImage,&d_greyImage, input_file); /*Start Timer*/ GpuTimer timer; timer.Start(); /*Execute Function*/ rgb_to_grey(h_rgbaImage,d_rgbaImage,d_greyImage,numRows(),numCols()); /*Stop Timer*/ timer.Stop(); //cudaDeviceSynchronize() forces the program to wait for all previously issued commands in all streams on the device to finish before continuing cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); cout<<"Code ran in: "<<timer.Elapsed())<<" seconds"<<endl; //Copy the image from the device back to the host size_t numPixels = numRows() * numCols(); checkCudaErrors(cudaMemCpy(h_greyImage,d_greyImage,sizeof(unsigned char) * numPixels, cudaMemcpyDeviceToHost)); //check results and output the grey image }//end of main
int main(int argc, char **argv) { uchar4 *h_sourceImg, *h_destImg, *h_blendedImg; size_t numRowsSource, numColsSource; std::string input_source_file; std::string input_dest_file; std::string output_file; if (argc == 4) { input_source_file = std::string(argv[1]); input_dest_file = std::string(argv[2]); output_file = std::string(argv[3]); } else { std::cerr << "Usage: ./hw input_source_file input_dest_file output_file" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_sourceImg, numRowsSource, numColsSource, &h_destImg, &h_blendedImg, input_source_file, input_dest_file); GpuTimer timer; timer.Start(); //call the students' code your_blend(h_sourceImg, numRowsSource, numColsSource, h_destImg, h_blendedImg); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("e57__TIMING__f82 %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } //check results and output the tone-mapped image postProcess(h_blendedImg, numRowsSource, numColsSource, output_file); delete[] h_destImg; delete[] h_sourceImg; delete[] h_blendedImg; return 0; }
int main(int argc, char **argv) { float *d_luminance; unsigned int *d_cdf; size_t numRows, numCols; unsigned int numBins; std::string input_file; std::string output_file; if (argc == 3) { input_file = std::string(argv[1]); output_file = std::string(argv[2]); } else { std::cerr << "Usage: ./hw input_file output_file" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&d_luminance, &d_cdf, &numRows, &numCols, &numBins, input_file); GpuTimer timer; float min_logLum, max_logLum; min_logLum = 0.f; max_logLum = 1.f; timer.Start(); //call the students' code your_histogram_and_prefixsum(d_luminance, d_cdf, min_logLum, max_logLum, numRows, numCols, numBins); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("%f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } //check results and output the tone-mapped image postProcess(output_file, numRows, numCols, min_logLum, max_logLum); return 0; }
int main(int argc, char **argv) { uchar4 *h_rgbaImage, *d_rgbaImage; unsigned char *h_greyImage, *d_greyImage; std::string input_file; std::string output_file; if (argc == 3) { input_file = std::string(argv[1]); output_file = std::string(argv[2]); } else { std::cerr << "Usage: ./hw input_file output_file" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage, input_file); GpuTimer timer; timer.Start(); //call the students' code your_rgba_to_greyscale(h_rgbaImage, d_rgbaImage, d_greyImage, numRows(), numCols()); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); printf("\n"); int err = printf("%f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } //check results and output the grey image postProcess(output_file); return 0; }
int main(int argc, char **argv) { unsigned int *inputVals; unsigned int *inputPos; unsigned int *outputVals; unsigned int *outputPos; size_t numElems; std::string input_file; std::string template_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 3: input_file = std::string(argv[1]); template_file = std::string(argv[2]); output_file = "HW4_output.png"; break; case 4: input_file = std::string(argv[1]); template_file = std::string(argv[2]); output_file = std::string(argv[3]); break; default: std::cerr << "Usage: ./HW4 input_file template_file [output_filename]" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&inputVals, &inputPos, &outputVals, &outputPos, numElems, input_file, template_file); GpuTimer timer; timer.Start(); //call the students' code your_sort(inputVals, inputPos, outputVals, outputPos, numElems); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); printf("\n"); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } //check results and output the red-eye corrected image postProcess(outputVals, outputPos, numElems, output_file); // check code moved from HW4.cu /**************************************************************************** * You can use the code below to help with debugging, but make sure to * * comment it out again before submitting your assignment for grading, * * otherwise this code will take too much time and make it seem like your * * GPU implementation isn't fast enough. * * * * This code MUST RUN BEFORE YOUR CODE in case you accidentally change * * the input values when implementing your radix sort. * * * * This code performs the reference radix sort on the host and compares your * * sorted values to the reference. * * * * Thrust containers are used for copying memory from the GPU * * ************************************************************************* */ thrust::device_ptr<unsigned int> d_inputVals(inputVals); thrust::device_ptr<unsigned int> d_inputPos(inputPos); thrust::host_vector<unsigned int> h_inputVals(d_inputVals, d_inputVals+numElems); thrust::host_vector<unsigned int> h_inputPos(d_inputPos, d_inputPos + numElems); thrust::host_vector<unsigned int> h_outputVals(numElems); thrust::host_vector<unsigned int> h_outputPos(numElems); reference_calculation(&h_inputVals[0], &h_inputPos[0], &h_outputVals[0], &h_outputPos[0], numElems); //postProcess(&h_outputVals[0], &h_outputPos[0], numElems, reference_file); compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError); thrust::device_ptr<unsigned int> d_outputVals(outputVals); thrust::device_ptr<unsigned int> d_outputPos(outputPos); thrust::host_vector<unsigned int> h_yourOutputVals(d_outputVals, d_outputVals + numElems); thrust::host_vector<unsigned int> h_yourOutputPos(d_outputPos, d_outputPos + numElems); checkResultsExact(&h_outputVals[0], &h_yourOutputVals[0], numElems); checkResultsExact(&h_outputPos[0], &h_yourOutputPos[0], numElems); checkCudaErrors(cudaFree(inputVals)); checkCudaErrors(cudaFree(inputPos)); checkCudaErrors(cudaFree(outputVals)); checkCudaErrors(cudaFree(outputPos)); return 0; }
int main(int argc, char **argv) { uchar4 *h_rgbaImage, *d_rgbaImage; unsigned char *h_greyImage, *d_greyImage; std::string input_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 2: input_file = std::string(argv[1]); output_file = "HW1_output.png"; reference_file = "HW1_reference.png"; break; case 3: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = "HW1_reference.png"; break; case 4: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); break; case 6: useEpsCheck=true; input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); perPixelError = atof(argv[4]); globalError = atof(argv[5]); break; default: std::cerr << "Usage: ./HW1 input_file [output_filename] [reference_filename] [perPixelError] [globalError]" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_rgbaImage, &h_greyImage, &d_rgbaImage, &d_greyImage, input_file); GpuTimer timer; timer.Start(); //call the students' code your_rgba_to_greyscale(h_rgbaImage, d_rgbaImage, d_greyImage, numRows(), numCols()); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } size_t numPixels = numRows()*numCols(); checkCudaErrors(cudaMemcpy(h_greyImage, d_greyImage, sizeof(unsigned char) * numPixels, cudaMemcpyDeviceToHost)); //check results and output the grey image postProcess(output_file, h_greyImage); referenceCalculation(h_rgbaImage, h_greyImage, numRows(), numCols()); postProcess(reference_file, h_greyImage); //generateReferenceImage(input_file, reference_file); compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError); cleanup(); return 0; }
int main(int argc, char **argv) { float *d_luminance; unsigned int *d_cdf; size_t numRows, numCols; unsigned int numBins; std::string input_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 2: input_file = std::string(argv[1]); output_file = "HW3_output.png"; reference_file = "HW3_reference.png"; break; case 3: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = "HW3_reference.png"; break; case 4: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); break; case 6: useEpsCheck=true; input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); perPixelError = atof(argv[4]); globalError = atof(argv[5]); break; default: std::cerr << "Usage: ./HW3 input_file [output_filename] [reference_filename] [perPixelError] [globalError]" << std::endl; system("Pause"); exit(1); } //load the image and give us our input and output pointers preProcess(&d_luminance, &d_cdf, &numRows, &numCols, &numBins, input_file); GpuTimer timer; float min_logLum, max_logLum; min_logLum = 0.f; max_logLum = 1.f; timer.Start(); //call the students' code your_histogram_and_prefixsum(d_luminance, d_cdf, min_logLum, max_logLum, numRows, numCols, numBins); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; system("Pause"); exit(1); } float *h_luminance = (float *) malloc(sizeof(float)*numRows*numCols); unsigned int *h_cdf = (unsigned int *) malloc(sizeof(unsigned int)*numBins); checkCudaErrors(cudaMemcpy(h_luminance, d_luminance, numRows*numCols*sizeof(float), cudaMemcpyDeviceToHost)); //check results and output the tone-mapped image postProcess(output_file, numRows, numCols, min_logLum, max_logLum); for (size_t i = 1; i < numCols * numRows; ++i) { min_logLum = std::min(h_luminance[i], min_logLum); max_logLum = std::max(h_luminance[i], max_logLum); } referenceCalculation(h_luminance, h_cdf, numRows, numCols, numBins, min_logLum, max_logLum); checkCudaErrors(cudaMemcpy(d_cdf, h_cdf, sizeof(unsigned int) * numBins, cudaMemcpyHostToDevice)); //check results and output the tone-mapped image postProcess(reference_file, numRows, numCols, min_logLum, max_logLum); cleanupGlobalMemory(); compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError); system("pause"); return 0; }
int main(int argc, char **argv) { uchar4 *h_inputImageRGBA, *d_inputImageRGBA; uchar4 *h_outputImageRGBA, *d_outputImageRGBA; unsigned char *d_redBlurred, *d_greenBlurred, *d_blueBlurred; float *h_filter; int filterWidth; std::string input_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 2: input_file = std::string(argv[1]); output_file = "HW2_output.png"; reference_file = "HW2_reference.png"; break; case 3: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = "HW2_reference.png"; break; case 4: input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); break; case 6: useEpsCheck=true; input_file = std::string(argv[1]); output_file = std::string(argv[2]); reference_file = std::string(argv[3]); perPixelError = atof(argv[4]); globalError = atof(argv[5]); break; default: std::cerr << "Usage: ./HW2 input_file [output_filename] [reference_filename] [perPixelError] [globalError]" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&h_inputImageRGBA, &h_outputImageRGBA, &d_inputImageRGBA, &d_outputImageRGBA, &d_redBlurred, &d_greenBlurred, &d_blueBlurred, &h_filter, &filterWidth, input_file); allocateMemoryAndCopyToGPU(numRows(), numCols(), h_filter, filterWidth); /* for(int i=0;i<filterWidth; ++i) { for(int j=0;j<filterWidth; ++j) { std::cerr<<h_filter[i*filterWidth + j]<<" "; } std::cerr<<std::endl; } */ GpuTimer timer; timer.Start(); //call the students' code your_gaussian_blur(h_inputImageRGBA, d_inputImageRGBA, d_outputImageRGBA, numRows(), numCols(), d_redBlurred, d_greenBlurred, d_blueBlurred, filterWidth); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } //check results and output the blurred image size_t numPixels = numRows()*numCols(); //copy the output back to the host checkCudaErrors(cudaMemcpy(h_outputImageRGBA, d_outputImageRGBA__, sizeof(uchar4) * numPixels, cudaMemcpyDeviceToHost)); postProcess(output_file, h_outputImageRGBA); referenceCalculation(h_inputImageRGBA, h_outputImageRGBA, numRows(), numCols(), h_filter, filterWidth); postProcess(reference_file, h_outputImageRGBA); // Cheater easy way with OpenCV //generateReferenceImage(input_file, reference_file, filterWidth); compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError); checkCudaErrors(cudaFree(d_redBlurred)); checkCudaErrors(cudaFree(d_greenBlurred)); checkCudaErrors(cudaFree(d_blueBlurred)); cleanUp(); return 0; }
int _tmain(int argc, _TCHAR* argv[]) { uchar4 *h_inputImageRGBA, *d_inputImageRGBA; uchar4 *h_outputImageRGBA, *d_outputImageRGBA; unsigned char *d_redBlurred, *d_greenBlurred, *d_blueBlurred; float *h_filter; int filterWidth; //PreProcess const std::string *filename = new std::string("./cinque_terre_small.jpg"); cv::Mat imageInputRGBA; cv::Mat imageOutputRGBA; //make sure the context initializes ok checkCudaErrors(cudaFree(0)); cv::Mat image = cv::imread(filename->c_str(), CV_LOAD_IMAGE_COLOR); if (image.empty()) { std::cerr << "Couldn't open file: " << filename << std::endl; cv::waitKey(0); exit(1); } cv::cvtColor(image, imageInputRGBA, CV_BGR2RGBA); //allocate memory for the output imageOutputRGBA.create(image.rows, image.cols, CV_8UC4); //This shouldn't ever happen given the way the images are created //at least based upon my limited understanding of OpenCV, but better to check if (!imageInputRGBA.isContinuous() || !imageOutputRGBA.isContinuous()) { std::cerr << "Images aren't continuous!! Exiting." << std::endl; exit(1); } h_inputImageRGBA = (uchar4 *)imageInputRGBA.ptr<unsigned char>(0); h_outputImageRGBA = (uchar4 *)imageOutputRGBA.ptr<unsigned char>(0); const size_t numPixels = image.rows * image.cols; //allocate memory on the device for both input and output checkCudaErrors(cudaMalloc(&d_inputImageRGBA, sizeof(uchar4) * numPixels)); checkCudaErrors(cudaMalloc(&d_outputImageRGBA, sizeof(uchar4) * numPixels)); checkCudaErrors(cudaMemset(d_outputImageRGBA, 0, numPixels * sizeof(uchar4))); //make sure no memory is left laying around //copy input array to the GPU checkCudaErrors(cudaMemcpy(d_inputImageRGBA, h_inputImageRGBA, sizeof(uchar4) * numPixels, cudaMemcpyHostToDevice)); //now create the filter that they will use const int blurKernelWidth = 9; const float blurKernelSigma = 2.; filterWidth = blurKernelWidth; //create and fill the filter we will convolve with h_filter = new float[blurKernelWidth * blurKernelWidth]; float filterSum = 0.f; //for normalization for (int r = -blurKernelWidth/2; r <= blurKernelWidth/2; ++r) { for (int c = -blurKernelWidth/2; c <= blurKernelWidth/2; ++c) { float filterValue = expf( -(float)(c * c + r * r) / (2.f * blurKernelSigma * blurKernelSigma)); h_filter[(r + blurKernelWidth/2) * blurKernelWidth + c + blurKernelWidth/2] = filterValue; filterSum += filterValue; } } float normalizationFactor = 1.f / filterSum; for (int r = -blurKernelWidth/2; r <= blurKernelWidth/2; ++r) { for (int c = -blurKernelWidth/2; c <= blurKernelWidth/2; ++c) { h_filter[(r + blurKernelWidth/2) * blurKernelWidth + c + blurKernelWidth/2] *= normalizationFactor; } } //blurred checkCudaErrors(cudaMalloc(&d_redBlurred, sizeof(unsigned char) * numPixels)); checkCudaErrors(cudaMalloc(&d_greenBlurred, sizeof(unsigned char) * numPixels)); checkCudaErrors(cudaMalloc(&d_blueBlurred, sizeof(unsigned char) * numPixels)); checkCudaErrors(cudaMemset(d_redBlurred, 0, sizeof(unsigned char) * numPixels)); checkCudaErrors(cudaMemset(d_greenBlurred, 0, sizeof(unsigned char) * numPixels)); checkCudaErrors(cudaMemset(d_blueBlurred, 0, sizeof(unsigned char) * numPixels)); allocateMemoryAndCopyToGPU(image.rows, image.cols, h_filter, filterWidth); GpuTimer timer; timer.Start(); //call the students' code your_gaussian_blur(h_inputImageRGBA, d_inputImageRGBA, d_outputImageRGBA, image.rows, image.cols, d_redBlurred, d_greenBlurred, d_blueBlurred, filterWidth); timer.Stop(); cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError()); int err = printf("%f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } cleanup(); //check results and output the blurred image //PostProcess //copy the output back to the host checkCudaErrors(cudaMemcpy(imageOutputRGBA.ptr<unsigned char>(0), d_outputImageRGBA, sizeof(uchar4) * numPixels, cudaMemcpyDeviceToHost)); cv::Mat imageOutputBGR; cv::cvtColor(imageOutputRGBA, imageOutputBGR, CV_RGBA2BGR); //output the image cv::imwrite("./blurredResult.jpg", imageOutputBGR); cv::namedWindow( "Display window", CV_WINDOW_NORMAL); cv::imshow("Display window", imageOutputBGR); cv::waitKey(0); checkCudaErrors(cudaFree(d_redBlurred)); checkCudaErrors(cudaFree(d_greenBlurred)); checkCudaErrors(cudaFree(d_blueBlurred)); return 0; }
int main(int argc, char **argv) { float *d_luminance; unsigned int *d_cdf; size_t numRows, numCols; unsigned int numBins; std::string input_file; std::string output_file; std::string reference_file; double perPixelError = 0.0; double globalError = 0.0; bool useEpsCheck = false; switch (argc) { case 2: input_file = std::string(argv[1]); output_file = "output.png"; break; case 3: input_file = std::string(argv[1]); output_file = std::string(argv[2]); break; default: std::cerr << "Usage: ./tone input_file [output_filename] [reference_filename]" << std::endl; exit(1); } //load the image and give us our input and output pointers preProcess(&d_luminance, &d_cdf, &numRows, &numCols, &numBins, input_file); GpuTimer timer; float min_logLum, max_logLum; min_logLum = 0.f; max_logLum = 1.f; timer.Start(); //call the students' code calculate_cdf(d_luminance, d_cdf, min_logLum, max_logLum, numRows, numCols, numBins); timer.Stop(); cudaDeviceSynchronize(); int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed()); if (err < 0) { //Couldn't print! Probably the student closed stdout - bad news std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl; exit(1); } float *h_luminance = (float *) malloc(sizeof(float)*numRows*numCols); unsigned int *h_cdf = (unsigned int *) malloc(sizeof(unsigned int)*numBins); cudaMemcpy(h_luminance, d_luminance, numRows*numCols*sizeof(float), cudaMemcpyDeviceToHost); //check results and output the tone-mapped image postProcess(output_file, numRows, numCols, min_logLum, max_logLum); cleanupGlobalMemory(); return 0; }