Exemplo n.º 1
0
int main(int argc, char **argv) {
  unsigned int *inputVals;
  unsigned int *inputPos;
  unsigned int *outputVals;
  unsigned int *outputPos;

  size_t numElems;

  std::string input_file;
  std::string template_file;
  std::string output_file;
  std::string reference_file;
  double perPixelError = 0.0;
  double globalError   = 0.0;
  bool useEpsCheck = false;

  switch (argc)
  {
  case 3:
    input_file  = std::string(argv[1]);
      template_file = std::string(argv[2]);
    output_file = "HW4_output.png";
    break;
  case 4:
    input_file  = std::string(argv[1]);
      template_file = std::string(argv[2]);
    output_file = std::string(argv[3]);
    break;
  default:
          std::cerr << "Usage: ./HW4 input_file template_file [output_filename]" << std::endl;
          exit(1);
  }
  //load the image and give us our input and output pointers
  preProcess(&inputVals, &inputPos, &outputVals, &outputPos, numElems, input_file, template_file);

  GpuTimer timer;
  timer.Start();

  //call the students' code
  your_sort(inputVals, inputPos, outputVals, outputPos, numElems);

  timer.Stop();
  cudaDeviceSynchronize(); checkCudaErrors(cudaGetLastError());
  printf("\n");
  int err = printf("Your code ran in: %f msecs.\n", timer.Elapsed());

  if (err < 0) {
    //Couldn't print! Probably the student closed stdout - bad news
    std::cerr << "Couldn't print timing information! STDOUT Closed!" << std::endl;
    exit(1);
  }

  //check results and output the red-eye corrected image
  postProcess(outputVals, outputPos, numElems, output_file);

  // check code moved from HW4.cu
  /****************************************************************************
  * You can use the code below to help with debugging, but make sure to       *
  * comment it out again before submitting your assignment for grading,       *
  * otherwise this code will take too much time and make it seem like your    *
  * GPU implementation isn't fast enough.                                     *
  *                                                                           *
  * This code MUST RUN BEFORE YOUR CODE in case you accidentally change       *
  * the input values when implementing your radix sort.                       *
  *                                                                           *
  * This code performs the reference radix sort on the host and compares your *
  * sorted values to the reference.                                           *
  *                                                                           *
  * Thrust containers are used for copying memory from the GPU                *
  * ************************************************************************* */
  thrust::device_ptr<unsigned int> d_inputVals(inputVals);
  thrust::device_ptr<unsigned int> d_inputPos(inputPos);

  thrust::host_vector<unsigned int> h_inputVals(d_inputVals,
                                                d_inputVals+numElems);
  thrust::host_vector<unsigned int> h_inputPos(d_inputPos,
                                               d_inputPos + numElems);

  thrust::host_vector<unsigned int> h_outputVals(numElems);
  thrust::host_vector<unsigned int> h_outputPos(numElems);

  reference_calculation(&h_inputVals[0], &h_inputPos[0],
            &h_outputVals[0], &h_outputPos[0],
            numElems);

  //postProcess(&h_outputVals[0], &h_outputPos[0], numElems, reference_file);

  compareImages(reference_file, output_file, useEpsCheck, perPixelError, globalError);

  thrust::device_ptr<unsigned int> d_outputVals(outputVals);
  thrust::device_ptr<unsigned int> d_outputPos(outputPos);

  thrust::host_vector<unsigned int> h_yourOutputVals(d_outputVals,
                                                     d_outputVals + numElems);
  thrust::host_vector<unsigned int> h_yourOutputPos(d_outputPos,
                                                    d_outputPos + numElems);

  checkResultsExact(&h_outputVals[0], &h_yourOutputVals[0], numElems);
  checkResultsExact(&h_outputPos[0], &h_yourOutputPos[0], numElems);

  checkCudaErrors(cudaFree(inputVals));
  checkCudaErrors(cudaFree(inputPos));
  checkCudaErrors(cudaFree(outputVals));
  checkCudaErrors(cudaFree(outputPos));

  return 0;
}
Exemplo n.º 2
0
int main(int argc, char **argv){
	
	int nTaps = 8;
	int nChannels = 1024;
	int nStreams = 2;
	int NUM_THREADS = 512;
	int seg_blocks = 5000;
	unsigned int data_size = 10000+nTaps-1;
	unsigned int nBlocks = 0;
	float error = 1.1f;
	bool debug=true;

	if (debug) printf("\t\tWelcome\n");

	Complex *h_signal, *h_spectra_pinned, *h_spectra_ref, *h_data_pinned;
	float *h_coeff;

	if (argc >= 2) nChannels   = atof(argv[1]);
	if (argc >= 3) NUM_THREADS = atof(argv[2]);
	if (argc >= 4) nStreams	   = (atof(argv[3]));
	if (argc >= 5) nTaps 	   = (atof(argv[4]));
	if (argc >= 6) seg_blocks  = (atof(argv[5]));
	if (argc >= 7) data_size   = (atof(argv[6])+nTaps-1)*nChannels;

	nBlocks = (data_size+nTaps-1)/nChannels;nBlocks = data_size/nChannels;

	if (debug) printf("\nHost memory allocation...\t");
	checkCudaErrors(cudaMallocHost((void**)&h_spectra_pinned, data_size*sizeof(Complex)));
	checkCudaErrors(cudaMallocHost((void**)&h_data_pinned, data_size*sizeof(Complex)));
	h_signal 	= (Complex *)malloc(data_size*sizeof(Complex));
	h_spectra_ref = (Complex *)malloc(data_size*sizeof(Complex));
	h_coeff 	= (float *)malloc(nTaps*nChannels*sizeof(float));
	if (debug) printf("done.");

	if (debug) printf("\nHost memory memset...\t\t");
	memset(h_spectra_pinned, 0.0, sizeof(Complex)*data_size);	
	memset(h_spectra_ref, 0.0, sizeof(Complex)*data_size);	
	if (debug) printf("done.");

	if (debug) printf("\nLoad window coefficients...\t");
	//Load_window_data(h_coeff);
		for (int i = 0; i < nTaps*nChannels; i++)
			h_coeff[i] = rand() / (float)RAND_MAX;
	if (debug) printf("done.");


	if (debug) printf("\nRandom data set...\t\t");	
	srand(time(NULL));
	for (int i=0; i < (int)data_size; i++){
		h_signal[i].x = rand() / (float)RAND_MAX;
		h_signal[i].y = rand() / (float)RAND_MAX;
	}

	for (int i = 0; i < (int)data_size; i++){
		h_data_pinned[i] = h_signal[i];
	}
	if (debug) printf("done.");


	if (debug) printf("\nReference calculation...\t");
	reference_calculation(h_signal, h_spectra_ref, h_coeff, nChannels, nBlocks, nTaps);
	if (debug) printf("done.\n");
	
	//printf("CPU jedna %g druha %g", h_spectra_ref[3584], h_spectra_ref[7*512 + 259999].x);

	gpu_code(h_data_pinned, h_spectra_pinned, h_coeff, nChannels, nBlocks, data_size, NUM_THREADS, nTaps, nStreams, seg_blocks);	
	
	if (debug){
		error = reference_code(h_spectra_ref, h_spectra_pinned, nChannels, nTaps, nBlocks);
		printf( "error = %lf\n", error);
	}

	checkCudaErrors(cudaFreeHost(h_spectra_pinned));
	checkCudaErrors(cudaFreeHost(h_data_pinned));
	delete[] h_signal;
	delete[] h_spectra_ref;
	delete[] h_coeff;

}