예제 #1
0
int main (int argc, char* argv[]){
  struct pb_Parameters* prms;
  struct pb_TimerSet timers;

  prms = pb_ReadParameters(&argc,argv);
  pb_InitializeTimerSet(&timers);
  
  pb_AddSubTimer(&timers, oclOverhead, pb_TimerID_KERNEL);

  pb_SwitchToTimer(&timers, pb_TimerID_NONE);

  char uksdata[250];
  parameters params;

  FILE* uksfile_f = NULL;
  FILE* uksdata_f = NULL;

  strcpy(uksdata,prms->inpFiles[0]);
  strcat(uksdata,".data");

  uksfile_f = fopen(prms->inpFiles[0],"r");
  if (uksfile_f == NULL){
    printf("ERROR: Could not open %s\n",prms->inpFiles[0]);
    exit(1);
  }

  printf("\nReading parameters\n");

  if (argc >= 2){
    params.binsize = atoi(argv[1]);
  } else { //default binsize value;
    params.binsize = 128;
  }

  setParameters(uksfile_f, &params);

  pb_SwitchToTimer(&timers, pb_TimerID_IO);

  ReconstructionSample* samples; //Input Data
//  cl_mem samplesPin; 
  float* LUT; //use look-up table for faster execution on CPU (intermediate data)
  unsigned int sizeLUT; //set in the function calculateLUT (intermediate data)

  cmplx* gridData; //Output Data
  float* sampleDensity; //Output Data
//  cl_mem gridDataPin;
//  cl_mem sampleDensityPin;

  cmplx* gridData_gold; //Gold Output Data
  float* sampleDensity_gold; //Gold Output Data
  
  cl_int ciErrNum;
  cl_platform_id clPlatform;
  cl_device_type deviceType = CL_DEVICE_TYPE_GPU;
  cl_device_id clDevice;
  cl_context clContext;

  int deviceFound = getOpenCLDevice(&clPlatform, &clDevice, &deviceType, 0);

  size_t max_alloc_size = 0;
  (void) clGetDeviceInfo(clDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_alloc_size, 0);
  size_t global_mem_size = 0;
  (void) clGetDeviceInfo(clDevice, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &global_mem_size, 0);

  size_t samples_size = params.numSamples*sizeof(ReconstructionSample);
  int gridNumElems = params.gridSize[0] * params.gridSize[1] * params.gridSize[2];
  size_t output_size = gridNumElems*sizeof(cmplx);

  if ( (deviceFound < 0) ||
       ((samples_size+output_size) > global_mem_size) ||
       (samples_size > max_alloc_size) || 
       (output_size > max_alloc_size ) ) {
    fprintf(stderr, "No suitable device was found\n");
    if(deviceFound >= 0) {
      fprintf(stderr, "Memory requirements for this dataset exceed device capabilities\n");
    }
    exit(1);
  }
  
  cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) clPlatform, 0};
  clContext = clCreateContextFromType(cps, deviceType, NULL, NULL, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);

  cl_command_queue clCommandQueue = clCreateCommandQueue(clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
  
  cl_uint workItemDimensions;
  OCL_ERRCK_RETVAL( clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &workItemDimensions, NULL) );
  
  size_t workItemSizes[workItemDimensions];
  OCL_ERRCK_RETVAL( clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemDimensions*sizeof(size_t), workItemSizes, NULL) );
  
  pb_SetOpenCL(&clContext, &clCommandQueue);
    
    /*
  samplesPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, 
      params.numSamples*sizeof(ReconstructionSample),
      NULL, &ciErrNum);
*/
  samples = (ReconstructionSample *) malloc ( params.numSamples*sizeof(ReconstructionSample) );
  
  /*(ReconstructionSample *) clEnqueueMapBuffer(clCommandQueue, samplesPin, CL_TRUE, CL_MAP_WRITE, 0, params.numSamples*sizeof(ReconstructionSample), 0, NULL, NULL, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
*/
  if (samples == NULL){
    printf("ERROR: Unable to allocate and map memory for input data\n");
    exit(1);
  }


  uksdata_f = fopen(uksdata,"rb");

  if(uksdata_f == NULL){
    printf("ERROR: Could not open data file\n");
    exit(1);
  }

  printf("Reading input data from files\n");

  unsigned int n = readSampleData(params, uksdata_f, samples);
  fclose(uksdata_f);

  if (params.useLUT){
    printf("Generating Look-Up Table\n");
    float beta = PI * sqrt(4*params.kernelWidth*params.kernelWidth/(params.oversample*params.oversample) * (params.oversample-.5)*(params.oversample-.5)-.8);
    calculateLUT(beta, params.kernelWidth, &LUT, &sizeLUT);
  }

  pb_SwitchToTimer(&timers, pb_TimerID_NONE);

  gridData_gold = (cmplx*) calloc (gridNumElems, sizeof(cmplx));
  sampleDensity_gold = (float*) calloc (gridNumElems, sizeof(float));
  if (sampleDensity_gold == NULL || gridData_gold == NULL){
    printf("ERROR: Unable to allocate memory for output data\n");
    exit(1);
  }

  printf("Running gold version\n");

  gridding_Gold(n, params, samples, LUT, sizeLUT, gridData_gold, sampleDensity_gold);

  printf("Running OpenCL version\n");

  pb_SwitchToTimer(&timers, pb_TimerID_COPY);

/*
  OCL_ERRCK_RETVAL( clEnqueueWriteBuffer(clCommandQueue, samplesPin, CL_TRUE, 
                          0, // Offset in bytes
                          n*sizeof(ReconstructionSample), // Size of data to write
                          samples, // Host Source
  
                          0, NULL, NULL) );*/
 // OCL_ERRCK_RETVAL ( clFinish(clCommandQueue) );
 
 /*
  gridDataPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, 
      gridNumElems*sizeof(cmplx), NULL, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
  */
  gridData = (cmplx *) malloc ( gridNumElems*sizeof(cmplx) );
  if (gridData == NULL) { fprintf(stderr, "Could not allocate memory on host! (%s: %d)\n", __FILE__, __LINE__); exit(1); }
  
  /*(cmplx *) clEnqueueMapBuffer(clCommandQueue, gridDataPin, CL_TRUE, CL_MAP_READ, 0, gridNumElems*sizeof(cmplx), 0, NULL, NULL, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
  */
  
  /*
  sampleDensityPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, 
      gridNumElems*sizeof(float), NULL, &ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
  */
  
  sampleDensity = (float *) malloc ( gridNumElems*sizeof(float) );
  if (sampleDensity == NULL) { fprintf(stderr, "Could not allocate memory on host! (%s: %d)\n", __FILE__, __LINE__); exit(1); }
  
  /*(float *) clEnqueueMapBuffer(clCommandQueue, sampleDensityPin, CL_TRUE, CL_MAP_READ, 0, gridNumElems*sizeof(float), 0, NULL, NULL, &ciErrNum);
  */
  
  OCL_ERRCK_VAR(ciErrNum);
  OCL_ERRCK_VAR(ciErrNum);
  
  if (sampleDensity == NULL || gridData == NULL){
    printf("ERROR: Unable to allocate memory for output data\n");
    exit(1);
  }

  pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
  
  //Interface function to GPU implementation of gridding
  OpenCL_interface(&timers, n, params, samples, LUT, sizeLUT, gridData, sampleDensity, clContext, clCommandQueue, clDevice, workItemSizes);

  pb_SwitchToTimer(&timers, pb_TimerID_NONE);

  int passed=1;
  for (int i=0; i<gridNumElems; i++){
    if(sampleDensity[i] != sampleDensity_gold[i]) {
      passed=0;
      break;
    }
  }
  //(passed) ? printf("Comparing GPU and Gold results... PASSED\n"):printf("Comparing GPU and Gold results... FAILED\n");

  pb_SwitchToTimer(&timers, pb_TimerID_IO);

  FILE* outfile;
  if(!(outfile=fopen(prms->outFile,"w")))
  {
        printf("Cannot open output file!\n");
  } else {
        fwrite(&passed,sizeof(int),1,outfile);
        fclose(outfile);
  }

  pb_SwitchToTimer(&timers, pb_TimerID_NONE);

  if (params.useLUT){
    free(LUT);
  }
  
  /*
  OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, samplesPin, samples, 0, NULL, NULL) );
  OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, gridDataPin, gridData, 0, NULL, NULL) );
  OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, sampleDensityPin, sampleDensity, 0, NULL, NULL) );
  
  clReleaseMemObject(samplesPin);
  clReleaseMemObject(gridDataPin);
  clReleaseMemObject(sampleDensityPin);
  */
  
  free(samples);
  free(gridData);
  free(sampleDensity);
  
  
  free(gridData_gold);
  free(sampleDensity_gold);

  printf("\n");
  pb_PrintTimerSet(&timers);
  pb_FreeParameters(prms);

  return 0;
}
예제 #2
0
// ---------------------------------------------------------------------------
//	readAiffData
// ---------------------------------------------------------------------------
//
void 
AiffFile::readAiffData( const std::string & filename )
{
	ContainerCk containerChunk;
	CommonCk commonChunk;
	SoundDataCk soundDataChunk;
	InstrumentCk instrumentChunk;
	MarkerCk markerChunk;

	try 
	{
		std::ifstream s( filename.c_str(), std::ifstream::binary );
	
		//	the Container chunk must be first, read it:
		readChunkHeader( s, containerChunk.header );
        if ( !s )
        {
			Throw( FileIOException, "File not found, or corrupted." );
        }
		if ( containerChunk.header.id != ContainerId )
        {
			Throw( FileIOException, "Found no Container chunk." );
        }
		readContainer( s, containerChunk, containerChunk.header.size );
		
		//	read other chunks, we are only interested in
		//	the Common chunk, the Sound Data chunk, the Markers: 
		CkHeader h;
		while ( readChunkHeader( s, h ) )
		{			
			switch (h.id)
			{
				case CommonId:
					readCommonData( s, commonChunk, h.size );
					if ( commonChunk.channels != 1 )
					{
						Throw( FileIOException, 
							   "Loris only processes single-channel AIFF samples files." );
					}					
					if ( commonChunk.bitsPerSample != 8 &&
						 commonChunk.bitsPerSample != 16 &&
						 commonChunk.bitsPerSample != 24 &&
						 commonChunk.bitsPerSample != 32 )
					{
						Throw( FileIOException, "Unrecognized sample size." );
					}										
					break;
				case SoundDataId:
					readSampleData( s, soundDataChunk, h.size );
					break;
				case InstrumentId:
					readInstrumentData( s, instrumentChunk, h.size );
					break;
				case MarkerId:
					readMarkerData( s, markerChunk, h.size );
					break;
				default:
					s.ignore( h.size );
			}
		}
	
		if ( ! commonChunk.header.id || ! soundDataChunk.header.id )
		{
			Throw( FileIOException, 
				   "Reached end of file before finding both a Common chunk and a Sound Data chunk." );
		}
	}
	catch ( Exception & ex ) 
	{
		ex.append( " Failed to read AIFF file." );
		throw;
	}
	
	
	//	all the chunks have been read, use them to initialize
	//	the AiffFile members:
	rate_ = commonChunk.srate;
	
	if ( instrumentChunk.header.id )
	{
		notenum_ = instrumentChunk.baseNote;
		notenum_ -= 0.01 * instrumentChunk.detune;
	}
	
	if ( markerChunk.header.id )
	{
		for ( int j = 0; j < markerChunk.numMarkers; ++j )
		{
			MarkerCk::Marker & m = markerChunk.markers[j];
			markers_.push_back( Marker( m.position / rate_, m.markerName ) );
		}		
	}
	
	convertBytesToSamples( soundDataChunk.sampleBytes, samples_, commonChunk.bitsPerSample );
	if ( samples_.size() != commonChunk.sampleFrames )
	{
		notifier << "Found " << samples_.size() << " frames of "
				 << commonChunk.bitsPerSample << "-bit sample data." << endl;
		notifier << "Header says there should be " << commonChunk.sampleFrames 
				 << "." << endl;
	}
}