int main (int argc, char* argv[]){ struct pb_Parameters* prms; struct pb_TimerSet timers; prms = pb_ReadParameters(&argc,argv); pb_InitializeTimerSet(&timers); pb_AddSubTimer(&timers, oclOverhead, pb_TimerID_KERNEL); pb_SwitchToTimer(&timers, pb_TimerID_NONE); char uksdata[250]; parameters params; FILE* uksfile_f = NULL; FILE* uksdata_f = NULL; strcpy(uksdata,prms->inpFiles[0]); strcat(uksdata,".data"); uksfile_f = fopen(prms->inpFiles[0],"r"); if (uksfile_f == NULL){ printf("ERROR: Could not open %s\n",prms->inpFiles[0]); exit(1); } printf("\nReading parameters\n"); if (argc >= 2){ params.binsize = atoi(argv[1]); } else { //default binsize value; params.binsize = 128; } setParameters(uksfile_f, ¶ms); pb_SwitchToTimer(&timers, pb_TimerID_IO); ReconstructionSample* samples; //Input Data // cl_mem samplesPin; float* LUT; //use look-up table for faster execution on CPU (intermediate data) unsigned int sizeLUT; //set in the function calculateLUT (intermediate data) cmplx* gridData; //Output Data float* sampleDensity; //Output Data // cl_mem gridDataPin; // cl_mem sampleDensityPin; cmplx* gridData_gold; //Gold Output Data float* sampleDensity_gold; //Gold Output Data cl_int ciErrNum; cl_platform_id clPlatform; cl_device_type deviceType = CL_DEVICE_TYPE_GPU; cl_device_id clDevice; cl_context clContext; int deviceFound = getOpenCLDevice(&clPlatform, &clDevice, &deviceType, 0); size_t max_alloc_size = 0; (void) clGetDeviceInfo(clDevice, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(size_t), &max_alloc_size, 0); size_t global_mem_size = 0; (void) clGetDeviceInfo(clDevice, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(size_t), &global_mem_size, 0); size_t samples_size = params.numSamples*sizeof(ReconstructionSample); int gridNumElems = params.gridSize[0] * params.gridSize[1] * params.gridSize[2]; size_t output_size = gridNumElems*sizeof(cmplx); if ( (deviceFound < 0) || ((samples_size+output_size) > global_mem_size) || (samples_size > max_alloc_size) || (output_size > max_alloc_size ) ) { fprintf(stderr, "No suitable device was found\n"); if(deviceFound >= 0) { fprintf(stderr, "Memory requirements for this dataset exceed device capabilities\n"); } exit(1); } cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) clPlatform, 0}; clContext = clCreateContextFromType(cps, deviceType, NULL, NULL, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); cl_command_queue clCommandQueue = clCreateCommandQueue(clContext, clDevice, CL_QUEUE_PROFILING_ENABLE, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); cl_uint workItemDimensions; OCL_ERRCK_RETVAL( clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &workItemDimensions, NULL) ); size_t workItemSizes[workItemDimensions]; OCL_ERRCK_RETVAL( clGetDeviceInfo(clDevice, CL_DEVICE_MAX_WORK_ITEM_SIZES, workItemDimensions*sizeof(size_t), workItemSizes, NULL) ); pb_SetOpenCL(&clContext, &clCommandQueue); /* samplesPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, params.numSamples*sizeof(ReconstructionSample), NULL, &ciErrNum); */ samples = (ReconstructionSample *) malloc ( params.numSamples*sizeof(ReconstructionSample) ); /*(ReconstructionSample *) clEnqueueMapBuffer(clCommandQueue, samplesPin, CL_TRUE, CL_MAP_WRITE, 0, params.numSamples*sizeof(ReconstructionSample), 0, NULL, NULL, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); */ if (samples == NULL){ printf("ERROR: Unable to allocate and map memory for input data\n"); exit(1); } uksdata_f = fopen(uksdata,"rb"); if(uksdata_f == NULL){ printf("ERROR: Could not open data file\n"); exit(1); } printf("Reading input data from files\n"); unsigned int n = readSampleData(params, uksdata_f, samples); fclose(uksdata_f); if (params.useLUT){ printf("Generating Look-Up Table\n"); float beta = PI * sqrt(4*params.kernelWidth*params.kernelWidth/(params.oversample*params.oversample) * (params.oversample-.5)*(params.oversample-.5)-.8); calculateLUT(beta, params.kernelWidth, &LUT, &sizeLUT); } pb_SwitchToTimer(&timers, pb_TimerID_NONE); gridData_gold = (cmplx*) calloc (gridNumElems, sizeof(cmplx)); sampleDensity_gold = (float*) calloc (gridNumElems, sizeof(float)); if (sampleDensity_gold == NULL || gridData_gold == NULL){ printf("ERROR: Unable to allocate memory for output data\n"); exit(1); } printf("Running gold version\n"); gridding_Gold(n, params, samples, LUT, sizeLUT, gridData_gold, sampleDensity_gold); printf("Running OpenCL version\n"); pb_SwitchToTimer(&timers, pb_TimerID_COPY); /* OCL_ERRCK_RETVAL( clEnqueueWriteBuffer(clCommandQueue, samplesPin, CL_TRUE, 0, // Offset in bytes n*sizeof(ReconstructionSample), // Size of data to write samples, // Host Source 0, NULL, NULL) );*/ // OCL_ERRCK_RETVAL ( clFinish(clCommandQueue) ); /* gridDataPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, gridNumElems*sizeof(cmplx), NULL, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); */ gridData = (cmplx *) malloc ( gridNumElems*sizeof(cmplx) ); if (gridData == NULL) { fprintf(stderr, "Could not allocate memory on host! (%s: %d)\n", __FILE__, __LINE__); exit(1); } /*(cmplx *) clEnqueueMapBuffer(clCommandQueue, gridDataPin, CL_TRUE, CL_MAP_READ, 0, gridNumElems*sizeof(cmplx), 0, NULL, NULL, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); */ /* sampleDensityPin = clCreateBuffer(clContext, CL_MEM_ALLOC_HOST_PTR, gridNumElems*sizeof(float), NULL, &ciErrNum); OCL_ERRCK_VAR(ciErrNum); */ sampleDensity = (float *) malloc ( gridNumElems*sizeof(float) ); if (sampleDensity == NULL) { fprintf(stderr, "Could not allocate memory on host! (%s: %d)\n", __FILE__, __LINE__); exit(1); } /*(float *) clEnqueueMapBuffer(clCommandQueue, sampleDensityPin, CL_TRUE, CL_MAP_READ, 0, gridNumElems*sizeof(float), 0, NULL, NULL, &ciErrNum); */ OCL_ERRCK_VAR(ciErrNum); OCL_ERRCK_VAR(ciErrNum); if (sampleDensity == NULL || gridData == NULL){ printf("ERROR: Unable to allocate memory for output data\n"); exit(1); } pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //Interface function to GPU implementation of gridding OpenCL_interface(&timers, n, params, samples, LUT, sizeLUT, gridData, sampleDensity, clContext, clCommandQueue, clDevice, workItemSizes); pb_SwitchToTimer(&timers, pb_TimerID_NONE); int passed=1; for (int i=0; i<gridNumElems; i++){ if(sampleDensity[i] != sampleDensity_gold[i]) { passed=0; break; } } //(passed) ? printf("Comparing GPU and Gold results... PASSED\n"):printf("Comparing GPU and Gold results... FAILED\n"); pb_SwitchToTimer(&timers, pb_TimerID_IO); FILE* outfile; if(!(outfile=fopen(prms->outFile,"w"))) { printf("Cannot open output file!\n"); } else { fwrite(&passed,sizeof(int),1,outfile); fclose(outfile); } pb_SwitchToTimer(&timers, pb_TimerID_NONE); if (params.useLUT){ free(LUT); } /* OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, samplesPin, samples, 0, NULL, NULL) ); OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, gridDataPin, gridData, 0, NULL, NULL) ); OCL_ERRCK_RETVAL ( clEnqueueUnmapMemObject(clCommandQueue, sampleDensityPin, sampleDensity, 0, NULL, NULL) ); clReleaseMemObject(samplesPin); clReleaseMemObject(gridDataPin); clReleaseMemObject(sampleDensityPin); */ free(samples); free(gridData); free(sampleDensity); free(gridData_gold); free(sampleDensity_gold); printf("\n"); pb_PrintTimerSet(&timers); pb_FreeParameters(prms); return 0; }
// --------------------------------------------------------------------------- // readAiffData // --------------------------------------------------------------------------- // void AiffFile::readAiffData( const std::string & filename ) { ContainerCk containerChunk; CommonCk commonChunk; SoundDataCk soundDataChunk; InstrumentCk instrumentChunk; MarkerCk markerChunk; try { std::ifstream s( filename.c_str(), std::ifstream::binary ); // the Container chunk must be first, read it: readChunkHeader( s, containerChunk.header ); if ( !s ) { Throw( FileIOException, "File not found, or corrupted." ); } if ( containerChunk.header.id != ContainerId ) { Throw( FileIOException, "Found no Container chunk." ); } readContainer( s, containerChunk, containerChunk.header.size ); // read other chunks, we are only interested in // the Common chunk, the Sound Data chunk, the Markers: CkHeader h; while ( readChunkHeader( s, h ) ) { switch (h.id) { case CommonId: readCommonData( s, commonChunk, h.size ); if ( commonChunk.channels != 1 ) { Throw( FileIOException, "Loris only processes single-channel AIFF samples files." ); } if ( commonChunk.bitsPerSample != 8 && commonChunk.bitsPerSample != 16 && commonChunk.bitsPerSample != 24 && commonChunk.bitsPerSample != 32 ) { Throw( FileIOException, "Unrecognized sample size." ); } break; case SoundDataId: readSampleData( s, soundDataChunk, h.size ); break; case InstrumentId: readInstrumentData( s, instrumentChunk, h.size ); break; case MarkerId: readMarkerData( s, markerChunk, h.size ); break; default: s.ignore( h.size ); } } if ( ! commonChunk.header.id || ! soundDataChunk.header.id ) { Throw( FileIOException, "Reached end of file before finding both a Common chunk and a Sound Data chunk." ); } } catch ( Exception & ex ) { ex.append( " Failed to read AIFF file." ); throw; } // all the chunks have been read, use them to initialize // the AiffFile members: rate_ = commonChunk.srate; if ( instrumentChunk.header.id ) { notenum_ = instrumentChunk.baseNote; notenum_ -= 0.01 * instrumentChunk.detune; } if ( markerChunk.header.id ) { for ( int j = 0; j < markerChunk.numMarkers; ++j ) { MarkerCk::Marker & m = markerChunk.markers[j]; markers_.push_back( Marker( m.position / rate_, m.markerName ) ); } } convertBytesToSamples( soundDataChunk.sampleBytes, samples_, commonChunk.bitsPerSample ); if ( samples_.size() != commonChunk.sampleFrames ) { notifier << "Found " << samples_.size() << " frames of " << commonChunk.bitsPerSample << "-bit sample data." << endl; notifier << "Header says there should be " << commonChunk.sampleFrames << "." << endl; } }