int ScanLargeArrays::setupScanLargeArrays() { // input buffer size cl_uint sizeBytes = length * sizeof(cl_float); // allocate memory for input arrray input = (cl_float*)malloc(sizeBytes); CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)"); // random initialisation of input sampleCommon->fillRandom<cl_float>(input, length, 1, 0, 255); // allocate memory for output buffer output = (cl_float*)malloc(sizeBytes); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); // if verification is enabled if(verify) { // allocate memory for verification output array verificationOutput = (cl_float*)malloc(sizeBytes); CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verify)"); memset(verificationOutput, 0, sizeBytes); } // Unless quiet mode has been enabled, print the INPUT array if(!quiet) { sampleCommon->printArray<cl_float>("Input", input, length, 1); } return SDK_SUCCESS; }
int ScanLargeArrays::initialize() { // Call base class Initialize to get default configuration if(this->SDKSample::initialize() != SDK_SUCCESS) return SDK_FAILURE; streamsdk::Option* array_length = new streamsdk::Option; CHECK_ALLOCATION(array_length,"Memory Allocation error.(array_length)"); array_length->_sVersion = "x"; array_length->_lVersion = "length"; array_length->_description = "Length of the input array"; array_length->_type = streamsdk::CA_ARG_INT; array_length->_value = &length; sampleArgs->AddOption(array_length); delete array_length; streamsdk::Option* iteration_option = new streamsdk::Option; CHECK_ALLOCATION(iteration_option,"Memory Allocation error.(iteration_option)"); iteration_option->_sVersion = "i"; iteration_option->_lVersion = "iterations"; iteration_option->_description = "Number of iterations to execute kernel"; iteration_option->_type = streamsdk::CA_ARG_INT; iteration_option->_value = &iterations; sampleArgs->AddOption(iteration_option); delete iteration_option; return SDK_SUCCESS; }
int BinomialOption::initialize() { // Call base class Initialize to get default configuration CHECK_ERROR(this->SDKSample::initialize(), SDK_SUCCESS, "OpenCL Resource Intilization failed"); streamsdk::Option* num_samples = new streamsdk::Option; CHECK_ALLOCATION(num_samples, "Error. Failed to allocate memory (num_samples)\n"); num_samples->_sVersion = "x"; num_samples->_lVersion = "samples"; num_samples->_description = "Number of samples to be calculated"; num_samples->_type = streamsdk::CA_ARG_INT; num_samples->_value = &numSamples; sampleArgs->AddOption(num_samples); delete num_samples; streamsdk::Option* num_iterations = new streamsdk::Option; CHECK_ALLOCATION(num_iterations, "Error. Failed to allocate memory (num_iterations)\n"); num_iterations->_sVersion = "i"; num_iterations->_lVersion = "iterations"; num_iterations->_description = "Number of iterations for kernel execution"; num_iterations->_type = streamsdk::CA_ARG_INT; num_iterations->_value = &iterations; sampleArgs->AddOption(num_iterations); delete num_iterations; return SDK_SUCCESS; }
int BinomialOption::setupBinomialOption() { // Make numSamples multiple of 4 numSamples = (numSamples / 4)? (numSamples / 4) * 4: 4; #if defined (_WIN32) randArray = (cl_float*)_aligned_malloc(numSamples * sizeof(cl_float4), 16); #else randArray = (cl_float*)memalign(16, numSamples * sizeof(cl_float4)); #endif CHECK_ALLOCATION(randArray, "Failed to allocate host memory. (randArray)"); for(int i = 0; i < numSamples * 4; i++) { randArray[i] = (float)rand() / (float)RAND_MAX; } #if defined (_WIN32) output = (cl_float*)_aligned_malloc(numSamples * sizeof(cl_float4), 16); #else output = (cl_float*)memalign(16, numSamples * sizeof(cl_float4)); #endif CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); memset(output, 0, numSamples * sizeof(cl_float4)); return SDK_SUCCESS; }
int SimpleConvolution::initialize() { // Call base class Initialize to get default configuration if (this->SDKSample::initialize() != SDK_SUCCESS) return SDK_FAILURE; // Now add customized options streamsdk::Option* width_option = new streamsdk::Option; CHECK_ALLOCATION(width_option, "Memory allocation error.\n"); width_option->_sVersion = "x"; width_option->_lVersion = "width"; width_option->_description = "Width of the input matrix"; width_option->_type = streamsdk::CA_ARG_INT; width_option->_value = &width; sampleArgs->AddOption(width_option); delete width_option; streamsdk::Option* height_option = new streamsdk::Option; CHECK_ALLOCATION(height_option, "Memory allocation error.\n"); height_option->_sVersion = "y"; height_option->_lVersion = "height"; height_option->_description = "Height of the input matrix"; height_option->_type = streamsdk::CA_ARG_INT; height_option->_value = &height; sampleArgs->AddOption(height_option); delete height_option; streamsdk::Option* mask_width = new streamsdk::Option; CHECK_ALLOCATION(mask_width, "Memory allocation error.\n"); maskWidth = 3; mask_width->_sVersion = "m"; mask_width->_lVersion = "masksize"; mask_width->_description = "Width of the mask matrix"; mask_width->_type = streamsdk::CA_ARG_INT; mask_width->_value = &maskWidth; sampleArgs->AddOption(mask_width); delete mask_width; streamsdk::Option* num_iterations = new streamsdk::Option; CHECK_ALLOCATION(num_iterations, "Memory allocation error.\n"); num_iterations->_sVersion = "i"; num_iterations->_lVersion = "iterations"; num_iterations->_description = "Number of iterations for kernel execution"; num_iterations->_type = streamsdk::CA_ARG_INT; num_iterations->_value = &iterations; sampleArgs->AddOption(num_iterations); delete num_iterations; return SDK_SUCCESS; }
static int get_desired_platform(const char *substr, cl_platform_id *platform_id_out, cl_int *err) { cl_int _err = CL_SUCCESS; cl_uint i, num_platforms; cl_platform_id *platform_ids = NULL; char *platform_name = NULL; assert(platform_id_out != NULL); if (!err) err = &_err; *err = clGetPlatformIDs(0, NULL, &num_platforms); CHECK_CL_ERROR(*err); platform_ids = malloc(sizeof(*platform_ids) * num_platforms); CHECK_ALLOCATION(platform_ids); *err = clGetPlatformIDs(num_platforms, platform_ids, NULL); CHECK_CL_ERROR(*err); for (i = 0; i < num_platforms; i++) { size_t platform_name_size; *err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, 0, NULL, &platform_name_size); CHECK_CL_ERROR(*err); platform_name = realloc(platform_name, sizeof(*platform_name) * platform_name_size); CHECK_ALLOCATION(platform_name); *err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, platform_name_size, platform_name, NULL); CHECK_CL_ERROR(*err); if (DEBUG) printf("Platform %u: \"%s\"\n", i, platform_name); if (strstr(platform_name, substr)) break; } if (i < num_platforms) *platform_id_out = platform_ids[i]; else goto error; /* No platforms found */ free(platform_ids); free(platform_name); return 0; error: free(platform_ids); free(platform_name); return -1; }
int SobelFilter::verifyResults() { if(!byteRWSupport) { return SDK_SUCCESS; } if(sampleArgs->verify) { // reference implementation sobelFilterCPUReference(); float *outputDevice = new float[width * height * pixelSize]; CHECK_ALLOCATION(outputDevice, "Failed to allocate host memory! (outputDevice)"); float *outputReference = new float[width * height * pixelSize]; CHECK_ALLOCATION(outputReference, "Failed to allocate host memory!" "(outputReference)"); // copy uchar data to float array for(int i = 0; i < (int)(width * height); i++) { outputDevice[i * 4 + 0] = outputImageData[i].s[0]; outputDevice[i * 4 + 1] = outputImageData[i].s[1]; outputDevice[i * 4 + 2] = outputImageData[i].s[2]; outputDevice[i * 4 + 3] = outputImageData[i].s[3]; outputReference[i * 4 + 0] = verificationOutput[i * 4 + 0]; outputReference[i * 4 + 1] = verificationOutput[i * 4 + 1]; outputReference[i * 4 + 2] = verificationOutput[i * 4 + 2]; outputReference[i * 4 + 3] = verificationOutput[i * 4 + 3]; } // compare the results and see if they match if(compare(outputReference, outputDevice, width * height * 4)) { std::cout << "Passed!\n" << std::endl; delete[] outputDevice; delete[] outputReference; return SDK_SUCCESS; } else { std::cout << "Failed\n" << std::endl; delete[] outputDevice; delete[] outputReference; return SDK_FAILURE; } } return SDK_SUCCESS; }
int MatrixMulDouble::setupMatrixMulDouble() { // allocate and init memory used by host inputA[widthA][heightA] cl_uint inputSizeBytes0 = widthA * heightA * sizeof(cl_double); inputA = (cl_double*) malloc(inputSizeBytes0); CHECK_ALLOCATION(inputA, "Failed to allocate host memory. (inputA)"); // allocate and init memory used by host inputB[widthB][heightB] cl_uint inputSizeBytes1 = widthB * heightB * sizeof(cl_double); inputB = (cl_double*) malloc(inputSizeBytes1); CHECK_ALLOCATION(inputB, "Failed to allocate host memory. (inputB)"); // random initialisation of input fillRandom<cl_double>(inputA, widthA, heightA, 0, 10); fillRandom<cl_double>(inputB, widthB, heightB, 0, 10); // allocate memory for output[widthB][heightA] cl_uint outputSizeBytes = heightA * widthB * sizeof(cl_double); output = (cl_double*) malloc(outputSizeBytes); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); // allocate memory for output[widthB][heightA] of reference implemenation if(sampleArgs->verify) { refOutput = (cl_double*) malloc(outputSizeBytes); CHECK_ALLOCATION(refOutput, "Failed to allocate host memory. (refOutput)"); memset(refOutput, 0, outputSizeBytes); } /* * Unless quiet mode has been enabled, print the INPUT arrays */ if(!sampleArgs->quiet) { printArray<cl_double>( "inputA", inputA, widthA, 1); printArray<cl_double>( "inputB", inputB, widthB, 1); } return SDK_SUCCESS; }
int DeviceFission::initialize() { // Call base class Initialize to get default configuration if(this->SDKSample::initialize()) return SDK_FAILURE; // Now add customized options streamsdk::Option* array_length = new streamsdk::Option; CHECK_ALLOCATION(array_length, "Memory allocation error.\n"); array_length->_sVersion = "x"; array_length->_lVersion = "length"; array_length->_description = "Length of the Input array (Default value 1024)"; array_length->_type = streamsdk::CA_ARG_INT; array_length->_value = &length; sampleArgs->AddOption(array_length); delete array_length; streamsdk::Option* bool_cpu2gpu = new streamsdk::Option; CHECK_ALLOCATION(bool_cpu2gpu, "Memory allocation error.\n"); bool_cpu2gpu->_sVersion = "g"; bool_cpu2gpu->_lVersion = "cpu2gpu"; bool_cpu2gpu->_description = "Switch to migrate memory object from sub device to GPU before run kernels.(0 or 1, where 0 is disable, 1 is enable)"; bool_cpu2gpu->_type = streamsdk::CA_ARG_INT; bool_cpu2gpu->_value = &cpu2gpuValue; sampleArgs->AddOption(bool_cpu2gpu); delete bool_cpu2gpu; streamsdk::Option* bool_cpu2cpu = new streamsdk::Option; CHECK_ALLOCATION(bool_cpu2cpu, "Memory allocation error.\n"); bool_cpu2cpu->_sVersion = "c"; bool_cpu2cpu->_lVersion = "cpu2cpu"; bool_cpu2cpu->_description = "Switch to migrate memory object from sub device to another sub device before run kernels. (0 or 1, where 0 is disable, 1 is enable)"; bool_cpu2cpu->_type = streamsdk::CA_ARG_INT; bool_cpu2cpu->_value = &cpu2cpuValue; sampleArgs->AddOption(bool_cpu2cpu); delete bool_cpu2cpu; streamsdk::Option* load_gpu = new streamsdk::Option; CHECK_ALLOCATION(load_gpu, "Memory allocation error.\n"); load_gpu->_sVersion = ""; load_gpu->_lVersion = "loadgpu"; load_gpu->_description = "Load GPU binary image and execute on GPU"; load_gpu->_type = streamsdk::CA_ARG_STRING; load_gpu->_value = &loadBinaryGPU; sampleArgs->AddOption(load_gpu); delete load_gpu; return SDK_SUCCESS; }
int SobelFilter::readInputImage(std::string inputImageName) { // load input bitmap image inputBitmap.load(inputImageName.c_str()); // error if image did not load if(!inputBitmap.isLoaded()) { std::cout << "Failed to load input image!"; return SDK_FAILURE; } // get width and height of input image height = inputBitmap.getHeight(); width = inputBitmap.getWidth(); // allocate memory for input & output image data inputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(inputImageData, "Failed to allocate memory! (inputImageData)"); // allocate memory for output image data outputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(outputImageData, "Failed to allocate memory! (outputImageData)"); // initializa the Image data to NULL memset(outputImageData, 0, width * height * pixelSize); // get the pointer to pixel data pixelData = inputBitmap.getPixels(); if(pixelData == NULL) { std::cout << "Failed to read pixel Data!"; return SDK_FAILURE; } // Copy pixel data into inputImageData memcpy(inputImageData, pixelData, width * height * pixelSize); // allocate memory for verification output verificationOutput = (cl_uchar*)malloc(width * height * pixelSize); CHECK_ALLOCATION(verificationOutput, "verificationOutput heap allocation failed!"); // initialize the data to NULL memset(verificationOutput, 0, width * height * pixelSize); return SDK_SUCCESS; }
int GlobalMemoryBandwidth::setupGlobalMemoryBandwidth() { //Make vectorSize as 4 if -v option is 3. //This memeory alignment is required as per OpenCL for type3 vectors if(vectorSize == 3) { vec3 = true; vectorSize = 4; } /** * Allocate memory required for global buffer * This includes both single and linear(cached and uncached) reads */ cl_uint sizeElement = vectorSize * sizeof(cl_float); cl_uint readLength = length + (NUM_READS * 1024 / sizeElement) + EXTRA_BYTES; cl_uint size = readLength * vectorSize * sizeof(cl_float); input = (cl_float*)malloc(size); CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)"); outputReadSingle = (cl_float*)malloc(length * vectorSize * sizeof(cl_float)); CHECK_ALLOCATION(outputReadSingle, "Failed to allocate host memory. (outputReadSingle)"); memset(outputReadSingle, 0, length * vectorSize * sizeof(cl_float)); outputReadLinear = (cl_float*)malloc(length * vectorSize * sizeof(cl_float)); CHECK_ALLOCATION(outputReadLinear, "Failed to allocate host memory. (outputReadLinear)"); memset(outputReadLinear, 0, length * vectorSize * sizeof(cl_float)); outputReadLU = (cl_float*)malloc(length * vectorSize * sizeof(cl_float)); CHECK_ALLOCATION(outputReadLU, "Failed to allocate host memory. (outputReadLU)"); memset(outputReadLU, 0, length * vectorSize * sizeof(cl_float)); outputWriteLinear = (cl_float*)malloc(size); CHECK_ALLOCATION(outputWriteLinear, "Failed to allocate host memory. (outputWriteLinear)"); memset(outputWriteLinear, 0, size); // random initialisation of input sampleCommon->fillRandom<cl_float>(input, readLength * vectorSize, 1, 0, 10); return SDK_SUCCESS; }
int MatrixMulImage::setupMatrixMulImage() { // allocate and init memory used by host input0[width0][height0] cl_uint inputSizeBytes0 = width0 * height0 * sizeof(cl_float); input0 = (cl_float *) malloc(inputSizeBytes0); CHECK_ALLOCATION(input0, "Failed to allocate host memory. (input0)"); // allocate and init memory used by host input1[width1][height1] cl_uint inputSizeBytes1 = width1 * height1 * sizeof(cl_float); input1 = (cl_float *) malloc(inputSizeBytes1); CHECK_ALLOCATION(input1, "Failed to allocate host memory. (input1)"); // random initialisation of input sampleCommon->fillRandom<cl_float>(input0, width0, height0, 0, 10); sampleCommon->fillRandom<cl_float>(input1, width1, height1, 0, 10); // allocate memory for output[width1][height0] cl_uint outputSizeBytes = height0 * width1 * sizeof(cl_float); output = (cl_float *) malloc(outputSizeBytes); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); // allocate memory for output[width1][height0] of reference implemenation if(verify) { verificationOutput = (cl_float *) malloc(outputSizeBytes); CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)"); memset(verificationOutput, 0, outputSizeBytes); } /* * Unless quiet mode has been enabled, print the INPUT arrays */ if(!quiet) { sampleCommon->printArray<cl_float>( "Input0", input0, width0, 1); sampleCommon->printArray<cl_float>( "Input1", input1, width1, 1); } return SDK_SUCCESS; }
int ImageOverlap::readImage(std::string mapImageName,std::string verifyImageName) { // load input bitmap image mapBitmap.load(mapImageName.c_str()); verifyBitmap.load(verifyImageName.c_str()); // error if image did not load if(!mapBitmap.isLoaded()) { sampleCommon->error("Failed to load input image!"); return SDK_FAILURE; } // get width and height of input image height = mapBitmap.getHeight(); width = mapBitmap.getWidth(); image_desc.image_width=width; image_desc.image_height=height; // allocate memory for map image data mapImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(mapImageData,"Failed to allocate memory! (mapImageData)"); // allocate memory for fill image data fillImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(fillImageData,"Failed to allocate memory! (fillImageData)"); // initializa the Image data to NULL memset(fillImageData, 0, width * height * pixelSize); // get the pointer to pixel data pixelData = mapBitmap.getPixels(); CHECK_ALLOCATION(pixelData,"Failed to read mapBitmap pixel Data!"); // Copy pixel data into mapImageData memcpy(mapImageData, pixelData, width * height * pixelSize); // allocate memory for verification output verificationImageData = (cl_uchar4*)malloc(width * height * pixelSize); CHECK_ALLOCATION(pixelData,"verificationOutput heap allocation failed!"); pixelData = verifyBitmap.getPixels(); CHECK_ALLOCATION(pixelData,"Failed to read verifyBitmap pixel Data!"); // Copy pixel data into verificationOutput memcpy(verificationImageData, pixelData, width * height * pixelSize); return SDK_SUCCESS; }
int DeviceFission::setupDeviceFission() { // Make sure length is multiple of group size * numSubDevices unsigned int mulFactor = (unsigned int)groupSize * numSubDevices; length = (length < mulFactor) ? mulFactor : length; length = (length / mulFactor) * mulFactor; // Calculate half length half_length = length >> 1; // Deal with options: cpu2cpu and cpu2gpu if(cpu2gpuValue != 0 && cpu2cpuValue != 0) { std::cout << "cpu2gpu and cpu2cpu can't be both true. Disable cpu2cpu."<< std::endl; cpu2cpuValue = 0; } if(cpu2gpuValue != 0) { std::cout << "Enable cpu2gpu mode."<< std::endl; cpu2gpu = CL_TRUE; } if(cpu2cpuValue != 0) { std::cout << "Enable cpu2cpu mode."<< std::endl; cpu2cpu = CL_TRUE; } // Get allocate memory for input buffer input = (cl_int*)malloc(half_length * sizeof(cl_int)); CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)"); // Random initialisation of input sampleCommon->fillRandom<cl_int>(input, half_length, 1, 1, 8); // Unless quiet mode has been enabled, print the INPUT array if(!quiet) sampleCommon->printArray<cl_int>("Input:", input, half_length, 1); // Get allocate memory for subOutput buffer subOutput = (cl_int*)malloc(length * sizeof(cl_int)); CHECK_ALLOCATION(subOutput, "Failed to allocate host memory. (subOutput)"); // Get allocate memory for gpuOutput buffer gpuOutput = (cl_int*)malloc(length * sizeof(cl_int)); CHECK_ALLOCATION(gpuOutput, "Failed to allocate host memory. (gpuOutput)"); return SDK_SUCCESS; }
/****************************************************************************** * Implementation of BoltSample::initialize() * ******************************************************************************/ int BoltSample::initialize() { int defaultOptions = 6; boltsdk::Option *optionList = new boltsdk::Option[defaultOptions]; CHECK_ALLOCATION(optionList, "Error. Failed to allocate memory (optionList)\n"); optionList[0]._sVersion = "q"; optionList[0]._lVersion = "quiet"; optionList[0]._description = "Quiet mode. Suppress most text output."; optionList[0]._type = boltsdk::CA_NO_ARGUMENT; optionList[0]._value = &quiet; optionList[1]._sVersion = "e"; optionList[1]._lVersion = "verify"; optionList[1]._description = "Verify results against reference implementation."; optionList[1]._type = boltsdk::CA_NO_ARGUMENT; optionList[1]._value = &verify; optionList[2]._sVersion = "t"; optionList[2]._lVersion = "timing"; optionList[2]._description = "Print timing related statistics."; optionList[2]._type = boltsdk::CA_NO_ARGUMENT; optionList[2]._value = &timing; optionList[3]._sVersion = "v"; optionList[3]._lVersion = "version"; optionList[3]._description = "Bolt lib & runtime version string."; optionList[3]._type = boltsdk::CA_NO_ARGUMENT; optionList[3]._value = &version; optionList[4]._sVersion = "x"; optionList[4]._lVersion = "samples"; optionList[4]._description = "Number of sample input values."; optionList[4]._type = boltsdk::CA_ARG_INT; optionList[4]._value = &samples; optionList[5]._sVersion = "i"; optionList[5]._lVersion = "iterations"; optionList[5]._description = "Number of iterations."; optionList[5]._type = boltsdk::CA_ARG_INT; optionList[5]._value = &iterations; sampleArgs = new boltsdk::BoltCommandArgs(defaultOptions, optionList); CHECK_ALLOCATION(sampleArgs, "Failed to allocate memory. (sampleArgs)\n"); return SDK_SUCCESS; }
int LDSBandwidth::setupLDSBandwidth() { /** * Make vectorSize as 4 if -v option is 3. * This memory alignment is required as per OpenCL for type3 vectors */ if(vectorSize == 3) { vec3 = true; vectorSize = 4; } else if((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) && (8 != vectorSize) && (16 != vectorSize)) { std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl; return SDK_FAILURE; } // host output output = (cl_float*)malloc(length * vectorSize * sizeof(cl_float)); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); return SDK_SUCCESS; }
int SimpleConvolution::verifyResults() { if(verify) { verificationOutput = (cl_uint *) malloc(width * height * sizeof(cl_uint )); CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)"); /* * reference implementation */ cl_uint2 inputDimensions = {width , height}; cl_uint2 maskDimensions = {maskWidth, maskHeight}; simpleConvolutionCPUReference(verificationOutput, input, mask, width, height, maskWidth, maskHeight); // compare the results and see if they match if(memcmp(output, verificationOutput, height*width*sizeof(cl_uint )) == 0) { std::cout<<"Passed!\n" << std::endl; return SDK_SUCCESS; } else { std::cout<<"Failed\n" << std::endl; return SDK_FAILURE; } } return SDK_SUCCESS; }
/** *********************************************************************** * @fn initialize * @brief Initialize the resources used by tests * @return 0 on success Positive if expected and Non-zero on failure **********************************************************************/ int initialize() { int defaultOptions = 5; Option *optionList = new Option[defaultOptions]; CHECK_ALLOCATION(optionList, "Error. Failed to allocate memory (optionList)\n"); optionList[0]._sVersion = "q"; optionList[0]._lVersion = "quiet"; optionList[0]._description = "Quiet mode. Suppress all text output."; optionList[0]._type = CA_NO_ARGUMENT; optionList[0]._value = &quiet; optionList[1]._sVersion = "e"; optionList[1]._lVersion = "verify"; optionList[1]._description = "Verify results against reference implementation."; optionList[1]._type = CA_NO_ARGUMENT; optionList[1]._value = &verify; optionList[2]._sVersion = "t"; optionList[2]._lVersion = "timing"; optionList[2]._description = "Print timing."; optionList[2]._type = CA_NO_ARGUMENT; optionList[2]._value = &timing; optionList[3]._sVersion = "v"; optionList[3]._lVersion = "version"; optionList[3]._description = "AMD APP SDK version string."; optionList[3]._type = CA_NO_ARGUMENT; optionList[3]._value = &version; optionList[4]._sVersion = "d"; optionList[4]._lVersion = "deviceId"; optionList[4]._description = "Select deviceId to be used[0 to N-1 where N is number devices available]."; optionList[4]._type = CA_ARG_INT; optionList[4]._value = &deviceId; _numArgs = defaultOptions; _options = optionList; return SDK_SUCCESS; }
int DwtHaar1D::calApproxFinalOnHost() { // Copy inData to hOutData cl_float *tempOutData = (cl_float*)malloc(signalLength * sizeof(cl_float)); CHECK_ALLOCATION(tempOutData, "Failed to allocate host memory. (tempOutData)"); memcpy(tempOutData, inData, signalLength * sizeof(cl_float)); for(cl_uint i = 0; i < signalLength; ++i) { tempOutData[i] = tempOutData[i] / sqrt((float)signalLength); } cl_uint length = signalLength; while(length > 1u) { for(cl_uint i = 0; i < length / 2; ++i) { cl_float data0 = tempOutData[2 * i]; cl_float data1 = tempOutData[2 * i + 1]; hOutData[i] = (data0 + data1) / sqrt((float)2); hOutData[length / 2 + i] = (data0 - data1) / sqrt((float)2); } // Copy inData to hOutData memcpy(tempOutData, hOutData, signalLength * sizeof(cl_float)); length >>= 1; } FREE(tempOutData); return SDK_SUCCESS; }
static VAStatus tng_yuv_processor_CreateContext( object_context_p obj_context, object_config_p __maybe_unused obj_config) { VAStatus vaStatus = VA_STATUS_SUCCESS; context_DEC_p dec_ctx = (context_DEC_p) obj_context->format_data; context_yuv_processor_p ctx; ctx = (context_yuv_processor_p) malloc(sizeof(struct context_yuv_processor_s)); CHECK_ALLOCATION(ctx); /* ctx could be create in/out another dec context */ ctx->has_dec_ctx = 0; ctx->src_surface = NULL; if (!dec_ctx) { dec_ctx = (context_DEC_p) malloc(sizeof(struct context_DEC_s)); if(dec_ctx == NULL) { free(ctx); vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED; drv_debug_msg(VIDEO_DEBUG_ERROR, "%s fails with '%d' at %s:%d\n", __FUNCTION__, vaStatus, __FILE__, __LINE__); return vaStatus; } obj_context->format_data = (void *)dec_ctx; ctx->has_dec_ctx = 1; vaStatus = vld_dec_CreateContext(dec_ctx, obj_context); DEBUG_FAILURE; } dec_ctx->yuv_ctx = ctx; dec_ctx->process_buffer = tng_yuv_processor_process_buffer; return vaStatus; }
int MersenneTwister::initialize() { // Call base class Initialize to get default configuration if(sampleArgs->initialize() != SDK_SUCCESS) { return SDK_FAILURE; } // add an option for getting blockSize from commandline Option* num_option = new Option; CHECK_ALLOCATION(num_option, "Memory Allocation error.\n"); num_option->_sVersion = "x"; num_option->_lVersion = "numRands"; num_option->_description = "Number of random numbers to be generated"; num_option->_type = CA_ARG_INT; num_option->_value = &numRands; sampleArgs->AddOption(num_option); delete num_option; Option* factor_option = new Option; CHECK_ALLOCATION(factor_option,"Memory Allocation error.\n"); factor_option->_sVersion = "y"; factor_option->_lVersion = "factor"; factor_option->_description = "Each seed generates 'factor' random numbers"; factor_option->_type = CA_ARG_INT; factor_option->_value = &mulFactor; sampleArgs->AddOption(factor_option); delete factor_option; Option* iteration_option = new Option; CHECK_ALLOCATION(iteration_option, "Memory Allocation error.\n"); iteration_option->_sVersion = "i"; iteration_option->_lVersion = "iterations"; iteration_option->_description = "Number of iterations to execute kernel"; iteration_option->_type = CA_ARG_INT; iteration_option->_value = &iterations; sampleArgs->AddOption(iteration_option); delete iteration_option; return SDK_SUCCESS; }
int GaussianNoise::readInputImage(std::string inputImageName) { // load input bitmap image std::string filePath = inputImageName; inputBitmap.load(filePath.c_str()); // error if image did not load if(!inputBitmap.isLoaded()) { sampleCommon->error("Failed to load input image!"); return SDK_FAILURE; } // get width and height of input image height = inputBitmap.getHeight(); width = inputBitmap.getWidth(); // allocate memory for input & output image data inputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(inputImageData, "Failed to allocate memory! (inputImageData)"); // allocate memory for output image data outputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4)); CHECK_ALLOCATION(outputImageData, "Failed to allocate memory! (outputImageData)"); // initializa the Image data to NULL memset(outputImageData, 0, width * height * pixelSize); // get the pointer to pixel data pixelData = inputBitmap.getPixels(); // error check if(pixelData == NULL) { sampleCommon->error("Failed to read pixel Data!"); return SDK_FAILURE; } // Copy pixel data into inputImageData memcpy(inputImageData, pixelData, width * height * pixelSize); return SDK_SUCCESS; }
int ConstantBandwidth::setupConstantBandwidth() { //Make vectorSize as 4 if -v option is 3. //This memeory alignment is required as per OpenCL for type3 vectors if(vectorSize == 3) { vec3 = true; vectorSize = 4; } else if((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) && (8 != vectorSize) && (16 != vectorSize)) { std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl; return SDK_FAILURE; } // Allocate memory cl_uint size = (WAVEFRONT + NUM_READS) * vectorSize * sizeof(cl_float); input = (cl_float*)malloc(size); CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)"); // Allocate memory for output buffer output = (cl_float*)malloc(length * vectorSize * sizeof(cl_float)); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); if(verify) { cl_uint size = length * vectorSize * sizeof(cl_float); verificationOutput = (cl_float*)malloc(size); CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)"); } // random initialisation of input sampleCommon->fillRandom<cl_float>(input, (WAVEFRONT + NUM_READS) * vectorSize, 1, 0, 10); return SDK_SUCCESS; }
int BitonicSort::initialize() { // Call base class Initialize to get default configuration CHECK_ERROR(this->SDKSample::initialize(), SDK_SUCCESS, "OpenCL resource initilization failed"); // Now add customized options streamsdk::Option* array_length = new streamsdk::Option; CHECK_ALLOCATION(array_length, "Memory allocation error.\n"); array_length->_sVersion = "x"; array_length->_lVersion = "length"; array_length->_description = "Length of the array to be sorted"; array_length->_type = streamsdk::CA_ARG_INT; array_length->_value = &length; sampleArgs->AddOption(array_length); delete array_length; streamsdk::Option* sort_order = new streamsdk::Option; CHECK_ALLOCATION(sort_order, "Memory allocation error.\n"); sort_order->_sVersion = "s"; sort_order->_lVersion = "sort"; sort_order->_description = "Sort in descending/ascending order[desc/asc]"; sort_order->_type = streamsdk::CA_ARG_STRING; sort_order->_value = &sortDescending; sampleArgs->AddOption(sort_order); delete sort_order; streamsdk::Option* num_iterations = new streamsdk::Option; CHECK_ALLOCATION(num_iterations, "Memory allocation error.\n"); num_iterations->_sVersion = "i"; num_iterations->_lVersion = "iterations"; num_iterations->_description = "Number of iterations for kernel execution"; num_iterations->_type = streamsdk::CA_ARG_INT; num_iterations->_value = &iterations; sampleArgs->AddOption(num_iterations); delete num_iterations; return SDK_SUCCESS; }
/** *************************************************************************** * @fn initialize * @brief Initialize the resources used by tests * @return SDK_SUCCESS on success, SDK_FAILURE otherwise **************************************************************************/ int initialize() { int defaultOptions = 7; Option *optionList = new Option[defaultOptions]; CHECK_ALLOCATION(optionList, "Error. Failed to allocate memory (optionList)\n"); optionList[0]._sVersion = ""; optionList[0]._lVersion = "device"; optionList[0]._description = "Explicit device selection for Bolt"; std::string optionStr = "[auto|"; optionStr.append("OpenCL"); optionStr.append("|SerialCpu"); optionStr.append(((enable_tbb) ? "|MultiCoreCpu" : "")); optionStr.append("]"); optionList[0]._usage = optionStr; optionList[0]._type = CA_ARG_STRING; optionList[0]._value = &runMode; optionList[1]._sVersion = "q"; optionList[1]._lVersion = "quiet"; optionList[1]._description = "Quiet mode. Suppress most text output."; optionList[1]._usage = ""; optionList[1]._type = CA_NO_ARGUMENT; optionList[1]._value = &quiet; optionList[2]._sVersion = "e"; optionList[2]._lVersion = "verify"; optionList[2]._description = "Verify results against reference implementation."; optionList[2]._usage = ""; optionList[2]._type = CA_NO_ARGUMENT; optionList[2]._value = &verify; optionList[3]._sVersion = "t"; optionList[3]._lVersion = "timing"; optionList[3]._description = "Print timing related statistics."; optionList[3]._usage = ""; optionList[3]._type = CA_NO_ARGUMENT; optionList[3]._value = &timing; optionList[4]._sVersion = "v"; optionList[4]._lVersion = "version"; optionList[4]._description = "Bolt lib & runtime version string."; optionList[4]._usage = ""; optionList[4]._type = CA_NO_ARGUMENT; optionList[4]._value = &version; optionList[5]._sVersion = "x"; optionList[5]._lVersion = "samples"; optionList[5]._description = "Number of sample input values."; optionList[5]._usage = "[value]"; optionList[5]._type = CA_ARG_INT; optionList[5]._value = &samples; optionList[6]._sVersion = "i"; optionList[6]._lVersion = "iterations"; optionList[6]._description = "Number of iterations."; optionList[6]._usage = "[value]"; optionList[6]._type = CA_ARG_INT; optionList[6]._value = &iterations; _numArgs = defaultOptions; _options = optionList; return SDK_SUCCESS; }
int main(int argc, char *argv[]) { int ecode; bfcc_options bfopts = {0}; if (parse_arguments(argc, argv, &bfopts) != 0) { return -2; } FILE *f = stdin; if (bfopts.input_file != 0) { f = fopen(bfopts.input_file, "r"); if (!f) { fprintf(stderr, "Unknown file.\n"); return ERROR_FILE_NOT_FOUND; } } c99_options opts; c99_options_default(&opts); backend back = create_c99_backend(&opts); ecode = back.begin(&back, stdout); FATAL_IF_ERROR(ecode, "Backend preamble generation"); tokeniser *t = tokeniser_setup(f); CHECK_ALLOCATION(t, "Tokeniser setup"); while (1) { token tok; int error = tokeniser_next(t, &tok); if (IS_ERROR(error)) { fprintf(stderr, "Tokenisation error detected: %d.\n", error); return ERROR_TOKENISATION; } if (tok == token_eof) break; if (IS_ERROR(back.emit(&back, stdout, (token) tok))) { fprintf(stderr, "Failure encountered when translating token: %s\n", token_name((token) tok)); } } ecode = back.end(&back, stdout); FATAL_IF_ERROR(ecode, "Backend could not finish") return 0; }
int DwtHaar1D::setupDwtHaar1D() { // signal length must be power of 2 signalLength = sampleCommon->roundToPowerOf2<cl_uint>(signalLength); unsigned int levels = 0; int result = getLevels(signalLength, &levels); CHECK_ERROR(result,SDK_SUCCESS, "signalLength > 2 ^ 23 not supported"); // Allocate and init memory used by host inData = (cl_float*)malloc(signalLength * sizeof(cl_float)); CHECK_ALLOCATION(inData, "Failed to allocate host memory. (inData)"); for(unsigned int i = 0; i < signalLength; i++) { inData[i] = (cl_float)(rand() % 10); } dOutData = (cl_float*) malloc(signalLength * sizeof(cl_float)); CHECK_ALLOCATION(dOutData, "Failed to allocate host memory. (dOutData)"); memset(dOutData, 0, signalLength * sizeof(cl_float)); dPartialOutData = (cl_float*) malloc(signalLength * sizeof(cl_float)); CHECK_ALLOCATION(dPartialOutData, "Failed to allocate host memory.(dPartialOutData)"); memset(dPartialOutData, 0, signalLength * sizeof(cl_float)); hOutData = (cl_float*)malloc(signalLength * sizeof(cl_float)); CHECK_ALLOCATION(hOutData, "Failed to allocate host memory. (hOutData)"); memset(hOutData, 0, signalLength * sizeof(cl_float)); if(!quiet) { sampleCommon->printArray<cl_float>("Input Signal", inData, 256, 1); } return SDK_SUCCESS; }
NTSTATUS NTAPI RawChannelCreate(IN PSAC_CHANNEL Channel) { CHECK_PARAMETER(Channel); /* Allocate the output buffer */ Channel->OBuffer = SacAllocatePool(SAC_RAW_OBUFFER_SIZE, GLOBAL_BLOCK_TAG); CHECK_ALLOCATION(Channel->OBuffer); /* Allocate the input buffer */ Channel->IBuffer = SacAllocatePool(SAC_RAW_IBUFFER_SIZE, GLOBAL_BLOCK_TAG); CHECK_ALLOCATION(Channel->IBuffer); /* Reset all flags and return success */ Channel->OBufferIndex = 0; Channel->OBufferFirstGoodIndex = 0; Channel->ChannelHasNewIBufferData = FALSE; Channel->ChannelHasNewOBufferData = FALSE; return STATUS_SUCCESS; }
int FastWalshTransform::setupFastWalshTransform() { cl_uint inputSizeBytes; if(length < 512) { length = 512; } // allocate and init memory used by host inputSizeBytes = length * sizeof(cl_float); input = (cl_float *) malloc(inputSizeBytes); CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)"); output = (cl_float *) malloc(inputSizeBytes); CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)"); // random initialisation of input fillRandom<cl_float>(input, length, 1, 0, 255); if(sampleArgs->verify) { verificationInput = (cl_float *) malloc(inputSizeBytes); CHECK_ALLOCATION(verificationInput, "Failed to allocate host memory. (verificationInput)"); memcpy(verificationInput, input, inputSizeBytes); } // Unless sampleArgs->quiet mode has been enabled, print the INPUT array. if(!sampleArgs->quiet) { printArray<cl_float>( "Input", input, length, 1); } return SDK_SUCCESS; }
int MersenneTwister::setupMersenneTwister() { // Calculate width and height from numRands numRands = numRands / 4; numRands = (numRands / GROUP_SIZE)? (numRands / GROUP_SIZE) * GROUP_SIZE: GROUP_SIZE; unsigned int tempVar1 = (unsigned int)sqrt((double)numRands); tempVar1 = (tempVar1 / GROUP_SIZE)? (tempVar1 / GROUP_SIZE) * GROUP_SIZE: GROUP_SIZE; numRands = tempVar1 * tempVar1; width = tempVar1; height = width; // Allocate and init memory used by host #if defined (_WIN32) seeds = (cl_uint*)_aligned_malloc(width * height * sizeof(cl_uint4), 16); #else seeds = (cl_uint*)memalign(16, width * height * sizeof(cl_uint4)); #endif CHECK_ALLOCATION(seeds,"Failed to allocate host memory. (seeds)"); deviceResult = (cl_float *) malloc(width * height * mulFactor * sizeof( cl_float4)); CHECK_ALLOCATION(deviceResult, "Failed to allocate host memory. (deviceResult)"); for(int i = 0; i < width * height * 4; ++i) { seeds[i] = (unsigned int)rand(); } memset((void*)deviceResult, 0, width * height * mulFactor * sizeof(cl_float4)); return SDK_SUCCESS; }