int 
ScanLargeArrays::setupScanLargeArrays()
{
    // input buffer size
    cl_uint sizeBytes = length * sizeof(cl_float);

    // allocate memory for input arrray
    input = (cl_float*)malloc(sizeBytes);
    CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");

    // random initialisation of input
    sampleCommon->fillRandom<cl_float>(input, length, 1, 0, 255);

    // allocate memory for output buffer
    output = (cl_float*)malloc(sizeBytes);
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");

    // if verification is enabled
    if(verify)
    {
        // allocate memory for verification output array
        verificationOutput = (cl_float*)malloc(sizeBytes);
        CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verify)");
        memset(verificationOutput, 0, sizeBytes);
    }
    // Unless quiet mode has been enabled, print the INPUT array
    if(!quiet) 
    {
        sampleCommon->printArray<cl_float>("Input", 
                                           input, 
                                           length, 
                                           1);
    }
    return SDK_SUCCESS;
}
int ScanLargeArrays::initialize()
{
    // Call base class Initialize to get default configuration
    if(this->SDKSample::initialize() != SDK_SUCCESS)
        return SDK_FAILURE;

    streamsdk::Option* array_length = new streamsdk::Option;
    CHECK_ALLOCATION(array_length,"Memory Allocation error.(array_length)");

    array_length->_sVersion = "x";
    array_length->_lVersion = "length";
    array_length->_description = "Length of the input array";
    array_length->_type = streamsdk::CA_ARG_INT;
    array_length->_value = &length;
    sampleArgs->AddOption(array_length);
    delete array_length;

    streamsdk::Option* iteration_option = new streamsdk::Option;
    CHECK_ALLOCATION(iteration_option,"Memory Allocation error.(iteration_option)");

    iteration_option->_sVersion = "i";
    iteration_option->_lVersion = "iterations";
    iteration_option->_description = "Number of iterations to execute kernel";
    iteration_option->_type = streamsdk::CA_ARG_INT;
    iteration_option->_value = &iterations;

    sampleArgs->AddOption(iteration_option);
    delete iteration_option;

    return SDK_SUCCESS;
}
示例#3
0
int BinomialOption::initialize()
{
    // Call base class Initialize to get default configuration
    CHECK_ERROR(this->SDKSample::initialize(), SDK_SUCCESS, "OpenCL Resource Intilization failed");

    streamsdk::Option* num_samples = new streamsdk::Option;
    CHECK_ALLOCATION(num_samples, "Error. Failed to allocate memory (num_samples)\n");

    num_samples->_sVersion = "x";
    num_samples->_lVersion = "samples";
    num_samples->_description = "Number of samples to be calculated";
    num_samples->_type = streamsdk::CA_ARG_INT;
    num_samples->_value = &numSamples;

    sampleArgs->AddOption(num_samples);

    delete num_samples;

    streamsdk::Option* num_iterations = new streamsdk::Option;
    CHECK_ALLOCATION(num_iterations, "Error. Failed to allocate memory (num_iterations)\n");

    num_iterations->_sVersion = "i";
    num_iterations->_lVersion = "iterations";
    num_iterations->_description = "Number of iterations for kernel execution";
    num_iterations->_type = streamsdk::CA_ARG_INT;
    num_iterations->_value = &iterations;

    sampleArgs->AddOption(num_iterations);

    delete num_iterations;

    return SDK_SUCCESS;
}
示例#4
0
int
BinomialOption::setupBinomialOption()
{

    // Make numSamples multiple of 4
    numSamples = (numSamples / 4)? (numSamples / 4) * 4: 4;

#if defined (_WIN32)
    randArray = (cl_float*)_aligned_malloc(numSamples * sizeof(cl_float4), 16);
#else
    randArray = (cl_float*)memalign(16, numSamples * sizeof(cl_float4));
#endif
    CHECK_ALLOCATION(randArray, "Failed to allocate host memory. (randArray)");
    
    for(int i = 0; i < numSamples * 4; i++)
    {
        randArray[i] = (float)rand() / (float)RAND_MAX;
    }

#if defined (_WIN32)
    output = (cl_float*)_aligned_malloc(numSamples * sizeof(cl_float4), 16);
#else
    output = (cl_float*)memalign(16, numSamples * sizeof(cl_float4));
#endif

    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");
    memset(output, 0, numSamples * sizeof(cl_float4));

    return SDK_SUCCESS;
}
int SimpleConvolution::initialize()
{
   // Call base class Initialize to get default configuration
   if  (this->SDKSample::initialize() != SDK_SUCCESS)
       return SDK_FAILURE;

   // Now add customized options
    streamsdk::Option* width_option = new streamsdk::Option;
    CHECK_ALLOCATION(width_option, "Memory allocation error.\n");

    width_option->_sVersion = "x";
    width_option->_lVersion = "width";
    width_option->_description = "Width of the input matrix";
    width_option->_type = streamsdk::CA_ARG_INT;
    width_option->_value = &width;

    sampleArgs->AddOption(width_option);
    delete width_option;

    streamsdk::Option* height_option = new streamsdk::Option;
    CHECK_ALLOCATION(height_option, "Memory allocation error.\n");

    height_option->_sVersion = "y";
    height_option->_lVersion = "height";
    height_option->_description = "Height of the input matrix";
    height_option->_type = streamsdk::CA_ARG_INT;
    height_option->_value = &height;

    sampleArgs->AddOption(height_option);
    delete height_option;

    streamsdk::Option* mask_width = new streamsdk::Option;
    CHECK_ALLOCATION(mask_width, "Memory allocation error.\n");

    maskWidth = 3;
    mask_width->_sVersion = "m";
    mask_width->_lVersion = "masksize";
    mask_width->_description = "Width of the mask matrix";
    mask_width->_type = streamsdk::CA_ARG_INT;
    mask_width->_value = &maskWidth;
    sampleArgs->AddOption(mask_width);
    delete mask_width;

    streamsdk::Option* num_iterations = new streamsdk::Option;
    CHECK_ALLOCATION(num_iterations, "Memory allocation error.\n");

    num_iterations->_sVersion = "i";
    num_iterations->_lVersion = "iterations";
    num_iterations->_description = "Number of iterations for kernel execution";
    num_iterations->_type = streamsdk::CA_ARG_INT;
    num_iterations->_value = &iterations;

    sampleArgs->AddOption(num_iterations);
    delete num_iterations;

   return SDK_SUCCESS;
}
示例#6
0
static int get_desired_platform(const char *substr,
                                cl_platform_id *platform_id_out,
                                cl_int *err)
{
    cl_int _err = CL_SUCCESS;
    cl_uint i, num_platforms;
    cl_platform_id *platform_ids = NULL;
    char *platform_name = NULL;

    assert(platform_id_out != NULL);

    if (!err) err = &_err;

    *err = clGetPlatformIDs(0, NULL, &num_platforms);
    CHECK_CL_ERROR(*err);

    platform_ids = malloc(sizeof(*platform_ids) * num_platforms);
    CHECK_ALLOCATION(platform_ids);

    *err = clGetPlatformIDs(num_platforms, platform_ids, NULL);
    CHECK_CL_ERROR(*err);

    for (i = 0; i < num_platforms; i++) {
        size_t platform_name_size;

        *err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME, 0, NULL,
                                 &platform_name_size);
        CHECK_CL_ERROR(*err);

        platform_name = realloc(platform_name,
                                sizeof(*platform_name) * platform_name_size);
        CHECK_ALLOCATION(platform_name);

        *err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_NAME,
                                 platform_name_size, platform_name, NULL);
        CHECK_CL_ERROR(*err);

        if (DEBUG)
            printf("Platform %u: \"%s\"\n", i, platform_name);

        if (strstr(platform_name, substr))
            break;
    }

    if (i < num_platforms)
        *platform_id_out = platform_ids[i];
    else
        goto error; /* No platforms found */

    free(platform_ids);
    free(platform_name);
    return 0;
error:
    free(platform_ids);
    free(platform_name);
    return -1;
}
示例#7
0
int
SobelFilter::verifyResults()
{
    if(!byteRWSupport)
    {
        return SDK_SUCCESS;
    }

    if(sampleArgs->verify)
    {
        // reference implementation
        sobelFilterCPUReference();

        float *outputDevice = new float[width * height * pixelSize];
        CHECK_ALLOCATION(outputDevice,
                         "Failed to allocate host memory! (outputDevice)");

        float *outputReference = new float[width * height * pixelSize];
        CHECK_ALLOCATION(outputReference, "Failed to allocate host memory!"
                         "(outputReference)");

        // copy uchar data to float array
        for(int i = 0; i < (int)(width * height); i++)
        {
            outputDevice[i * 4 + 0] = outputImageData[i].s[0];
            outputDevice[i * 4 + 1] = outputImageData[i].s[1];
            outputDevice[i * 4 + 2] = outputImageData[i].s[2];
            outputDevice[i * 4 + 3] = outputImageData[i].s[3];

            outputReference[i * 4 + 0] = verificationOutput[i * 4 + 0];
            outputReference[i * 4 + 1] = verificationOutput[i * 4 + 1];
            outputReference[i * 4 + 2] = verificationOutput[i * 4 + 2];
            outputReference[i * 4 + 3] = verificationOutput[i * 4 + 3];
        }


        // compare the results and see if they match
        if(compare(outputReference,
                   outputDevice,
                   width * height * 4))
        {
            std::cout << "Passed!\n" << std::endl;
            delete[] outputDevice;
            delete[] outputReference;
            return SDK_SUCCESS;
        }
        else
        {
            std::cout << "Failed\n" << std::endl;
            delete[] outputDevice;
            delete[] outputReference;
            return SDK_FAILURE;
        }
    }

    return SDK_SUCCESS;
}
int
MatrixMulDouble::setupMatrixMulDouble()
{
    // allocate and init memory used by host  inputA[widthA][heightA]
    cl_uint inputSizeBytes0 = widthA * heightA * sizeof(cl_double);

    inputA = (cl_double*) malloc(inputSizeBytes0);
    CHECK_ALLOCATION(inputA, "Failed to allocate host memory. (inputA)");


    // allocate and init memory used by host inputB[widthB][heightB]
    cl_uint inputSizeBytes1 = widthB * heightB * sizeof(cl_double);

    inputB = (cl_double*) malloc(inputSizeBytes1);
    CHECK_ALLOCATION(inputB, "Failed to allocate host memory. (inputB)");


    // random initialisation of input
    fillRandom<cl_double>(inputA, widthA, heightA, 0, 10);
    fillRandom<cl_double>(inputB, widthB, heightB, 0, 10);

    // allocate memory for output[widthB][heightA]
    cl_uint outputSizeBytes = heightA * widthB * sizeof(cl_double);

    output = (cl_double*) malloc(outputSizeBytes);
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");


    // allocate memory for output[widthB][heightA] of reference implemenation
    if(sampleArgs->verify)
    {
        refOutput = (cl_double*) malloc(outputSizeBytes);
        CHECK_ALLOCATION(refOutput, "Failed to allocate host memory. (refOutput)");
        memset(refOutput, 0, outputSizeBytes);
    }
    /*
     * Unless quiet mode has been enabled, print the INPUT arrays
     */
    if(!sampleArgs->quiet)
    {
        printArray<cl_double>(
            "inputA",
            inputA,
            widthA,
            1);
        printArray<cl_double>(
            "inputB",
            inputB,
            widthB,
            1);

    }

    return SDK_SUCCESS;
}
int DeviceFission::initialize()
{
    // Call base class Initialize to get default configuration
    if(this->SDKSample::initialize())
        return SDK_FAILURE;

    // Now add customized options
    streamsdk::Option* array_length = new streamsdk::Option;
    CHECK_ALLOCATION(array_length, "Memory allocation error.\n");

    array_length->_sVersion = "x";
    array_length->_lVersion = "length";
    array_length->_description = "Length of the Input array (Default value 1024)";
    array_length->_type = streamsdk::CA_ARG_INT;
    array_length->_value = &length;
    sampleArgs->AddOption(array_length);
    delete array_length;

	streamsdk::Option* bool_cpu2gpu = new streamsdk::Option;
	CHECK_ALLOCATION(bool_cpu2gpu, "Memory allocation error.\n");

	bool_cpu2gpu->_sVersion = "g";
	bool_cpu2gpu->_lVersion = "cpu2gpu";
	bool_cpu2gpu->_description = "Switch to migrate memory object from sub device to GPU before run kernels.(0 or 1, where 0 is disable, 1 is enable)";
	bool_cpu2gpu->_type = streamsdk::CA_ARG_INT;
	bool_cpu2gpu->_value = &cpu2gpuValue;
	sampleArgs->AddOption(bool_cpu2gpu);
	delete bool_cpu2gpu;

	streamsdk::Option* bool_cpu2cpu = new streamsdk::Option;
	CHECK_ALLOCATION(bool_cpu2cpu, "Memory allocation error.\n");

	bool_cpu2cpu->_sVersion = "c";
	bool_cpu2cpu->_lVersion = "cpu2cpu";
	bool_cpu2cpu->_description = "Switch to migrate memory object from sub device to another sub device before run kernels. (0 or 1, where 0 is disable, 1 is enable)";
	bool_cpu2cpu->_type = streamsdk::CA_ARG_INT;
	bool_cpu2cpu->_value = &cpu2cpuValue;
	sampleArgs->AddOption(bool_cpu2cpu);
	delete bool_cpu2cpu;

	streamsdk::Option* load_gpu = new streamsdk::Option;
	CHECK_ALLOCATION(load_gpu, "Memory allocation error.\n");

	load_gpu->_sVersion = "";
	load_gpu->_lVersion = "loadgpu";
	load_gpu->_description = "Load GPU binary image and execute on GPU";
	load_gpu->_type = streamsdk::CA_ARG_STRING;
	load_gpu->_value = &loadBinaryGPU;
	sampleArgs->AddOption(load_gpu);
	delete load_gpu;


    return SDK_SUCCESS;
}
示例#10
0
int
SobelFilter::readInputImage(std::string inputImageName)
{

    // load input bitmap image
    inputBitmap.load(inputImageName.c_str());

    // error if image did not load
    if(!inputBitmap.isLoaded())
    {
        std::cout << "Failed to load input image!";
        return SDK_FAILURE;
    }


    // get width and height of input image
    height = inputBitmap.getHeight();
    width = inputBitmap.getWidth();

    // allocate memory for input & output image data
    inputImageData  = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(inputImageData, "Failed to allocate memory! (inputImageData)");

    // allocate memory for output image data
    outputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(outputImageData,
                     "Failed to allocate memory! (outputImageData)");

    // initializa the Image data to NULL
    memset(outputImageData, 0, width * height * pixelSize);

    // get the pointer to pixel data
    pixelData = inputBitmap.getPixels();
    if(pixelData == NULL)
    {
        std::cout << "Failed to read pixel Data!";
        return SDK_FAILURE;
    }

    // Copy pixel data into inputImageData
    memcpy(inputImageData, pixelData, width * height * pixelSize);

    // allocate memory for verification output
    verificationOutput = (cl_uchar*)malloc(width * height * pixelSize);
    CHECK_ALLOCATION(verificationOutput,
                     "verificationOutput heap allocation failed!");

    // initialize the data to NULL
    memset(verificationOutput, 0, width * height * pixelSize);

    return SDK_SUCCESS;

}
int 
GlobalMemoryBandwidth::setupGlobalMemoryBandwidth()
{
    //Make vectorSize as 4 if -v option is 3. 

    //This memeory alignment is required as per OpenCL for type3 vectors 
    if(vectorSize == 3)
    {
        vec3 = true;
        vectorSize = 4;
    }

    /**
     * Allocate memory required for global buffer 
     * This includes both single and linear(cached and uncached) reads 
     */
    cl_uint sizeElement = vectorSize * sizeof(cl_float);
    cl_uint readLength = length + (NUM_READS * 1024 / sizeElement) + EXTRA_BYTES;
    cl_uint size = readLength * vectorSize * sizeof(cl_float);

    input = (cl_float*)malloc(size);
    CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");

    outputReadSingle = (cl_float*)malloc(length * vectorSize * sizeof(cl_float));
    CHECK_ALLOCATION(outputReadSingle, "Failed to allocate host memory. (outputReadSingle)");

    memset(outputReadSingle, 0, length * vectorSize * sizeof(cl_float));

    outputReadLinear = (cl_float*)malloc(length * vectorSize * sizeof(cl_float));
    CHECK_ALLOCATION(outputReadLinear, "Failed to allocate host memory. (outputReadLinear)");

    memset(outputReadLinear, 0, length * vectorSize * sizeof(cl_float));

    outputReadLU = (cl_float*)malloc(length * vectorSize * sizeof(cl_float));
    CHECK_ALLOCATION(outputReadLU, "Failed to allocate host memory. (outputReadLU)");

    memset(outputReadLU, 0, length * vectorSize * sizeof(cl_float));

    outputWriteLinear = (cl_float*)malloc(size);
    CHECK_ALLOCATION(outputWriteLinear, "Failed to allocate host memory. (outputWriteLinear)");

    memset(outputWriteLinear, 0, size);

    // random initialisation of input 
    sampleCommon->fillRandom<cl_float>(input,  
                                       readLength * vectorSize, 
                                       1, 
                                       0, 
                                       10);

    return SDK_SUCCESS;
}
int 
MatrixMulImage::setupMatrixMulImage()
{
    // allocate and init memory used by host  input0[width0][height0]
    cl_uint inputSizeBytes0 = width0 * height0 * sizeof(cl_float);

    input0 = (cl_float *) malloc(inputSizeBytes0);
    CHECK_ALLOCATION(input0, "Failed to allocate host memory. (input0)");

    // allocate and init memory used by host input1[width1][height1]
    cl_uint inputSizeBytes1 = width1 * height1 * sizeof(cl_float);

    input1 = (cl_float *) malloc(inputSizeBytes1);
    CHECK_ALLOCATION(input1, "Failed to allocate host memory. (input1)");
    
    // random initialisation of input
    sampleCommon->fillRandom<cl_float>(input0, width0, height0, 0, 10);
    sampleCommon->fillRandom<cl_float>(input1, width1, height1, 0, 10);

    // allocate memory for output[width1][height0]
    cl_uint outputSizeBytes = height0 * width1 * sizeof(cl_float);

    output = (cl_float *) malloc(outputSizeBytes);
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");

    // allocate memory for output[width1][height0] of reference implemenation
    if(verify)
    {
        verificationOutput = (cl_float *) malloc(outputSizeBytes);
        CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)");
        memset(verificationOutput, 0, outputSizeBytes);
    }

    /* 
     * Unless quiet mode has been enabled, print the INPUT arrays
     */
    if(!quiet) 
    {
        sampleCommon->printArray<cl_float>(
            "Input0", 
            input0, 
            width0, 
            1);
        sampleCommon->printArray<cl_float>(
            "Input1", 
            input1, 
            width1, 
            1);
    }
    return SDK_SUCCESS;
}
int
ImageOverlap::readImage(std::string mapImageName,std::string verifyImageName)
{

    // load input bitmap image 
    mapBitmap.load(mapImageName.c_str());
	verifyBitmap.load(verifyImageName.c_str());
    // error if image did not load 
    if(!mapBitmap.isLoaded())
    {
        sampleCommon->error("Failed to load input image!");
        return SDK_FAILURE;
    }

    // get width and height of input image
    height = mapBitmap.getHeight();
    width = mapBitmap.getWidth();
	image_desc.image_width=width;
	image_desc.image_height=height;
    // allocate memory for map image data 
    mapImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(mapImageData,"Failed to allocate memory! (mapImageData)");

    // allocate memory for fill image data 
    fillImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(fillImageData,"Failed to allocate memory! (fillImageData)");

    // initializa the Image data to NULL 
    memset(fillImageData, 0, width * height * pixelSize);

    // get the pointer to pixel data 
    pixelData = mapBitmap.getPixels();
    CHECK_ALLOCATION(pixelData,"Failed to read mapBitmap pixel Data!");

    // Copy pixel data into mapImageData
    memcpy(mapImageData, pixelData, width * height * pixelSize);

    // allocate memory for verification output
    verificationImageData = (cl_uchar4*)malloc(width * height * pixelSize);
    CHECK_ALLOCATION(pixelData,"verificationOutput heap allocation failed!");

	pixelData = verifyBitmap.getPixels();
	CHECK_ALLOCATION(pixelData,"Failed to read verifyBitmap pixel Data!");

	// Copy pixel data into verificationOutput
	memcpy(verificationImageData, pixelData, width * height * pixelSize);
	
    return SDK_SUCCESS;
}
int
DeviceFission::setupDeviceFission()
{
    // Make sure length is multiple of group size * numSubDevices
    unsigned int mulFactor = (unsigned int)groupSize * numSubDevices;
    length = (length < mulFactor) ? mulFactor : length;
    length = (length / mulFactor) * mulFactor;

	// Calculate half length
	half_length = length >> 1;

	// Deal with options: cpu2cpu and cpu2gpu
	if(cpu2gpuValue != 0 && cpu2cpuValue != 0)
	{
		std::cout << "cpu2gpu and cpu2cpu can't be both true. Disable cpu2cpu."<< std::endl;
		cpu2cpuValue = 0;
	}
	if(cpu2gpuValue != 0)
	{
		std::cout << "Enable cpu2gpu mode."<< std::endl;
		cpu2gpu = CL_TRUE;
	}
	if(cpu2cpuValue != 0)
	{
		std::cout << "Enable cpu2cpu mode."<< std::endl;
		cpu2cpu = CL_TRUE;
	}

	// Get allocate memory for input buffer
    input = (cl_int*)malloc(half_length * sizeof(cl_int));
    CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");

    // Random initialisation of input
    sampleCommon->fillRandom<cl_int>(input, half_length, 1, 1, 8);

    // Unless quiet mode has been enabled, print the INPUT array
    if(!quiet) 
		sampleCommon->printArray<cl_int>("Input:", input, half_length, 1);

	// Get allocate memory for subOutput buffer
    subOutput = (cl_int*)malloc(length * sizeof(cl_int));
    CHECK_ALLOCATION(subOutput, "Failed to allocate host memory. (subOutput)");

	// Get allocate memory for gpuOutput buffer
	gpuOutput = (cl_int*)malloc(length * sizeof(cl_int));
	CHECK_ALLOCATION(gpuOutput, "Failed to allocate host memory. (gpuOutput)");

    return SDK_SUCCESS;
}
/******************************************************************************
* Implementation of BoltSample::initialize()                                  *
******************************************************************************/
int BoltSample::initialize()
{
    int defaultOptions = 6;

    boltsdk::Option *optionList = new boltsdk::Option[defaultOptions];
    CHECK_ALLOCATION(optionList, "Error. Failed to allocate memory (optionList)\n");

    optionList[0]._sVersion = "q";
    optionList[0]._lVersion = "quiet";
    optionList[0]._description = "Quiet mode. Suppress most text output.";
    optionList[0]._type = boltsdk::CA_NO_ARGUMENT;
    optionList[0]._value = &quiet;

    optionList[1]._sVersion = "e";
    optionList[1]._lVersion = "verify";
    optionList[1]._description = "Verify results against reference implementation.";
    optionList[1]._type = boltsdk::CA_NO_ARGUMENT;
    optionList[1]._value = &verify;

    optionList[2]._sVersion = "t";
    optionList[2]._lVersion = "timing";
    optionList[2]._description = "Print timing related statistics.";
    optionList[2]._type = boltsdk::CA_NO_ARGUMENT;
    optionList[2]._value = &timing;

    optionList[3]._sVersion = "v";
    optionList[3]._lVersion = "version";
    optionList[3]._description = "Bolt lib & runtime version string.";
    optionList[3]._type = boltsdk::CA_NO_ARGUMENT;
    optionList[3]._value = &version;

    optionList[4]._sVersion = "x";
    optionList[4]._lVersion = "samples";
    optionList[4]._description = "Number of sample input values.";
    optionList[4]._type = boltsdk::CA_ARG_INT;
    optionList[4]._value = &samples;

    optionList[5]._sVersion = "i";
    optionList[5]._lVersion = "iterations";
    optionList[5]._description = "Number of iterations.";
    optionList[5]._type = boltsdk::CA_ARG_INT;
    optionList[5]._value = &iterations;

    sampleArgs = new boltsdk::BoltCommandArgs(defaultOptions, optionList);
    CHECK_ALLOCATION(sampleArgs, "Failed to allocate memory. (sampleArgs)\n");
                
    return SDK_SUCCESS;
}
int
LDSBandwidth::setupLDSBandwidth()
{
    /**
     * Make vectorSize as 4 if -v option is 3.
     * This memory alignment is required as per OpenCL for type3 vectors
     */
    if(vectorSize == 3)
    {
        vec3 = true;
        vectorSize = 4;
    }
    else if((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) &&
            (8 != vectorSize) && (16 != vectorSize))
    {
        std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl;
        return SDK_FAILURE;
    }

    // host output
    output = (cl_float*)malloc(length * vectorSize * sizeof(cl_float));
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");

    return SDK_SUCCESS;
}
int SimpleConvolution::verifyResults()
{
    if(verify)
    {
        verificationOutput = (cl_uint *) malloc(width * height * sizeof(cl_uint ));
        CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)");

        /* 
         * reference implementation
         */
        cl_uint2 inputDimensions = {width    , height};
        cl_uint2 maskDimensions  = {maskWidth, maskHeight};

        simpleConvolutionCPUReference(verificationOutput, input, mask, width, height,
                                        maskWidth, maskHeight);

        // compare the results and see if they match
        if(memcmp(output, verificationOutput, height*width*sizeof(cl_uint )) == 0)
        {
            std::cout<<"Passed!\n" << std::endl;
            return SDK_SUCCESS;
        }
        else
        {
            std::cout<<"Failed\n" << std::endl;
            return SDK_FAILURE;
        }
    }

    return SDK_SUCCESS;
}
示例#18
0
 /**
 ***********************************************************************
 * @fn initialize
 * @brief Initialize the resources used by tests
 * @return 0 on success Positive if expected and Non-zero on failure
 **********************************************************************/
 int initialize()
 {
     int defaultOptions = 5;
     Option *optionList = new Option[defaultOptions];
     CHECK_ALLOCATION(optionList, "Error. Failed to allocate memory (optionList)\n");
     optionList[0]._sVersion = "q";
     optionList[0]._lVersion = "quiet";
     optionList[0]._description = "Quiet mode. Suppress all text output.";
     optionList[0]._type = CA_NO_ARGUMENT;
     optionList[0]._value = &quiet;
     optionList[1]._sVersion = "e";
     optionList[1]._lVersion = "verify";
     optionList[1]._description = "Verify results against reference implementation.";
     optionList[1]._type = CA_NO_ARGUMENT;
     optionList[1]._value = &verify;
     optionList[2]._sVersion = "t";
     optionList[2]._lVersion = "timing";
     optionList[2]._description = "Print timing.";
     optionList[2]._type = CA_NO_ARGUMENT;
     optionList[2]._value = &timing;
     optionList[3]._sVersion = "v";
     optionList[3]._lVersion = "version";
     optionList[3]._description = "AMD APP SDK version string.";
     optionList[3]._type = CA_NO_ARGUMENT;
     optionList[3]._value = &version;
     optionList[4]._sVersion = "d";
     optionList[4]._lVersion = "deviceId";
     optionList[4]._description =
         "Select deviceId to be used[0 to N-1 where N is number devices available].";
     optionList[4]._type = CA_ARG_INT;
     optionList[4]._value = &deviceId;
     _numArgs = defaultOptions;
     _options = optionList;
     return SDK_SUCCESS;
 }
int 
DwtHaar1D::calApproxFinalOnHost()
{
    // Copy inData to hOutData
    cl_float *tempOutData = (cl_float*)malloc(signalLength * sizeof(cl_float));
    CHECK_ALLOCATION(tempOutData, "Failed to allocate host memory. (tempOutData)");

    memcpy(tempOutData, inData, signalLength * sizeof(cl_float));

    for(cl_uint i = 0; i < signalLength; ++i)
    {
        tempOutData[i] = tempOutData[i] / sqrt((float)signalLength);
    }

    cl_uint length = signalLength;
    while(length > 1u)
    {
        for(cl_uint i = 0; i < length / 2; ++i)
        {
            cl_float data0 = tempOutData[2 * i];
            cl_float data1 = tempOutData[2 * i + 1];

            hOutData[i] = (data0 + data1) / sqrt((float)2);
            hOutData[length / 2 + i] = (data0 - data1) / sqrt((float)2);
        }
        // Copy inData to hOutData
        memcpy(tempOutData, hOutData, signalLength * sizeof(cl_float));

        length >>= 1;
    }

    FREE(tempOutData);
    return SDK_SUCCESS;
}
static VAStatus tng_yuv_processor_CreateContext(
    object_context_p obj_context,
    object_config_p __maybe_unused obj_config)
{
    VAStatus vaStatus = VA_STATUS_SUCCESS;
    context_DEC_p dec_ctx = (context_DEC_p) obj_context->format_data;
    context_yuv_processor_p ctx;

    ctx = (context_yuv_processor_p) malloc(sizeof(struct context_yuv_processor_s));
    CHECK_ALLOCATION(ctx);

    /* ctx could be create in/out another dec context */
    ctx->has_dec_ctx = 0;
    ctx->src_surface = NULL;

    if (!dec_ctx) {
        dec_ctx = (context_DEC_p) malloc(sizeof(struct context_DEC_s));
        if(dec_ctx == NULL) {
            free(ctx);
            vaStatus = VA_STATUS_ERROR_ALLOCATION_FAILED;
            drv_debug_msg(VIDEO_DEBUG_ERROR, "%s fails with '%d' at %s:%d\n", __FUNCTION__, vaStatus, __FILE__, __LINE__);
            return vaStatus;
        }
        obj_context->format_data = (void *)dec_ctx;
        ctx->has_dec_ctx = 1;
        vaStatus = vld_dec_CreateContext(dec_ctx, obj_context);
        DEBUG_FAILURE;
    }

    dec_ctx->yuv_ctx = ctx;
    dec_ctx->process_buffer = tng_yuv_processor_process_buffer;

    return vaStatus;
}
int MersenneTwister::initialize()
{
    // Call base class Initialize to get default configuration
    if(sampleArgs->initialize() != SDK_SUCCESS)
    {
        return SDK_FAILURE;
    }

    // add an option for getting blockSize from commandline
    Option* num_option = new Option;
    CHECK_ALLOCATION(num_option, "Memory Allocation error.\n");

    num_option->_sVersion = "x";
    num_option->_lVersion = "numRands";
    num_option->_description = "Number of random numbers to be generated";
    num_option->_type = CA_ARG_INT;
    num_option->_value = &numRands;

    sampleArgs->AddOption(num_option);
    delete num_option;

    Option* factor_option = new Option;
    CHECK_ALLOCATION(factor_option,"Memory Allocation error.\n");

    factor_option->_sVersion = "y";
    factor_option->_lVersion = "factor";
    factor_option->_description = "Each seed generates 'factor' random numbers";
    factor_option->_type = CA_ARG_INT;
    factor_option->_value = &mulFactor;

    sampleArgs->AddOption(factor_option);
    delete factor_option;

    Option* iteration_option = new Option;
    CHECK_ALLOCATION(iteration_option, "Memory Allocation error.\n");

    iteration_option->_sVersion = "i";
    iteration_option->_lVersion = "iterations";
    iteration_option->_description = "Number of iterations to execute kernel";
    iteration_option->_type = CA_ARG_INT;
    iteration_option->_value = &iterations;

    sampleArgs->AddOption(iteration_option);
    delete iteration_option;

    return SDK_SUCCESS;
}
int
GaussianNoise::readInputImage(std::string inputImageName)
{

    // load input bitmap image 
    std::string filePath = inputImageName;
    inputBitmap.load(filePath.c_str());

    // error if image did not load 
    if(!inputBitmap.isLoaded())
    {
        sampleCommon->error("Failed to load input image!");
        return SDK_FAILURE;
    }

    // get width and height of input image 
    height = inputBitmap.getHeight();
    width = inputBitmap.getWidth();

    // allocate memory for input & output image data  
    inputImageData  = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(inputImageData, "Failed to allocate memory! (inputImageData)");


    // allocate memory for output image data 
    outputImageData = (cl_uchar4*)malloc(width * height * sizeof(cl_uchar4));
    CHECK_ALLOCATION(outputImageData, "Failed to allocate memory! (outputImageData)");

    // initializa the Image data to NULL 
    memset(outputImageData, 0, width * height * pixelSize);

    // get the pointer to pixel data 
    pixelData = inputBitmap.getPixels();

    // error check 
    if(pixelData == NULL)
    {
        sampleCommon->error("Failed to read pixel Data!");
        return SDK_FAILURE;
    }

    // Copy pixel data into inputImageData 
    memcpy(inputImageData, pixelData, width * height * pixelSize);

    return SDK_SUCCESS;
}
int 
ConstantBandwidth::setupConstantBandwidth()
{

    //Make vectorSize as 4 if -v option is 3. 

    //This memeory alignment is required as per OpenCL for type3 vectors 
    if(vectorSize == 3)
    {
        vec3 = true;
        vectorSize = 4;
    }
	else if((1 != vectorSize) && (2 != vectorSize) && (4 != vectorSize) && (8 != vectorSize) && (16 != vectorSize))
	{
		std::cout << "The vectorsize can only be one of 1,2,3(4),4,8,16!" << std::endl;
		return SDK_FAILURE;
	}

    // Allocate memory 
    cl_uint size = (WAVEFRONT + NUM_READS) * vectorSize * sizeof(cl_float);

    input = (cl_float*)malloc(size);
    CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");

    // Allocate memory for output buffer
    output = (cl_float*)malloc(length * vectorSize * sizeof(cl_float));
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");
   
    if(verify)
    {
        cl_uint size = length * vectorSize * sizeof(cl_float);
        verificationOutput = (cl_float*)malloc(size);
        CHECK_ALLOCATION(verificationOutput, "Failed to allocate host memory. (verificationOutput)");
    }

    
    // random initialisation of input
    sampleCommon->fillRandom<cl_float>(input, 
                                       (WAVEFRONT + NUM_READS) * vectorSize, 
                                       1, 
                                       0, 
                                       10);

	
    return SDK_SUCCESS;
}
int BitonicSort::initialize()
{
    // Call base class Initialize to get default configuration
    CHECK_ERROR(this->SDKSample::initialize(), SDK_SUCCESS, "OpenCL resource initilization failed");   
    
    // Now add customized options
    streamsdk::Option* array_length = new streamsdk::Option;
    CHECK_ALLOCATION(array_length, "Memory allocation error.\n");
    
    array_length->_sVersion = "x";
    array_length->_lVersion = "length";
    array_length->_description = "Length of the array to be sorted";
    array_length->_type = streamsdk::CA_ARG_INT;
    array_length->_value = &length;
    sampleArgs->AddOption(array_length);

    delete array_length;
   
    streamsdk::Option* sort_order = new streamsdk::Option;
    CHECK_ALLOCATION(sort_order, "Memory allocation error.\n");

    sort_order->_sVersion = "s";
    sort_order->_lVersion = "sort";
    sort_order->_description = "Sort in descending/ascending order[desc/asc]";
    sort_order->_type = streamsdk::CA_ARG_STRING;
    sort_order->_value = &sortDescending;
    sampleArgs->AddOption(sort_order);

    delete sort_order;

    streamsdk::Option* num_iterations = new streamsdk::Option;
    CHECK_ALLOCATION(num_iterations, "Memory allocation error.\n");

    num_iterations->_sVersion = "i";
    num_iterations->_lVersion = "iterations";
    num_iterations->_description = "Number of iterations for kernel execution";
    num_iterations->_type = streamsdk::CA_ARG_INT;
    num_iterations->_value = &iterations;

    sampleArgs->AddOption(num_iterations);

    delete num_iterations;

    return SDK_SUCCESS;
}
 /**
 ***************************************************************************
 * @fn initialize
 * @brief Initialize the resources used by tests
 * @return SDK_SUCCESS on success, SDK_FAILURE otherwise
 **************************************************************************/
 int initialize() {
   int defaultOptions = 7;
   Option *optionList = new Option[defaultOptions];
   CHECK_ALLOCATION(optionList,
                    "Error. Failed to allocate memory (optionList)\n");
   optionList[0]._sVersion = "";
   optionList[0]._lVersion = "device";
   optionList[0]._description = "Explicit device selection for Bolt";
   std::string optionStr = "[auto|";
   optionStr.append("OpenCL");
   optionStr.append("|SerialCpu");
   optionStr.append(((enable_tbb) ? "|MultiCoreCpu" : ""));
   optionStr.append("]");
   optionList[0]._usage = optionStr;
   optionList[0]._type = CA_ARG_STRING;
   optionList[0]._value = &runMode;
   optionList[1]._sVersion = "q";
   optionList[1]._lVersion = "quiet";
   optionList[1]._description = "Quiet mode. Suppress most text output.";
   optionList[1]._usage = "";
   optionList[1]._type = CA_NO_ARGUMENT;
   optionList[1]._value = &quiet;
   optionList[2]._sVersion = "e";
   optionList[2]._lVersion = "verify";
   optionList[2]._description =
       "Verify results against reference implementation.";
   optionList[2]._usage = "";
   optionList[2]._type = CA_NO_ARGUMENT;
   optionList[2]._value = &verify;
   optionList[3]._sVersion = "t";
   optionList[3]._lVersion = "timing";
   optionList[3]._description = "Print timing related statistics.";
   optionList[3]._usage = "";
   optionList[3]._type = CA_NO_ARGUMENT;
   optionList[3]._value = &timing;
   optionList[4]._sVersion = "v";
   optionList[4]._lVersion = "version";
   optionList[4]._description = "Bolt lib & runtime version string.";
   optionList[4]._usage = "";
   optionList[4]._type = CA_NO_ARGUMENT;
   optionList[4]._value = &version;
   optionList[5]._sVersion = "x";
   optionList[5]._lVersion = "samples";
   optionList[5]._description = "Number of sample input values.";
   optionList[5]._usage = "[value]";
   optionList[5]._type = CA_ARG_INT;
   optionList[5]._value = &samples;
   optionList[6]._sVersion = "i";
   optionList[6]._lVersion = "iterations";
   optionList[6]._description = "Number of iterations.";
   optionList[6]._usage = "[value]";
   optionList[6]._type = CA_ARG_INT;
   optionList[6]._value = &iterations;
   _numArgs = defaultOptions;
   _options = optionList;
   return SDK_SUCCESS;
 }
示例#26
0
文件: main.c 项目: skurmedel/bfcc
int main(int argc, char *argv[])
{
	int ecode;

	bfcc_options bfopts = {0};
	if (parse_arguments(argc, argv, &bfopts) != 0)
	{
		return -2;
	}

	FILE *f = stdin;
	if (bfopts.input_file != 0)
	{
		f = fopen(bfopts.input_file, "r");
		if (!f)
		{
			fprintf(stderr, "Unknown file.\n");
			return ERROR_FILE_NOT_FOUND;
		}
	}

	c99_options opts;
	c99_options_default(&opts);
	
	backend back = create_c99_backend(&opts);

	ecode = back.begin(&back, stdout);
	FATAL_IF_ERROR(ecode, "Backend preamble generation");

	tokeniser *t = tokeniser_setup(f);
	CHECK_ALLOCATION(t, "Tokeniser setup");

	while (1)
	{
		token tok;
		int error = tokeniser_next(t, &tok);

		if (IS_ERROR(error))
		{
			fprintf(stderr,  "Tokenisation error detected: %d.\n", error);
			return ERROR_TOKENISATION;
		}
		if (tok == token_eof)
			break;
		
		if (IS_ERROR(back.emit(&back, stdout, (token) tok)))
		{
			fprintf(stderr, "Failure encountered when translating token: %s\n", token_name((token) tok));
		}
	}

	ecode = back.end(&back, stdout);
	FATAL_IF_ERROR(ecode, "Backend could not finish")
	return 0;
}
int DwtHaar1D::setupDwtHaar1D()
{
    // signal length must be power of 2
    signalLength = sampleCommon->roundToPowerOf2<cl_uint>(signalLength);

    unsigned int levels = 0;
    int result = getLevels(signalLength, &levels);
    CHECK_ERROR(result,SDK_SUCCESS, "signalLength > 2 ^ 23 not supported");

    // Allocate and init memory used by host
    inData = (cl_float*)malloc(signalLength * sizeof(cl_float));
    CHECK_ALLOCATION(inData, "Failed to allocate host memory. (inData)");

    for(unsigned int i = 0; i < signalLength; i++)
    {
        inData[i] = (cl_float)(rand() % 10);
    }

    dOutData = (cl_float*) malloc(signalLength * sizeof(cl_float));
    CHECK_ALLOCATION(dOutData, "Failed to allocate host memory. (dOutData)");

    memset(dOutData, 0, signalLength * sizeof(cl_float));

    dPartialOutData = (cl_float*) malloc(signalLength * sizeof(cl_float));
    CHECK_ALLOCATION(dPartialOutData, "Failed to allocate host memory.(dPartialOutData)");

    memset(dPartialOutData, 0, signalLength * sizeof(cl_float));

    hOutData = (cl_float*)malloc(signalLength * sizeof(cl_float));
    CHECK_ALLOCATION(hOutData, "Failed to allocate host memory. (hOutData)");

    memset(hOutData, 0, signalLength * sizeof(cl_float));

    if(!quiet)
    {
        sampleCommon->printArray<cl_float>("Input Signal", inData, 256, 1);
    }
	

    return SDK_SUCCESS;
}
示例#28
0
文件: rawchan.c 项目: GYGit/reactos
NTSTATUS
NTAPI
RawChannelCreate(IN PSAC_CHANNEL Channel)
{
    CHECK_PARAMETER(Channel);

    /* Allocate the output buffer */
    Channel->OBuffer = SacAllocatePool(SAC_RAW_OBUFFER_SIZE, GLOBAL_BLOCK_TAG);
    CHECK_ALLOCATION(Channel->OBuffer);

    /* Allocate the input buffer */
    Channel->IBuffer = SacAllocatePool(SAC_RAW_IBUFFER_SIZE, GLOBAL_BLOCK_TAG);
    CHECK_ALLOCATION(Channel->IBuffer);

    /* Reset all flags and return success */
    Channel->OBufferIndex = 0;
    Channel->OBufferFirstGoodIndex = 0;
    Channel->ChannelHasNewIBufferData = FALSE;
    Channel->ChannelHasNewOBufferData = FALSE;
    return STATUS_SUCCESS;
}
int
FastWalshTransform::setupFastWalshTransform()
{
    cl_uint inputSizeBytes;

    if(length < 512)
    {
        length = 512;
    }

    // allocate and init memory used by host
    inputSizeBytes = length * sizeof(cl_float);
    input = (cl_float *) malloc(inputSizeBytes);
    CHECK_ALLOCATION(input, "Failed to allocate host memory. (input)");

    output = (cl_float *) malloc(inputSizeBytes);
    CHECK_ALLOCATION(output, "Failed to allocate host memory. (output)");

    // random initialisation of input
    fillRandom<cl_float>(input, length, 1, 0, 255);

    if(sampleArgs->verify)
    {
        verificationInput = (cl_float *) malloc(inputSizeBytes);
        CHECK_ALLOCATION(verificationInput,
                         "Failed to allocate host memory. (verificationInput)");
        memcpy(verificationInput, input, inputSizeBytes);
    }

    // Unless sampleArgs->quiet mode has been enabled, print the INPUT array.
    if(!sampleArgs->quiet)
    {
        printArray<cl_float>(
            "Input",
            input,
            length,
            1);
    }
    return SDK_SUCCESS;
}
int
MersenneTwister::setupMersenneTwister()
{
    // Calculate width and height from numRands
    numRands = numRands / 4;
    numRands = (numRands / GROUP_SIZE)? (numRands / GROUP_SIZE) * GROUP_SIZE:
               GROUP_SIZE;

    unsigned int tempVar1 = (unsigned int)sqrt((double)numRands);
    tempVar1 = (tempVar1 / GROUP_SIZE)? (tempVar1 / GROUP_SIZE) * GROUP_SIZE:
               GROUP_SIZE;
    numRands = tempVar1 * tempVar1;

    width = tempVar1;
    height = width;


    // Allocate and init memory used by host
#if defined (_WIN32)
    seeds = (cl_uint*)_aligned_malloc(width * height * sizeof(cl_uint4), 16);
#else
    seeds = (cl_uint*)memalign(16, width * height * sizeof(cl_uint4));
#endif

    CHECK_ALLOCATION(seeds,"Failed to allocate host memory. (seeds)");

    deviceResult = (cl_float *) malloc(width * height * mulFactor * sizeof(
                                           cl_float4));
    CHECK_ALLOCATION(deviceResult,
                     "Failed to allocate host memory. (deviceResult)");

    for(int i = 0; i < width * height * 4; ++i)
    {
        seeds[i] = (unsigned int)rand();
    }

    memset((void*)deviceResult, 0, width * height * mulFactor * sizeof(cl_float4));

    return SDK_SUCCESS;
}