Exemplo n.º 1
0
//--------------------------------------------------------------------------------------
// Name: AcquireReleaseVBOs()
// Desc: Acquire or release the VBO objects to OpenCL memory objects
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::AcquireReleaseVBOs(bool bAcquire)
{
    cl_int errNum = 0;

    if( bAcquire )
    {
        // Before acquiring, finish any pending OpenGL operations
        glFinish();
        errNum = clEnqueueAcquireGLObjects( m_commandQueue, NUM_VBOS, &m_vboMem[0], 0, NULL, NULL );
        if( errNum != CL_SUCCESS )
        {
            FrmLogMessage( "Error acquiring OpenGL VBOs as OpenCL memory objects.\n");
            return FALSE;
        }
    }
    else
    {
        errNum = clEnqueueReleaseGLObjects( m_commandQueue, NUM_VBOS, &m_vboMem[0], 0, NULL, NULL );
        if( errNum != CL_SUCCESS )
        {
            FrmLogMessage( "Error releasing OpenGL VBOs as OpenCL memory objects.\n");
            return FALSE;
        }
    }

    return TRUE;
}
Exemplo n.º 2
0
//--------------------------------------------------------------------------------------
// Name: Initialize()
// Desc: Initialize the OpenCL implementation of the cloth simulation
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::Initialize( cl_context context, cl_device_id device )
{
    // Call the base class first
    if( !CClothSim::Initialize() )
        return FALSE;

    // Generate VBO for previous position
    glGenBuffers( 1, &m_hPrevPositionVBO );
    glBindBuffer( GL_ARRAY_BUFFER, m_hPrevPositionVBO );
    glBufferData( GL_ARRAY_BUFFER, m_uiNumVerts * 4 * sizeof(float), NULL, GL_DYNAMIC_DRAW );
    glBufferSubData( GL_ARRAY_BUFFER, 0, m_uiNumVerts * 4 * sizeof(float), m_pVerts );
    glBindBuffer( GL_ARRAY_BUFFER, 0 );


    if( !InitKernels( context, device ) )
        return FALSE;

    // Create OpenCL memory objects for the VBOs
    UINT32 vbos[NUM_VBOS] =
    {
        m_hPositionVBO,
        m_hPrevPositionVBO,
        m_hNormalVBO,
        m_hTangentVBO,
        m_hBitangentVBO,
        m_hTextureVBO,
        m_hBackNormalVBO,
        m_hBackTangentVBO,
        m_hBackBitangentVBO
    };

    cl_int errNum = 0;
    for( INT32 i = 0; i < NUM_VBOS; i++ )
    {
        m_vboMem[i] = clCreateFromGLBuffer( context, CL_MEM_READ_WRITE, vbos[i], &errNum );
        if( errNum != CL_SUCCESS )
        {
            FrmLogMessage( "Error creating OpenCL memory object from GL VBO." );
            return FALSE;
        }
    }

    // Initialize the base distances memory object
    if( !InitConstraintsBaseDists( context ) )
        return FALSE;

    m_vertsCopyMem = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_float4) * m_uiNumVerts, NULL, &errNum);
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error creating OpenCL memory object." );
        return FALSE;

    }
    return TRUE;
}
//--------------------------------------------------------------------------------------
// Name: Run()
// Desc: Create the framework, initialize the application, and render frames.
//--------------------------------------------------------------------------------------
BOOL CFrmAppContainer::Run(cl_device_type deviceType)
{
    // Use the executable's directory so that relative media paths work correctly
    {
	char processDir[PATH_MAX];
	char currentDir[PATH_MAX];

	struct stat dirInfo;
	pid_t pid = getpid();

	sprintf(processDir, "/proc/%d/exe", (int)pid);
	if (readlink(processDir, currentDir, PATH_MAX) != -1)
	{
	    strcpy(currentDir, dirname(currentDir));
            chdir(currentDir);
	}
        else
        {
            printf("Error finding executable directory.\n");
            return FALSE;
        }
    }

 	// Create the Application
	m_pApplication = FrmCreateComputeApplicationInstance();

    m_pApplication->m_bRunTests = TRUE;

	if( NULL == m_pApplication )
        return FALSE;

    if( FALSE == m_pApplication->CreateOpenCLContext( deviceType ) )
    {
        return FALSE;
    }

    if( FALSE == m_pApplication->Initialize() )
    {
        printf( "Application failed initialization!\nPlease debug accordingly.");
        return FALSE;
    }

    // Run the computation
    BOOL result = m_pApplication->Compute();

    if ( m_pApplication->RunTests() )
    {
        char msg[256];
		FrmSprintf( msg, sizeof(msg), "RunTests: %s\n", result ? "PASSED" : "FAILED" );
		FrmLogMessage( msg );
    }

    // Display the message log
    const CHAR* strErrorMessage = FrmGetMessageLog();
    printf("%s", strErrorMessage);

    return TRUE;
}
Exemplo n.º 4
0
//--------------------------------------------------------------------------------------
// Name: UpdateNormalsCL()
// Desc: Update the normal/tangent/binormal using OpenCL
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::UpdateNormalsCL()
{
    // Acquire from OpenGL VBOs as OpenCL memory objects
    cl_int errNum = 0;
    cl_mem memObjs[7] =
    {
        m_vboMem[CUR_POSITION],
        m_vboMem[NORMAL],
        m_vboMem[TANGENT],
        m_vboMem[BITANGENT],
        m_vboMem[BACKNORMAL],
        m_vboMem[BACKTANGENT],
        m_vboMem[BACKBITANGENT]
    };

    // Set the kernel arguments for the UpdateNormals kernel
    cl_kernel kernel = m_kernels[UPDATE_NORMALS];
    errNum |= clSetKernelArg( kernel, 0, sizeof(cl_mem), &m_vboMem[CUR_POSITION] );
    errNum |= clSetKernelArg( kernel, 1, sizeof(cl_mem), &m_vboMem[NORMAL] );
    errNum |= clSetKernelArg( kernel, 2, sizeof(cl_mem), &m_vboMem[TANGENT] );
    errNum |= clSetKernelArg( kernel, 3, sizeof(cl_mem), &m_vboMem[BITANGENT] );
    errNum |= clSetKernelArg( kernel, 4, sizeof(cl_mem), &m_vboMem[BACKNORMAL] );
    errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_vboMem[BACKTANGENT] );
    errNum |= clSetKernelArg( kernel, 6, sizeof(cl_mem), &m_vboMem[BACKBITANGENT] );
    if( errNum != CL_SUCCESS )
    {
        char str[256];
        FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum );
        FrmLogMessage( str );
        return FALSE;
    }

    // Finally queue the kernel for execution
    size_t globalWorkSize[2] = { CLOTH_POINTS_WIDTH - 1, CLOTH_POINTS_HEIGHT - 1 };
    errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 2, NULL, globalWorkSize,
                                     NULL, 0, NULL, NULL );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error queuing kernel for execution." );
        return FALSE;
    }

    return TRUE;
}
Exemplo n.º 5
0
//--------------------------------------------------------------------------------------
// Name: VerletIntegrationCL()
// Desc: Perform verlet integration using OpenCL
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::VerletIntegrationCL( float fltElapsed )
{
    cl_int errNum = 0;

    // Set the kernel arguments for the VerletIntegration kernel
    FRMVECTOR4 VelDamping = FRMVECTOR4( m_VelDamping, 0.0f );
    FRMVECTOR4 CurrentWind = FRMVECTOR4( m_CurrentWind, 0.0f );
    FRMVECTOR4 WindDir = FRMVECTOR4( m_WindDir, 0.0f );
    FRMVECTOR4 Acceleration = FRMVECTOR4( m_Gravity, 0.0f );
    Acceleration *= 0.25f;

    cl_float ElapsedSquared = fltElapsed * fltElapsed;

    cl_kernel kernel = m_kernels[VERLET_INTEGRATION];
    errNum |= clSetKernelArg( kernel, 0, sizeof(cl_float), &ElapsedSquared );
    errNum |= clSetKernelArg( kernel, 1, sizeof(cl_float4), &Acceleration );
    errNum |= clSetKernelArg( kernel, 2, sizeof(cl_float4), &VelDamping );
    errNum |= clSetKernelArg( kernel, 3, sizeof(cl_float4), &WindDir );
    errNum |= clSetKernelArg( kernel, 4, sizeof(cl_float4), &CurrentWind );
    errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_vboMem[CUR_POSITION] );
    errNum |= clSetKernelArg( kernel, 6, sizeof(cl_mem), &m_vboMem[PREV_POSITION] );
    errNum |= clSetKernelArg( kernel, 7, sizeof(cl_mem), &m_vboMem[NORMAL] );
    if( errNum != CL_SUCCESS )
    {
        char str[256];
        FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum );
        FrmLogMessage( str );
        return FALSE;
    }

    // Finally queue the kernel for execution
    size_t globalWorkSize[1] = { m_uiNumVerts - CLOTH_POINTS_WIDTH };
    errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 1, NULL, globalWorkSize,
                                     NULL, 0, NULL, NULL );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error queuing kernel for execution." );
        return FALSE;
    }

    return TRUE;
}
Exemplo n.º 6
0
//--------------------------------------------------------------------------------------
// Name: InitKernels()
// Desc: Initialize the kernels
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::InitKernels( cl_context context, cl_device_id device )
{
    cl_int errNum;

    // Create the command queue
    m_commandQueue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE, &errNum );
    if ( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Failed to create command queue" );
        return FALSE;
    }

    if( FALSE == FrmBuildComputeProgramFromFile( "Samples/Kernels/ClothSimCLGLES.cl", &m_program, context,
                                                  &device, 1, "-cl-fast-relaxed-math" ) )
    {
        return FALSE;
    }

    const CHAR* pKernelNames[] =
    {
        "SatisfyConstraints",
        "VerletIntegration",
        "UpdateNormals"
    };

    for( INT32 i = 0; i < NUM_KERNELS; i++ )
    {
        m_kernels[i] = clCreateKernel( m_program, pKernelNames[i] , &errNum );
        if ( errNum != CL_SUCCESS )
        {
            CHAR str[256];
            FrmSprintf( str, sizeof(str), "ERROR: Failed to create kernel '%s'.\n", pKernelNames[i] );
            FrmLogMessage( str );
            return FALSE;
        }
    }


    return TRUE;
}
bool SimpleConvolution::Initialize( cl_context context, cl_device_id device, cl_command_queue commandQueue ){
	m_context = context;
	m_device = device;
	m_commandQueue = commandQueue;
	cl_program m_program;
	cl_int errNum = 0;
	if( false == FrmGetOrBuildComputeProgramFromFile(&simpleConvolution, &m_program, m_context, &m_device, 1) ){
		return false;
	}

	m_kernel = clCreateKernel(m_program, "simpleConvolution", &errNum);
	if(errNum != CL_SUCCESS){
		char str[256];
		FrmSprintf( str, sizeof(str), "ERROR: Failed to create kernel simpleConvolution.\n" );
		FrmLogMessage( str );
		return false;
	}
	clReleaseProgram(m_program);
	return true;
}
Exemplo n.º 8
0
//--------------------------------------------------------------------------------------
// Name: SetUseOpenCL()
// Desc: Set whether to use OpenCL or CPU simulation
//--------------------------------------------------------------------------------------
VOID CClothSimCL::SetUseOpenCL( bool bUseOpenCL )
{
    // If we are switching from CPU to GPU, copy the
    // current position to the previous position so
    // the simulation does not jump around
    if( bUseOpenCL == true && m_bUseOpenCL == false)
    {
        AcquireReleaseVBOs( true );

        // Copy the current position to previous position to initialize the simulation
        cl_int errNum = 0;
        errNum = clEnqueueCopyBuffer( m_commandQueue, m_vboMem[CUR_POSITION], m_vboMem[PREV_POSITION], 0, 0, sizeof(cl_float4) * m_uiNumVerts,
            0, NULL, NULL);
        if( errNum != CL_SUCCESS )
        {
            FrmLogMessage( "Error copying positions VBO.\n" );
            return;
        }

        AcquireReleaseVBOs( false );

    }
    m_bUseOpenCL = bUseOpenCL;
}
bool SimpleConvolution::runCLKernels( const float* pInArray, const float* pMaskArray, float* pOutArray){
	cl_mem inputBuffer = 0, maskBuffer = 0, outputBuffer = 0;
	//cl_int nArraySize = width * height;
	cl_int errNum = 0;
	cl_event events[2];
	inputBuffer = clCreateBuffer(m_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, width*height*sizeof(float), (void*)pInArray, &errNum );
	if(errNum != CL_SUCCESS){
		printf( "ERROR: allocation of device input array.\n" );
		return false;
	}
	maskBuffer = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, maskWidth*maskHeight*sizeof(float), (void*)pMaskArray, &errNum);
	if(errNum != CL_SUCCESS){
		clReleaseMemObject(inputBuffer);
		printf( "ERROR: allocation of device input array.\n" );
		return false;
	}
	outputBuffer = clCreateBuffer(m_context, CL_MEM_READ_WRITE, width*height*sizeof(float), NULL, &errNum );
	if(errNum != CL_SUCCESS){
		clReleaseMemObject(inputBuffer);
		clReleaseMemObject(maskBuffer);
		printf( "ERROR: allocation of device input array.\n" );
		return false;
	}

	size_t gws[1] = { width * height };
	size_t lws[1] = { 256 };
	errNum |= clSetKernelArg( m_kernel, 0, sizeof(cl_mem), (void *)&outputBuffer );
	errNum |= clSetKernelArg( m_kernel, 1, sizeof(cl_mem), (void *)&inputBuffer);
	errNum |= clSetKernelArg( m_kernel, 2, sizeof(cl_mem), (void *)&maskBuffer);
	cl_uint2 inputDimensions = {width, height};
	cl_uint2 maskDimensions  = {maskWidth, maskHeight};
	errNum |= clSetKernelArg( m_kernel, 3, sizeof(cl_uint2), (void *)&inputDimensions );
	errNum |= clSetKernelArg( m_kernel, 4, sizeof(cl_uint2), (void *)&maskDimensions );
	if( errNum != CL_SUCCESS )
	{
		FrmLogMessage( "Error setting kernel arguments" );
		return false;
	}

	errNum = clEnqueueNDRangeKernel( m_commandQueue, m_kernel, 1, NULL,
		gws, lws, 0, NULL, &events[0] );
	if( errNum != CL_SUCCESS )
	{
		FrmLogMessage( "Error setting kernel arguments" );
		return false;
	}
	errNum = clFlush(m_commandQueue);
	errNum = waitForEventAndRelease(&events[0]);

	errNum = clEnqueueReadBuffer( m_commandQueue, outputBuffer, CL_TRUE, 0, width * height * sizeof(float), pOutArray, 0, NULL, &events[1]);
	if(errNum != CL_SUCCESS)
	{
		return false;
	}
	errNum = clFlush(m_commandQueue);
	errNum = waitForEventAndRelease(&events[1]);

	clReleaseMemObject(inputBuffer);
	clReleaseMemObject(maskBuffer);
	clReleaseMemObject(outputBuffer);
	return true;
}
Exemplo n.º 10
0
//--------------------------------------------------------------------------------------
// Name: SatisfyConstraintsCL()
// Desc: Perform satisfy spring constraint using OpeNCL
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::SatisfyConstraintsCL()
{
    cl_int errNum = 0;

    // Copy the positions to another buffer for double buffering
    errNum = clEnqueueCopyBuffer( m_commandQueue, m_vboMem[CUR_POSITION], m_vertsCopyMem, 0, 0, sizeof(cl_float4) * m_uiNumVerts,
        0, NULL, NULL);
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error copying positions VBO.\n" );
        return FALSE;
    }

    // Set kernel arguments
    cl_kernel kernel = m_kernels[SATISFY_CONSTRAINTS];
    errNum |= clSetKernelArg( kernel, 2, sizeof(cl_mem), &m_baseDistMem );
    errNum |= clSetKernelArg( kernel, 3, sizeof(cl_mem), &m_baseDistNeighborIndexMem );
    errNum |= clSetKernelArg( kernel, 4, sizeof(cl_mem), &m_baseDistStartIndexMem );
    errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_baseDistEndIndexMem );
    if( errNum != CL_SUCCESS )
    {
        char str[256];
        FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum );
        FrmLogMessage( str );
        return FALSE;
    }

    // Number of NxN grid chunks to perform separately, largest chunk should be 16x16
    int numChunks = (CLOTH_POINTS_WIDTH * CLOTH_POINTS_HEIGHT) / (16 * 16);
    size_t globalWorkSize[1] =
    {
        (CLOTH_POINTS_WIDTH * CLOTH_POINTS_HEIGHT) / numChunks
    };

    char str[1024];

    for( UINT32 i = 0; i < m_ConstraintIters; i++ )
    {
        bool even = ((i % 2) == 0);

        // Swap input/output
        errNum |= clSetKernelArg( kernel, 0, sizeof(cl_mem), even ? (&m_vboMem[CUR_POSITION]) : (&m_vertsCopyMem) );
        errNum |= clSetKernelArg( kernel, 1, sizeof(cl_mem), even ? (&m_vertsCopyMem) : (&m_vboMem[CUR_POSITION]) );
        if( errNum != CL_SUCCESS )
        {
            FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum );
            FrmLogMessage( str );
            return FALSE;
        }

        for( int chunk = 0; chunk < numChunks; chunk++ )
        {
            size_t globalWorkOffset[1] = { chunk * globalWorkSize[0] };
            errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 1, globalWorkOffset, globalWorkSize,
                                             NULL, 0, NULL, NULL );
            if( errNum != CL_SUCCESS )
            {
                FrmLogMessage( "Error queuing kernel for execution." );
                return FALSE;
            }

        }
    }
    return TRUE;
}
Exemplo n.º 11
0
//--------------------------------------------------------------------------------------
// Name: InitConstraintsBaseDists()
// Desc: Compute base distances and store in an OpenCL memory object
//--------------------------------------------------------------------------------------
BOOL CClothSimCL::InitConstraintsBaseDists( cl_context context )
{
    cl_int errNum = 0;

    struct Offsets
    {
        INT32 xOffset;
        INT32 yOffset;
    };

    // Offsets of neighbors for constraints
    const struct Offsets cConstraintOffsets[] =
    {
        { -1,  0 },         // left
        { +1,  0 },         // right
        {  0, -1 },         // up
        {  0,  1 },         // down
        { -1, -1 },         // diagUpLeft
        { +1, -1 },         // diagUpRight
        { -1, +1 },         // diagDownLeft
        { +1, +1 },         // diagDownRight
        { -2,  0 },         // shearLeft
        { +2,  0 },         // shearRight
        {  0, -2 },         // shearUp
        {  0, +2 },         // shearDown
        { -2, -2 },         // shearUpLeft
        { +2, -2 },         // shearUpRight
        { -2, +2 },         // shearDownLeft
        { +2, +2 },         // shearDownRight
    };
    INT32 numConstraints = sizeof(cConstraintOffsets) / sizeof(struct Offsets);
    FLOAT32* pBaseDists = new FLOAT32[ numConstraints * m_uiNumVerts ];
    UINT32* pBaseDistNeighborIndex = new UINT32[  numConstraints * m_uiNumVerts ];

    UINT32* pBaseDistStartIndex = new UINT32[ m_uiNumVerts ];
    UINT32* pBaseDistEndIndex = new UINT32[ m_uiNumVerts ];

    UINT32 index = 0;
    for( INT32 y = 0; y < CLOTH_POINTS_HEIGHT; y++ )
    {
        for( INT32 x = 0; x < CLOTH_POINTS_WIDTH; x++ )
        {
            int vertIdx = (y * CLOTH_POINTS_WIDTH + x);
            pBaseDistStartIndex[vertIdx] = index;

            for( INT32 c = 0; c < numConstraints; c++ )
            {
                INT32 constraintX = x + cConstraintOffsets[c].xOffset;
                INT32 constraintY = y + cConstraintOffsets[c].yOffset;
                FLOAT32 baseDist = 0.0f;

                unsigned int constraintIdx = (constraintY * CLOTH_POINTS_WIDTH + constraintX);

                if( constraintX >= 0 && constraintX < CLOTH_POINTS_WIDTH &&
                    constraintY >= 0 && constraintY < CLOTH_POINTS_HEIGHT )
                {
                    FRMVECTOR3 vecA; loadXYZ(vecA, (float *)&m_pVerts[vertIdx * 4]);
                    FRMVECTOR3 vecB; loadXYZ(vecB, (float *)&m_pVerts[constraintIdx * 4]);
                    FRMVECTOR3 vecDiff = vecB - vecA;

                    baseDist = FrmVector3Length( vecDiff );
                    pBaseDists[index] = baseDist;
                    pBaseDistNeighborIndex[index] = constraintIdx;
                    index++;
                }
            }

            pBaseDistEndIndex[vertIdx] = index;
        }
    }

    int numTotalConstraints = index;

    // Create a memory object to hold the constraint buffer
    m_baseDistMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(FLOAT32) * numTotalConstraints,
                                    pBaseDists, &errNum );
    delete [] pBaseDists;
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error creating OpenCL BaseDists buffer." );
        return FALSE;
    }

    m_baseDistNeighborIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * numTotalConstraints,
                                                 pBaseDistNeighborIndex, &errNum );
    delete [] pBaseDistNeighborIndex;
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." );
        return FALSE;
    }

    m_baseDistEndIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * m_uiNumVerts,
                                            pBaseDistEndIndex, &errNum );
    delete [] pBaseDistEndIndex;
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." );
        return FALSE;
    }

    m_baseDistStartIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * m_uiNumVerts,
                                              pBaseDistStartIndex, &errNum );
    delete [] pBaseDistStartIndex;
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." );
        return FALSE;
    }

    return TRUE;
}
Exemplo n.º 12
0
//--------------------------------------------------------------------------------------
// Name: CreateNativeWindow()
// Desc: Creates a window for the application
//--------------------------------------------------------------------------------------
BOOL CFrmAppContainer::CreateNativeWindow( NativeWindowType* pWindow,
                                           NativeDisplayType* pDisplay )
{
	LPCTSTR	pszTitle = _T("Snapdragon SDK Window");

    // Need a WCHAR version of m_pApplication->m_strName to pass to window creation
    WCHAR wszName[MAX_PATH];
    mbstowcs(wszName, m_pApplication->m_strName, MAX_PATH);

    // The global instance
    HINSTANCE hInstance  = GetModuleHandle( NULL );

	// If the size is <= 0 then we are running if full screen mode
	BOOL fFullScreen = FALSE;
	if(m_pApplication->m_nWidth <= 0 || m_pApplication->m_nHeight <= 0)
	{
		fFullScreen = TRUE;
	}

    // Register the window class
    WNDCLASS wc = {0};
    wc.style          = CS_HREDRAW | CS_VREDRAW;  // Window style
    wc.lpfnWndProc    = (WNDPROC)WndProc;                    // WndProc message handler
    wc.cbClsExtra     = 0;                                   // No extra window data
    wc.cbWndExtra     = 0;                                   // No extra window data
    wc.hInstance      = hInstance;                           // Instance
    wc.hIcon          = NULL;
    wc.hCursor        = LoadCursor( NULL, IDC_ARROW );       // Cursor
    wc.hInstance      = hInstance;
    wc.hbrBackground  = NULL;                                // No Background
    wc.lpszMenuName   = NULL;                                // No menu
    wc.lpszClassName  = pszTitle;                 // Set the class name

    if( FALSE == RegisterClass(&wc) )
    {
        FrmLogMessage( "ERROR: Failed to register window class.\n" );
        return FALSE;
    }

	HWND hParentWnd = NULL;
	HWND hClientWnd = NULL;
	DWORD dwWindowStyle;
	if(!fFullScreen)
	{
		// Adjust the window size to fit our rectangle
		dwWindowStyle =	WS_SYSMENU | WS_MINIMIZEBOX | WS_CAPTION | WS_BORDER |
						WS_CLIPSIBLINGS | WS_CLIPCHILDREN;
		RECT rcWindow;
		SetRect( &rcWindow, 0, 0, m_pApplication->m_nWidth, m_pApplication->m_nHeight );
		AdjustWindowRectEx( &rcWindow, dwWindowStyle, FALSE, 0 );
		if(rcWindow.left < 0)
		{
			// AdjustWindowRectEx gives back negative values if the window is placed
			// such that it overlaps the WM start bar.  This always happens if the 
			// rectangle starts at (0,0) like above.
			rcWindow.right += -rcWindow.left;
			rcWindow.left = 0;
		}
		if(rcWindow.top < 0)
		{
			rcWindow.bottom += -rcWindow.top;
			rcWindow.top = 0;
		}

    // Create the parent window
		hParentWnd = CreateWindow(	wc.lpszClassName,					//Class
									wszName,			                //Title
									dwWindowStyle,						//Style
									50+rcWindow.left,50+rcWindow.top,	//Position
									(rcWindow.right-rcWindow.left),		//Width
									(rcWindow.bottom-rcWindow.top),		//Height
									NULL,								//Noparentwindow
									NULL,								//Nomenu
									hInstance,							//Instance
									NULL);								//Creationparameter
		if( NULL == hParentWnd )
		{
			FrmLogMessage( "ERROR: Failed to create window.\n" );
			return FALSE;
		}

	}
	else
	{
		// Full Screen Mode
		m_pApplication->m_nWidth = GetSystemMetrics(SM_CXSCREEN);
		m_pApplication->m_nHeight = GetSystemMetrics(SM_CYSCREEN);
	}

	// Create the client window
	if(!fFullScreen)
		dwWindowStyle = WS_CHILD;
	else
		dwWindowStyle = WS_VISIBLE;

	hClientWnd = CreateWindow(	wc.lpszClassName,			//Class
								wszName,	                //Title
								dwWindowStyle,				//Style
								0,0,						//Pos and size
								m_pApplication->m_nWidth,
								m_pApplication->m_nHeight,	//Pos and size
								hParentWnd,					//Parent window
								NULL,						//No menu
								hInstance,					//Instance
								NULL);						//Creation parameter
	if( NULL == hClientWnd )
	{
		DWORD dwLastError = GetLastError();
		FrmLogMessage( "ERROR: Failed to create window.\n" );
		return FALSE;
	}

	if(fFullScreen)
	{
		// Hide the taskbars
		// SHFullScreen(hClientWnd, SHFS_HIDETASKBAR | SHFS_HIDESTARTICON | SHFS_HIDESIPBUTTON);
	}

	// Pass application data pointer to the windows for later use
	if(hParentWnd != NULL)
		SetWindowLong( hParentWnd, GWL_USERDATA, (LONG)this );

	if(hClientWnd != NULL)
		SetWindowLong( hClientWnd, GWL_USERDATA, (LONG)this );

    // Note: We delay showing the window until after Initialization() succeeds
    // Otherwise, an unsightly, empty window briefly appears during initialization

    // Return
    (*pWindow)  = (NativeWindowType)hClientWnd;
    (*pDisplay) = NULL;


    return TRUE;
}
Exemplo n.º 13
0
//--------------------------------------------------------------------------------------
// Name: Initialize()
// Desc:
//--------------------------------------------------------------------------------------
BOOL CSample::Initialize()
{
    cl_int errNum;

    if(!FrmOpenConsole())
        return FALSE;

    // Create the command queue
    m_commandQueue = clCreateCommandQueue( m_context, m_devices[0], CL_QUEUE_PROFILING_ENABLE, &errNum );
    if ( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Failed to create command queue" );
        return FALSE;
    }

    if( FALSE == FrmBuildComputeProgramFromFile( "Samples/Kernels/VectorAdd.cl", &m_program, m_context,
                                                  &m_devices[0], 1, "-cl-fast-relaxed-math" ) )
    {
        return FALSE;
    }

    // Create kernel
    m_kernel = clCreateKernel( m_program, "VectorAdd", &errNum );
    if ( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Failed to create kernel 'VectorAdd'\n" );
        return FALSE;
    }

     // Create device buffers
    m_srcA = clCreateBuffer( m_context, CL_MEM_READ_ONLY, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: allocation device host buffer A" );
        return FALSE;
    }

    m_srcB = clCreateBuffer( m_context, CL_MEM_READ_ONLY, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: allocation device host buffer B" );
        return FALSE;
    }

    // Map to host arrys
    FRMVECTOR4 *pHostA = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_srcA, CL_TRUE, CL_MAP_WRITE, 0, sizeof(FRMVECTOR4) * m_nNumVectors,
                                                           0, NULL, NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: mapping device buffer A." );
        return FALSE;
    }

    FRMVECTOR4 *pHostB = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_srcB, CL_TRUE, CL_MAP_WRITE, 0, sizeof(FRMVECTOR4) * m_nNumVectors,
                                                           0, NULL, NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: mapping device buffer B." );
        return FALSE;
    }

    // Fill with data
    for( size_t i = 0; i < m_nNumVectors; i++ )
    {
        FLOAT32 valA = (FLOAT32)i / m_nNumVectors;
        FLOAT32 valB = 1.0f - valA;
        pHostA[i] = FRMVECTOR4( valA, valA, valA, valA );
        pHostB[i] = FRMVECTOR4( valB, valB, valB, valB );
    }



    // Compute reference results on CPU
    if ( RunTests() )
    {
        m_pRefResults = new FRMVECTOR4[ m_nNumVectors ];
        for( size_t i = 0; i < m_nNumVectors; i++ )
        {
            m_pRefResults[ i ] = pHostA[ i ] + pHostB[ i ];
        }
    }

    // Unmap buffers
    errNum = clEnqueueUnmapMemObject( m_commandQueue, m_srcA, pHostA, 0, NULL, NULL );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: Unmapping buffer A." );
        return FALSE;
    }

    errNum = clEnqueueUnmapMemObject( m_commandQueue, m_srcB, pHostB, 0, NULL, NULL );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: Unmapping buffer B." );
        return FALSE;
    }

    // Create result buffer
    m_result = clCreateBuffer( m_context, CL_MEM_READ_WRITE, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: allocation device host buffer result" );
        return FALSE;
    }
    return TRUE;
}
Exemplo n.º 14
0
//--------------------------------------------------------------------------------------
// Name: Compute()
// Desc:
//--------------------------------------------------------------------------------------
BOOL CSample::Compute()
{
    m_Timer.Reset();
    m_Timer.Start();

    char str[256];

    // Set the kernel arguments
    cl_int errNum = 0;
    errNum |= clSetKernelArg( m_kernel, 0, sizeof(cl_mem), &m_srcA );
    errNum |= clSetKernelArg( m_kernel, 1, sizeof(cl_mem), &m_srcB );
    errNum |= clSetKernelArg( m_kernel, 2, sizeof(cl_mem), &m_result );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error setting kernel arguments" );
        return FALSE;
    }

    size_t globalWorkSize[1] = { m_nNumVectors };
    size_t localWorkSize[1] = { 1 };

    cl_event kernel_event;
    cl_ulong t_queued=0, t_submit=0, t_start=0, t_end=0;

    // Queue the kernel for execution
    errNum = clEnqueueNDRangeKernel( m_commandQueue, m_kernel, 1, NULL,
                                    globalWorkSize, localWorkSize, 0, NULL, &kernel_event );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error queueing kernel for execution." );
        return FALSE;
    }

    clWaitForEvents(1 , &kernel_event);

    // Query timestamp for kernel profiling
    //   Queued time is when the command is queued to host.
    //   Submit time is when the command is submitted from host to device.
    //   Start time is when the command starts the execution.
    //   End time is when the command finishes the execution.
    // The delta between start and end, marks the total elapsed time to execute a kernel in device.
    errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_QUEUED,
                sizeof(cl_ulong), &t_queued, NULL);
    if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting queued timestamp." );
    errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_SUBMIT,
                sizeof(cl_ulong), &t_submit, NULL);
    if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting submit timestamp." );
    errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_START,
                sizeof(cl_ulong), &t_start,    NULL);
    if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting start timestamp." );
    errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END,
                sizeof(cl_ulong), &t_end, NULL);
    if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting end timestamp." );

    FrmLogMessage("Kernel event profiling....(nano sec)\n");
    FrmSprintf(str, sizeof(str), "  -> Queued time: %lu\n", t_queued);
    FrmLogMessage( str );
    FrmSprintf(str, sizeof(str), "  -> Submit time: %lu\n", t_submit);
    FrmLogMessage( str );
    FrmSprintf(str, sizeof(str), "  -> Start time:  %lu\n", t_start);
    FrmLogMessage( str );
    FrmSprintf(str, sizeof(str), "  -> End time:    %lu\n", t_end);
    FrmLogMessage( str );

    clReleaseEvent(kernel_event);

    // Read the result back to host memory
    FRMVECTOR4* pResult;
    pResult = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_result, CL_TRUE, CL_MAP_READ, 0, sizeof(FRMVECTOR4) * m_nNumVectors,
                                                0, NULL, NULL, &errNum );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "Error enqueuing buffer map." );
        return FALSE;
    }
    m_Timer.Stop();

    FrmSprintf( str, sizeof(str), "Results: '%d' vector additions in '%.6f' seconds.\n", m_nNumVectors, m_Timer.GetTime() );
    FrmLogMessage( str );

    // Test results again CPU reference
    BOOL result = TRUE;
    if ( RunTests() )
    {
        const FLOAT32 epsilon = 0.000001f;

        for( size_t i = 0; i < m_nNumVectors; i++ )
        {
            for ( size_t j = 0; j < 4; j++ )
            {
                FLOAT32 refVal = m_pRefResults[ i ].v[ j ];
                FLOAT32 val = pResult[ i ].v[ j ];

                if( FrmAbs( refVal - val ) > epsilon )
                {
                    FrmSprintf( str, sizeof(str), "Reference test failure, ref = (%f), result = (%f) Diff = (%f).\n", refVal, val, FrmAbs(refVal - val));
                    FrmLogMessage( str );
                    result = FALSE;
                }
            }
        }

    }

    // Unmap buffer
    errNum = clEnqueueUnmapMemObject( m_commandQueue, m_result, pResult, 0, NULL, NULL );
    if( errNum != CL_SUCCESS )
    {
        FrmLogMessage( "ERROR: Unmapping result buffer." );
        return FALSE;
    }
    return result;
}
Exemplo n.º 15
0
//--------------------------------------------------------------------------------------
// Name: CreateOpenCLContext()
// Desc: Create the OpenCL context based on the command-line options for creation
//--------------------------------------------------------------------------------------
BOOL CFrmComputeApplication::CreateOpenCLContext(cl_device_type deviceType)
{
    cl_int errNum;
    cl_uint numPlatforms = 0;
    cl_platform_id platformId;

    // Get the first platform ID
    errNum = clGetPlatformIDs( 1, &platformId, &numPlatforms );
    if (errNum != CL_SUCCESS || numPlatforms <= 0)
    {
        FrmLogMessage("No OpenCL platforms found.");
        return FALSE;
    }

    m_platform = platformId;

    // Get the number of devices for the requested device type (CPU, GPU, ALL)
    cl_uint numDevices = 0;
    errNum = clGetDeviceIDs( platformId, deviceType, 0, NULL, &numDevices );
    if (errNum != CL_SUCCESS || numDevices <= 0)
    {
        FrmLogMessage("No matching OpenCL devices found.");
        return FALSE;
    }

    char platformInfo[1024];
    char logMessage[2048];
    errNum = clGetPlatformInfo( platformId, CL_PLATFORM_VENDOR, sizeof(platformInfo), platformInfo, NULL );
    if (errNum != CL_SUCCESS)
    {
        FrmLogMessage("ERROR: getting platform info.");
        return FALSE;
    }
    FrmSprintf( logMessage, sizeof(logMessage), "OpenCL Platform: %s\n", platformInfo );
    FrmLogMessage( logMessage );

    // Get the devices
    m_devices = new cl_device_id[numDevices];
    m_deviceCount = numDevices;
    errNum = clGetDeviceIDs( platformId, deviceType, numDevices, m_devices, NULL );
    if (errNum != CL_SUCCESS)
    {
        FrmLogMessage("Erorr getting OpenCL device(s).");
        return FALSE;
    }

    switch (deviceType)
    {
    case CL_DEVICE_TYPE_GPU:
        FrmLogMessage("Selected device: GPU\n");
        break;
    case CL_DEVICE_TYPE_CPU:
        FrmLogMessage("Selected device: CPU\n");
        break;
    case CL_DEVICE_TYPE_DEFAULT:
    default:
        FrmLogMessage("Selected device: DEFAULT\n");
        break;
    }

    for (int i = 0; i < m_deviceCount; i++)
    {
        char deviceInfo[1024];
        errNum = clGetDeviceInfo( m_devices[i], CL_DEVICE_NAME, sizeof(deviceInfo), deviceInfo, NULL );
        if (errNum == CL_SUCCESS )
        {
            FrmSprintf( logMessage, sizeof(logMessage), "OpenCL Device Name (%d) : %s\n", i , deviceInfo );
            FrmLogMessage( logMessage );
        }
    }

    // Finally, create the context
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platformId,
        0
    };
    m_context = clCreateContext( contextProperties, m_deviceCount, m_devices, NULL, NULL, &errNum );
    if (errNum != CL_SUCCESS)
    {
        FrmLogMessage("Could not create OpenCL context.");
        return FALSE;
    }

    return TRUE;
}
Exemplo n.º 16
0
//--------------------------------------------------------------------------------------
// Name: Select2DWGSize()
// Desc: Select 2D local workgroup size based on user input.
//       Final selected dimension is AxB where minWorkGroupSize <= AxB <= maxWorkGroupSize
//.......The dimension of A and B is based on m_WG2DWHRatio where A/B = m_WG2DWHRatio / (100-m_WG2DWHRatio)
//--------------------------------------------------------------------------------------
void CFrmComputeApplication::Select2DWGSize(size_t maxWorkGroupSize, size_t defGlobalWG_X, size_t defGlobalWG_Y, size_t minWorkGroupSize)
{

    char msg[512];

    m_globalWorkSize[0] = defGlobalWG_X;
    m_globalWorkSize[1] = defGlobalWG_Y;

    // m_WGSize is the percentage of maxWorkGroupSize
    float selectedWgSize = (float)m_WGSize / 100.0f * (float)maxWorkGroupSize;
    if ((int)selectedWgSize < minWorkGroupSize) selectedWgSize = (float)minWorkGroupSize;

    FrmSprintf( msg, sizeof(msg), "Max WG Size: %d. Selected WG Size: %d\n", maxWorkGroupSize, (int)selectedWgSize);
    FrmLogMessage( msg );
    FrmSprintf( msg, sizeof(msg), "Global WG Size: %dx%d\n", m_globalWorkSize[0], m_globalWorkSize[1]);
    FrmLogMessage( msg );

    // if ratio = 0, leave local size calculation to the driver
    if (m_WG2DWHRatio == 0) return;

    // Ratio is out of scale 100
    // Adjust local dimension based on width:height ratio
    float whRatio = (float)m_WG2DWHRatio / (float)(100-m_WG2DWHRatio);
    m_localWorkSize[0] = (size_t) FrmSqrt( (FLOAT32) selectedWgSize * whRatio);

    // make sure the size is at lease 1
    if (m_localWorkSize[0] < 1) m_localWorkSize[0] = 1;


    // 50 means 1:1
    if (m_WG2DWHRatio == 50)
    {
        m_localWorkSize[1] = m_localWorkSize[0];
    }
    else
    {
        m_localWorkSize[1] = (size_t) selectedWgSize / m_localWorkSize[0];
        if (m_localWorkSize[1] < 1) m_localWorkSize[1] = 1;
    }

    // Compute the next global size that is a multiple of the local size
    size_t remndr = defGlobalWG_X % m_localWorkSize[0];
    if( remndr )
    {
        m_globalWorkSize[0] = defGlobalWG_X + m_localWorkSize[0] - remndr;
    }

    remndr = defGlobalWG_Y % m_localWorkSize[1];
    if( remndr )
    {
        m_globalWorkSize[1] = defGlobalWG_Y + m_localWorkSize[1] - remndr;
    }
    for (int i = 0; i < 2; i ++)
    {
        remndr = (m_globalWorkSize[i] % m_localWorkSize[i])? 1:0;
        m_numWorkgroup[i] = m_globalWorkSize[i] / m_localWorkSize[i] + remndr;
    }

    FrmSprintf( msg, sizeof(msg), "Local WG Size: %dx%d\n", m_localWorkSize[0], m_localWorkSize[1]);
    FrmLogMessage( msg );
    FrmSprintf( msg, sizeof(msg), "Num of local WG: (%d x %d)\n", m_numWorkgroup[0], m_numWorkgroup[1] );
    FrmLogMessage( msg );

}