//-------------------------------------------------------------------------------------- // Name: AcquireReleaseVBOs() // Desc: Acquire or release the VBO objects to OpenCL memory objects //-------------------------------------------------------------------------------------- BOOL CClothSimCL::AcquireReleaseVBOs(bool bAcquire) { cl_int errNum = 0; if( bAcquire ) { // Before acquiring, finish any pending OpenGL operations glFinish(); errNum = clEnqueueAcquireGLObjects( m_commandQueue, NUM_VBOS, &m_vboMem[0], 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error acquiring OpenGL VBOs as OpenCL memory objects.\n"); return FALSE; } } else { errNum = clEnqueueReleaseGLObjects( m_commandQueue, NUM_VBOS, &m_vboMem[0], 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error releasing OpenGL VBOs as OpenCL memory objects.\n"); return FALSE; } } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: Initialize() // Desc: Initialize the OpenCL implementation of the cloth simulation //-------------------------------------------------------------------------------------- BOOL CClothSimCL::Initialize( cl_context context, cl_device_id device ) { // Call the base class first if( !CClothSim::Initialize() ) return FALSE; // Generate VBO for previous position glGenBuffers( 1, &m_hPrevPositionVBO ); glBindBuffer( GL_ARRAY_BUFFER, m_hPrevPositionVBO ); glBufferData( GL_ARRAY_BUFFER, m_uiNumVerts * 4 * sizeof(float), NULL, GL_DYNAMIC_DRAW ); glBufferSubData( GL_ARRAY_BUFFER, 0, m_uiNumVerts * 4 * sizeof(float), m_pVerts ); glBindBuffer( GL_ARRAY_BUFFER, 0 ); if( !InitKernels( context, device ) ) return FALSE; // Create OpenCL memory objects for the VBOs UINT32 vbos[NUM_VBOS] = { m_hPositionVBO, m_hPrevPositionVBO, m_hNormalVBO, m_hTangentVBO, m_hBitangentVBO, m_hTextureVBO, m_hBackNormalVBO, m_hBackTangentVBO, m_hBackBitangentVBO }; cl_int errNum = 0; for( INT32 i = 0; i < NUM_VBOS; i++ ) { m_vboMem[i] = clCreateFromGLBuffer( context, CL_MEM_READ_WRITE, vbos[i], &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL memory object from GL VBO." ); return FALSE; } } // Initialize the base distances memory object if( !InitConstraintsBaseDists( context ) ) return FALSE; m_vertsCopyMem = clCreateBuffer( context, CL_MEM_READ_WRITE, sizeof(cl_float4) * m_uiNumVerts, NULL, &errNum); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL memory object." ); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: Run() // Desc: Create the framework, initialize the application, and render frames. //-------------------------------------------------------------------------------------- BOOL CFrmAppContainer::Run(cl_device_type deviceType) { // Use the executable's directory so that relative media paths work correctly { char processDir[PATH_MAX]; char currentDir[PATH_MAX]; struct stat dirInfo; pid_t pid = getpid(); sprintf(processDir, "/proc/%d/exe", (int)pid); if (readlink(processDir, currentDir, PATH_MAX) != -1) { strcpy(currentDir, dirname(currentDir)); chdir(currentDir); } else { printf("Error finding executable directory.\n"); return FALSE; } } // Create the Application m_pApplication = FrmCreateComputeApplicationInstance(); m_pApplication->m_bRunTests = TRUE; if( NULL == m_pApplication ) return FALSE; if( FALSE == m_pApplication->CreateOpenCLContext( deviceType ) ) { return FALSE; } if( FALSE == m_pApplication->Initialize() ) { printf( "Application failed initialization!\nPlease debug accordingly."); return FALSE; } // Run the computation BOOL result = m_pApplication->Compute(); if ( m_pApplication->RunTests() ) { char msg[256]; FrmSprintf( msg, sizeof(msg), "RunTests: %s\n", result ? "PASSED" : "FAILED" ); FrmLogMessage( msg ); } // Display the message log const CHAR* strErrorMessage = FrmGetMessageLog(); printf("%s", strErrorMessage); return TRUE; }
//-------------------------------------------------------------------------------------- // Name: UpdateNormalsCL() // Desc: Update the normal/tangent/binormal using OpenCL //-------------------------------------------------------------------------------------- BOOL CClothSimCL::UpdateNormalsCL() { // Acquire from OpenGL VBOs as OpenCL memory objects cl_int errNum = 0; cl_mem memObjs[7] = { m_vboMem[CUR_POSITION], m_vboMem[NORMAL], m_vboMem[TANGENT], m_vboMem[BITANGENT], m_vboMem[BACKNORMAL], m_vboMem[BACKTANGENT], m_vboMem[BACKBITANGENT] }; // Set the kernel arguments for the UpdateNormals kernel cl_kernel kernel = m_kernels[UPDATE_NORMALS]; errNum |= clSetKernelArg( kernel, 0, sizeof(cl_mem), &m_vboMem[CUR_POSITION] ); errNum |= clSetKernelArg( kernel, 1, sizeof(cl_mem), &m_vboMem[NORMAL] ); errNum |= clSetKernelArg( kernel, 2, sizeof(cl_mem), &m_vboMem[TANGENT] ); errNum |= clSetKernelArg( kernel, 3, sizeof(cl_mem), &m_vboMem[BITANGENT] ); errNum |= clSetKernelArg( kernel, 4, sizeof(cl_mem), &m_vboMem[BACKNORMAL] ); errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_vboMem[BACKTANGENT] ); errNum |= clSetKernelArg( kernel, 6, sizeof(cl_mem), &m_vboMem[BACKBITANGENT] ); if( errNum != CL_SUCCESS ) { char str[256]; FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum ); FrmLogMessage( str ); return FALSE; } // Finally queue the kernel for execution size_t globalWorkSize[2] = { CLOTH_POINTS_WIDTH - 1, CLOTH_POINTS_HEIGHT - 1 }; errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 2, NULL, globalWorkSize, NULL, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error queuing kernel for execution." ); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: VerletIntegrationCL() // Desc: Perform verlet integration using OpenCL //-------------------------------------------------------------------------------------- BOOL CClothSimCL::VerletIntegrationCL( float fltElapsed ) { cl_int errNum = 0; // Set the kernel arguments for the VerletIntegration kernel FRMVECTOR4 VelDamping = FRMVECTOR4( m_VelDamping, 0.0f ); FRMVECTOR4 CurrentWind = FRMVECTOR4( m_CurrentWind, 0.0f ); FRMVECTOR4 WindDir = FRMVECTOR4( m_WindDir, 0.0f ); FRMVECTOR4 Acceleration = FRMVECTOR4( m_Gravity, 0.0f ); Acceleration *= 0.25f; cl_float ElapsedSquared = fltElapsed * fltElapsed; cl_kernel kernel = m_kernels[VERLET_INTEGRATION]; errNum |= clSetKernelArg( kernel, 0, sizeof(cl_float), &ElapsedSquared ); errNum |= clSetKernelArg( kernel, 1, sizeof(cl_float4), &Acceleration ); errNum |= clSetKernelArg( kernel, 2, sizeof(cl_float4), &VelDamping ); errNum |= clSetKernelArg( kernel, 3, sizeof(cl_float4), &WindDir ); errNum |= clSetKernelArg( kernel, 4, sizeof(cl_float4), &CurrentWind ); errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_vboMem[CUR_POSITION] ); errNum |= clSetKernelArg( kernel, 6, sizeof(cl_mem), &m_vboMem[PREV_POSITION] ); errNum |= clSetKernelArg( kernel, 7, sizeof(cl_mem), &m_vboMem[NORMAL] ); if( errNum != CL_SUCCESS ) { char str[256]; FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum ); FrmLogMessage( str ); return FALSE; } // Finally queue the kernel for execution size_t globalWorkSize[1] = { m_uiNumVerts - CLOTH_POINTS_WIDTH }; errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error queuing kernel for execution." ); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: InitKernels() // Desc: Initialize the kernels //-------------------------------------------------------------------------------------- BOOL CClothSimCL::InitKernels( cl_context context, cl_device_id device ) { cl_int errNum; // Create the command queue m_commandQueue = clCreateCommandQueue( context, device, CL_QUEUE_PROFILING_ENABLE, &errNum ); if ( errNum != CL_SUCCESS ) { FrmLogMessage( "Failed to create command queue" ); return FALSE; } if( FALSE == FrmBuildComputeProgramFromFile( "Samples/Kernels/ClothSimCLGLES.cl", &m_program, context, &device, 1, "-cl-fast-relaxed-math" ) ) { return FALSE; } const CHAR* pKernelNames[] = { "SatisfyConstraints", "VerletIntegration", "UpdateNormals" }; for( INT32 i = 0; i < NUM_KERNELS; i++ ) { m_kernels[i] = clCreateKernel( m_program, pKernelNames[i] , &errNum ); if ( errNum != CL_SUCCESS ) { CHAR str[256]; FrmSprintf( str, sizeof(str), "ERROR: Failed to create kernel '%s'.\n", pKernelNames[i] ); FrmLogMessage( str ); return FALSE; } } return TRUE; }
bool SimpleConvolution::Initialize( cl_context context, cl_device_id device, cl_command_queue commandQueue ){ m_context = context; m_device = device; m_commandQueue = commandQueue; cl_program m_program; cl_int errNum = 0; if( false == FrmGetOrBuildComputeProgramFromFile(&simpleConvolution, &m_program, m_context, &m_device, 1) ){ return false; } m_kernel = clCreateKernel(m_program, "simpleConvolution", &errNum); if(errNum != CL_SUCCESS){ char str[256]; FrmSprintf( str, sizeof(str), "ERROR: Failed to create kernel simpleConvolution.\n" ); FrmLogMessage( str ); return false; } clReleaseProgram(m_program); return true; }
//-------------------------------------------------------------------------------------- // Name: SetUseOpenCL() // Desc: Set whether to use OpenCL or CPU simulation //-------------------------------------------------------------------------------------- VOID CClothSimCL::SetUseOpenCL( bool bUseOpenCL ) { // If we are switching from CPU to GPU, copy the // current position to the previous position so // the simulation does not jump around if( bUseOpenCL == true && m_bUseOpenCL == false) { AcquireReleaseVBOs( true ); // Copy the current position to previous position to initialize the simulation cl_int errNum = 0; errNum = clEnqueueCopyBuffer( m_commandQueue, m_vboMem[CUR_POSITION], m_vboMem[PREV_POSITION], 0, 0, sizeof(cl_float4) * m_uiNumVerts, 0, NULL, NULL); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error copying positions VBO.\n" ); return; } AcquireReleaseVBOs( false ); } m_bUseOpenCL = bUseOpenCL; }
bool SimpleConvolution::runCLKernels( const float* pInArray, const float* pMaskArray, float* pOutArray){ cl_mem inputBuffer = 0, maskBuffer = 0, outputBuffer = 0; //cl_int nArraySize = width * height; cl_int errNum = 0; cl_event events[2]; inputBuffer = clCreateBuffer(m_context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, width*height*sizeof(float), (void*)pInArray, &errNum ); if(errNum != CL_SUCCESS){ printf( "ERROR: allocation of device input array.\n" ); return false; } maskBuffer = clCreateBuffer(m_context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, maskWidth*maskHeight*sizeof(float), (void*)pMaskArray, &errNum); if(errNum != CL_SUCCESS){ clReleaseMemObject(inputBuffer); printf( "ERROR: allocation of device input array.\n" ); return false; } outputBuffer = clCreateBuffer(m_context, CL_MEM_READ_WRITE, width*height*sizeof(float), NULL, &errNum ); if(errNum != CL_SUCCESS){ clReleaseMemObject(inputBuffer); clReleaseMemObject(maskBuffer); printf( "ERROR: allocation of device input array.\n" ); return false; } size_t gws[1] = { width * height }; size_t lws[1] = { 256 }; errNum |= clSetKernelArg( m_kernel, 0, sizeof(cl_mem), (void *)&outputBuffer ); errNum |= clSetKernelArg( m_kernel, 1, sizeof(cl_mem), (void *)&inputBuffer); errNum |= clSetKernelArg( m_kernel, 2, sizeof(cl_mem), (void *)&maskBuffer); cl_uint2 inputDimensions = {width, height}; cl_uint2 maskDimensions = {maskWidth, maskHeight}; errNum |= clSetKernelArg( m_kernel, 3, sizeof(cl_uint2), (void *)&inputDimensions ); errNum |= clSetKernelArg( m_kernel, 4, sizeof(cl_uint2), (void *)&maskDimensions ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error setting kernel arguments" ); return false; } errNum = clEnqueueNDRangeKernel( m_commandQueue, m_kernel, 1, NULL, gws, lws, 0, NULL, &events[0] ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error setting kernel arguments" ); return false; } errNum = clFlush(m_commandQueue); errNum = waitForEventAndRelease(&events[0]); errNum = clEnqueueReadBuffer( m_commandQueue, outputBuffer, CL_TRUE, 0, width * height * sizeof(float), pOutArray, 0, NULL, &events[1]); if(errNum != CL_SUCCESS) { return false; } errNum = clFlush(m_commandQueue); errNum = waitForEventAndRelease(&events[1]); clReleaseMemObject(inputBuffer); clReleaseMemObject(maskBuffer); clReleaseMemObject(outputBuffer); return true; }
//-------------------------------------------------------------------------------------- // Name: SatisfyConstraintsCL() // Desc: Perform satisfy spring constraint using OpeNCL //-------------------------------------------------------------------------------------- BOOL CClothSimCL::SatisfyConstraintsCL() { cl_int errNum = 0; // Copy the positions to another buffer for double buffering errNum = clEnqueueCopyBuffer( m_commandQueue, m_vboMem[CUR_POSITION], m_vertsCopyMem, 0, 0, sizeof(cl_float4) * m_uiNumVerts, 0, NULL, NULL); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error copying positions VBO.\n" ); return FALSE; } // Set kernel arguments cl_kernel kernel = m_kernels[SATISFY_CONSTRAINTS]; errNum |= clSetKernelArg( kernel, 2, sizeof(cl_mem), &m_baseDistMem ); errNum |= clSetKernelArg( kernel, 3, sizeof(cl_mem), &m_baseDistNeighborIndexMem ); errNum |= clSetKernelArg( kernel, 4, sizeof(cl_mem), &m_baseDistStartIndexMem ); errNum |= clSetKernelArg( kernel, 5, sizeof(cl_mem), &m_baseDistEndIndexMem ); if( errNum != CL_SUCCESS ) { char str[256]; FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum ); FrmLogMessage( str ); return FALSE; } // Number of NxN grid chunks to perform separately, largest chunk should be 16x16 int numChunks = (CLOTH_POINTS_WIDTH * CLOTH_POINTS_HEIGHT) / (16 * 16); size_t globalWorkSize[1] = { (CLOTH_POINTS_WIDTH * CLOTH_POINTS_HEIGHT) / numChunks }; char str[1024]; for( UINT32 i = 0; i < m_ConstraintIters; i++ ) { bool even = ((i % 2) == 0); // Swap input/output errNum |= clSetKernelArg( kernel, 0, sizeof(cl_mem), even ? (&m_vboMem[CUR_POSITION]) : (&m_vertsCopyMem) ); errNum |= clSetKernelArg( kernel, 1, sizeof(cl_mem), even ? (&m_vertsCopyMem) : (&m_vboMem[CUR_POSITION]) ); if( errNum != CL_SUCCESS ) { FrmSprintf( str, sizeof(str), "Error setting kernel arguments (%d).", errNum ); FrmLogMessage( str ); return FALSE; } for( int chunk = 0; chunk < numChunks; chunk++ ) { size_t globalWorkOffset[1] = { chunk * globalWorkSize[0] }; errNum = clEnqueueNDRangeKernel( m_commandQueue, kernel, 1, globalWorkOffset, globalWorkSize, NULL, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error queuing kernel for execution." ); return FALSE; } } } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: InitConstraintsBaseDists() // Desc: Compute base distances and store in an OpenCL memory object //-------------------------------------------------------------------------------------- BOOL CClothSimCL::InitConstraintsBaseDists( cl_context context ) { cl_int errNum = 0; struct Offsets { INT32 xOffset; INT32 yOffset; }; // Offsets of neighbors for constraints const struct Offsets cConstraintOffsets[] = { { -1, 0 }, // left { +1, 0 }, // right { 0, -1 }, // up { 0, 1 }, // down { -1, -1 }, // diagUpLeft { +1, -1 }, // diagUpRight { -1, +1 }, // diagDownLeft { +1, +1 }, // diagDownRight { -2, 0 }, // shearLeft { +2, 0 }, // shearRight { 0, -2 }, // shearUp { 0, +2 }, // shearDown { -2, -2 }, // shearUpLeft { +2, -2 }, // shearUpRight { -2, +2 }, // shearDownLeft { +2, +2 }, // shearDownRight }; INT32 numConstraints = sizeof(cConstraintOffsets) / sizeof(struct Offsets); FLOAT32* pBaseDists = new FLOAT32[ numConstraints * m_uiNumVerts ]; UINT32* pBaseDistNeighborIndex = new UINT32[ numConstraints * m_uiNumVerts ]; UINT32* pBaseDistStartIndex = new UINT32[ m_uiNumVerts ]; UINT32* pBaseDistEndIndex = new UINT32[ m_uiNumVerts ]; UINT32 index = 0; for( INT32 y = 0; y < CLOTH_POINTS_HEIGHT; y++ ) { for( INT32 x = 0; x < CLOTH_POINTS_WIDTH; x++ ) { int vertIdx = (y * CLOTH_POINTS_WIDTH + x); pBaseDistStartIndex[vertIdx] = index; for( INT32 c = 0; c < numConstraints; c++ ) { INT32 constraintX = x + cConstraintOffsets[c].xOffset; INT32 constraintY = y + cConstraintOffsets[c].yOffset; FLOAT32 baseDist = 0.0f; unsigned int constraintIdx = (constraintY * CLOTH_POINTS_WIDTH + constraintX); if( constraintX >= 0 && constraintX < CLOTH_POINTS_WIDTH && constraintY >= 0 && constraintY < CLOTH_POINTS_HEIGHT ) { FRMVECTOR3 vecA; loadXYZ(vecA, (float *)&m_pVerts[vertIdx * 4]); FRMVECTOR3 vecB; loadXYZ(vecB, (float *)&m_pVerts[constraintIdx * 4]); FRMVECTOR3 vecDiff = vecB - vecA; baseDist = FrmVector3Length( vecDiff ); pBaseDists[index] = baseDist; pBaseDistNeighborIndex[index] = constraintIdx; index++; } } pBaseDistEndIndex[vertIdx] = index; } } int numTotalConstraints = index; // Create a memory object to hold the constraint buffer m_baseDistMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(FLOAT32) * numTotalConstraints, pBaseDists, &errNum ); delete [] pBaseDists; if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL BaseDists buffer." ); return FALSE; } m_baseDistNeighborIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * numTotalConstraints, pBaseDistNeighborIndex, &errNum ); delete [] pBaseDistNeighborIndex; if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." ); return FALSE; } m_baseDistEndIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * m_uiNumVerts, pBaseDistEndIndex, &errNum ); delete [] pBaseDistEndIndex; if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." ); return FALSE; } m_baseDistStartIndexMem = clCreateBuffer( context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(UINT32) * m_uiNumVerts, pBaseDistStartIndex, &errNum ); delete [] pBaseDistStartIndex; if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error creating OpenCL BaseDistsStartIndex buffer." ); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: CreateNativeWindow() // Desc: Creates a window for the application //-------------------------------------------------------------------------------------- BOOL CFrmAppContainer::CreateNativeWindow( NativeWindowType* pWindow, NativeDisplayType* pDisplay ) { LPCTSTR pszTitle = _T("Snapdragon SDK Window"); // Need a WCHAR version of m_pApplication->m_strName to pass to window creation WCHAR wszName[MAX_PATH]; mbstowcs(wszName, m_pApplication->m_strName, MAX_PATH); // The global instance HINSTANCE hInstance = GetModuleHandle( NULL ); // If the size is <= 0 then we are running if full screen mode BOOL fFullScreen = FALSE; if(m_pApplication->m_nWidth <= 0 || m_pApplication->m_nHeight <= 0) { fFullScreen = TRUE; } // Register the window class WNDCLASS wc = {0}; wc.style = CS_HREDRAW | CS_VREDRAW; // Window style wc.lpfnWndProc = (WNDPROC)WndProc; // WndProc message handler wc.cbClsExtra = 0; // No extra window data wc.cbWndExtra = 0; // No extra window data wc.hInstance = hInstance; // Instance wc.hIcon = NULL; wc.hCursor = LoadCursor( NULL, IDC_ARROW ); // Cursor wc.hInstance = hInstance; wc.hbrBackground = NULL; // No Background wc.lpszMenuName = NULL; // No menu wc.lpszClassName = pszTitle; // Set the class name if( FALSE == RegisterClass(&wc) ) { FrmLogMessage( "ERROR: Failed to register window class.\n" ); return FALSE; } HWND hParentWnd = NULL; HWND hClientWnd = NULL; DWORD dwWindowStyle; if(!fFullScreen) { // Adjust the window size to fit our rectangle dwWindowStyle = WS_SYSMENU | WS_MINIMIZEBOX | WS_CAPTION | WS_BORDER | WS_CLIPSIBLINGS | WS_CLIPCHILDREN; RECT rcWindow; SetRect( &rcWindow, 0, 0, m_pApplication->m_nWidth, m_pApplication->m_nHeight ); AdjustWindowRectEx( &rcWindow, dwWindowStyle, FALSE, 0 ); if(rcWindow.left < 0) { // AdjustWindowRectEx gives back negative values if the window is placed // such that it overlaps the WM start bar. This always happens if the // rectangle starts at (0,0) like above. rcWindow.right += -rcWindow.left; rcWindow.left = 0; } if(rcWindow.top < 0) { rcWindow.bottom += -rcWindow.top; rcWindow.top = 0; } // Create the parent window hParentWnd = CreateWindow( wc.lpszClassName, //Class wszName, //Title dwWindowStyle, //Style 50+rcWindow.left,50+rcWindow.top, //Position (rcWindow.right-rcWindow.left), //Width (rcWindow.bottom-rcWindow.top), //Height NULL, //Noparentwindow NULL, //Nomenu hInstance, //Instance NULL); //Creationparameter if( NULL == hParentWnd ) { FrmLogMessage( "ERROR: Failed to create window.\n" ); return FALSE; } } else { // Full Screen Mode m_pApplication->m_nWidth = GetSystemMetrics(SM_CXSCREEN); m_pApplication->m_nHeight = GetSystemMetrics(SM_CYSCREEN); } // Create the client window if(!fFullScreen) dwWindowStyle = WS_CHILD; else dwWindowStyle = WS_VISIBLE; hClientWnd = CreateWindow( wc.lpszClassName, //Class wszName, //Title dwWindowStyle, //Style 0,0, //Pos and size m_pApplication->m_nWidth, m_pApplication->m_nHeight, //Pos and size hParentWnd, //Parent window NULL, //No menu hInstance, //Instance NULL); //Creation parameter if( NULL == hClientWnd ) { DWORD dwLastError = GetLastError(); FrmLogMessage( "ERROR: Failed to create window.\n" ); return FALSE; } if(fFullScreen) { // Hide the taskbars // SHFullScreen(hClientWnd, SHFS_HIDETASKBAR | SHFS_HIDESTARTICON | SHFS_HIDESIPBUTTON); } // Pass application data pointer to the windows for later use if(hParentWnd != NULL) SetWindowLong( hParentWnd, GWL_USERDATA, (LONG)this ); if(hClientWnd != NULL) SetWindowLong( hClientWnd, GWL_USERDATA, (LONG)this ); // Note: We delay showing the window until after Initialization() succeeds // Otherwise, an unsightly, empty window briefly appears during initialization // Return (*pWindow) = (NativeWindowType)hClientWnd; (*pDisplay) = NULL; return TRUE; }
//-------------------------------------------------------------------------------------- // Name: Initialize() // Desc: //-------------------------------------------------------------------------------------- BOOL CSample::Initialize() { cl_int errNum; if(!FrmOpenConsole()) return FALSE; // Create the command queue m_commandQueue = clCreateCommandQueue( m_context, m_devices[0], CL_QUEUE_PROFILING_ENABLE, &errNum ); if ( errNum != CL_SUCCESS ) { FrmLogMessage( "Failed to create command queue" ); return FALSE; } if( FALSE == FrmBuildComputeProgramFromFile( "Samples/Kernels/VectorAdd.cl", &m_program, m_context, &m_devices[0], 1, "-cl-fast-relaxed-math" ) ) { return FALSE; } // Create kernel m_kernel = clCreateKernel( m_program, "VectorAdd", &errNum ); if ( errNum != CL_SUCCESS ) { FrmLogMessage( "Failed to create kernel 'VectorAdd'\n" ); return FALSE; } // Create device buffers m_srcA = clCreateBuffer( m_context, CL_MEM_READ_ONLY, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: allocation device host buffer A" ); return FALSE; } m_srcB = clCreateBuffer( m_context, CL_MEM_READ_ONLY, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: allocation device host buffer B" ); return FALSE; } // Map to host arrys FRMVECTOR4 *pHostA = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_srcA, CL_TRUE, CL_MAP_WRITE, 0, sizeof(FRMVECTOR4) * m_nNumVectors, 0, NULL, NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: mapping device buffer A." ); return FALSE; } FRMVECTOR4 *pHostB = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_srcB, CL_TRUE, CL_MAP_WRITE, 0, sizeof(FRMVECTOR4) * m_nNumVectors, 0, NULL, NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: mapping device buffer B." ); return FALSE; } // Fill with data for( size_t i = 0; i < m_nNumVectors; i++ ) { FLOAT32 valA = (FLOAT32)i / m_nNumVectors; FLOAT32 valB = 1.0f - valA; pHostA[i] = FRMVECTOR4( valA, valA, valA, valA ); pHostB[i] = FRMVECTOR4( valB, valB, valB, valB ); } // Compute reference results on CPU if ( RunTests() ) { m_pRefResults = new FRMVECTOR4[ m_nNumVectors ]; for( size_t i = 0; i < m_nNumVectors; i++ ) { m_pRefResults[ i ] = pHostA[ i ] + pHostB[ i ]; } } // Unmap buffers errNum = clEnqueueUnmapMemObject( m_commandQueue, m_srcA, pHostA, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: Unmapping buffer A." ); return FALSE; } errNum = clEnqueueUnmapMemObject( m_commandQueue, m_srcB, pHostB, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: Unmapping buffer B." ); return FALSE; } // Create result buffer m_result = clCreateBuffer( m_context, CL_MEM_READ_WRITE, m_nNumVectors * sizeof(FRMVECTOR4), NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: allocation device host buffer result" ); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: Compute() // Desc: //-------------------------------------------------------------------------------------- BOOL CSample::Compute() { m_Timer.Reset(); m_Timer.Start(); char str[256]; // Set the kernel arguments cl_int errNum = 0; errNum |= clSetKernelArg( m_kernel, 0, sizeof(cl_mem), &m_srcA ); errNum |= clSetKernelArg( m_kernel, 1, sizeof(cl_mem), &m_srcB ); errNum |= clSetKernelArg( m_kernel, 2, sizeof(cl_mem), &m_result ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error setting kernel arguments" ); return FALSE; } size_t globalWorkSize[1] = { m_nNumVectors }; size_t localWorkSize[1] = { 1 }; cl_event kernel_event; cl_ulong t_queued=0, t_submit=0, t_start=0, t_end=0; // Queue the kernel for execution errNum = clEnqueueNDRangeKernel( m_commandQueue, m_kernel, 1, NULL, globalWorkSize, localWorkSize, 0, NULL, &kernel_event ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error queueing kernel for execution." ); return FALSE; } clWaitForEvents(1 , &kernel_event); // Query timestamp for kernel profiling // Queued time is when the command is queued to host. // Submit time is when the command is submitted from host to device. // Start time is when the command starts the execution. // End time is when the command finishes the execution. // The delta between start and end, marks the total elapsed time to execute a kernel in device. errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &t_queued, NULL); if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting queued timestamp." ); errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &t_submit, NULL); if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting submit timestamp." ); errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t_start, NULL); if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting start timestamp." ); errNum = clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t_end, NULL); if( errNum != CL_SUCCESS ) FrmLogMessage( "Error getting end timestamp." ); FrmLogMessage("Kernel event profiling....(nano sec)\n"); FrmSprintf(str, sizeof(str), " -> Queued time: %lu\n", t_queued); FrmLogMessage( str ); FrmSprintf(str, sizeof(str), " -> Submit time: %lu\n", t_submit); FrmLogMessage( str ); FrmSprintf(str, sizeof(str), " -> Start time: %lu\n", t_start); FrmLogMessage( str ); FrmSprintf(str, sizeof(str), " -> End time: %lu\n", t_end); FrmLogMessage( str ); clReleaseEvent(kernel_event); // Read the result back to host memory FRMVECTOR4* pResult; pResult = (FRMVECTOR4*) clEnqueueMapBuffer( m_commandQueue, m_result, CL_TRUE, CL_MAP_READ, 0, sizeof(FRMVECTOR4) * m_nNumVectors, 0, NULL, NULL, &errNum ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "Error enqueuing buffer map." ); return FALSE; } m_Timer.Stop(); FrmSprintf( str, sizeof(str), "Results: '%d' vector additions in '%.6f' seconds.\n", m_nNumVectors, m_Timer.GetTime() ); FrmLogMessage( str ); // Test results again CPU reference BOOL result = TRUE; if ( RunTests() ) { const FLOAT32 epsilon = 0.000001f; for( size_t i = 0; i < m_nNumVectors; i++ ) { for ( size_t j = 0; j < 4; j++ ) { FLOAT32 refVal = m_pRefResults[ i ].v[ j ]; FLOAT32 val = pResult[ i ].v[ j ]; if( FrmAbs( refVal - val ) > epsilon ) { FrmSprintf( str, sizeof(str), "Reference test failure, ref = (%f), result = (%f) Diff = (%f).\n", refVal, val, FrmAbs(refVal - val)); FrmLogMessage( str ); result = FALSE; } } } } // Unmap buffer errNum = clEnqueueUnmapMemObject( m_commandQueue, m_result, pResult, 0, NULL, NULL ); if( errNum != CL_SUCCESS ) { FrmLogMessage( "ERROR: Unmapping result buffer." ); return FALSE; } return result; }
//-------------------------------------------------------------------------------------- // Name: CreateOpenCLContext() // Desc: Create the OpenCL context based on the command-line options for creation //-------------------------------------------------------------------------------------- BOOL CFrmComputeApplication::CreateOpenCLContext(cl_device_type deviceType) { cl_int errNum; cl_uint numPlatforms = 0; cl_platform_id platformId; // Get the first platform ID errNum = clGetPlatformIDs( 1, &platformId, &numPlatforms ); if (errNum != CL_SUCCESS || numPlatforms <= 0) { FrmLogMessage("No OpenCL platforms found."); return FALSE; } m_platform = platformId; // Get the number of devices for the requested device type (CPU, GPU, ALL) cl_uint numDevices = 0; errNum = clGetDeviceIDs( platformId, deviceType, 0, NULL, &numDevices ); if (errNum != CL_SUCCESS || numDevices <= 0) { FrmLogMessage("No matching OpenCL devices found."); return FALSE; } char platformInfo[1024]; char logMessage[2048]; errNum = clGetPlatformInfo( platformId, CL_PLATFORM_VENDOR, sizeof(platformInfo), platformInfo, NULL ); if (errNum != CL_SUCCESS) { FrmLogMessage("ERROR: getting platform info."); return FALSE; } FrmSprintf( logMessage, sizeof(logMessage), "OpenCL Platform: %s\n", platformInfo ); FrmLogMessage( logMessage ); // Get the devices m_devices = new cl_device_id[numDevices]; m_deviceCount = numDevices; errNum = clGetDeviceIDs( platformId, deviceType, numDevices, m_devices, NULL ); if (errNum != CL_SUCCESS) { FrmLogMessage("Erorr getting OpenCL device(s)."); return FALSE; } switch (deviceType) { case CL_DEVICE_TYPE_GPU: FrmLogMessage("Selected device: GPU\n"); break; case CL_DEVICE_TYPE_CPU: FrmLogMessage("Selected device: CPU\n"); break; case CL_DEVICE_TYPE_DEFAULT: default: FrmLogMessage("Selected device: DEFAULT\n"); break; } for (int i = 0; i < m_deviceCount; i++) { char deviceInfo[1024]; errNum = clGetDeviceInfo( m_devices[i], CL_DEVICE_NAME, sizeof(deviceInfo), deviceInfo, NULL ); if (errNum == CL_SUCCESS ) { FrmSprintf( logMessage, sizeof(logMessage), "OpenCL Device Name (%d) : %s\n", i , deviceInfo ); FrmLogMessage( logMessage ); } } // Finally, create the context cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0 }; m_context = clCreateContext( contextProperties, m_deviceCount, m_devices, NULL, NULL, &errNum ); if (errNum != CL_SUCCESS) { FrmLogMessage("Could not create OpenCL context."); return FALSE; } return TRUE; }
//-------------------------------------------------------------------------------------- // Name: Select2DWGSize() // Desc: Select 2D local workgroup size based on user input. // Final selected dimension is AxB where minWorkGroupSize <= AxB <= maxWorkGroupSize //.......The dimension of A and B is based on m_WG2DWHRatio where A/B = m_WG2DWHRatio / (100-m_WG2DWHRatio) //-------------------------------------------------------------------------------------- void CFrmComputeApplication::Select2DWGSize(size_t maxWorkGroupSize, size_t defGlobalWG_X, size_t defGlobalWG_Y, size_t minWorkGroupSize) { char msg[512]; m_globalWorkSize[0] = defGlobalWG_X; m_globalWorkSize[1] = defGlobalWG_Y; // m_WGSize is the percentage of maxWorkGroupSize float selectedWgSize = (float)m_WGSize / 100.0f * (float)maxWorkGroupSize; if ((int)selectedWgSize < minWorkGroupSize) selectedWgSize = (float)minWorkGroupSize; FrmSprintf( msg, sizeof(msg), "Max WG Size: %d. Selected WG Size: %d\n", maxWorkGroupSize, (int)selectedWgSize); FrmLogMessage( msg ); FrmSprintf( msg, sizeof(msg), "Global WG Size: %dx%d\n", m_globalWorkSize[0], m_globalWorkSize[1]); FrmLogMessage( msg ); // if ratio = 0, leave local size calculation to the driver if (m_WG2DWHRatio == 0) return; // Ratio is out of scale 100 // Adjust local dimension based on width:height ratio float whRatio = (float)m_WG2DWHRatio / (float)(100-m_WG2DWHRatio); m_localWorkSize[0] = (size_t) FrmSqrt( (FLOAT32) selectedWgSize * whRatio); // make sure the size is at lease 1 if (m_localWorkSize[0] < 1) m_localWorkSize[0] = 1; // 50 means 1:1 if (m_WG2DWHRatio == 50) { m_localWorkSize[1] = m_localWorkSize[0]; } else { m_localWorkSize[1] = (size_t) selectedWgSize / m_localWorkSize[0]; if (m_localWorkSize[1] < 1) m_localWorkSize[1] = 1; } // Compute the next global size that is a multiple of the local size size_t remndr = defGlobalWG_X % m_localWorkSize[0]; if( remndr ) { m_globalWorkSize[0] = defGlobalWG_X + m_localWorkSize[0] - remndr; } remndr = defGlobalWG_Y % m_localWorkSize[1]; if( remndr ) { m_globalWorkSize[1] = defGlobalWG_Y + m_localWorkSize[1] - remndr; } for (int i = 0; i < 2; i ++) { remndr = (m_globalWorkSize[i] % m_localWorkSize[i])? 1:0; m_numWorkgroup[i] = m_globalWorkSize[i] / m_localWorkSize[i] + remndr; } FrmSprintf( msg, sizeof(msg), "Local WG Size: %dx%d\n", m_localWorkSize[0], m_localWorkSize[1]); FrmLogMessage( msg ); FrmSprintf( msg, sizeof(msg), "Num of local WG: (%d x %d)\n", m_numWorkgroup[0], m_numWorkgroup[1] ); FrmLogMessage( msg ); }