CM_RT_API INT CmDevice_RT::GetSurface2DInfo(UINT width, UINT height, CM_SURFACE_FORMAT format, UINT & pitch, UINT & physicalSize) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_SURFACE2D_UP_PARAM inParam; PCM_CONTEXT pCmData; PCM_HAL_STATE pCmHalState; CMCHK_HR(m_pSurfaceMgr->Surface2DSanityCheck(width, height, format)); CmSafeMemSet(&inParam, 0, sizeof(CM_HAL_SURFACE2D_UP_PARAM)); inParam.iWidth = width; inParam.iHeight = height; inParam.format = m_pSurfaceMgr->CmFmtToGenHwFmt(format); pCmData = (PCM_CONTEXT) GetAccelData(); pCmHalState = pCmData->pCmHalState; CHK_GENOSSTATUS_RETURN_CMERROR(pCmHalState->pfnGetSurface2DPitchAndSize (pCmHalState, &inParam)); pitch = inParam.iPitch; physicalSize = inParam.iPhysicalSize; finish: return hr; }
INT CmQueue::FlushEnqueueWithHintsTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_HINTS_TASK_PARAM param; PCM_CONTEXT pCmData = NULL; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_HINTS_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); pTask->GetHints(param.iHints); pTask->GetNumTasksGenerated(param.iNumTasksGenerated); pTask->GetLastTask(param.isLastTask); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); } pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CMCHK_NULL(pCmData); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteHintsTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGroupTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_GROUP_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_GROUP_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; param.iPreemptionMode = pTask->GetPreemptionMode(); CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetSLMSize(param.iSLMSize); if (param.iSLMSize > MAX_SLM_SIZE_PER_GROUP_IN_1K) { CM_ASSERT(0); hr = CM_EXCEED_MAX_SLM_SIZE; goto finish; } if (pTask->IsThreadGroupSpaceCreated()) { pTask->GetThreadGroupSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight, param.groupSpaceWidth, param.groupSpaceHeight); } param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteGroupTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGeneralTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; UINT totalThreadCount = 0; UINT count = 0; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL_RETURN(param.pKernels, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelSizes, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelCurbeOffset, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetTotalThreadCount(totalThreadCount); param.threadSpaceWidth = (totalThreadCount > CM_MAX_THREADSPACE_WIDTH) ? CM_MAX_THREADSPACE_WIDTH : totalThreadCount; if (totalThreadCount % CM_MAX_THREADSPACE_WIDTH) { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH + 1; } else { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH; } param.DependencyPattern = CM_DEPENDENCY_NONE; if (pTask->IsThreadSpaceCreated()) { if (pTask->IsThreadCoordinatesExisted()) { param.ppThreadCoordinates = new(std::nothrow) PCM_HAL_SCOREBOARD_XY[count]; param.ppDependencyMasks = new(std::nothrow) PCM_HAL_MASK_AND_RESET[count]; CMCHK_NULL_RETURN(param.ppThreadCoordinates, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.ppDependencyMasks, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { void *pKernelCoordinates = NULL; void *pDependencyMasks = NULL; pTask->GetKernelCoordinates(i, pKernelCoordinates); pTask->GetKernelDependencyMasks(i, pDependencyMasks); param.ppThreadCoordinates[i] = (PCM_HAL_SCOREBOARD_XY) pKernelCoordinates; param.ppDependencyMasks[i] = (PCM_HAL_MASK_AND_RESET) pDependencyMasks; } } else { param.ppThreadCoordinates = NULL; } pTask->GetDependencyPattern(param.DependencyPattern); pTask->GetThreadSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight); pTask->GetWalkingPattern(param.WalkingPattern); if (pTask->CheckWalkingParametersSet()) { param.walkingParamsValid = 1; CMCHK_HR(pTask->GetWalkingParameters (param.walkingParams)); } else { param.walkingParamsValid = 0; } if (pTask->CheckDependencyVectorsSet()) { param.dependencyVectorsValid = 1; CMCHK_HR(pTask->GetDependencyVectors (param.dependencyVectors)); } else { param.dependencyVectorsValid = 0; } } pTask->GetColorCountMinusOne(param.ColorCountMinusOne); param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnExecuteTask(pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.ppThreadCoordinates); CmSafeDeleteArray(param.ppDependencyMasks); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
CM_RT_API INT CmQueue::EnqueueWithHints(CmTask * pKernelArray, CmEvent * &pEvent, UINT hints) { INT hr = CM_FAILURE; UINT count = 0; UINT index = 0; CmKernel **pKernels = NULL; UINT numTasks = 0; BOOLEAN splitTask = FALSE; BOOLEAN lastTask = FALSE; UINT numTasksGenerated = 0; CMCHK_NULL_RETURN(pKernelArray, CM_INVALID_ARG_VALUE); count = pKernelArray->GetKernelCount(); if (count == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } if (count > m_pHalMaxValues->iMaxKernelsPerTask) { CM_ASSERT(0); hr = CM_EXCEED_MAX_KERNEL_PER_ENQUEUE; goto finish; } for (UINT i = 0; i < count; ++i) { CmKernel *pKernelTmp = NULL; CmThreadSpace *pTSTmp = NULL; pKernelTmp = pKernelArray->GetKernelPointer(i); CMCHK_NULL(pKernelTmp); pKernelTmp->GetThreadSpace(pTSTmp); CMCHK_NULL(pTSTmp); if (pTSTmp->GetNeedSetKernelPointer() && pTSTmp->KernelPointerIsNULL()) { pTSTmp->SetKernelPointer(pKernelTmp); } } numTasks = (hints & CM_HINTS_MASK_NUM_TASKS) >> CM_HINTS_NUM_BITS_TASK_POS; if (numTasks > 1) { splitTask = TRUE; } pKernels = new(std::nothrow) CmKernel *[count + 1]; CMCHK_NULL(pKernels); do { for (index = 0; index < count; ++index) { pKernels[index] = pKernelArray->GetKernelPointer(index); } pKernels[count] = NULL; if (splitTask) { if (numTasksGenerated == (numTasks - 1)) { lastTask = TRUE; } } else { lastTask = TRUE; } CMCHK_HR(Enqueue_RT (pKernels, pEvent, numTasksGenerated, lastTask, hints, pKernelArray->GetPowerOption())); numTasksGenerated++; } while (numTasksGenerated < numTasks); finish: CmSafeDeleteArray(pKernels); return hr; }
BOOLEAN CmTask::IntegrityCheckKernelThreadspace(void) { INT hr = CM_SUCCESS; UINT kernelCount = 0; UINT i = 0; UINT j = 0; CmKernel *pKernel_RT = NULL; CmKernel *pKernTmp = NULL; UINT threadCount = 0; CmThreadSpace *pKernelTS = NULL; UINT width = 0; UINT height = 0; BOOLEAN **pTSMapping = NULL; BOOLEAN *pKernelInScoreboard = NULL; CM_THREAD_SPACE_UNIT *pThreadSpaceUnit = NULL; UINT kernelIndex = 0; UINT unassociated = 0; kernelCount = this->GetKernelCount(); pTSMapping = new(std::nothrow) BOOLEAN *[kernelCount]; pKernelInScoreboard = new(std::nothrow) BOOLEAN[kernelCount]; CMCHK_NULL_RETURN(pTSMapping, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(pKernelInScoreboard, CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping, 0, kernelCount * sizeof(BOOLEAN *)); CmSafeMemSet(pKernelInScoreboard, 0, kernelCount * sizeof(BOOLEAN)); for (i = 0; i < kernelCount; ++i) { pKernel_RT = this->GetKernelPointer(i); CMCHK_NULL(pKernel_RT); CMCHK_HR(pKernel_RT->GetThreadSpace(pKernelTS)); CMCHK_NULL_RETURN(pKernelTS, CM_KERNEL_THREADSPACE_NOT_SET); CMCHK_HR(pKernelTS->GetThreadSpaceSize(width, height)); CMCHK_HR(pKernel_RT->GetThreadCount(threadCount)); if (threadCount != (width * height)) { CM_ASSERT(0); hr = CM_INVALID_KERNEL_THREADSPACE; goto finish; } if (pKernelTS->IsThreadAssociated()) { pTSMapping[i] = new(std::nothrow) BOOLEAN[threadCount]; CMCHK_NULL_RETURN(pTSMapping[i], CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping[i], 0, threadCount * sizeof(BOOLEAN)); pKernelInScoreboard[i] = FALSE; hr = pKernelTS->GetThreadSpaceUnit(pThreadSpaceUnit); if (hr != CM_SUCCESS || pThreadSpaceUnit == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping[i]); hr = CM_FAILURE; goto finish; } for (j = 0; j < width * height; ++j) { pKernTmp = static_cast < CmKernel * >(pThreadSpaceUnit[j].pKernel); if (pKernTmp == NULL) { if (pKernelTS->GetNeedSetKernelPointer ()) { pKernTmp = pKernelTS->GetKernelPointer (); } if (pKernTmp == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping [i]); hr = CM_FAILURE; goto finish; } } kernelIndex = pKernTmp->GetIndexInTask(); pTSMapping[kernelIndex][pThreadSpaceUnit [j].threadId] = TRUE; pKernelInScoreboard[kernelIndex] = TRUE; } if (pKernelInScoreboard[i] == TRUE) { pKernel_RT->SetAssociatedToTSFlag(TRUE); for (j = 0; j < threadCount; ++j) { if (pTSMapping[i][j] == FALSE) { unassociated++; break; } } } CmSafeDeleteArray(pTSMapping[i]); } if (unassociated != 0) { CM_ASSERT(0); hr = CM_KERNEL_THREADSPACE_THREADS_NOT_ASSOCIATED; goto finish; } } finish: CmSafeDeleteArray(pTSMapping); CmSafeDeleteArray(pKernelInScoreboard); return (hr == CM_SUCCESS) ? TRUE : FALSE; }