INT CmDevice_RT::GetMaxValueFromCaps(CM_HAL_MAX_VALUES & MaxValues, CM_HAL_MAX_VALUES_EX & MaxValuesEx) { DXVA_CM_QUERY_CAPS queryCaps; UINT querySize = sizeof(DXVA_CM_QUERY_CAPS); CmSafeMemSet(&queryCaps, 0, sizeof(DXVA_CM_QUERY_CAPS)); queryCaps.Type = DXVA_CM_MAX_VALUES; INT hr = GetCapsInternal(&queryCaps, &querySize); if (FAILED(hr)) { CM_ASSERT(0); return CM_FAILURE; } MaxValues = queryCaps.MaxValues; MaxValues.iMaxArgsPerKernel = (queryCaps.MaxValues.iMaxArgsPerKernel > CM_MAX_ARGS_PER_KERNEL) ? (CM_MAX_ARGS_PER_KERNEL) : queryCaps.MaxValues.iMaxArgsPerKernel; CmSafeMemSet(&queryCaps, 0, sizeof(DXVA_CM_QUERY_CAPS)); queryCaps.Type = DXVA_CM_MAX_VALUES_EX; hr = GetCapsInternal(&queryCaps, &querySize); if (FAILED(hr)) { CM_ASSERT(0); return CM_FAILURE; } MaxValuesEx = queryCaps.MaxValuesEx; return CM_SUCCESS; }
INT CmDevice_RT::SetCapsInternal(CM_DEVICE_CAP_NAME capName, size_t capValueSize, void *pCapValue) { CM_RETURN_CODE hr = CM_SUCCESS; DXVA_CM_SET_CAPS setCaps; UINT maxValue; size_t size = sizeof(maxValue); CmSafeMemSet(&setCaps, 0, sizeof(setCaps)); switch (capName) { case CAP_HW_THREAD_COUNT: if (capValueSize != sizeof(UINT)) { CM_ASSERT(0); return CM_INVALID_HARDWARE_THREAD_NUMBER; } if (*(UINT *) pCapValue <= 0) { CM_ASSERT(0); return CM_INVALID_HARDWARE_THREAD_NUMBER; } GetCaps(CAP_HW_THREAD_COUNT, size, &maxValue); if (*(UINT *) pCapValue > maxValue) { CM_ASSERT(0); return CM_INVALID_HARDWARE_THREAD_NUMBER; } setCaps.Type = DXVA_CM_MAX_HW_THREADS; setCaps.MaxValue = *(UINT *) pCapValue; break; case CAP_L3_CONFIG: if (capValueSize != sizeof(L3_CONFIG_REGISTER_VALUES)){ CM_ASSERT(0); return CM_INVALIDE_L3_CONFIGURATION; } else { L3_CONFIG_REGISTER_VALUES *l3_c = (L3_CONFIG_REGISTER_VALUES *)pCapValue; setCaps.L3_SQCREG1 = l3_c->SQCREG1_VALUE; setCaps.L3_CNTLREG2 = l3_c->CNTLREG2_VALUE; setCaps.L3_CNTLREG3 = l3_c->CNTLREG3_VALUE; setCaps.L3_CNTLREG = l3_c->CNTLREG_VALUE; setCaps.Type = DXVA_CM_MAX_HW_L3_CONFIG; } break; default: return CM_FAILURE; } PCM_CONTEXT pCmData = (PCM_CONTEXT) this->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetCaps(pCmData->pCmHalState, (PCM_HAL_MAX_SET_CAPS_PARAM) & setCaps)); finish: return hr; }
CM_RT_API INT CmDevice_RT::GetGenPlatform(UINT & platform) { if (m_Platform != IGFX_UNKNOWN_CORE) { platform = m_Platform; return CM_SUCCESS; } platform = IGFX_UNKNOWN_CORE; INT hr = 0; DXVA_CM_QUERY_CAPS queryCaps; UINT querySize = sizeof(DXVA_CM_QUERY_CAPS); CmSafeMemSet(&queryCaps, 0, sizeof(queryCaps)); queryCaps.Type = DXVA_CM_QUERY_GPU; hr = GetCapsInternal(&queryCaps, &querySize); if (FAILED(hr)) { CM_ASSERT(0); return CM_FAILURE; } if (queryCaps.iVersion) { platform = queryCaps.iVersion; } return CM_SUCCESS; }
CM_RT_API INT CmDevice_RT::GetSurface2DInfo(UINT width, UINT height, CM_SURFACE_FORMAT format, UINT & pitch, UINT & physicalSize) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_SURFACE2D_UP_PARAM inParam; PCM_CONTEXT pCmData; PCM_HAL_STATE pCmHalState; CMCHK_HR(m_pSurfaceMgr->Surface2DSanityCheck(width, height, format)); CmSafeMemSet(&inParam, 0, sizeof(CM_HAL_SURFACE2D_UP_PARAM)); inParam.iWidth = width; inParam.iHeight = height; inParam.format = m_pSurfaceMgr->CmFmtToGenHwFmt(format); pCmData = (PCM_CONTEXT) GetAccelData(); pCmHalState = pCmData->pCmHalState; CHK_GENOSSTATUS_RETURN_CMERROR(pCmHalState->pfnGetSurface2DPitchAndSize (pCmHalState, &inParam)); pitch = inParam.iPitch; physicalSize = inParam.iPhysicalSize; finish: return hr; }
CmDevice_RT::CmDevice_RT(UINT DevCreateOption): m_pUmdContext(NULL), m_pAccelData(NULL), m_AccelSize(0), m_pSurfaceMgr(NULL), m_pQueue(NULL), m_ProgramArray(CM_INIT_PROGRAM_COUNT), m_ProgramCount(0), m_KernelArray(CM_INIT_KERNEL_COUNT), m_KernelCount(0), m_ThreadSpaceArray(CM_INIT_THREADSPACE_COUNT), m_ThreadSpaceCount(0), m_hJITDll(NULL), m_fJITCompile(NULL), m_fFreeBlock(NULL), m_fJITVersion(NULL), m_DDIVersion(0), m_Platform(IGFX_UNKNOWN_CORE), m_CmDeviceRefCount(0), m_ThreadGroupSpaceArray(CM_INIT_THREADGROUPSPACE_COUNT), m_ThreadGroupSpaceCount(0), m_TaskArray(CM_INIT_TASK_COUNT), m_TaskCount(0) { CmSafeMemSet(&m_l3_c, 0, sizeof(L3_CONFIG_REGISTER_VALUES)); InitDevCreateOption(m_DevCreateOption, DevCreateOption); }
CmTask::CmTask(CmDevice * pCmDevice, UINT index, UINT max_kernel_count): m_pKernelArray(NULL), m_KernelCount(0), m_MaxKernelCount(max_kernel_count), m_IndexTaskArray(index), m_ui64SyncBitmap(0), m_pCmDev(pCmDevice) { CmSafeMemSet(&m_PowerOption, 0, sizeof(m_PowerOption)); m_PreemptionMode = UN_PREEMPTABLE_MODE; }
INT CmTask::Initialize() { m_pKernelArray = new(std::nothrow) CmKernel *[m_MaxKernelCount]; if (m_pKernelArray) { CmSafeMemSet(m_pKernelArray, 0, sizeof(CmKernel *) * m_MaxKernelCount); return CM_SUCCESS; } else { CM_ASSERT(0); return CM_OUT_OF_HOST_MEMORY; } }
CM_RT_API INT CmTask::Reset(void) { m_KernelCount = 0; m_ui64SyncBitmap = 0; if (m_pKernelArray) { CmSafeMemSet(m_pKernelArray, 0, sizeof(CmKernel *) * m_MaxKernelCount); return CM_SUCCESS; } else { CM_ASSERT(0); return CM_FAILURE; } }
INT CmSurfaceManager::AllocateBuffer(UINT size, CM_BUFFER_TYPE type, UINT & handle, CmOsResource * pCmOsResource, void *pSysMem) { CM_RETURN_CODE hr = CM_SUCCESS; GENOS_STATUS genos_status = GENOS_STATUS_SUCCESS; PCM_CONTEXT pCmData = (PCM_CONTEXT) m_pCmDevice->GetAccelData(); handle = 0; CM_HAL_BUFFER_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(CM_HAL_BUFFER_PARAM)); inParam.iSize = size; inParam.type = type; if (pCmOsResource) { inParam.pCmOsResource = pCmOsResource; inParam.isAllocatedbyCmrtUmd = FALSE; } else { inParam.pCmOsResource = NULL; inParam.isAllocatedbyCmrtUmd = TRUE; } if (pSysMem) { inParam.pData = pSysMem; } genos_status = pCmData->pCmHalState->pfnAllocateBuffer(pCmData->pCmHalState, &inParam); while (genos_status == GENOS_STATUS_NO_SPACE) { if (!TouchSurfaceInPoolForDestroy()) { CM_ASSERT(0); return CM_SURFACE_ALLOCATION_FAILURE; } genos_status = pCmData->pCmHalState->pfnAllocateBuffer(pCmData-> pCmHalState, &inParam); } GENOSSTATUS2CM_AND_CHECK(genos_status, hr); handle = inParam.dwHandle; finish: return hr; }
INT CmEvent::Initialize(void) { CmSafeMemSet(&m_SurEntryInfoArrays, 0, sizeof(CM_HAL_SURFACE_ENTRY_INFO_ARRAYS)); if (m_TaskDriverId == -1) { m_Status = CM_STATUS_QUEUED; } else { CM_ASSERT(0); return CM_FAILURE; } m_KernelNames = NULL; m_KernelCount = 0; m_pDevice->GetQueue(m_pQueue); return CM_SUCCESS; }
INT CmSurfaceManager::AllocateSurface2D(UINT width, UINT height, CM_SURFACE_FORMAT format, UINT & handle, UINT & pitch) { CM_RETURN_CODE hr = CM_SUCCESS; GENOS_STATUS genos_status = GENOS_STATUS_SUCCESS; PCM_CONTEXT pCmData = (PCM_CONTEXT) m_pCmDevice->GetAccelData(); CM_HAL_SURFACE2D_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(CM_HAL_SURFACE2D_PARAM)); inParam.iWidth = width; inParam.iHeight = height; inParam.format = CmFmtToGenHwFmt(format); inParam.pData = NULL; inParam.isAllocatedbyCmrtUmd = TRUE; genos_status = pCmData->pCmHalState->pfnAllocateSurface2D(pCmData->pCmHalState, &inParam); while (genos_status == GENOS_STATUS_NO_SPACE) { if (!TouchSurfaceInPoolForDestroy()) { CM_ASSERT(0); return CM_SURFACE_ALLOCATION_FAILURE; } genos_status = pCmData->pCmHalState->pfnAllocateSurface2D(pCmData-> pCmHalState, &inParam); } GENOSSTATUS2CM_AND_CHECK(genos_status, hr); handle = inParam.dwHandle; CHK_GENOSSTATUS_RETURN_CMERROR(pCmData-> pCmHalState->pfnGetSurface2DTileYPitch (pCmData->pCmHalState, &inParam)); pitch = inParam.iPitch; finish: return hr; }
INT CmSurfaceManager::Initialize(CM_HAL_MAX_VALUES HalMaxValues, CM_HAL_MAX_VALUES_EX HalMaxValuesEx) { UINT totalSurfaceCount = HalMaxValues.iMaxBufferTableSize + HalMaxValues.iMax2DSurfaceTableSize + HalMaxValues.iMax3DSurfaceTableSize + HalMaxValuesEx.iMax2DUPSurfaceTableSize; m_SurfaceArraySize = totalSurfaceCount; m_maxBufferCount = HalMaxValues.iMaxBufferTableSize; m_max2DSurfaceCount = HalMaxValues.iMax2DSurfaceTableSize; m_max2DUPSurfaceCount = HalMaxValuesEx.iMax2DUPSurfaceTableSize; typedef CmSurface *PCMSURFACE; m_SurfaceArray = new(std::nothrow) PCMSURFACE[m_SurfaceArraySize]; m_SurfaceState = new(std::nothrow) INT[m_SurfaceArraySize]; m_SurfaceCached = new(std::nothrow) BOOL[m_SurfaceArraySize]; m_SurfaceReleased = new(std::nothrow) BOOL[m_SurfaceArraySize]; m_SurfaceDestroyID = new(std::nothrow) INT[m_SurfaceArraySize]; m_SurfaceSizes = new(std::nothrow) INT[m_SurfaceArraySize]; if (m_SurfaceArray == NULL || m_SurfaceState == NULL || m_SurfaceCached == NULL || m_SurfaceReleased == NULL || m_SurfaceDestroyID == NULL || m_SurfaceSizes == NULL) { CmSafeDeleteArray(m_SurfaceState); CmSafeDeleteArray(m_SurfaceCached); CmSafeDeleteArray(m_SurfaceReleased); CmSafeDeleteArray(m_SurfaceDestroyID); CmSafeDeleteArray(m_SurfaceSizes); CmSafeDeleteArray(m_SurfaceArray); CM_ASSERT(0); return CM_OUT_OF_HOST_MEMORY; } CmSafeMemSet(m_SurfaceArray, 0, m_SurfaceArraySize * sizeof(CmSurface *)); CmSafeMemSet(m_SurfaceState, 0, m_SurfaceArraySize * sizeof(INT)); CmSafeMemSet(m_SurfaceCached, 0, m_SurfaceArraySize * sizeof(BOOL)); CmSafeMemSet(m_SurfaceReleased, 0, m_SurfaceArraySize * sizeof(BOOL)); CmSafeMemSet(m_SurfaceDestroyID, 0, m_SurfaceArraySize * sizeof(INT)); CmSafeMemSet(m_SurfaceSizes, 0, m_SurfaceArraySize * sizeof(INT)); return CM_SUCCESS; }
INT CmSurfaceManager::AllocateSurface2DUP(UINT width, UINT height, CM_SURFACE_FORMAT format, void *pSysMem, UINT & handle) { CM_RETURN_CODE hr = CM_SUCCESS; GENOS_STATUS genos_status = GENOS_STATUS_SUCCESS; handle = 0; PCM_CONTEXT pCmData = (PCM_CONTEXT) m_pCmDevice->GetAccelData(); CM_HAL_SURFACE2D_UP_PARAM inParam; CmSafeMemSet(&inParam, 0, sizeof(CM_HAL_SURFACE2D_UP_PARAM)); inParam.iWidth = width; inParam.iHeight = height; inParam.format = CmFmtToGenHwFmt(format); inParam.pData = pSysMem; genos_status = pCmData->pCmHalState->pfnAllocateSurface2DUP(pCmData->pCmHalState, &inParam); while (genos_status == GENOS_STATUS_NO_SPACE) { if (!TouchSurfaceInPoolForDestroy()) { CM_ASSERT(0); return CM_SURFACE_ALLOCATION_FAILURE; } genos_status = pCmData->pCmHalState->pfnAllocateSurface2DUP(pCmData-> pCmHalState, &inParam); } GENOSSTATUS2CM_AND_CHECK(genos_status, hr); handle = inParam.dwHandle; finish: return hr; }
INT CmEvent::SetKernelNames(CmTask * pTask, CmThreadSpace * pThreadSpace, CmThreadGroupSpace * pThreadGroupSpace) { UINT i = 0; INT hr = CM_SUCCESS; CmThreadSpace *pThreadSpace_RT = dynamic_cast < CmThreadSpace * >(pThreadSpace); UINT ThreadCount; m_KernelCount = pTask->GetKernelCount(); m_KernelNames = new(std::nothrow) char *[m_KernelCount]; m_ThreadSpace = new(std::nothrow) UINT[4 * m_KernelCount]; CMCHK_NULL_RETURN(m_KernelNames, CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(m_KernelNames, 0, m_KernelCount * sizeof(char *)); CMCHK_NULL_RETURN(m_ThreadSpace, CM_OUT_OF_HOST_MEMORY); for (i = 0; i < m_KernelCount; i++) { m_KernelNames[i] = new(std::nothrow) char[CM_MAX_KERNEL_NAME_SIZE_IN_BYTE]; CMCHK_NULL_RETURN(m_KernelNames[i], CM_OUT_OF_HOST_MEMORY); CmKernel *pKernel = pTask->GetKernelPointer(i); strcpy_s(m_KernelNames[i], CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, pKernel->GetName()); pKernel->GetThreadCount(ThreadCount); m_ThreadSpace[4 * i] = ThreadCount; m_ThreadSpace[4 * i + 1] = 1; m_ThreadSpace[4 * i + 2] = ThreadCount; m_ThreadSpace[4 * i + 3] = 1; } if (pThreadSpace) { UINT ThreadWidth, ThreadHeight; pThreadSpace_RT->GetThreadSpaceSize(ThreadWidth, ThreadHeight); m_ThreadSpace[0] = ThreadWidth; m_ThreadSpace[1] = ThreadHeight; m_ThreadSpace[2] = ThreadWidth; m_ThreadSpace[3] = ThreadHeight; } else if (pThreadGroupSpace) { UINT ThreadWidth, ThreadHeight, GroupWidth, GroupHeight; pThreadGroupSpace->GetThreadGroupSpaceSize(ThreadWidth, ThreadHeight, GroupWidth, GroupHeight); m_ThreadSpace[0] = ThreadWidth; m_ThreadSpace[1] = ThreadHeight; m_ThreadSpace[2] = ThreadWidth * GroupWidth; m_ThreadSpace[3] = ThreadHeight * GroupHeight; } finish: if (hr == CM_OUT_OF_HOST_MEMORY) { if (m_KernelNames != NULL) { for (UINT j = 0; j < m_KernelCount; j++) { CmSafeDeleteArray(m_KernelNames[j]); } } CmSafeDeleteArray(m_KernelNames); CmSafeDeleteArray(m_ThreadSpace); } return hr; }
CmKernelData::CmKernelData(CmKernel * pCmKernel): m_kerneldatasize(0),m_pCmKernel(static_cast<CmKernel_RT *>(pCmKernel)), m_RefCount(0), m_KernelRef(0), m_IsInUse(TRUE) { CmSafeMemSet(&m_HalKernelParam, 0, sizeof(CM_HAL_KERNEL_PARAM)); }
INT CmDevice_RT::GetGenStepInfo(UINT platform, char *&stepinfostr) { INT hr; const char *CmSteppingInfo[MAX_STEPPING_NUM] = { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J" }; DXVA_CM_QUERY_CAPS queryCaps; CmSafeMemSet(&queryCaps, 0, sizeof(queryCaps)); queryCaps.Type = DXVA_CM_QUERY_STEP; UINT queryCapsSize = sizeof(queryCaps); if (platform < IGFX_GEN7_5_CORE) { stepinfostr = NULL; return CM_SUCCESS; } hr = GetCapsInternal(&queryCaps, &queryCapsSize); if (FAILED(hr)) { CM_ASSERT(0); return CM_FAILURE; } UINT stepid = queryCaps.genStepId; UINT ulStepId = (1 << stepid); if (platform < IGFX_GEN9_CORE) { switch (ulStepId) { case SIWA_ONLY_BDW_A0: stepinfostr = (char *)HW_GT_STEPPING_A0; break; case SIWA_ONLY_HSW_A1: stepinfostr = (char *)HW_GT_STEPPING_A1; break; case SIWA_ONLY_HSW_B0: stepinfostr = (char *)HW_GT_STEPPING_B0; break; case SIWA_ONLY_HSW_C0: stepinfostr = (char *)HW_GT_STEPPING_C0; break; default: stepinfostr = NULL; } } else if (stepid < MAX_STEPPING_NUM) { stepinfostr = (char*)CmSteppingInfo[stepid]; } else { stepinfostr = NULL; } return CM_SUCCESS; }
INT CmQueue::FlushEnqueueWithHintsTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_HINTS_TASK_PARAM param; PCM_CONTEXT pCmData = NULL; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_HINTS_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); pTask->GetHints(param.iHints); pTask->GetNumTasksGenerated(param.iNumTasksGenerated); pTask->GetLastTask(param.isLastTask); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); } pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CMCHK_NULL(pCmData); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteHintsTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGroupTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_GROUP_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_GROUP_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; param.iPreemptionMode = pTask->GetPreemptionMode(); CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetSLMSize(param.iSLMSize); if (param.iSLMSize > MAX_SLM_SIZE_PER_GROUP_IN_1K) { CM_ASSERT(0); hr = CM_EXCEED_MAX_SLM_SIZE; goto finish; } if (pTask->IsThreadGroupSpaceCreated()) { pTask->GetThreadGroupSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight, param.groupSpaceWidth, param.groupSpaceHeight); } param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteGroupTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGeneralTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; UINT totalThreadCount = 0; UINT count = 0; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL_RETURN(param.pKernels, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelSizes, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelCurbeOffset, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetTotalThreadCount(totalThreadCount); param.threadSpaceWidth = (totalThreadCount > CM_MAX_THREADSPACE_WIDTH) ? CM_MAX_THREADSPACE_WIDTH : totalThreadCount; if (totalThreadCount % CM_MAX_THREADSPACE_WIDTH) { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH + 1; } else { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH; } param.DependencyPattern = CM_DEPENDENCY_NONE; if (pTask->IsThreadSpaceCreated()) { if (pTask->IsThreadCoordinatesExisted()) { param.ppThreadCoordinates = new(std::nothrow) PCM_HAL_SCOREBOARD_XY[count]; param.ppDependencyMasks = new(std::nothrow) PCM_HAL_MASK_AND_RESET[count]; CMCHK_NULL_RETURN(param.ppThreadCoordinates, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.ppDependencyMasks, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { void *pKernelCoordinates = NULL; void *pDependencyMasks = NULL; pTask->GetKernelCoordinates(i, pKernelCoordinates); pTask->GetKernelDependencyMasks(i, pDependencyMasks); param.ppThreadCoordinates[i] = (PCM_HAL_SCOREBOARD_XY) pKernelCoordinates; param.ppDependencyMasks[i] = (PCM_HAL_MASK_AND_RESET) pDependencyMasks; } } else { param.ppThreadCoordinates = NULL; } pTask->GetDependencyPattern(param.DependencyPattern); pTask->GetThreadSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight); pTask->GetWalkingPattern(param.WalkingPattern); if (pTask->CheckWalkingParametersSet()) { param.walkingParamsValid = 1; CMCHK_HR(pTask->GetWalkingParameters (param.walkingParams)); } else { param.walkingParamsValid = 0; } if (pTask->CheckDependencyVectorsSet()) { param.dependencyVectorsValid = 1; CMCHK_HR(pTask->GetDependencyVectors (param.dependencyVectors)); } else { param.dependencyVectorsValid = 0; } } pTask->GetColorCountMinusOne(param.ColorCountMinusOne); param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnExecuteTask(pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.ppThreadCoordinates); CmSafeDeleteArray(param.ppDependencyMasks); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
BOOLEAN CmTask::IntegrityCheckKernelThreadspace(void) { INT hr = CM_SUCCESS; UINT kernelCount = 0; UINT i = 0; UINT j = 0; CmKernel *pKernel_RT = NULL; CmKernel *pKernTmp = NULL; UINT threadCount = 0; CmThreadSpace *pKernelTS = NULL; UINT width = 0; UINT height = 0; BOOLEAN **pTSMapping = NULL; BOOLEAN *pKernelInScoreboard = NULL; CM_THREAD_SPACE_UNIT *pThreadSpaceUnit = NULL; UINT kernelIndex = 0; UINT unassociated = 0; kernelCount = this->GetKernelCount(); pTSMapping = new(std::nothrow) BOOLEAN *[kernelCount]; pKernelInScoreboard = new(std::nothrow) BOOLEAN[kernelCount]; CMCHK_NULL_RETURN(pTSMapping, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(pKernelInScoreboard, CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping, 0, kernelCount * sizeof(BOOLEAN *)); CmSafeMemSet(pKernelInScoreboard, 0, kernelCount * sizeof(BOOLEAN)); for (i = 0; i < kernelCount; ++i) { pKernel_RT = this->GetKernelPointer(i); CMCHK_NULL(pKernel_RT); CMCHK_HR(pKernel_RT->GetThreadSpace(pKernelTS)); CMCHK_NULL_RETURN(pKernelTS, CM_KERNEL_THREADSPACE_NOT_SET); CMCHK_HR(pKernelTS->GetThreadSpaceSize(width, height)); CMCHK_HR(pKernel_RT->GetThreadCount(threadCount)); if (threadCount != (width * height)) { CM_ASSERT(0); hr = CM_INVALID_KERNEL_THREADSPACE; goto finish; } if (pKernelTS->IsThreadAssociated()) { pTSMapping[i] = new(std::nothrow) BOOLEAN[threadCount]; CMCHK_NULL_RETURN(pTSMapping[i], CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping[i], 0, threadCount * sizeof(BOOLEAN)); pKernelInScoreboard[i] = FALSE; hr = pKernelTS->GetThreadSpaceUnit(pThreadSpaceUnit); if (hr != CM_SUCCESS || pThreadSpaceUnit == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping[i]); hr = CM_FAILURE; goto finish; } for (j = 0; j < width * height; ++j) { pKernTmp = static_cast < CmKernel * >(pThreadSpaceUnit[j].pKernel); if (pKernTmp == NULL) { if (pKernelTS->GetNeedSetKernelPointer ()) { pKernTmp = pKernelTS->GetKernelPointer (); } if (pKernTmp == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping [i]); hr = CM_FAILURE; goto finish; } } kernelIndex = pKernTmp->GetIndexInTask(); pTSMapping[kernelIndex][pThreadSpaceUnit [j].threadId] = TRUE; pKernelInScoreboard[kernelIndex] = TRUE; } if (pKernelInScoreboard[i] == TRUE) { pKernel_RT->SetAssociatedToTSFlag(TRUE); for (j = 0; j < threadCount; ++j) { if (pTSMapping[i][j] == FALSE) { unassociated++; break; } } } CmSafeDeleteArray(pTSMapping[i]); } if (unassociated != 0) { CM_ASSERT(0); hr = CM_KERNEL_THREADSPACE_THREADS_NOT_ASSOCIATED; goto finish; } } finish: CmSafeDeleteArray(pTSMapping); CmSafeDeleteArray(pKernelInScoreboard); return (hr == CM_SUCCESS) ? TRUE : FALSE; }