CmEvent::~CmEvent(void) { if (m_SurEntryInfoArrays.pSurfEntryInfosArray != NULL) { for (UINT i = 0; i < m_SurEntryInfoArrays.dwKrnNum; i++) { if (m_SurEntryInfoArrays. pSurfEntryInfosArray[i].pSurfEntryInfos != NULL) { CmSafeDelete (m_SurEntryInfoArrays.pSurfEntryInfosArray [i].pSurfEntryInfos); } if (m_SurEntryInfoArrays. pSurfEntryInfosArray[i].pGlobalSurfInfos != NULL) { CmSafeDelete (m_SurEntryInfoArrays.pSurfEntryInfosArray [i].pGlobalSurfInfos); } } CmSafeDelete(m_SurEntryInfoArrays.pSurfEntryInfosArray); } if (m_KernelNames != NULL) { for (UINT i = 0; i < m_KernelCount; i++) { CmSafeDeleteArray(m_KernelNames[i]); } CmSafeDeleteArray(m_KernelNames); CmSafeDeleteArray(m_ThreadSpace); } }
CM_RT_API INT CmQueue::EnqueueWithGroup(CmTask * pTask, CmEvent * &pEvent, const CmThreadGroupSpace * pTGS) { INT result; if (pTask == NULL) { CM_ASSERTMESSAGE("Kernel array is NULL."); return CM_INVALID_ARG_VALUE; } UINT count = 0; count = pTask->GetKernelCount(); if (count == 0) { CM_ASSERTMESSAGE("There are no valid kernels."); return CM_FAILURE; } typedef CmKernel *pCmKernel; CmKernel **pTmp = new(std::nothrow) pCmKernel[count + 1]; if (pTmp == NULL) { CM_ASSERT(0); return CM_OUT_OF_HOST_MEMORY; } UINT totalThreadNumber = 0; for (UINT i = 0; i < count; i++) { UINT singleThreadNumber = 0; pTmp[i] = pTask->GetKernelPointer(i); if (pTmp[i]->IsThreadArgExisted()) { CM_ASSERTMESSAGE ("No thread Args allowed when using group space"); CmSafeDeleteArray(pTmp); return CM_THREAD_ARG_NOT_ALLOWED; } pTmp[i]->GetThreadCount(singleThreadNumber); totalThreadNumber += singleThreadNumber; } pTmp[count] = NULL; result = Enqueue_RT(pTmp, count, totalThreadNumber, pEvent, pTGS, pTask->GetSyncBitmap(), pTask->GetPreemptionMode()); if (pEvent) { pEvent->SetKernelNames(pTask, NULL, const_cast < CmThreadGroupSpace * >(pTGS)); } CmSafeDeleteArray(pTmp); return result; }
CmSurfaceManager::~CmSurfaceManager(void) { for (UINT i = m_pCmDevice->ValidSurfaceIndexStart(); i < m_SurfaceArraySize; i++) { DestroySurfaceArrayElement(i); } #ifdef SURFACE_MANAGE_PROFILE printf("\n\n"); printf("Total %d 1D buffers, with size: %d\n", m_bufferAllCount, m_bufferAllSize); printf("Total %d 2D surfaces, with size: %d\n", m_2DSurfaceAllCount, m_2DSurfaceAllSize); printf("\nReused %d 1D buffers, with size: %d\n", m_bufferReuseCount, m_bufferReuseSize); printf("Reused %d 2D surfaces, with size: %d\n", m_2DSurfaceReuseCount, m_2DSurfaceReuseSize); printf("\nGC trigger times: %d\n", m_GCTriggerTimes); printf("GC collected 1D surface size: %d\n", m_GCCollected1DSize); printf("GC collected 2D surface size: %d\n", m_GCCollected2DSize); printf("\n\n"); #endif CmSafeDeleteArray(m_SurfaceState); CmSafeDeleteArray(m_SurfaceCached); CmSafeDeleteArray(m_SurfaceReleased); CmSafeDeleteArray(m_SurfaceDestroyID); CmSafeDeleteArray(m_SurfaceSizes); CmSafeDeleteArray(m_SurfaceArray); }
INT CmSurfaceManager::Initialize(CM_HAL_MAX_VALUES HalMaxValues, CM_HAL_MAX_VALUES_EX HalMaxValuesEx) { UINT totalSurfaceCount = HalMaxValues.iMaxBufferTableSize + HalMaxValues.iMax2DSurfaceTableSize + HalMaxValues.iMax3DSurfaceTableSize + HalMaxValuesEx.iMax2DUPSurfaceTableSize; m_SurfaceArraySize = totalSurfaceCount; m_maxBufferCount = HalMaxValues.iMaxBufferTableSize; m_max2DSurfaceCount = HalMaxValues.iMax2DSurfaceTableSize; m_max2DUPSurfaceCount = HalMaxValuesEx.iMax2DUPSurfaceTableSize; typedef CmSurface *PCMSURFACE; m_SurfaceArray = new(std::nothrow) PCMSURFACE[m_SurfaceArraySize]; m_SurfaceState = new(std::nothrow) INT[m_SurfaceArraySize]; m_SurfaceCached = new(std::nothrow) BOOL[m_SurfaceArraySize]; m_SurfaceReleased = new(std::nothrow) BOOL[m_SurfaceArraySize]; m_SurfaceDestroyID = new(std::nothrow) INT[m_SurfaceArraySize]; m_SurfaceSizes = new(std::nothrow) INT[m_SurfaceArraySize]; if (m_SurfaceArray == NULL || m_SurfaceState == NULL || m_SurfaceCached == NULL || m_SurfaceReleased == NULL || m_SurfaceDestroyID == NULL || m_SurfaceSizes == NULL) { CmSafeDeleteArray(m_SurfaceState); CmSafeDeleteArray(m_SurfaceCached); CmSafeDeleteArray(m_SurfaceReleased); CmSafeDeleteArray(m_SurfaceDestroyID); CmSafeDeleteArray(m_SurfaceSizes); CmSafeDeleteArray(m_SurfaceArray); CM_ASSERT(0); return CM_OUT_OF_HOST_MEMORY; } CmSafeMemSet(m_SurfaceArray, 0, m_SurfaceArraySize * sizeof(CmSurface *)); CmSafeMemSet(m_SurfaceState, 0, m_SurfaceArraySize * sizeof(INT)); CmSafeMemSet(m_SurfaceCached, 0, m_SurfaceArraySize * sizeof(BOOL)); CmSafeMemSet(m_SurfaceReleased, 0, m_SurfaceArraySize * sizeof(BOOL)); CmSafeMemSet(m_SurfaceDestroyID, 0, m_SurfaceArraySize * sizeof(INT)); CmSafeMemSet(m_SurfaceSizes, 0, m_SurfaceArraySize * sizeof(INT)); return CM_SUCCESS; }
CmKernelData::~CmKernelData(void) { for (UINT i = 0; i < m_HalKernelParam.iNumArgs; i++) { CmSafeDeleteArray(m_HalKernelParam.CmArgParams[i].pFirstValue); } CmSafeDeleteArray(m_HalKernelParam.CmIndirectDataParam.pIndirectData); CmSafeDeleteArray(m_HalKernelParam.CmIndirectDataParam.pSurfaceInfo); CmSafeDeleteArray(m_HalKernelParam. CmKernelThreadSpaceParam.dispatchInfo. pNumThreadsInWave); CmSafeDeleteArray(m_HalKernelParam.CmKernelThreadSpaceParam. pThreadCoordinates); CmSafeDeleteArray(m_HalKernelParam.pMovInsData); }
INT CmQueue::FlushEnqueueWithHintsTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_HINTS_TASK_PARAM param; PCM_CONTEXT pCmData = NULL; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_HINTS_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); pTask->GetHints(param.iHints); pTask->GetNumTasksGenerated(param.iNumTasksGenerated); pTask->GetLastTask(param.isLastTask); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); } pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CMCHK_NULL(pCmData); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteHintsTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGroupTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_GROUP_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; UINT count = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_GROUP_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; param.iPreemptionMode = pTask->GetPreemptionMode(); CMCHK_NULL(param.pKernels); CMCHK_NULL(param.piKernelSizes); CMCHK_NULL(param.piKernelCurbeOffset); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetSLMSize(param.iSLMSize); if (param.iSLMSize > MAX_SLM_SIZE_PER_GROUP_IN_1K) { CM_ASSERT(0); hr = CM_EXCEED_MAX_SLM_SIZE; goto finish; } if (pTask->IsThreadGroupSpaceCreated()) { pTask->GetThreadGroupSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight, param.groupSpaceWidth, param.groupSpaceHeight); } param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState->pfnExecuteGroupTask (pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
INT CmQueue::FlushGeneralTask(CmTaskInternal * pTask) { CM_RETURN_CODE hr = CM_SUCCESS; CM_HAL_EXEC_TASK_PARAM param; CmKernelData *pKernelData = NULL; UINT kernelDataSize = 0; PCM_CONTEXT pCmData = NULL; CmEvent *pEvent = NULL; UINT totalThreadCount = 0; UINT count = 0; PCM_HAL_KERNEL_PARAM pTempData = NULL; CmSafeMemSet(¶m, 0, sizeof(CM_HAL_EXEC_TASK_PARAM)); pTask->GetKernelCount(count); param.iNumKernels = count; param.pKernels = new(std::nothrow) PCM_HAL_KERNEL_PARAM[count]; param.piKernelSizes = new(std::nothrow) UINT[count]; param.piKernelCurbeOffset = new(std::nothrow) UINT[count]; CMCHK_NULL_RETURN(param.pKernels, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelSizes, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.piKernelCurbeOffset, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { pTask->GetKernelData(i, pKernelData); CMCHK_NULL(pKernelData); pTask->GetKernelDataSize(i, kernelDataSize); if (kernelDataSize == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTempData = pKernelData->GetHalCmKernelData(); param.pKernels[i] = pTempData; param.piKernelSizes[i] = kernelDataSize; param.piKernelCurbeOffset[i] = pTask->GetKernelCurbeOffset(i); param.bGlobalSurfaceUsed |= pTempData->bGlobalSurfaceUsed; param.bKernelDebugEnabled |= pTempData->bKernelDebugEnabled; } pTask->GetTotalThreadCount(totalThreadCount); param.threadSpaceWidth = (totalThreadCount > CM_MAX_THREADSPACE_WIDTH) ? CM_MAX_THREADSPACE_WIDTH : totalThreadCount; if (totalThreadCount % CM_MAX_THREADSPACE_WIDTH) { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH + 1; } else { param.threadSpaceHeight = totalThreadCount / CM_MAX_THREADSPACE_WIDTH; } param.DependencyPattern = CM_DEPENDENCY_NONE; if (pTask->IsThreadSpaceCreated()) { if (pTask->IsThreadCoordinatesExisted()) { param.ppThreadCoordinates = new(std::nothrow) PCM_HAL_SCOREBOARD_XY[count]; param.ppDependencyMasks = new(std::nothrow) PCM_HAL_MASK_AND_RESET[count]; CMCHK_NULL_RETURN(param.ppThreadCoordinates, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(param.ppDependencyMasks, CM_OUT_OF_HOST_MEMORY); for (UINT i = 0; i < count; i++) { void *pKernelCoordinates = NULL; void *pDependencyMasks = NULL; pTask->GetKernelCoordinates(i, pKernelCoordinates); pTask->GetKernelDependencyMasks(i, pDependencyMasks); param.ppThreadCoordinates[i] = (PCM_HAL_SCOREBOARD_XY) pKernelCoordinates; param.ppDependencyMasks[i] = (PCM_HAL_MASK_AND_RESET) pDependencyMasks; } } else { param.ppThreadCoordinates = NULL; } pTask->GetDependencyPattern(param.DependencyPattern); pTask->GetThreadSpaceSize(param.threadSpaceWidth, param.threadSpaceHeight); pTask->GetWalkingPattern(param.WalkingPattern); if (pTask->CheckWalkingParametersSet()) { param.walkingParamsValid = 1; CMCHK_HR(pTask->GetWalkingParameters (param.walkingParams)); } else { param.walkingParamsValid = 0; } if (pTask->CheckDependencyVectorsSet()) { param.dependencyVectorsValid = 1; CMCHK_HR(pTask->GetDependencyVectors (param.dependencyVectors)); } else { param.dependencyVectorsValid = 0; } } pTask->GetColorCountMinusOne(param.ColorCountMinusOne); param.uiSyncBitmap = pTask->GetSyncBitmap(); pCmData = (PCM_CONTEXT) m_pDevice->GetAccelData(); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnSetPowerOption(pCmData->pCmHalState, pTask->GetPowerOption ())); CHK_GENOSSTATUS_RETURN_CMERROR(pCmData->pCmHalState-> pfnExecuteTask(pCmData->pCmHalState, ¶m)); if (param.iTaskIdOut < 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } pTask->GetTaskEvent(pEvent); CMCHK_NULL(pEvent); CMCHK_HR(pEvent->SetTaskDriverId(param.iTaskIdOut)); CMCHK_HR(pEvent->SetTaskOsData(param.OsData)); CMCHK_HR(pTask->ReleaseKernel()); finish: CmSafeDeleteArray(param.pKernels); CmSafeDeleteArray(param.piKernelSizes); CmSafeDeleteArray(param.ppThreadCoordinates); CmSafeDeleteArray(param.ppDependencyMasks); CmSafeDeleteArray(param.piKernelCurbeOffset); return hr; }
CM_RT_API INT CmQueue::EnqueueWithHints(CmTask * pKernelArray, CmEvent * &pEvent, UINT hints) { INT hr = CM_FAILURE; UINT count = 0; UINT index = 0; CmKernel **pKernels = NULL; UINT numTasks = 0; BOOLEAN splitTask = FALSE; BOOLEAN lastTask = FALSE; UINT numTasksGenerated = 0; CMCHK_NULL_RETURN(pKernelArray, CM_INVALID_ARG_VALUE); count = pKernelArray->GetKernelCount(); if (count == 0) { CM_ASSERT(0); hr = CM_FAILURE; goto finish; } if (count > m_pHalMaxValues->iMaxKernelsPerTask) { CM_ASSERT(0); hr = CM_EXCEED_MAX_KERNEL_PER_ENQUEUE; goto finish; } for (UINT i = 0; i < count; ++i) { CmKernel *pKernelTmp = NULL; CmThreadSpace *pTSTmp = NULL; pKernelTmp = pKernelArray->GetKernelPointer(i); CMCHK_NULL(pKernelTmp); pKernelTmp->GetThreadSpace(pTSTmp); CMCHK_NULL(pTSTmp); if (pTSTmp->GetNeedSetKernelPointer() && pTSTmp->KernelPointerIsNULL()) { pTSTmp->SetKernelPointer(pKernelTmp); } } numTasks = (hints & CM_HINTS_MASK_NUM_TASKS) >> CM_HINTS_NUM_BITS_TASK_POS; if (numTasks > 1) { splitTask = TRUE; } pKernels = new(std::nothrow) CmKernel *[count + 1]; CMCHK_NULL(pKernels); do { for (index = 0; index < count; ++index) { pKernels[index] = pKernelArray->GetKernelPointer(index); } pKernels[count] = NULL; if (splitTask) { if (numTasksGenerated == (numTasks - 1)) { lastTask = TRUE; } } else { lastTask = TRUE; } CMCHK_HR(Enqueue_RT (pKernels, pEvent, numTasksGenerated, lastTask, hints, pKernelArray->GetPowerOption())); numTasksGenerated++; } while (numTasksGenerated < numTasks); finish: CmSafeDeleteArray(pKernels); return hr; }
CM_RT_API INT CmQueue::Enqueue(CmTask * pKernelArray, CmEvent * &pEvent, const CmThreadSpace * pTS) { INT result; if (pKernelArray == NULL) { CM_ASSERT(0); return CM_INVALID_ARG_VALUE; } UINT KernelCount = 0; KernelCount = pKernelArray->GetKernelCount(); if (KernelCount == 0) { CM_ASSERT(0); return CM_FAILURE; } if (KernelCount > m_pHalMaxValues->iMaxKernelsPerTask) { CM_ASSERT(0); return CM_EXCEED_MAX_KERNEL_PER_ENQUEUE; } if (pTS && pTS->IsThreadAssociated()) { if (pTS->GetNeedSetKernelPointer() && pTS->KernelPointerIsNULL()) { CmKernel *pTmp = NULL; pTmp = pKernelArray->GetKernelPointer(0); pTS->SetKernelPointer(pTmp); } } typedef CmKernel *pCmKernel; CmKernel **pTmp = new(std::nothrow) pCmKernel[KernelCount + 1]; if (pTmp == NULL) { CM_ASSERT(0); return CM_OUT_OF_HOST_MEMORY; } UINT totalThreadNumber = 0; for (UINT i = 0; i < KernelCount; i++) { pTmp[i] = pKernelArray->GetKernelPointer(i); UINT singleThreadNumber = 0; pTmp[i]->GetThreadCount(singleThreadNumber); totalThreadNumber += singleThreadNumber; } pTmp[KernelCount] = NULL; result = Enqueue_RT(pTmp, KernelCount, totalThreadNumber, pEvent, pTS, pKernelArray->GetSyncBitmap(), pKernelArray->GetPowerOption()); if (pEvent) { pEvent->SetKernelNames(pKernelArray, const_cast < CmThreadSpace * >(pTS), NULL); } CmSafeDeleteArray(pTmp); return result; }
INT CmEvent::SetKernelNames(CmTask * pTask, CmThreadSpace * pThreadSpace, CmThreadGroupSpace * pThreadGroupSpace) { UINT i = 0; INT hr = CM_SUCCESS; CmThreadSpace *pThreadSpace_RT = dynamic_cast < CmThreadSpace * >(pThreadSpace); UINT ThreadCount; m_KernelCount = pTask->GetKernelCount(); m_KernelNames = new(std::nothrow) char *[m_KernelCount]; m_ThreadSpace = new(std::nothrow) UINT[4 * m_KernelCount]; CMCHK_NULL_RETURN(m_KernelNames, CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(m_KernelNames, 0, m_KernelCount * sizeof(char *)); CMCHK_NULL_RETURN(m_ThreadSpace, CM_OUT_OF_HOST_MEMORY); for (i = 0; i < m_KernelCount; i++) { m_KernelNames[i] = new(std::nothrow) char[CM_MAX_KERNEL_NAME_SIZE_IN_BYTE]; CMCHK_NULL_RETURN(m_KernelNames[i], CM_OUT_OF_HOST_MEMORY); CmKernel *pKernel = pTask->GetKernelPointer(i); strcpy_s(m_KernelNames[i], CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, pKernel->GetName()); pKernel->GetThreadCount(ThreadCount); m_ThreadSpace[4 * i] = ThreadCount; m_ThreadSpace[4 * i + 1] = 1; m_ThreadSpace[4 * i + 2] = ThreadCount; m_ThreadSpace[4 * i + 3] = 1; } if (pThreadSpace) { UINT ThreadWidth, ThreadHeight; pThreadSpace_RT->GetThreadSpaceSize(ThreadWidth, ThreadHeight); m_ThreadSpace[0] = ThreadWidth; m_ThreadSpace[1] = ThreadHeight; m_ThreadSpace[2] = ThreadWidth; m_ThreadSpace[3] = ThreadHeight; } else if (pThreadGroupSpace) { UINT ThreadWidth, ThreadHeight, GroupWidth, GroupHeight; pThreadGroupSpace->GetThreadGroupSpaceSize(ThreadWidth, ThreadHeight, GroupWidth, GroupHeight); m_ThreadSpace[0] = ThreadWidth; m_ThreadSpace[1] = ThreadHeight; m_ThreadSpace[2] = ThreadWidth * GroupWidth; m_ThreadSpace[3] = ThreadHeight * GroupHeight; } finish: if (hr == CM_OUT_OF_HOST_MEMORY) { if (m_KernelNames != NULL) { for (UINT j = 0; j < m_KernelCount; j++) { CmSafeDeleteArray(m_KernelNames[j]); } } CmSafeDeleteArray(m_KernelNames); CmSafeDeleteArray(m_ThreadSpace); } return hr; }
CmTask::~CmTask(void) { CmSafeDeleteArray(m_pKernelArray); }
BOOLEAN CmTask::IntegrityCheckKernelThreadspace(void) { INT hr = CM_SUCCESS; UINT kernelCount = 0; UINT i = 0; UINT j = 0; CmKernel *pKernel_RT = NULL; CmKernel *pKernTmp = NULL; UINT threadCount = 0; CmThreadSpace *pKernelTS = NULL; UINT width = 0; UINT height = 0; BOOLEAN **pTSMapping = NULL; BOOLEAN *pKernelInScoreboard = NULL; CM_THREAD_SPACE_UNIT *pThreadSpaceUnit = NULL; UINT kernelIndex = 0; UINT unassociated = 0; kernelCount = this->GetKernelCount(); pTSMapping = new(std::nothrow) BOOLEAN *[kernelCount]; pKernelInScoreboard = new(std::nothrow) BOOLEAN[kernelCount]; CMCHK_NULL_RETURN(pTSMapping, CM_OUT_OF_HOST_MEMORY); CMCHK_NULL_RETURN(pKernelInScoreboard, CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping, 0, kernelCount * sizeof(BOOLEAN *)); CmSafeMemSet(pKernelInScoreboard, 0, kernelCount * sizeof(BOOLEAN)); for (i = 0; i < kernelCount; ++i) { pKernel_RT = this->GetKernelPointer(i); CMCHK_NULL(pKernel_RT); CMCHK_HR(pKernel_RT->GetThreadSpace(pKernelTS)); CMCHK_NULL_RETURN(pKernelTS, CM_KERNEL_THREADSPACE_NOT_SET); CMCHK_HR(pKernelTS->GetThreadSpaceSize(width, height)); CMCHK_HR(pKernel_RT->GetThreadCount(threadCount)); if (threadCount != (width * height)) { CM_ASSERT(0); hr = CM_INVALID_KERNEL_THREADSPACE; goto finish; } if (pKernelTS->IsThreadAssociated()) { pTSMapping[i] = new(std::nothrow) BOOLEAN[threadCount]; CMCHK_NULL_RETURN(pTSMapping[i], CM_OUT_OF_HOST_MEMORY); CmSafeMemSet(pTSMapping[i], 0, threadCount * sizeof(BOOLEAN)); pKernelInScoreboard[i] = FALSE; hr = pKernelTS->GetThreadSpaceUnit(pThreadSpaceUnit); if (hr != CM_SUCCESS || pThreadSpaceUnit == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping[i]); hr = CM_FAILURE; goto finish; } for (j = 0; j < width * height; ++j) { pKernTmp = static_cast < CmKernel * >(pThreadSpaceUnit[j].pKernel); if (pKernTmp == NULL) { if (pKernelTS->GetNeedSetKernelPointer ()) { pKernTmp = pKernelTS->GetKernelPointer (); } if (pKernTmp == NULL) { CM_ASSERT(0); CmSafeDeleteArray(pTSMapping [i]); hr = CM_FAILURE; goto finish; } } kernelIndex = pKernTmp->GetIndexInTask(); pTSMapping[kernelIndex][pThreadSpaceUnit [j].threadId] = TRUE; pKernelInScoreboard[kernelIndex] = TRUE; } if (pKernelInScoreboard[i] == TRUE) { pKernel_RT->SetAssociatedToTSFlag(TRUE); for (j = 0; j < threadCount; ++j) { if (pTSMapping[i][j] == FALSE) { unassociated++; break; } } } CmSafeDeleteArray(pTSMapping[i]); } if (unassociated != 0) { CM_ASSERT(0); hr = CM_KERNEL_THREADSPACE_THREADS_NOT_ASSOCIATED; goto finish; } } finish: CmSafeDeleteArray(pTSMapping); CmSafeDeleteArray(pKernelInScoreboard); return (hr == CM_SUCCESS) ? TRUE : FALSE; }