/* check if use default quantization matrix * returns true if default quantization matrix is used in all sizes */ bool ScalingList::checkDefaultScalingList() const { int defaultCounter = 0; for (int s = 0; s < NUM_SIZES; s++) for (int l = 0; l < NUM_LISTS; l++) if (!memcmp(m_scalingListCoef[s][l], getScalingListDefaultAddress(s, l), sizeof(int32_t) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[s])) && ((s < BLOCK_16x16) || (m_scalingListDC[s][l] == 16))) defaultCounter++; return defaultCounter != (NUM_LISTS * NUM_SIZES - 4); // -4 for 32x32 }
/** set quantized matrix coefficient for encode */ void ScalingList::setupQuantMatrices() { for (int size = 0; size < NUM_SIZES; size++) { int width = 1 << (size + 2); int ratio = width / X265_MIN(MAX_MATRIX_SIZE_NUM, width); int stride = X265_MIN(MAX_MATRIX_SIZE_NUM, width); int count = s_numCoefPerSize[size]; for (int list = 0; list < NUM_LISTS; list++) { int32_t *coeff = m_scalingListCoef[size][list]; int32_t dc = m_scalingListDC[size][list]; for (int rem = 0; rem < NUM_REM; rem++) { int32_t *quantCoeff = m_quantCoef[size][list][rem]; int32_t *dequantCoeff = m_dequantCoef[size][list][rem]; if (m_bEnabled) { processScalingListEnc(coeff, quantCoeff, s_quantScales[rem] << 4, width, width, ratio, stride, dc); processScalingListDec(coeff, dequantCoeff, s_invQuantScales[rem], width, width, ratio, stride, dc); } else { /* flat quant and dequant coefficients */ for (int i = 0; i < count; i++) { quantCoeff[i] = s_quantScales[rem]; dequantCoeff[i] = s_invQuantScales[rem]; } } } } } }
/* returns predicted list index if a match is found, else -1 */ int ScalingList::checkPredMode(int size, int list) const { for (int predList = list; predList >= 0; predList--) { // check DC value if (size < BLOCK_16x16 && m_scalingListDC[size][list] != m_scalingListDC[size][predList]) continue; // check value of matrix if (!memcmp(m_scalingListCoef[size][list], list == predList ? getScalingListDefaultAddress(size, predList) : m_scalingListCoef[size][predList], sizeof(int32_t) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[size]))) return predList; } return -1; }
bool ScalingList::init() { bool ok = true; for (int sizeId = 0; sizeId < NUM_SIZES; sizeId++) { for (int listId = 0; listId < NUM_LISTS; listId++) { m_scalingListCoef[sizeId][listId] = X265_MALLOC(int32_t, X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeId])); ok &= !!m_scalingListCoef[sizeId][listId]; for (int rem = 0; rem < NUM_REM; rem++) { m_quantCoef[sizeId][listId][rem] = X265_MALLOC(int32_t, s_numCoefPerSize[sizeId]); m_dequantCoef[sizeId][listId][rem] = X265_MALLOC(int32_t, s_numCoefPerSize[sizeId]); ok &= m_quantCoef[sizeId][listId][rem] && m_dequantCoef[sizeId][listId][rem]; } } } return ok; }
else if (bitrate > levels[i].maxBitrateMain && levels[i].maxBitrateHigh == MAX_UINT) continue; else if (bitrate > levels[i].maxBitrateHigh) continue; else if (param.sourceWidth > sqrt(levels[i].maxLumaSamples * 8.0f)) continue; else if (param.sourceHeight > sqrt(levels[i].maxLumaSamples * 8.0f)) continue; uint32_t maxDpbSize = MaxDpbPicBuf; if (lumaSamples <= (levels[i].maxLumaSamples >> 2)) maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16); else if (lumaSamples <= (levels[i].maxLumaSamples >> 1)) maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16); else if (lumaSamples <= ((3 * levels[i].maxLumaSamples) >> 2)) maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16); /* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than * or equal to MaxDpbSize */ if (vps.maxDecPicBuffering > maxDpbSize) continue; /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */ if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32) { x265_log(¶m, X265_LOG_WARNING, "level %s detected, but CTU size 16 is non-compliant\n", levels[i].name); vps.ptl.profileIdc = Profile::NONE; vps.ptl.levelIdc = Level::NONE; vps.ptl.tierFlag = Level::MAIN; x265_log(¶m, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n"); return;
bool ScalingList::parseScalingList(const char* filename) { FILE *fp = fopen(filename, "r"); if (!fp) { x265_log(NULL, X265_LOG_ERROR, "can't open scaling list file %s\n", filename); return true; } char line[1024]; int32_t *src = NULL; for (int sizeIdc = 0; sizeIdc < NUM_SIZES; sizeIdc++) { int size = X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeIdc]); for (int listIdc = 0; listIdc < NUM_LISTS; listIdc++) { src = m_scalingListCoef[sizeIdc][listIdc]; fseek(fp, 0, 0); do { char *ret = fgets(line, 1024, fp); if (!ret || (!strstr(line, MatrixType[sizeIdc][listIdc]) && feof(fp))) { x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename); return true; } } while (!strstr(line, MatrixType[sizeIdc][listIdc])); for (int i = 0; i < size; i++) { int data; if (fscanf(fp, "%d,", &data) != 1) { x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename); return true; } src[i] = data; } // set DC value for default matrix check m_scalingListDC[sizeIdc][listIdc] = src[0]; if (sizeIdc > BLOCK_8x8) { fseek(fp, 0, 0); do { char *ret = fgets(line, 1024, fp); if (!ret || (!strstr(line, MatrixType_DC[sizeIdc][listIdc]) && feof(fp))) { x265_log(NULL, X265_LOG_ERROR, "can't read DC from %s\n", filename); return true; } } while (!strstr(line, MatrixType_DC[sizeIdc][listIdc])); int data; if (fscanf(fp, "%d,", &data) != 1) { x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename); return true; } // overwrite DC value when size of matrix is larger than 16x16 m_scalingListDC[sizeIdc][listIdc] = data; } } } fclose(fp); m_bEnabled = true; m_bDataPresent = !checkDefaultScalingList(); return false; }
void ScalingList::processDefaultMarix(int sizeId, int listId) { memcpy(m_scalingListCoef[sizeId][listId], getScalingListDefaultAddress(sizeId, listId), sizeof(int) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeId])); m_scalingListDC[sizeId][listId] = SCALING_LIST_DC; }
ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools) { enum { MAX_NODE_NUM = 127 }; int cpusPerNode[MAX_NODE_NUM + 1]; memset(cpusPerNode, 0, sizeof(cpusPerNode)); int numNumaNodes = X265_MIN(getNumaNodeCount(), MAX_NODE_NUM); int cpuCount = getCpuCount(); bool bNumaSupport = false; #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 bNumaSupport = true; #elif HAVE_LIBNUMA bNumaSupport = numa_available() >= 0; #endif for (int i = 0; i < cpuCount; i++) { #if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 UCHAR node; if (GetNumaProcessorNode((UCHAR)i, &node)) cpusPerNode[X265_MIN(node, (UCHAR)MAX_NODE_NUM)]++; else #elif HAVE_LIBNUMA if (bNumaSupport >= 0) cpusPerNode[X265_MIN(numa_node_of_cpu(i), MAX_NODE_NUM)]++; else #endif cpusPerNode[0]++; } if (bNumaSupport && p->logLevel >= X265_LOG_DEBUG) for (int i = 0; i < numNumaNodes; i++) x265_log(p, X265_LOG_DEBUG, "detected NUMA node %d with %d logical cores\n", i, cpusPerNode[i]); /* limit nodes based on param->numaPools */ if (p->numaPools && *p->numaPools) { const char *nodeStr = p->numaPools; for (int i = 0; i < numNumaNodes; i++) { if (!*nodeStr) { cpusPerNode[i] = 0; continue; } else if (*nodeStr == '-') cpusPerNode[i] = 0; else if (*nodeStr == '*') break; else if (*nodeStr == '+') ; else { int count = atoi(nodeStr); cpusPerNode[i] = X265_MIN(count, cpusPerNode[i]); } /* consume current node string, comma, and white-space */ while (*nodeStr && *nodeStr != ',') ++nodeStr; if (*nodeStr == ',' || *nodeStr == ' ') ++nodeStr; } } // In the case that numa is disabled and we have more CPUs than 64, // spawn the last pool only if the # threads in that pool is > 1/2 max (heuristic) if ((numNumaNodes == 1) && (cpusPerNode[0] % MAX_POOL_THREADS < (MAX_POOL_THREADS / 2))) { cpusPerNode[0] -= (cpusPerNode[0] % MAX_POOL_THREADS); x265_log(p, X265_LOG_DEBUG, "Creating only %d worker threads to prevent asymmetry in pools; may not use all HW contexts\n", cpusPerNode[0]); } numPools = 0; for (int i = 0; i < numNumaNodes; i++) { if (bNumaSupport) x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical cores\n", i, cpusPerNode[i]); if (cpusPerNode[i]) numPools += (cpusPerNode[i] + MAX_POOL_THREADS - 1) / MAX_POOL_THREADS; } if (!numPools) return NULL; if (numPools > p->frameNumThreads) { x265_log(p, X265_LOG_DEBUG, "Reducing number of thread pools for frame thread count\n"); numPools = X265_MAX(p->frameNumThreads / 2, 1); } ThreadPool *pools = new ThreadPool[numPools]; if (pools) { int maxProviders = (p->frameNumThreads + numPools - 1) / numPools + 1; /* +1 is Lookahead, always assigned to threadpool 0 */ int node = 0; for (int i = 0; i < numPools; i++) { while (!cpusPerNode[node]) node++; int cores = X265_MIN(MAX_POOL_THREADS, cpusPerNode[node]); if (!pools[i].create(cores, maxProviders, node)) { X265_FREE(pools); numPools = 0; return NULL; } if (numNumaNodes > 1) x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on NUMA node %d\n", i, cores, node); else x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", cores); cpusPerNode[node] -= cores; } } else numPools = 0; return pools; }