Beispiel #1
0
/* check if use default quantization matrix
 * returns true if default quantization matrix is used in all sizes */
bool ScalingList::checkDefaultScalingList() const
{
    int defaultCounter = 0;

    for (int s = 0; s < NUM_SIZES; s++)
        for (int l = 0; l < NUM_LISTS; l++)
            if (!memcmp(m_scalingListCoef[s][l], getScalingListDefaultAddress(s, l),
                        sizeof(int32_t) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[s])) &&
                ((s < BLOCK_16x16) || (m_scalingListDC[s][l] == 16)))
                defaultCounter++;

    return defaultCounter != (NUM_LISTS * NUM_SIZES - 4); // -4 for 32x32
}
Beispiel #2
0
/** set quantized matrix coefficient for encode */
void ScalingList::setupQuantMatrices()
{
    for (int size = 0; size < NUM_SIZES; size++)
    {
        int width = 1 << (size + 2);
        int ratio = width / X265_MIN(MAX_MATRIX_SIZE_NUM, width);
        int stride = X265_MIN(MAX_MATRIX_SIZE_NUM, width);
        int count = s_numCoefPerSize[size];

        for (int list = 0; list < NUM_LISTS; list++)
        {
            int32_t *coeff = m_scalingListCoef[size][list];
            int32_t dc = m_scalingListDC[size][list];

            for (int rem = 0; rem < NUM_REM; rem++)
            {
                int32_t *quantCoeff   = m_quantCoef[size][list][rem];
                int32_t *dequantCoeff = m_dequantCoef[size][list][rem];

                if (m_bEnabled)
                {
                    processScalingListEnc(coeff, quantCoeff, s_quantScales[rem] << 4, width, width, ratio, stride, dc);
                    processScalingListDec(coeff, dequantCoeff, s_invQuantScales[rem], width, width, ratio, stride, dc);
                }
                else
                {
                    /* flat quant and dequant coefficients */
                    for (int i = 0; i < count; i++)
                    {
                        quantCoeff[i] = s_quantScales[rem];
                        dequantCoeff[i] = s_invQuantScales[rem];
                    }
                }
            }
        }
    }
}
Beispiel #3
0
/* returns predicted list index if a match is found, else -1 */ 
int ScalingList::checkPredMode(int size, int list) const
{
    for (int predList = list; predList >= 0; predList--)
    {
        // check DC value
        if (size < BLOCK_16x16 && m_scalingListDC[size][list] != m_scalingListDC[size][predList])
            continue;

        // check value of matrix
        if (!memcmp(m_scalingListCoef[size][list],
                    list == predList ? getScalingListDefaultAddress(size, predList) : m_scalingListCoef[size][predList],
                    sizeof(int32_t) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[size])))
            return predList;
    }

    return -1;
}
Beispiel #4
0
bool ScalingList::init()
{
    bool ok = true;
    for (int sizeId = 0; sizeId < NUM_SIZES; sizeId++)
    {
        for (int listId = 0; listId < NUM_LISTS; listId++)
        {
            m_scalingListCoef[sizeId][listId] = X265_MALLOC(int32_t, X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeId]));
            ok &= !!m_scalingListCoef[sizeId][listId];
            for (int rem = 0; rem < NUM_REM; rem++)
            {
                m_quantCoef[sizeId][listId][rem] = X265_MALLOC(int32_t, s_numCoefPerSize[sizeId]);
                m_dequantCoef[sizeId][listId][rem] = X265_MALLOC(int32_t, s_numCoefPerSize[sizeId]);
                ok &= m_quantCoef[sizeId][listId][rem] && m_dequantCoef[sizeId][listId][rem];
            }
        }
    }
    return ok;
}
Beispiel #5
0
            else if (bitrate > levels[i].maxBitrateMain && levels[i].maxBitrateHigh == MAX_UINT)
                continue;
            else if (bitrate > levels[i].maxBitrateHigh)
                continue;
            else if (param.sourceWidth > sqrt(levels[i].maxLumaSamples * 8.0f))
                continue;
            else if (param.sourceHeight > sqrt(levels[i].maxLumaSamples * 8.0f))
                continue;

            uint32_t maxDpbSize = MaxDpbPicBuf;
            if (lumaSamples <= (levels[i].maxLumaSamples >> 2))
                maxDpbSize = X265_MIN(4 * MaxDpbPicBuf, 16);
            else if (lumaSamples <= (levels[i].maxLumaSamples >> 1))
                maxDpbSize = X265_MIN(2 * MaxDpbPicBuf, 16);
            else if (lumaSamples <= ((3 * levels[i].maxLumaSamples) >> 2))
                maxDpbSize = X265_MIN((4 * MaxDpbPicBuf) / 3, 16);

            /* The value of sps_max_dec_pic_buffering_minus1[ HighestTid ] + 1 shall be less than
             * or equal to MaxDpbSize */
            if (vps.maxDecPicBuffering > maxDpbSize)
                continue;

            /* For level 5 and higher levels, the value of CtbSizeY shall be equal to 32 or 64 */
            if (levels[i].levelEnum >= Level::LEVEL5 && param.maxCUSize < 32)
            {
                x265_log(&param, X265_LOG_WARNING, "level %s detected, but CTU size 16 is non-compliant\n", levels[i].name);
                vps.ptl.profileIdc = Profile::NONE;
                vps.ptl.levelIdc = Level::NONE;
                vps.ptl.tierFlag = Level::MAIN;
                x265_log(&param, X265_LOG_INFO, "NONE profile, Level-NONE (Main tier)\n");
                return;
Beispiel #6
0
bool ScalingList::parseScalingList(const char* filename)
{
    FILE *fp = fopen(filename, "r");
    if (!fp)
    {
        x265_log(NULL, X265_LOG_ERROR, "can't open scaling list file %s\n", filename);
        return true;
    }

    char line[1024];
    int32_t *src = NULL;

    for (int sizeIdc = 0; sizeIdc < NUM_SIZES; sizeIdc++)
    {
        int size = X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeIdc]);
        for (int listIdc = 0; listIdc < NUM_LISTS; listIdc++)
        {
            src = m_scalingListCoef[sizeIdc][listIdc];

            fseek(fp, 0, 0);
            do
            {
                char *ret = fgets(line, 1024, fp);
                if (!ret || (!strstr(line, MatrixType[sizeIdc][listIdc]) && feof(fp)))
                {
                    x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename);
                    return true;
                }
            }
            while (!strstr(line, MatrixType[sizeIdc][listIdc]));

            for (int i = 0; i < size; i++)
            {
                int data;
                if (fscanf(fp, "%d,", &data) != 1)
                {
                    x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename);
                    return true;
                }
                src[i] = data;
            }

            // set DC value for default matrix check
            m_scalingListDC[sizeIdc][listIdc] = src[0];

            if (sizeIdc > BLOCK_8x8)
            {
                fseek(fp, 0, 0);
                do
                {
                    char *ret = fgets(line, 1024, fp);
                    if (!ret || (!strstr(line, MatrixType_DC[sizeIdc][listIdc]) && feof(fp)))
                    {
                        x265_log(NULL, X265_LOG_ERROR, "can't read DC from %s\n", filename);
                        return true;
                    }
                }
                while (!strstr(line, MatrixType_DC[sizeIdc][listIdc]));

                int data;
                if (fscanf(fp, "%d,", &data) != 1)
                {
                    x265_log(NULL, X265_LOG_ERROR, "can't read matrix from %s\n", filename);
                    return true;
                }

                // overwrite DC value when size of matrix is larger than 16x16
                m_scalingListDC[sizeIdc][listIdc] = data;
            }
        }
    }

    fclose(fp);

    m_bEnabled = true;
    m_bDataPresent = !checkDefaultScalingList();

    return false;
}
Beispiel #7
0
void ScalingList::processDefaultMarix(int sizeId, int listId)
{
    memcpy(m_scalingListCoef[sizeId][listId], getScalingListDefaultAddress(sizeId, listId), sizeof(int) * X265_MIN(MAX_MATRIX_COEF_NUM, s_numCoefPerSize[sizeId]));
    m_scalingListDC[sizeId][listId] = SCALING_LIST_DC;
}
Beispiel #8
0
ThreadPool* ThreadPool::allocThreadPools(x265_param* p, int& numPools)
{
    enum { MAX_NODE_NUM = 127 };
    int cpusPerNode[MAX_NODE_NUM + 1];

    memset(cpusPerNode, 0, sizeof(cpusPerNode));
    int numNumaNodes = X265_MIN(getNumaNodeCount(), MAX_NODE_NUM);
    int cpuCount = getCpuCount();
    bool bNumaSupport = false;

#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
    bNumaSupport = true;
#elif HAVE_LIBNUMA
    bNumaSupport = numa_available() >= 0;
#endif


    for (int i = 0; i < cpuCount; i++)
    {
#if defined(_WIN32_WINNT) && _WIN32_WINNT >= _WIN32_WINNT_WIN7 
        UCHAR node;
        if (GetNumaProcessorNode((UCHAR)i, &node))
            cpusPerNode[X265_MIN(node, (UCHAR)MAX_NODE_NUM)]++;
        else
#elif HAVE_LIBNUMA
        if (bNumaSupport >= 0)
            cpusPerNode[X265_MIN(numa_node_of_cpu(i), MAX_NODE_NUM)]++;
        else
#endif
            cpusPerNode[0]++;
    }

    if (bNumaSupport && p->logLevel >= X265_LOG_DEBUG)
        for (int i = 0; i < numNumaNodes; i++)
            x265_log(p, X265_LOG_DEBUG, "detected NUMA node %d with %d logical cores\n", i, cpusPerNode[i]);

    /* limit nodes based on param->numaPools */
    if (p->numaPools && *p->numaPools)
    {
        const char *nodeStr = p->numaPools;
        for (int i = 0; i < numNumaNodes; i++)
        {
            if (!*nodeStr)
            {
                cpusPerNode[i] = 0;
                continue;
            }
            else if (*nodeStr == '-')
                cpusPerNode[i] = 0;
            else if (*nodeStr == '*')
                break;
            else if (*nodeStr == '+')
                ;
            else
            {
                int count = atoi(nodeStr);
                cpusPerNode[i] = X265_MIN(count, cpusPerNode[i]);
            }

            /* consume current node string, comma, and white-space */
            while (*nodeStr && *nodeStr != ',')
               ++nodeStr;
            if (*nodeStr == ',' || *nodeStr == ' ')
               ++nodeStr;
        }
    }

    // In the case that numa is disabled and we have more CPUs than 64,
    // spawn the last pool only if the # threads in that pool is > 1/2 max (heuristic)
    if ((numNumaNodes == 1) && (cpusPerNode[0] % MAX_POOL_THREADS < (MAX_POOL_THREADS / 2)))
    {
        cpusPerNode[0] -= (cpusPerNode[0] % MAX_POOL_THREADS);
        x265_log(p, X265_LOG_DEBUG, "Creating only %d worker threads to prevent asymmetry in pools; may not use all HW contexts\n", cpusPerNode[0]);
    }

    numPools = 0;
    for (int i = 0; i < numNumaNodes; i++)
    {
        if (bNumaSupport)
            x265_log(p, X265_LOG_DEBUG, "NUMA node %d may use %d logical cores\n", i, cpusPerNode[i]);
        if (cpusPerNode[i])
            numPools += (cpusPerNode[i] + MAX_POOL_THREADS - 1) / MAX_POOL_THREADS;
    }

    if (!numPools)
        return NULL;

    if (numPools > p->frameNumThreads)
    {
        x265_log(p, X265_LOG_DEBUG, "Reducing number of thread pools for frame thread count\n");
        numPools = X265_MAX(p->frameNumThreads / 2, 1);
    }

    ThreadPool *pools = new ThreadPool[numPools];
    if (pools)
    {
        int maxProviders = (p->frameNumThreads + numPools - 1) / numPools + 1; /* +1 is Lookahead, always assigned to threadpool 0 */
        int node = 0;
        for (int i = 0; i < numPools; i++)
        {
            while (!cpusPerNode[node])
                node++;
            int cores = X265_MIN(MAX_POOL_THREADS, cpusPerNode[node]);
            if (!pools[i].create(cores, maxProviders, node))
            {
                X265_FREE(pools);
                numPools = 0;
                return NULL;
            }
            if (numNumaNodes > 1)
                x265_log(p, X265_LOG_INFO, "Thread pool %d using %d threads on NUMA node %d\n", i, cores, node);
            else
                x265_log(p, X265_LOG_INFO, "Thread pool created using %d threads\n", cores);
            cpusPerNode[node] -= cores;
        }
    }
    else
        numPools = 0;
    return pools;
}