Beispiel #1
0
void DeblockingBSCalc_c (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
                         int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
  if (iLeftFlag) {
    * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
                              pCurMb - 1, 0);
  } else {
    * (uint32_t*)uiBS[0][0] = 0;
  }
  if (iTopFlag) {
    * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
                                pCurMb, (pCurMb - iMbStride), 1);
  } else {
    * (uint32_t*)uiBS[1][0] = 0;
  }
  //SKIP MB_16x16 or others
  if (uiCurMbType != MB_TYPE_SKIP) {
    pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!

    if (uiCurMbType == MB_TYPE_16x16) {
      DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
    } else {
      DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
    }
  } else {
    * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
                                * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
  }
}
Beispiel #2
0
void DeblockingMbAvcbase (SWelsFuncPtrList* pFunc, SMB* pCurMb, SDeblockingFilter* pFilter) {
  uint8_t uiBS[2][4][4] = { 0 };

  Mb_Type uiCurMbType = pCurMb->uiMbType;
  int32_t iMbStride  = pFilter->iMbStride;

  int32_t iMbX = pCurMb->iMbX;
  int32_t iMbY = pCurMb->iMbY;

  bool bLeftBsValid[2] = { (iMbX > 0), ((iMbX > 0) && (pCurMb->uiSliceIdc == (pCurMb - 1)->uiSliceIdc))};
  bool bTopBsValid[2]  = { (iMbY > 0), ((iMbY > 0) && (pCurMb->uiSliceIdc == (pCurMb - iMbStride)->uiSliceIdc))};

  int32_t iLeftFlag = bLeftBsValid[pFilter->uiFilterIdc];
  int32_t iTopFlag  = bTopBsValid[pFilter->uiFilterIdc];

  switch (uiCurMbType) {
  case MB_TYPE_INTRA4x4:
  case MB_TYPE_INTRA16x16:
  case MB_TYPE_INTRA_PCM:
    DeblockingIntraMb (&pFunc->pfDeblocking, pCurMb, pFilter);
    break;
  default:
    if (iLeftFlag) {
      * (uint32_t*)uiBS[0][0] = IS_INTRA ((pCurMb - 1)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (pCurMb,
                                pCurMb - 1, 0);
    } else {
      * (uint32_t*)uiBS[0][0] = 0;
    }
    if (iTopFlag) {
      * (uint32_t*)uiBS[1][0] = IS_INTRA ((pCurMb - iMbStride)->uiMbType) ? 0x04040404 : DeblockingBSMarginalMBAvcbase (
                                  pCurMb, (pCurMb - iMbStride), 1);
    } else {
      * (uint32_t*)uiBS[1][0] = 0;
    }
    //SKIP MB_16x16 or others
    if (uiCurMbType != MB_TYPE_SKIP) {
      pFunc->pfSetNZCZero (pCurMb->pNonZeroCount); // set all none-zero nzc to 1; dbk can be opti!

      if (uiCurMbType == MB_TYPE_16x16) {
        DeblockingBSInsideMBAvsbase (pCurMb->pNonZeroCount, uiBS, 1);
      } else {
        DeblockingBSInsideMBNormal (pCurMb, uiBS, pCurMb->pNonZeroCount);
      }
    } else {
      * (uint32_t*)uiBS[0][1] = * (uint32_t*)uiBS[0][2] = * (uint32_t*)uiBS[0][3] =
                                  * (uint32_t*)uiBS[1][1] = * (uint32_t*)uiBS[1][2] = * (uint32_t*)uiBS[1][3] = 0;
    }

    DeblockingInterMb (&pFunc->pfDeblocking, pCurMb, pFilter, uiBS);
    break;
  }
}
Beispiel #3
0
int32_t CComplexityAnalysis::GetFrameSadExcludeBackground (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) {
  int32_t iWidth     = pSrcPixMap->sRect.iRectWidth;
  int32_t iHeight    = pSrcPixMap->sRect.iRectHeight;
  int32_t iMbWidth  = iWidth  >> 4;
  int32_t iMbHeight = iHeight >> 4;
  int32_t iMbNum    = iMbWidth * iMbHeight;

  int32_t iMbNumInGom = m_sComplexityAnalysisParam.iMbNumInGom;
  int32_t iGomMbNum = (iMbNum + iMbNumInGom - 1) / iMbNumInGom;
  int32_t iGomMbStartIndex = 0, iGomMbEndIndex = 0;

  uint8_t* pBackgroundMbFlag = (uint8_t*)m_sComplexityAnalysisParam.pBackgroundMbFlag;
  uint32_t* uiRefMbType = (uint32_t*)m_sComplexityAnalysisParam.uiRefMbType;
  SVAACalcResult* pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult;
  int32_t*  pGomForegroundBlockNum = m_sComplexityAnalysisParam.pGomForegroundBlockNum;

  uint32_t uiFrameSad = 0;
  for (int32_t j = 0; j < iGomMbNum; j ++) {
    iGomMbStartIndex = j * iMbNumInGom;
    iGomMbEndIndex = WELS_MIN ((j + 1) * iMbNumInGom, iMbNum);

    for (int32_t i = iGomMbStartIndex; i < iGomMbEndIndex; i ++) {
      if (pBackgroundMbFlag[i] == 0 || IS_INTRA (uiRefMbType[i])) {
        pGomForegroundBlockNum[j]++;
        uiFrameSad += pVaaCalcResults->pSad8x8[i][0];
        uiFrameSad += pVaaCalcResults->pSad8x8[i][1];
        uiFrameSad += pVaaCalcResults->pSad8x8[i][2];
        uiFrameSad += pVaaCalcResults->pSad8x8[i][3];
      }
    }
  }

  return (uiFrameSad);
}
Beispiel #4
0
void DeblockingBSCalc_neon (SWelsFuncPtrList* pFunc, SMB* pCurMb, uint8_t uiBS[2][4][4], Mb_Type uiCurMbType,
                            int32_t iMbStride, int32_t iLeftFlag, int32_t iTopFlag) {
  DeblockingBSCalcEnc_neon (pCurMb->pNonZeroCount, pCurMb->sMv, pCurMb->uiNeighborAvail, iMbStride, uiBS);
  if (iLeftFlag) {
    if (IS_INTRA ((pCurMb - 1)->uiMbType)) {
      * (uint32_t*)uiBS[0][0] = 0x04040404;
    }
  } else {
    * (uint32_t*)uiBS[0][0] = 0;
  }
  if (iTopFlag) {
    if (IS_INTRA ((pCurMb - iMbStride)->uiMbType)) {
      * (uint32_t*)uiBS[1][0] = 0x04040404;
    }
  } else {
    * (uint32_t*)uiBS[1][0] = 0;
  }
}
Beispiel #5
0
void CComplexityAnalysis::AnalyzeGomComplexityViaSad (SPixMap* pSrcPixMap, SPixMap* pRefPixMap) {
  int32_t iWidth     = pSrcPixMap->sRect.iRectWidth;
  int32_t iHeight    = pSrcPixMap->sRect.iRectHeight;
  int32_t iMbWidth  = iWidth  >> 4;
  int32_t iMbHeight = iHeight >> 4;
  int32_t iMbNum    = iMbWidth * iMbHeight;

  int32_t iMbNumInGom = m_sComplexityAnalysisParam.iMbNumInGom;
  int32_t iGomMbNum = (iMbNum + iMbNumInGom - 1) / iMbNumInGom;

  int32_t iGomMbStartIndex = 0, iGomMbEndIndex = 0, iGomMbRowNum = 0;
  int32_t iMbStartIndex = 0, iMbEndIndex = 0;

  uint8_t* pBackgroundMbFlag = (uint8_t*)m_sComplexityAnalysisParam.pBackgroundMbFlag;
  uint32_t* uiRefMbType = (uint32_t*)m_sComplexityAnalysisParam.uiRefMbType;
  SVAACalcResult* pVaaCalcResults = m_sComplexityAnalysisParam.pCalcResult;
  int32_t*  pGomForegroundBlockNum = (int32_t*)m_sComplexityAnalysisParam.pGomForegroundBlockNum;
  int32_t*  pGomComplexity = (int32_t*)m_sComplexityAnalysisParam.pGomComplexity;


  uint32_t uiGomSad = 0, uiFrameSad = 0;

  InitGomSadFunc (m_pfGomSad, m_sComplexityAnalysisParam.iCalcBgd);

  for (int32_t j = 0; j < iGomMbNum; j ++) {
    uiGomSad = 0;

    iGomMbStartIndex = j * iMbNumInGom;
    iGomMbEndIndex = WELS_MIN ((j + 1) * iMbNumInGom, iMbNum);
    iGomMbRowNum = (iGomMbEndIndex + iMbWidth - 1) / iMbWidth  - iGomMbStartIndex / iMbWidth;

    iMbStartIndex = iGomMbStartIndex;
    iMbEndIndex = WELS_MIN ((iMbStartIndex / iMbWidth + 1) * iMbWidth, iGomMbEndIndex);

    do {
      for (int32_t i = iMbStartIndex; i < iMbEndIndex; i ++) {
        m_pfGomSad (&uiGomSad, pGomForegroundBlockNum + j, pVaaCalcResults->pSad8x8[i], pBackgroundMbFlag[i]
                    && !IS_INTRA (uiRefMbType[i]));
      }

      iMbStartIndex = iMbEndIndex;
      iMbEndIndex = WELS_MIN (iMbEndIndex + iMbWidth , iGomMbEndIndex);

    } while (--iGomMbRowNum);

    pGomComplexity[j] = uiGomSad;
    uiFrameSad += pGomComplexity[j];
  }

  m_sComplexityAnalysisParam.iFrameComplexity = uiFrameSad;
}
Beispiel #6
0
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
    const int i_mb_type = h->mb.i_type;

    if( h->sh.b_mbaff &&
        (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
    {
        x264_cabac_encode_decision_noup( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
    }

    if( h->sh.i_type == SLICE_TYPE_I )
    {
        int ctx = 0;
        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != I_4x4 )
            ctx++;
        if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != I_4x4 )
            ctx++;

        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
    }
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
        /* prefix: 14, suffix: 17 */
        if( i_mb_type == P_L0 )
        {
            x264_cabac_encode_decision_noup( cb, 14, 0 );
            x264_cabac_encode_decision_noup( cb, 15, h->mb.i_partition != D_16x16 );
            x264_cabac_encode_decision_noup( cb, 17-(h->mb.i_partition == D_16x16), h->mb.i_partition == D_16x8 );
        }
        else if( i_mb_type == P_8x8 )
        {
            x264_cabac_encode_decision_noup( cb, 14, 0 );
            x264_cabac_encode_decision_noup( cb, 15, 0 );
            x264_cabac_encode_decision_noup( cb, 16, 1 );
        }
        else /* intra */
        {
            /* prefix */
            x264_cabac_encode_decision_noup( cb, 14, 1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
        }
    }
    else //if( h->sh.i_type == SLICE_TYPE_B )
    {
        int ctx = 0;
        if( (h->mb.i_neighbour & MB_LEFT) && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
            ctx++;
        if( (h->mb.i_neighbour & MB_TOP) && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
            ctx++;

        if( i_mb_type == B_DIRECT )
        {
            x264_cabac_encode_decision_noup( cb, 27+ctx, 0 );
            return;
        }
        x264_cabac_encode_decision_noup( cb, 27+ctx, 1 );

        if( i_mb_type == B_8x8 )
        {
            x264_cabac_encode_decision_noup( cb, 27+3,   1 );
            x264_cabac_encode_decision_noup( cb, 27+4,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision_noup( cb, 27+5,   1 );
        }
        else if( IS_INTRA( i_mb_type ) )
        {
            /* prefix */
            x264_cabac_encode_decision_noup( cb, 27+3,   1 );
            x264_cabac_encode_decision_noup( cb, 27+4,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   0 );
            x264_cabac_encode_decision( cb, 27+5,   1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
        }
        else
        {
            static const uint8_t i_mb_bits[9*3] =
            {
                0x31, 0x29, 0x4, /* L0 L0 */
                0x35, 0x2d, 0,   /* L0 L1 */
                0x43, 0x63, 0,   /* L0 BI */
                0x3d, 0x2f, 0,   /* L1 L0 */
                0x39, 0x25, 0x6, /* L1 L1 */
                0x53, 0x73, 0,   /* L1 BI */
                0x4b, 0x6b, 0,   /* BI L0 */
                0x5b, 0x7b, 0,   /* BI L1 */
                0x47, 0x67, 0x21 /* BI BI */
            };

            const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
            int bits = i_mb_bits[idx];

            x264_cabac_encode_decision_noup( cb, 27+3, bits&1 );
            x264_cabac_encode_decision( cb, 27+5-(bits&1), (bits>>1)&1 ); bits >>= 2;
            if( bits != 1 )
            {
                x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
                x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
                x264_cabac_encode_decision( cb, 27+5, bits&1 ); bits >>= 1;
                if( bits != 1 )
                    x264_cabac_encode_decision_noup( cb, 27+5, bits&1 );
            }
        }
    }
Beispiel #7
0
static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int mb_x    = s->mb_x;
    const int mb_y    = s->mb_y;
    const int mb_xy   = h->mb_xy;
    const int mb_type = s->current_picture.f.mb_type[mb_xy];
    uint8_t *dest_y, *dest_cb, *dest_cr;
    int linesize, uvlinesize /*dct_offset*/;
    int i, j;
    int *block_offset = &h->block_offset[0];
    const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
    /* is_h264 should always be true if SVQ3 is disabled. */
    const int is_h264 = !CONFIG_SVQ3_DECODER || SIMPLE || s->codec_id == AV_CODEC_ID_H264;
    void (*idct_add)(uint8_t *dst, int16_t *block, int stride);
    const int block_h   = 16 >> s->chroma_y_shift;
    const int chroma422 = CHROMA422;

    dest_y  = s->current_picture.f.data[0] + ((mb_x << PIXEL_SHIFT)     + mb_y * s->linesize)  * 16;
    dest_cb = s->current_picture.f.data[1] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;
    dest_cr = s->current_picture.f.data[2] +  (mb_x << PIXEL_SHIFT) * 8 + mb_y * s->uvlinesize * block_h;

    s->vdsp.prefetch(dest_y  + (s->mb_x & 3) * 4 * s->linesize   + (64 << PIXEL_SHIFT), s->linesize,       4);
    s->vdsp.prefetch(dest_cb + (s->mb_x & 7)     * s->uvlinesize + (64 << PIXEL_SHIFT), dest_cr - dest_cb, 2);

    h->list_counts[mb_xy] = h->list_count;

    if (!SIMPLE && MB_FIELD) {
        linesize     = h->mb_linesize = s->linesize * 2;
        uvlinesize   = h->mb_uvlinesize = s->uvlinesize * 2;
        block_offset = &h->block_offset[48];
        if (mb_y & 1) { // FIXME move out of this function?
            dest_y  -= s->linesize * 15;
            dest_cb -= s->uvlinesize * (block_h - 1);
            dest_cr -= s->uvlinesize * (block_h - 1);
        }
        if (FRAME_MBAFF) {
            int list;
            for (list = 0; list < h->list_count; list++) {
                if (!USES_LIST(mb_type, list))
                    continue;
                if (IS_16X16(mb_type)) {
                    int8_t *ref = &h->ref_cache[list][scan8[0]];
                    fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
                } else {
                    for (i = 0; i < 16; i += 4) {
                        int ref = h->ref_cache[list][scan8[i]];
                        if (ref >= 0)
                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
                                           8, (16 + ref) ^ (s->mb_y & 1), 1);
                    }
                }
            }
        }
    } else {
        linesize   = h->mb_linesize   = s->linesize;
        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
        // dct_offset = s->linesize * 16;
    }

    if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
        const int bit_depth = h->sps.bit_depth_luma;
        if (PIXEL_SHIFT) {
            int j;
            GetBitContext gb;
            init_get_bits(&gb, (uint8_t *)h->mb,
                          ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth);

            for (i = 0; i < 16; i++) {
                uint16_t *tmp_y = (uint16_t *)(dest_y + i * linesize);
                for (j = 0; j < 16; j++)
                    tmp_y[j] = get_bits(&gb, bit_depth);
            }
            if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
                if (!h->sps.chroma_format_idc) {
                    for (i = 0; i < block_h; i++) {
                        uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
                        uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
                        for (j = 0; j < 8; j++) {
                            tmp_cb[j] = tmp_cr[j] = 1 << (bit_depth - 1);
                        }
                    }
                } else {
                    for (i = 0; i < block_h; i++) {
                        uint16_t *tmp_cb = (uint16_t *)(dest_cb + i * uvlinesize);
                        for (j = 0; j < 8; j++)
                            tmp_cb[j] = get_bits(&gb, bit_depth);
                    }
                    for (i = 0; i < block_h; i++) {
                        uint16_t *tmp_cr = (uint16_t *)(dest_cr + i * uvlinesize);
                        for (j = 0; j < 8; j++)
                            tmp_cr[j] = get_bits(&gb, bit_depth);
                    }
                }
            }
        } else {
            for (i = 0; i < 16; i++)
                memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16);
            if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
                if (!h->sps.chroma_format_idc) {
                    for (i = 0; i < 8; i++) {
                        memset(dest_cb + i*uvlinesize, 1 << (bit_depth - 1), 8);
                        memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8);
                    }
                } else {
                    uint8_t *src_cb = (uint8_t *)h->mb + 256;
                    uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8;
                    for (i = 0; i < block_h; i++) {
                        memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8);
                        memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8);
                    }
                }
            }
        }
    } else {
        if (IS_INTRA(mb_type)) {
            if (h->deblocking_filter)
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
                               uvlinesize, 1, 0, SIMPLE, PIXEL_SHIFT);

            if (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) {
                h->hpc.pred8x8[h->chroma_pred_mode](dest_cb, uvlinesize);
                h->hpc.pred8x8[h->chroma_pred_mode](dest_cr, uvlinesize);
            }

            hl_decode_mb_predict_luma(h, mb_type, is_h264, SIMPLE,
                                      transform_bypass, PIXEL_SHIFT,
                                      block_offset, linesize, dest_y, 0);

            if (h->deblocking_filter)
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize,
                               uvlinesize, 0, 0, SIMPLE, PIXEL_SHIFT);
        } else if (is_h264) {
            if (chroma422) {
                FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
                              h->h264dsp.biweight_h264_pixels_tab);
            } else {
                FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
                              s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                              s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                              h->h264dsp.weight_h264_pixels_tab,
                              h->h264dsp.biweight_h264_pixels_tab);
            }
        }

        hl_decode_mb_idct_luma(h, mb_type, is_h264, SIMPLE, transform_bypass,
                               PIXEL_SHIFT, block_offset, linesize, dest_y, 0);

        if ((SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) &&
            (h->cbp & 0x30)) {
            uint8_t *dest[2] = { dest_cb, dest_cr };
            if (transform_bypass) {
                if (IS_INTRA(mb_type) && h->sps.profile_idc == 244 &&
                    (h->chroma_pred_mode == VERT_PRED8x8 ||
                     h->chroma_pred_mode == HOR_PRED8x8)) {
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0],
                                                            block_offset + 16,
                                                            h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
                                                            uvlinesize);
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1],
                                                            block_offset + 32,
                                                            h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
                                                            uvlinesize);
                } else {
                    idct_add = s->dsp.add_pixels4;
                    for (j = 1; j < 3; j++) {
                        for (i = j * 16; i < j * 16 + 4; i++)
                            if (h->non_zero_count_cache[scan8[i]] ||
                                dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
                                idct_add(dest[j - 1] + block_offset[i],
                                         h->mb + (i * 16 << PIXEL_SHIFT),
                                         uvlinesize);
                        if (chroma422) {
                            for (i = j * 16 + 4; i < j * 16 + 8; i++)
                                if (h->non_zero_count_cache[scan8[i + 4]] ||
                                    dctcoef_get(h->mb, PIXEL_SHIFT, i * 16))
                                    idct_add(dest[j - 1] + block_offset[i + 4],
                                             h->mb + (i * 16 << PIXEL_SHIFT),
                                             uvlinesize);
                        }
                    }
                }
            } else {
                if (is_h264) {
                    int qp[2];
                    if (chroma422) {
                        qp[0] = h->chroma_qp[0] + 3;
                        qp[1] = h->chroma_qp[1] + 3;
                    } else {
                        qp[0] = h->chroma_qp[0];
                        qp[1] = h->chroma_qp[1];
                    }
                    if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 0]])
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 1 << PIXEL_SHIFT),
                                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][qp[0]][0]);
                    if (h->non_zero_count_cache[scan8[CHROMA_DC_BLOCK_INDEX + 1]])
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
                                                               h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][qp[1]][0]);
                    h->h264dsp.h264_idct_add8(dest, block_offset,
                                              h->mb, uvlinesize,
                                              h->non_zero_count_cache);
                } else if (CONFIG_SVQ3_DECODER) {
                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 1,
                                                           h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][h->chroma_qp[0]][0]);
                    h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16 * 16 * 2,
                                                           h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][h->chroma_qp[1]][0]);
                    for (j = 1; j < 3; j++) {
                        for (i = j * 16; i < j * 16 + 4; i++)
                            if (h->non_zero_count_cache[scan8[i]] || h->mb[i * 16]) {
                                uint8_t *const ptr = dest[j - 1] + block_offset[i];
                                ff_svq3_add_idct_c(ptr, h->mb + i * 16,
                                                   uvlinesize,
                                                   ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
                            }
                    }
                }
            }
        }
    }
    if (h->cbp || IS_INTRA(mb_type)) {
        s->dsp.clear_blocks(h->mb);
        s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
    }
}
Beispiel #8
0
static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
{
    MpegEncContext *const s = &h->s;
    const int mb_x    = s->mb_x;
    const int mb_y    = s->mb_y;
    const int mb_xy   = h->mb_xy;
    const int mb_type = s->current_picture.f.mb_type[mb_xy];
    uint8_t *dest[3];
    int linesize;
    int i, j, p;
    int *block_offset = &h->block_offset[0];
    const int transform_bypass = !SIMPLE && (s->qscale == 0 && h->sps.transform_bypass);
    const int plane_count      = (SIMPLE || !CONFIG_GRAY || !(s->flags & CODEC_FLAG_GRAY)) ? 3 : 1;

    for (p = 0; p < plane_count; p++) {
        dest[p] = s->current_picture.f.data[p] +
                  ((mb_x << PIXEL_SHIFT) + mb_y * s->linesize) * 16;
        s->vdsp.prefetch(dest[p] + (s->mb_x & 3) * 4 * s->linesize + (64 << PIXEL_SHIFT),
                         s->linesize, 4);
    }

    h->list_counts[mb_xy] = h->list_count;

    if (!SIMPLE && MB_FIELD) {
        linesize     = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
        block_offset = &h->block_offset[48];
        if (mb_y & 1) // FIXME move out of this function?
            for (p = 0; p < 3; p++)
                dest[p] -= s->linesize * 15;
        if (FRAME_MBAFF) {
            int list;
            for (list = 0; list < h->list_count; list++) {
                if (!USES_LIST(mb_type, list))
                    continue;
                if (IS_16X16(mb_type)) {
                    int8_t *ref = &h->ref_cache[list][scan8[0]];
                    fill_rectangle(ref, 4, 4, 8, (16 + *ref) ^ (s->mb_y & 1), 1);
                } else {
                    for (i = 0; i < 16; i += 4) {
                        int ref = h->ref_cache[list][scan8[i]];
                        if (ref >= 0)
                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2,
                                           8, (16 + ref) ^ (s->mb_y & 1), 1);
                    }
                }
            }
        }
    } else {
        linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;
    }

    if (!SIMPLE && IS_INTRA_PCM(mb_type)) {
        if (PIXEL_SHIFT) {
            const int bit_depth = h->sps.bit_depth_luma;
            GetBitContext gb;
            init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth);

            for (p = 0; p < plane_count; p++)
                for (i = 0; i < 16; i++) {
                    uint16_t *tmp = (uint16_t *)(dest[p] + i * linesize);
                    for (j = 0; j < 16; j++)
                        tmp[j] = get_bits(&gb, bit_depth);
                }
        } else {
            for (p = 0; p < plane_count; p++)
                for (i = 0; i < 16; i++)
                    memcpy(dest[p] + i * linesize,
                           (uint8_t *)h->mb + p * 256 + i * 16, 16);
        }
    } else {
        if (IS_INTRA(mb_type)) {
            if (h->deblocking_filter)
                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
                               linesize, 1, 1, SIMPLE, PIXEL_SHIFT);

            for (p = 0; p < plane_count; p++)
                hl_decode_mb_predict_luma(h, mb_type, 1, SIMPLE,
                                          transform_bypass, PIXEL_SHIFT,
                                          block_offset, linesize, dest[p], p);

            if (h->deblocking_filter)
                xchg_mb_border(h, dest[0], dest[1], dest[2], linesize,
                               linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
        } else {
            FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
                      h->h264dsp.weight_h264_pixels_tab,
                      h->h264dsp.biweight_h264_pixels_tab);
        }

        for (p = 0; p < plane_count; p++)
            hl_decode_mb_idct_luma(h, mb_type, 1, SIMPLE, transform_bypass,
                                   PIXEL_SHIFT, block_offset, linesize,
                                   dest[p], p);
    }
    if (h->cbp || IS_INTRA(mb_type)) {
        s->dsp.clear_blocks(h->mb);
        s->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT));
    }
}
Beispiel #9
0
/*!
*************************************************************************************
* \brief
*    Mode Decision for a macroblock
*************************************************************************************
*/
void encode_one_macroblock_low (Macroblock *currMB)
{
    Slice *currSlice = currMB->p_slice;
    RDOPTStructure  *p_RDO = currSlice->p_RDO;
    ImageParameters *p_Img = currMB->p_Img;
    InputParameters *p_Inp = currMB->p_Inp;
    PicMotionParams *motion = &p_Img->enc_picture->motion;

    imgpel ***temp_img; // to temp store the Y data for 8x8 transform

    int         block, mode, i=0, j;
    RD_PARAMS   enc_mb;
    int         bmcost[5] = {INT_MAX};
    double      rd_cost = 0;
    int         cost = 0;
    int         min_cost = INT_MAX, cost_direct=0, have_direct=0;
    int         intra1 = 0;
    int         temp_cpb = 0;
    Boolean     best_transform_flag = FALSE;
    int         cost8x8_direct = 0;
    short       islice      = (short) (currSlice->slice_type == I_SLICE);
    short       bslice      = (short) (currSlice->slice_type == B_SLICE);
    short       pslice      = (short) ((currSlice->slice_type == P_SLICE) || (currSlice->slice_type == SP_SLICE));
    short       intra       = (short) (islice || (pslice && currMB->mb_y == p_Img->mb_y_upd && p_Img->mb_y_upd!=p_Img->mb_y_intra));
    int         lambda_mf[3];
    Block8x8Info *b8x8info   = p_Img->b8x8info;
    //int         mb_available[3] = { 1, 1, 1};

    char   **ipredmodes = p_Img->ipredmode;
    short   *allmvs = (currSlice->slice_type == I_SLICE) ? NULL: currSlice->all_mv[0][0][0][0][0];
    int     ****i4p;  //for non-RD-opt. mode
    imgpel  **mb_pred = currSlice->mb_pred[0];

    Boolean tmp_8x8_flag, tmp_no_mbpart;

    BestMode    md_best;
    Info8x8 best;

    init_md_best(&md_best);

    // Init best (need to create simple function)
    best.pdir = 0;
    best.bipred = 0;
    best.ref[LIST_0] = 0;
    best.ref[LIST_1] = -1;

    get_mem3Dpel(&temp_img, 3, MB_BLOCK_SIZE, MB_BLOCK_SIZE);

    intra |= RandomIntra (p_Img, currMB->mbAddrX);    // Forced Pseudo-Random Intra

    //===== Setup Macroblock encoding parameters =====
    init_enc_mb_params(currMB, &enc_mb, intra);
    if (p_Inp->AdaptiveRounding)
    {
        reset_adaptive_rounding(p_Img);
    }

    if (currSlice->MbaffFrameFlag)
    {
        reset_mb_nz_coeff(p_Img, currMB->mbAddrX);
    }

    //=====   S T O R E   C O D I N G   S T A T E   =====
    //---------------------------------------------------
    currSlice->store_coding_state (currMB, currSlice->p_RDO->cs_cm);

    if (!intra)
    {
        //===== set direct motion vectors =====
        currMB->best_mode = 10;  // let us set best_mode to an intra mode to avoid possible bug with RDOQ
        if (bslice && enc_mb.valid[0])
        {
            currSlice->Get_Direct_Motion_Vectors (currMB);
        }

        if (p_Inp->CtxAdptLagrangeMult == 1)
        {
            get_initial_mb16x16_cost(currMB);
        }

        //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
        for (mode = 1; mode < 4; mode++)
        {
            best.bipred = 0;
            best.mode = (char) mode;
            b8x8info->best[mode][0].bipred = 0;
            if (enc_mb.valid[mode])
            {
                for (cost=0, block=0; block<(mode==1?1:2); block++)
                {
                    update_lambda_costs(currMB, &enc_mb, lambda_mf);
                    PartitionMotionSearch (currMB, mode, block, lambda_mf);

                    //--- set 4x4 block indizes (for getting MV) ---
                    j = (block==1 && mode==2 ? 2 : 0);
                    i = (block==1 && mode==3 ? 2 : 0);

                    //--- get cost and reference frame for List 0 prediction ---
                    bmcost[LIST_0] = INT_MAX;
                    list_prediction_cost(currMB, LIST_0, block, mode, &enc_mb, bmcost, best.ref);

                    if (bslice)
                    {
                        //--- get cost and reference frame for List 1 prediction ---
                        bmcost[LIST_1] = INT_MAX;
                        list_prediction_cost(currMB, LIST_1, block, mode, &enc_mb, bmcost, best.ref);

                        // Compute bipredictive cost between best list 0 and best list 1 references
                        list_prediction_cost(currMB, BI_PRED, block, mode, &enc_mb, bmcost, best.ref);

                        // currently Bi predictive ME is only supported for modes 1, 2, 3 and ref 0
                        if (is_bipred_enabled(p_Inp, mode))
                        {
                            list_prediction_cost(currMB, BI_PRED_L0, block, mode, &enc_mb, bmcost, 0);
                            list_prediction_cost(currMB, BI_PRED_L1, block, mode, &enc_mb, bmcost, 0);
                        }
                        else
                        {
                            bmcost[BI_PRED_L0] = INT_MAX;
                            bmcost[BI_PRED_L1] = INT_MAX;
                        }

                        // Determine prediction list based on mode cost
                        determine_prediction_list(bmcost, &best, &cost);
                    }
                    else // if (bslice)
                    {
                        best.pdir  = 0;
                        cost      += bmcost[LIST_0];
                    }

                    assign_enc_picture_params(currMB, mode, &best, 2 * block);

                    //----- set reference frame and direction parameters -----
                    set_block8x8_info(b8x8info, mode, block, &best);

                    //--- set reference frames and motion vectors ---
                    if (mode>1 && block==0)
                        currSlice->set_ref_and_motion_vectors (currMB, motion, &best, block);
                } // for (block=0; block<(mode==1?1:2); block++)

                currMB->luma_transform_size_8x8_flag = FALSE;
                if (p_Inp->Transform8x8Mode) //for inter rd-off, set 8x8 to do 8x8 transform
                {
                    SetModesAndRefframeForBlocks(currMB, (short) mode);
                    currMB->luma_transform_size_8x8_flag = (byte) TransformDecision(currMB, -1, &cost);
                }

                if (cost < min_cost)
                {
                    currMB->best_mode = (short) mode;
                    min_cost  = cost;
                    best_transform_flag = currMB->luma_transform_size_8x8_flag;

                    if (p_Inp->CtxAdptLagrangeMult == 1)
                    {
                        adjust_mb16x16_cost(currMB, cost);
                    }
                }
            } // if (enc_mb.valid[mode])
        } // for (mode=1; mode<4; mode++)

        if (enc_mb.valid[P8x8])
        {
            //===== store coding state of macroblock =====
            currSlice->store_coding_state (currMB, currSlice->p_RDO->cs_mb);
            memset( currSlice->cofAC[0][0][0], 0, 2080 * sizeof(int)); // 4 * 4 * 2 * 65

            currMB->valid_8x8 = FALSE;

            if (p_Inp->Transform8x8Mode)
            {
                ResetRD8x8Data(p_Img, p_RDO->tr8x8);
                //===========================================================
                // Check 8x8 partition with transform size 8x8
                //===========================================================
                //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
                for (cost_direct =  0, block = 0; block < 4; block++)
                {
                    submacroblock_mode_decision_low(currMB, &enc_mb, p_RDO->tr8x8, p_RDO->cofAC8x8ts[block],
                                                    &have_direct, block, &cost_direct, &cost, &cost8x8_direct, 1);

                    set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr8x8);
                }

                currMB->luma_transform_size_8x8_flag = FALSE; //switch to 4x4 transform size
            }// if (p_Inp->Transform8x8Mode)


            if (p_Inp->Transform8x8Mode != 2)
            {
                ResetRD8x8Data(p_Img, p_RDO->tr4x4);
                //=================================================================
                // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
                //=================================================================
                //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
                for (cost_direct = 0, block=0; block<4; block++)
                {
                    submacroblock_mode_decision_low(currMB, &enc_mb, p_RDO->tr4x4, p_RDO->coefAC8x8[block],
                                                    &have_direct, block, &cost_direct, &cost, &cost8x8_direct, 0);

                    set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr4x4);
                }
            }// if (p_Inp->Transform8x8Mode != 2)

            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            //check cost for P8x8 for non-rdopt mode
            if (((p_Inp->Transform8x8Mode < 2) && (p_RDO->tr4x4->mb_p8x8_cost < min_cost)) ||
                    ((p_Inp->Transform8x8Mode >  0) && (p_RDO->tr8x8->mb_p8x8_cost < min_cost)))
            {
                currMB->best_mode = P8x8;
                if (p_Inp->Transform8x8Mode == 2)
                {
                    min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                    currMB->luma_transform_size_8x8_flag = TRUE;
                }
                else if (p_Inp->Transform8x8Mode)
                {
                    if (p_RDO->tr8x8->mb_p8x8_cost < p_RDO->tr4x4->mb_p8x8_cost)
                    {
                        min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                        currMB->luma_transform_size_8x8_flag = TRUE;
                    }
                    else if(p_RDO->tr4x4->mb_p8x8_cost < p_RDO->tr8x8->mb_p8x8_cost)
                    {
                        min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                        currMB->luma_transform_size_8x8_flag = FALSE;
                    }
                    else
                    {
                        if (GetBestTransformP8x8(currMB) == 0)
                        {
                            min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                            currMB->luma_transform_size_8x8_flag = FALSE;
                        }
                        else
                        {
                            min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                            currMB->luma_transform_size_8x8_flag = TRUE;
                        }
                    }
                }
                else
                {
                    min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                    currMB->luma_transform_size_8x8_flag = FALSE;
                }
            }// if ((p_RDO->tr4x4->mb_p8x8_cost < min_cost || p_RDO->tr8x8->mb_p8x8_cost < min_cost))
            p_Img->giRDOpt_B8OnlyFlag = FALSE;
        }

        // Find a motion vector for the Skip mode
        if(pslice)
            FindSkipModeMotionVector (currMB);
    }
    else // if (!intra)
    {
        min_cost = INT_MAX;
    }


    //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
    //-------------------------------------------------------------------------
    tmp_8x8_flag  = currMB->luma_transform_size_8x8_flag;  //save 8x8_flag
    tmp_no_mbpart = currMB->NoMbPartLessThan8x8Flag;      //save no-part-less
    if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
        // precompute all chroma intra prediction modes
        intra_chroma_prediction(currMB, NULL, NULL, NULL);

    if (enc_mb.valid[0] && bslice) // check DIRECT MODE
    {
        if(have_direct)
        {
            switch(p_Inp->Transform8x8Mode)
            {
            case 1: // Mixture of 8x8 & 4x4 transform
                cost = ((cost8x8_direct < cost_direct) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
                       ? cost8x8_direct : cost_direct;
                break;
            case 2: // 8x8 Transform only
                cost = cost8x8_direct;
                break;
            default: // 4x4 Transform only
                cost = cost_direct;
                break;
            }
        }
        else
        {   //!have_direct
            cost = GetDirectCostMB (currMB);
        }
        if (cost!=INT_MAX)
        {
            cost -= (int)floor(16 * enc_mb.lambda_md + 0.4999);
        }

        if (cost <= min_cost)
        {
            if(p_Img->active_sps->direct_8x8_inference_flag && p_Inp->Transform8x8Mode)
            {
                if(p_Inp->Transform8x8Mode==2)
                    currMB->luma_transform_size_8x8_flag = TRUE;
                else
                {
                    if(cost8x8_direct < cost_direct)
                        currMB->luma_transform_size_8x8_flag = TRUE;
                    else
                        currMB->luma_transform_size_8x8_flag = FALSE;
                }
            }
            else
                currMB->luma_transform_size_8x8_flag = FALSE;

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            min_cost  = cost;
            currMB->best_mode = 0;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            currMB->NoMbPartLessThan8x8Flag = (byte) tmp_no_mbpart; // restore if not best
        }
    }

    currMB->min_rdcost = (double) min_cost;

    if (enc_mb.valid[I8MB]) // check INTRA8x8
    {
        currMB->luma_transform_size_8x8_flag = TRUE; // at this point cost will ALWAYS be less than min_cost

        currMB->mb_type = currMB->ar_mode = I8MB;
        temp_cpb = Mode_Decision_for_Intra8x8Macroblock (currMB, enc_mb.lambda_md, &rd_cost);


        if (rd_cost <= currMB->min_rdcost) //HYU_NOTE. bug fix. 08/15/07
        {
            currMB->cbp = temp_cpb;
            if (p_Img->P444_joined)
            {
                currSlice->curr_cbp[0] = currSlice->cmp_cbp[1];
                currSlice->curr_cbp[1] = currSlice->cmp_cbp[2];
            }

            if(enc_mb.valid[I4MB])
            {
                //coeffs
                if (p_Inp->Transform8x8Mode != 2)
                {
                    i4p = p_RDO->cofAC;
                    p_RDO->cofAC = currSlice->cofAC;
                    currSlice->cofAC = i4p;
                }
            }

            copy_image_data_16x16(temp_img[0], &p_Img->enc_picture->imgY[currMB->pix_y], 0, currMB->pix_x);

            if (p_Img->P444_joined)
            {
                copy_image_data_16x16(temp_img[1], &p_Img->enc_picture->imgUV[0][currMB->pix_y], 0, currMB->pix_x);
                copy_image_data_16x16(temp_img[2], &p_Img->enc_picture->imgUV[1][currMB->pix_y], 0, currMB->pix_x);
            }

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            currMB->min_rdcost  = rd_cost;
            currMB->best_mode = I8MB;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            if (p_Img->P444_joined)
            {
                currMB->cbp |= currSlice->curr_cbp[0];
                currMB->cbp |= currSlice->curr_cbp[1];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }
        }
    }

    if (enc_mb.valid[I4MB]) // check INTRA4x4
    {
        currMB->luma_transform_size_8x8_flag = FALSE;
        currMB->mb_type = currMB->ar_mode = I4MB;
        temp_cpb = Mode_Decision_for_Intra4x4Macroblock (currMB, enc_mb.lambda_md, &rd_cost);

        if (rd_cost <= currMB->min_rdcost)
        {
            currMB->cbp = temp_cpb;

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            currMB->min_rdcost  = rd_cost;
            currMB->best_mode = I4MB;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            if (p_Img->P444_joined)
            {
                currMB->cbp |= currSlice->curr_cbp[0];
                currMB->cbp |= currSlice->curr_cbp[1];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }
            //coeffs
            i4p = p_RDO->cofAC;
            p_RDO->cofAC = currSlice->cofAC;
            currSlice->cofAC=i4p;
        }
    }
    if (enc_mb.valid[I16MB]) // check INTRA16x16
    {
        currMB->luma_transform_size_8x8_flag = FALSE;
        intrapred_16x16 (currMB, PLANE_Y);
        if (p_Img->P444_joined)
        {
            select_plane(p_Img, PLANE_U);
            intrapred_16x16 (currMB, PLANE_U);
            select_plane(p_Img, PLANE_V);
            intrapred_16x16 (currMB, PLANE_V);
            select_plane(p_Img, PLANE_Y);
        }

        rd_cost = currSlice->find_sad_16x16 (currMB);

        if (rd_cost < currMB->min_rdcost)
        {
            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, currSlice->mpr_16x16[0][(short) currMB->i16mode]);

            currMB->best_mode   = I16MB;
            currMB->min_rdcost  = rd_cost;
            currMB->cbp = currMB->trans_16x16 (currMB, PLANE_Y);

            if (p_Img->P444_joined)
            {
                select_plane(p_Img, PLANE_U);
                currSlice->cmp_cbp[1] = currMB->trans_16x16(currMB, PLANE_U);
                select_plane(p_Img, PLANE_V);
                currSlice->cmp_cbp[2] = currMB->trans_16x16(currMB, PLANE_V);

                select_plane(p_Img, PLANE_Y);
                currMB->cbp |= currSlice->cmp_cbp[1];
                currMB->cbp |= currSlice->cmp_cbp[2];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }

        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore
            currMB->NoMbPartLessThan8x8Flag = (byte) tmp_no_mbpart;     // restore
        }
    }

    intra1 = IS_INTRA(currMB);

    //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
    //---------------------------------------------------------------------------
    {
        //===== set parameters for chosen mode =====
        SetModesAndRefframeForBlocks (currMB, currMB->best_mode);

        if (currMB->best_mode == P8x8)
        {
            if (currMB->luma_transform_size_8x8_flag && (p_RDO->tr8x8->cbp8x8 == 0) && p_Inp->Transform8x8Mode != 2)
                currMB->luma_transform_size_8x8_flag = FALSE;

            SetCoeffAndReconstruction8x8 (currMB);

            memset(currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
            for (j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                memset(&ipredmodes[j][currMB->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
        }
        else
        {
            //===== set parameters for chosen mode =====
            if (currMB->best_mode == I8MB)
            {
                memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
                for(j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                    memcpy(&p_Img->ipredmode[j][currMB->block_x],&p_Img->ipredmode8x8[j][currMB->block_x], BLOCK_MULTIPLE * sizeof(char));

                //--- restore reconstruction for 8x8 transform ---
                copy_image_data_16x16(&p_Img->enc_picture->imgY[currMB->pix_y], temp_img[0], currMB->pix_x, 0);

                if (p_Img->P444_joined)
                {
                    copy_image_data_16x16(&p_Img->enc_picture->imgUV[0][currMB->pix_y], temp_img[1], currMB->pix_x, 0);
                    copy_image_data_16x16(&p_Img->enc_picture->imgUV[1][currMB->pix_y], temp_img[2], currMB->pix_x, 0);
                }
            }

            if ((currMB->best_mode!=I4MB)&&(currMB->best_mode != I8MB))
            {
                memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
                for(j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                    memset(&ipredmodes[j][currMB->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
                currMB->ar_mode = currMB->best_mode;

                if (currMB->best_mode!=I16MB)
                {
                    if((currMB->best_mode >= 1) && (currMB->best_mode <= 3))
                        currMB->luma_transform_size_8x8_flag = (byte) best_transform_flag;

                    if (currSlice->P444_joined)
                    {
                        luma_residual_coding_p444(currMB);
                        if((currMB->cbp==0 && currSlice->cmp_cbp[1] == 0 && currSlice->cmp_cbp[2] == 0) &&(currMB->best_mode == 0))
                            currMB->luma_transform_size_8x8_flag = FALSE;
                    }
                    else
                    {
                        luma_residual_coding(currMB);
                        if((currMB->cbp==0)&&(currMB->best_mode == 0))
                            currMB->luma_transform_size_8x8_flag = FALSE;
                    }

                    //Rate control
                    if (p_Inp->RCEnable)
                        rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);
                }
            }
        }
        //check luma cbp for transform size flag
        if (((currMB->cbp&15) == 0) && currMB->mb_type != I4MB && currMB->mb_type != I8MB)
            currMB->luma_transform_size_8x8_flag = FALSE;

        // precompute all chroma intra prediction modes
        if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
            intra_chroma_prediction(currMB, NULL, NULL, NULL);

        currMB->i16offset = 0;

        if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
            chroma_residual_coding (currMB);

        if (currMB->best_mode == I16MB)
        {
            currMB->i16offset = I16Offset  (currMB->cbp, currMB->i16mode);
        }

        currSlice->SetMotionVectorsMB (currMB, motion);

        //===== check for SKIP mode =====
        if(p_Img->P444_joined)
        {
            if ((pslice) && currMB->best_mode == 1 && currMB->cbp==0 && currSlice->cmp_cbp[1] == 0 && currSlice->cmp_cbp[2] == 0 &&
                    motion->ref_idx[LIST_0][currMB->block_y][currMB->block_x]    == 0 &&
                    motion->mv     [LIST_0][currMB->block_y][currMB->block_x][0] == allmvs[0] &&
                    motion->mv     [LIST_0][currMB->block_y][currMB->block_x][1] == allmvs[1])
            {
                currMB->mb_type = currMB->b8x8[0].mode = currMB->b8x8[1].mode = currMB->b8x8[2].mode = currMB->b8x8[3].mode = 0;
                currMB->luma_transform_size_8x8_flag = FALSE;
            }
        }
        else if ((pslice) && currMB->best_mode == 1 && currMB->cbp==0 &&
                 motion->ref_idx[LIST_0][currMB->block_y][currMB->block_x]    == 0 &&
                 motion->mv     [LIST_0][currMB->block_y][currMB->block_x][0] == allmvs[0] &&
                 motion->mv     [LIST_0][currMB->block_y][currMB->block_x][1] == allmvs[1])
        {
            currMB->mb_type = currMB->b8x8[0].mode = currMB->b8x8[1].mode = currMB->b8x8[2].mode = currMB->b8x8[3].mode = 0;
            currMB->luma_transform_size_8x8_flag = FALSE;
        }

        if (currSlice->MbaffFrameFlag || (p_Inp->UseRDOQuant && currSlice->RDOQ_QP_Num > 1))
            set_mbaff_parameters(currMB);
    }

    // Rate control
    if(p_Inp->RCEnable && p_Inp->RCUpdateMode <= MAX_RC_MODE)
        rc_store_mad(currMB);

    //===== Decide if this MB will restrict the reference frames =====
    if (p_Inp->RestrictRef)
        update_refresh_map(currMB, intra, intra1);


    /*update adaptive rounding offset p_Inp*/
    if (p_Img->AdaptiveRounding)
    {
        update_offset_params(currMB, currMB->best_mode, currMB->luma_transform_size_8x8_flag);
    }

    free_mem3Dpel(temp_img);
}
Beispiel #10
0
/* FIXME: B-type MBs not handled yet properly */
void x264_visualize_show( x264_t *h )
{
    int mb_xy;
    static const stringlist_t mb_types[] = {
        /* Block types marked as NULL will not be drawn */
        { I_4x4   , "red" },
        { I_8x8   , "#ff5640" },
        { I_16x16 , "#ff8060" },
        { I_PCM   , "violet" },
        { P_L0    , "SlateBlue" },
        { P_8x8   , "blue" },
        { P_SKIP  , "green" },
        { B_DIRECT, "yellow" },
        { B_L0_L0 , "yellow" },
        { B_L0_L1 , "yellow" },
        { B_L0_BI , "yellow" },
        { B_L1_L0 , "yellow" },
        { B_L1_L1 , "yellow" },
        { B_L1_BI , "yellow" },
        { B_BI_L0 , "yellow" },
        { B_BI_L1 , "yellow" },
        { B_BI_BI , "yellow" },
        { B_8x8   , "yellow" },
        { B_SKIP  , "yellow" },
    };

    static const int waitkey = 1;     /* Wait for enter after each frame */
    static const int drawbox = 1;     /* Draw box around each block */
    static const int borders = 0;     /* Display extrapolated borders outside frame */
    static const int zoom = 2;        /* Zoom factor */

    static const int pad = 32;
    uint8_t *const frame = h->fdec->plane[0];
    const int width = h->param.i_width;
    const int height = h->param.i_height;
    const int stride = h->fdec->i_stride[0];

    if (borders) {
        disp_gray_zoom(0, frame - pad*stride - pad, width+2*pad, height+2*pad, stride, "fdec", zoom);
    } else {
        disp_gray_zoom(0, frame, width, height, stride, "fdec", zoom);
    }

    for( mb_xy = 0; mb_xy < h->sps->i_mb_width * h->sps->i_mb_height; mb_xy++ )
    {
        visualize_t *const v = (visualize_t*)h->visualize + mb_xy;
        const int mb_y = mb_xy / h->sps->i_mb_width;
        const int mb_x = mb_xy % h->sps->i_mb_width;
        char *const col = GET_STRING(mb_types, v->i_type);
        int x = mb_x*16*zoom;
        int y = mb_y*16*zoom;
        int l = 0;
        unsigned int i, j;

        if (col==NULL) continue;
        if (borders) {
            x += pad*zoom;
            y += pad*zoom;
        }
        disp_setcolor(col);
        if (drawbox) disp_rect(0, x, y, x+16*zoom-1, y+16*zoom-1);

        if (v->i_type==P_L0 || v->i_type==P_8x8 || v->i_type==P_SKIP) {

            /* Predicted (inter) mode, with motion vector */
            if (v->i_partition==D_16x16 || v->i_type==P_SKIP) {
                mv(x+8*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col);
            }
            if (v->i_partition==D_16x8) {
                if (drawbox) disp_rect(0, x, y, x+16*zoom, y+8*zoom);
                mv(x+8*zoom, y+4*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col);
                if (drawbox) disp_rect(0, x, y+8*zoom, x+16*zoom, y+16*zoom);
                mv(x+8*zoom, y+12*zoom, v->mv[l][2][0], v->ref[l][2][0], zoom, col);
            }
            if (v->i_partition==D_8x16) {
                if (drawbox) disp_rect(0, x,          y, x+8*zoom,  y+16*zoom);
                mv(x+4*zoom, y+8*zoom, v->mv[l][0][0], v->ref[l][0][0], zoom, col);
                if (drawbox) disp_rect(0, x+8*zoom,   y, x+16*zoom, y+16*zoom);
                mv(x+12*zoom, y+8*zoom, v->mv[l][0][2], v->ref[l][0][2], zoom, col);
            }
            if (v->i_partition==D_8x8) {
                for (i=0; i<2; i++) for (j=0; j<2; j++) {
                    int sp = v->i_sub_partition[i*2+j];
                    const int x0 = x + j*8*zoom;
                    const int y0 = y + i*8*zoom;
                    l = x264_mb_partition_listX_table[0][sp] ? 0 : 1; /* FIXME: not tested if this works */
                    if (IS_SUB8x8(sp)) {
                        if (drawbox) disp_rect(0, x0, y0, x0+8*zoom, y0+8*zoom);
                        mv(x0+4*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col);
                    }
                    if (IS_SUB8x4(sp)) {
                        if (drawbox) disp_rect(0, x0, y0, x0+8*zoom, y0+4*zoom);
                        if (drawbox) disp_rect(0, x0, y0+4*zoom, x0+8*zoom, y0+8*zoom);
                        mv(x0+4*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col);
                        mv(x0+4*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col);
                    }
                    if (IS_SUB4x8(sp)) {
                        if (drawbox) disp_rect(0, x0, y0, x0+4*zoom, y0+8*zoom);
                        if (drawbox) disp_rect(0, x0+4*zoom, y0, x0+8*zoom, y0+8*zoom);
                        mv(x0+2*zoom, y0+4*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col);
                        mv(x0+6*zoom, y0+4*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col);
                    }
                    if (IS_SUB4x4(sp)) {
                        if (drawbox) disp_rect(0, x0, y0, x0+4*zoom, y0+4*zoom);
                        if (drawbox) disp_rect(0, x0+4*zoom, y0, x0+8*zoom, y0+4*zoom);
                        if (drawbox) disp_rect(0, x0, y0+4*zoom, x0+4*zoom, y0+8*zoom);
                        if (drawbox) disp_rect(0, x0+4*zoom, y0+4*zoom, x0+8*zoom, y0+8*zoom);
                        mv(x0+2*zoom, y0+2*zoom, v->mv[l][2*i][2*j], v->ref[l][2*i][2*j], zoom, col);
                        mv(x0+6*zoom, y0+2*zoom, v->mv[l][2*i][2*j+1], v->ref[l][2*i][2*j+1], zoom, col);
                        mv(x0+2*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j], v->ref[l][2*i+1][2*j], zoom, col);
                        mv(x0+6*zoom, y0+6*zoom, v->mv[l][2*i+1][2*j+1], v->ref[l][2*i+1][2*j+1], zoom, col);
                    }
                }
            }
        }

        if (IS_INTRA(v->i_type) || v->i_type==I_PCM) {
            /* Intra coded */
            if (v->i_type==I_16x16) {
                switch (v->i_intra16x16_pred_mode) {
                case I_PRED_16x16_V:
                    disp_line(0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom);
                    break;
                case I_PRED_16x16_H:
                    disp_line(0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom);
                    break;
                case I_PRED_16x16_DC:
                case I_PRED_16x16_DC_LEFT:
                case I_PRED_16x16_DC_TOP:
                case I_PRED_16x16_DC_128:
                    disp_line(0, x+2*zoom, y+2*zoom, x+14*zoom, y+2*zoom);
                    disp_line(0, x+2*zoom, y+2*zoom, x+2*zoom, y+14*zoom);
                    break;
                case I_PRED_16x16_P:
                    disp_line(0, x+2*zoom, y+2*zoom, x+8*zoom, y+8*zoom);
                    break;
                }
            }
            if (v->i_type==I_4x4 || v->i_type==I_8x8) {
                const int di = v->i_type==I_8x8 ? 2 : 1;
                const int zoom2 = zoom * di;
                for (i=0; i<4; i+=di) for (j=0; j<4; j+=di) {
                    const int x0 = x + j*4*zoom;
                    const int y0 = y + i*4*zoom;
                    if (drawbox) disp_rect(0, x0, y0, x0+4*zoom2, y0+4*zoom2);
                    switch (v->intra4x4_pred_mode[i][j]) {
                    case I_PRED_4x4_V:		/* Vertical */
                        disp_line(0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2);
                        break;
                    case I_PRED_4x4_H:		/* Horizontal */
                        disp_line(0, x0+1*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2);
                        break;
                    case I_PRED_4x4_DC:		/* DC, average from top and left sides */
                    case I_PRED_4x4_DC_LEFT:
                    case I_PRED_4x4_DC_TOP:
                    case I_PRED_4x4_DC_128:
                        disp_line(0, x0+1*zoom2, y0+1*zoom2, x0+4*zoom2, y0+1*zoom2);
                        disp_line(0, x0+1*zoom2, y0+1*zoom2, x0+1*zoom2, y0+4*zoom2);
                        break;
                    case I_PRED_4x4_DDL:	/* Topright-bottomleft */
                        disp_line(0, x0+0*zoom2, y0+0*zoom2, x0+4*zoom2, y0+4*zoom2);
                        break;
                    case I_PRED_4x4_DDR:	/* Topleft-bottomright */
                        disp_line(0, x0+0*zoom2, y0+4*zoom2, x0+4*zoom2, y0+0*zoom2);
                        break;
                    case I_PRED_4x4_VR:		/* Mix of topleft-bottomright and vertical */
                        disp_line(0, x0+0*zoom2, y0+2*zoom2, x0+4*zoom2, y0+1*zoom2);
                        break;
                    case I_PRED_4x4_HD:		/* Mix of topleft-bottomright and horizontal */
                        disp_line(0, x0+2*zoom2, y0+0*zoom2, x0+1*zoom2, y0+4*zoom2);
                        break;
                    case I_PRED_4x4_VL:		/* Mix of topright-bottomleft and vertical */
                        disp_line(0, x0+0*zoom2, y0+1*zoom2, x0+4*zoom2, y0+2*zoom2);
                        break;
                    case I_PRED_4x4_HU:		/* Mix of topright-bottomleft and horizontal */
                        disp_line(0, x0+1*zoom2, y0+0*zoom2, x0+2*zoom2, y0+4*zoom2);
                        break;
                    }
                }
            }
        }
    }

    disp_sync();
    if (waitkey) getchar();
}
Beispiel #11
0
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
    const int i_mb_type = h->mb.i_type;

    if( h->sh.i_type == SLICE_TYPE_I )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
        {
            ctx++;
        }

        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
    }
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
        /* prefix: 14, suffix: 17 */
        if( i_mb_type == P_L0 )
        {
            if( h->mb.i_partition == D_16x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 0 );
                x264_cabac_encode_decision( cb, 16, 0 );
            }
            else if( h->mb.i_partition == D_16x8 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 1 );
            }
            else if( h->mb.i_partition == D_8x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 0 );
            }
        }
        else if( i_mb_type == P_8x8 )
        {
            x264_cabac_encode_decision( cb, 14, 0 );
            x264_cabac_encode_decision( cb, 15, 0 );
            x264_cabac_encode_decision( cb, 16, 1 );
        }
        else /* intra */
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 14, 1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
        }
    }
    else if( h->sh.i_type == SLICE_TYPE_B )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
        {
            ctx++;
        }

        if( i_mb_type == B_DIRECT )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 0 );
        }
        else if( i_mb_type == B_8x8 )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
        }
        else if( IS_INTRA( i_mb_type ) )
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   0 );
            x264_cabac_encode_decision( cb, 27+5,   1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
        }
        else
        {
            static const int i_mb_len[21] =
            {
                3, 6, 6,    /* L0 L0 */
                3, 6, 6,    /* L1 L1 */
                6, 7, 7,    /* BI BI */

                6, 6,       /* L0 L1 */
                6, 6,       /* L1 L0 */
                7, 7,       /* L0 BI */
                7, 7,       /* L1 BI */
                7, 7,       /* BI L0 */
                7, 7,       /* BI L1 */
            };
            static const int i_mb_bits[21][7] =
            {
                { 1, 0, 0, },            { 1, 1, 0, 0, 0, 1, },    { 1, 1, 0, 0, 1, 0, },   /* L0 L0 */
                { 1, 0, 1, },            { 1, 1, 0, 0, 1, 1, },    { 1, 1, 0, 1, 0, 0, },   /* L1 L1 */
                { 1, 1, 0, 0, 0, 0 ,},   { 1, 1, 1, 1, 0, 0 , 0 }, { 1, 1, 1, 1, 0, 0 , 1 },/* BI BI */

                { 1, 1, 0, 1, 0, 1, },   { 1, 1, 0, 1, 1, 0, },     /* L0 L1 */
                { 1, 1, 0, 1, 1, 1, },   { 1, 1, 1, 1, 1, 0, },     /* L1 L0 */
                { 1, 1, 1, 0, 0, 0, 0 }, { 1, 1, 1, 0, 0, 0, 1 },   /* L0 BI */
                { 1, 1, 1, 0, 0, 1, 0 }, { 1, 1, 1, 0, 0, 1, 1 },   /* L1 BI */
                { 1, 1, 1, 0, 1, 0, 0 }, { 1, 1, 1, 0, 1, 0, 1 },   /* BI L0 */
                { 1, 1, 1, 0, 1, 1, 0 }, { 1, 1, 1, 0, 1, 1, 1 }    /* BI L1 */
            };

            const int i_partition = h->mb.i_partition;
            int idx = 0;
            int i;
            switch( i_mb_type )
            {
                /* D_16x16, D_16x8, D_8x16 */
                case B_BI_BI: idx += 3;
                case B_L1_L1: idx += 3;
                case B_L0_L0:
                    if( i_partition == D_16x8 )
                        idx += 1;
                    else if( i_partition == D_8x16 )
                        idx += 2;
                    break;

                /* D_16x8, D_8x16 */
                case B_BI_L1: idx += 2;
                case B_BI_L0: idx += 2;
                case B_L1_BI: idx += 2;
                case B_L0_BI: idx += 2;
                case B_L1_L0: idx += 2;
                case B_L0_L1:
                    idx += 3*3;
                    if( i_partition == D_8x16 )
                        idx++;
                    break;
                default:
                    x264_log(h, X264_LOG_ERROR, "error in B mb type\n" );
                    return;
            }

            x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] );
            x264_cabac_encode_decision( cb, 27+3,   i_mb_bits[idx][1] );
            x264_cabac_encode_decision( cb, 27+(i_mb_bits[idx][1] != 0 ? 4 : 5), i_mb_bits[idx][2] );
            for( i = 3; i < i_mb_len[idx]; i++ )
            {
                x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] );
            }
        }
    }
    else
    {
        x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" );
    }
}
/*!
*************************************************************************************
* \brief
*    Mode Decision for a macroblock with error resilience
*************************************************************************************
*/
void encode_one_macroblock_highloss (Macroblock *currMB)
{
  Slice *currSlice = currMB->p_Slice;
  RDOPTStructure  *p_RDO = currSlice->p_RDO;
  VideoParameters *p_Vid = currMB->p_Vid;
  InputParameters *p_Inp = currMB->p_Inp;
  PicMotionParams **motion = p_Vid->enc_picture->mv_info;

  int         max_index = 9;
  int         rerun, block, index, mode, i, j;
  RD_PARAMS   enc_mb;
  distblk     bmcost[5] = {DISTBLK_MAX};
  distblk     cost=0;
  distblk     min_cost = DISTBLK_MAX;
  int         intra1 = 0;
  int         mb_available[3];

  short       bslice      = (short) (currSlice->slice_type == B_SLICE);
  short       pslice      = (short) ((currSlice->slice_type == P_SLICE) || (currSlice->slice_type == SP_SLICE));
  short       intra       = (short) ((currSlice->slice_type == I_SLICE) || (currSlice->slice_type == SI_SLICE) || (pslice && currMB->mb_y == p_Vid->mb_y_upd && p_Vid->mb_y_upd != p_Vid->mb_y_intra));
  int         lambda_mf[3];
  short       runs        = (short) ((p_Inp->RestrictRef==1 && (pslice  || (bslice && p_Vid->nal_reference_idc>0))) ? 2 : 1);

  imgpel    **mb_pred  = currSlice->mb_pred[0];
  Block8x8Info *b8x8info = p_Vid->b8x8info;

  char       chroma_pred_mode_range[2];
  short       inter_skip = 0;
  BestMode    md_best;
  Info8x8     best;

  init_md_best(&md_best);

  // Init best (need to create simple function)
  best.pdir = 0;
  best.bipred = 0;
  best.ref[LIST_0] = 0;
  best.ref[LIST_1] = -1;

  intra |= RandomIntra (p_Vid, currMB->mbAddrX);    // Forced Pseudo-Random Intra

  //===== Setup Macroblock encoding parameters =====
  init_enc_mb_params(currMB, &enc_mb, intra);

  // Perform multiple encodings if rdopt with losses is enabled
  for (rerun=0; rerun<runs; rerun++)
  {
    if (runs==2)
      p_Inp->rdopt= (rerun==0) ? 1 : 3;

    if (p_Inp->AdaptiveRounding)
    {
      reset_adaptive_rounding(p_Vid);
    }

    if (currSlice->mb_aff_frame_flag)
    {
      reset_mb_nz_coeff(p_Vid, currMB->mbAddrX);
    }

    //=====   S T O R E   C O D I N G   S T A T E   =====
    //---------------------------------------------------
    currSlice->store_coding_state (currMB, currSlice->p_RDO->cs_cm);

    if (!intra)
    {
      //===== set skip/direct motion vectors =====
      if (enc_mb.valid[0])
      {
        if (bslice)
          currSlice->Get_Direct_Motion_Vectors (currMB);
        else 
          FindSkipModeMotionVector (currMB);
      }
      if (p_Inp->CtxAdptLagrangeMult == 1)
      {
        get_initial_mb16x16_cost(currMB);
      }

      //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
      for (mode = 1; mode < 4; mode++)
      {
        best.mode = (char) mode;
        best.bipred = 0;
        b8x8info->best[mode][0].bipred = 0;

        if (enc_mb.valid[mode])
        {
          for (cost=0, block=0; block<(mode==1?1:2); block++)
          {
            update_lambda_costs(currMB, &enc_mb, lambda_mf);
            PartitionMotionSearch (currMB, mode, block, lambda_mf);

          //--- set 4x4 block indices (for getting MV) ---
            j = (block==1 && mode==2 ? 2 : 0);
            i = (block==1 && mode==3 ? 2 : 0);

            //--- get cost and reference frame for List 0 prediction ---
            bmcost[LIST_0] = DISTBLK_MAX;
            list_prediction_cost(currMB, LIST_0, block, mode, &enc_mb, bmcost, best.ref);

            if (bslice)
            {
              //--- get cost and reference frame for List 1 prediction ---
              bmcost[LIST_1] = DISTBLK_MAX;
              list_prediction_cost(currMB, LIST_1, block, mode, &enc_mb, bmcost, best.ref);

              // Compute bipredictive cost between best list 0 and best list 1 references
              list_prediction_cost(currMB, BI_PRED, block, mode, &enc_mb, bmcost, best.ref);

              // currently Bi predictive ME is only supported for modes 1, 2, 3 and ref 0
              if (is_bipred_enabled(p_Vid, mode))
              {
                get_bipred_cost(currMB, mode, block, i, j, &best, &enc_mb, bmcost);
              }
              else
              {
                bmcost[BI_PRED_L0] = DISTBLK_MAX;
                bmcost[BI_PRED_L1] = DISTBLK_MAX;
              }

              // Determine prediction list based on mode cost
              determine_prediction_list(bmcost, &best, &cost);
            }
            else // if (bslice)
            {
              best.pdir  = 0;
              cost      += bmcost[LIST_0];
            }

            assign_enc_picture_params(currMB, mode, &best, 2 * block);

            //----- set reference frame and direction parameters -----
            set_block8x8_info(b8x8info, mode, block, &best);

            //--- set reference frames and motion vectors ---
            if (mode>1 && block==0)
              currSlice->set_ref_and_motion_vectors (currMB, motion, &best, block);
          } // for (block=0; block<(mode==1?1:2); block++)
          if (cost < min_cost)
          {
            md_best.mode = (byte) mode;
            md_best.cost = cost;
            currMB->best_mode = (short) mode;
            min_cost  = cost;
            if (p_Inp->CtxAdptLagrangeMult == 1)
            {
              adjust_mb16x16_cost(currMB, cost);
            }
          }
        } // if (enc_mb.valid[mode])
      } // for (mode=1; mode<4; mode++)

      if (enc_mb.valid[P8x8])
      {
        currMB->valid_8x8 = FALSE;

        if (p_Inp->Transform8x8Mode)
        {
          ResetRD8x8Data(p_Vid, p_RDO->tr8x8);
          currMB->luma_transform_size_8x8_flag = TRUE; //switch to 8x8 transform size
          //===========================================================
          // Check 8x8 partition with transform size 8x8
          //===========================================================
          //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
          for (block = 0; block < 4; block++)
          {
            currSlice->submacroblock_mode_decision(currMB, &enc_mb, p_RDO->tr8x8, p_RDO->cofAC8x8ts[block], block, &cost);
            if(!currMB->valid_8x8)
              break;
            set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr8x8);
          }
        }// if (p_Inp->Transform8x8Mode)

        currMB->valid_4x4 = FALSE;
        if (p_Inp->Transform8x8Mode != 2)
        {
          currMB->luma_transform_size_8x8_flag = FALSE; //switch to 8x8 transform size
          ResetRD8x8Data(p_Vid, p_RDO->tr4x4);
          //=================================================================
          // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
          //=================================================================
          //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
          for (block = 0; block < 4; block++)
          {
            currSlice->submacroblock_mode_decision(currMB, &enc_mb, p_RDO->tr4x4, p_RDO->coefAC8x8[block], block, &cost);
            if(!currMB->valid_4x4)
              break;
            set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr4x4);
          }

        }// if (p_Inp->Transform8x8Mode != 2)

        if (p_Inp->RCEnable)
          rc_store_diff(currSlice->diffy, &p_Vid->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

        p_Vid->giRDOpt_B8OnlyFlag = FALSE;
      }
    }
    else // if (!intra)
    {
      min_cost = DISTBLK_MAX;
    }

    // Set Chroma mode
    set_chroma_pred_mode(currMB, enc_mb, mb_available, chroma_pred_mode_range);

    //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
    //-------------------------------------------------------------------------
    for (currMB->c_ipred_mode = chroma_pred_mode_range[0]; currMB->c_ipred_mode<=chroma_pred_mode_range[1]; currMB->c_ipred_mode++)
    {
      // bypass if c_ipred_mode is not allowed
      if ( (p_Vid->yuv_format != YUV400) &&
        (  ((!intra || !p_Inp->IntraDisableInterOnly) && p_Inp->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
        || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available[0]) 
        || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available[1]) 
        || (currMB->c_ipred_mode == PLANE_8     && (!mb_available[1] || !mb_available[0] || !mb_available[2]))))
        continue;

      //===== GET BEST MACROBLOCK MODE =====
      for (index=0; index < max_index; index++)
      {
        mode = mb_mode_table[index];
        if (enc_mb.valid[mode])
        {
          if (p_Vid->yuv_format != YUV400)
          {           
            currMB->i16mode = 0; 
          }

          // Skip intra modes in inter slices if best mode is inter <P8x8 with cbp equal to 0    
          if (currSlice->P444_joined)
          { 
            if (p_Inp->SkipIntraInInterSlices && !intra && mode >= I16MB 
              && currMB->best_mode <=3 && currMB->best_cbp == 0 && currSlice->cmp_cbp[1] == 0 && currSlice->cmp_cbp[2] == 0 && (currMB->min_rdcost < weighted_cost(enc_mb.lambda_mdfp,5)))
              continue;
          }
          else
          {
            if (p_Inp->SkipIntraInInterSlices)
            {
              if (!intra && mode >= I4MB)
              {
                if (currMB->best_mode <=3 && currMB->best_cbp == 0 && (currMB->min_rdcost < weighted_cost(enc_mb.lambda_mdfp, 5)))
                {
                  continue;
                }
                else if (currMB->best_mode == 0 && (currMB->min_rdcost < weighted_cost(enc_mb.lambda_mdfp,6)))
                {
                  continue;
                }
              }
            }
          }
          compute_mode_RD_cost(currMB, &enc_mb, (short) mode, &inter_skip);

        }
      }// for (index=0; index<max_index; index++)
    }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=chroma_pred_mode_range[1]; currMB->c_ipred_mode++)                     

    restore_nz_coeff(currMB);

    if (rerun==0)
      intra1 = IS_INTRA(currMB);
  } // for (rerun=0; rerun<runs; rerun++)

  //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
  //---------------------------------------------------------------------------
  update_qp_cbp_tmp(currMB, p_RDO->cbp);
  currSlice->set_stored_mb_parameters (currMB);

  // Rate control
  if(p_Inp->RCEnable && p_Inp->RCUpdateMode <= MAX_RC_MODE)
    rc_store_mad(currMB);

  //===== Decide if this MB will restrict the reference frames =====
  if (p_Inp->RestrictRef)
    update_refresh_map(currMB, intra, intra1);
}
/*****************************************************************************
 * x264_macroblock_analyse:
 *****************************************************************************/
void dull_macroblock_analyse_P_BEST( x264_t *h )
{
    x264_mb_analysis_t analysis;
    int i_cost = COST_MAX;
    int i;

    dull_mb_analyse_init_P( h, &analysis );

    /*--------------------------- Do the analysis ---------------------------*/
//{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P //{ macroblock_analyse_P 
    {
        int b_skip = 0;

        analysis.b_try_skip = 0;

        if( b_skip )
        {
            h->mb.i_type = P_SKIP;
            h->mb.i_partition = D_16x16;
            assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 );
            /* Set up MVs for future predictors */
            for( i = 0; i < h->mb.pic.i_fref[0]; i++ )
                M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
        }
        else
        {
            const unsigned int flags = h->param.analyse.inter;
            int i_type;
            int i_partition;

            x264_mb_analyse_load_costs( h, &analysis );

            dull_mb_analyse_inter_p16x16_2( h, &analysis );

            if( h->mb.i_type == P_SKIP )
            {
                for( i = 1; i < h->mb.pic.i_fref[0]; i++ )
                    M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0;
                return;
            }

            if( flags & X264_ANALYSE_PSUB16x16 )
            {
                if( h->param.analyse.b_mixed_references )
                    x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis );
                else
                    dull_mb_analyse_inter_p8x8_2( h, &analysis );
            }

            /* Select best inter mode */
            i_type = P_L0;
            i_partition = D_16x16;
            i_cost = analysis.l0.me16x16.cost;

            if( ( flags & X264_ANALYSE_PSUB16x16 ) &&
                analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost )
            {
                i_type = P_8x8;
                i_partition = D_8x8;
                i_cost = analysis.l0.i_cost8x8;

                /* Do sub 8x8 */
                if( flags & X264_ANALYSE_PSUB8x8 )
                {
                    for( i = 0; i < 4; i++ )
                    {
                        x264_mb_analyse_inter_p4x4( h, &analysis, i );
                        if( analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost )
                        {
                            int i_cost8x8 = analysis.l0.i_cost4x4[i];
                            h->mb.i_sub_partition[i] = D_L0_4x4;

                            i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost;
                        }
                        x264_mb_cache_mv_p8x8( h, &analysis, i );
                    }
                    analysis.l0.i_cost8x8 = i_cost;
                }
            }

            h->mb.i_partition = i_partition;

            /* refine qpel */
            //FIXME mb_type costs?
            if( analysis.i_mbrd || !h->mb.i_subpel_refine )
            {
                /* refine later */
            }
            else if( i_partition == D_16x16 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me16x16 );
                i_cost = analysis.l0.me16x16.cost;
            }
            else if( i_partition == D_16x8 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me16x8[0] );
                x264_me_refine_qpel( h, &analysis.l0.me16x8[1] );
                i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost;
            }
            else if( i_partition == D_8x16 )
            {
                x264_me_refine_qpel( h, &analysis.l0.me8x16[0] );
                x264_me_refine_qpel( h, &analysis.l0.me8x16[1] );
                i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost;
            }
            else if( i_partition == D_8x8 )
            {
                int i8x8;
                i_cost = 0;
                for( i8x8 = 0; i8x8 < 4; i8x8++ )
                {
                    switch( h->mb.i_sub_partition[i8x8] )
                    {
                        case D_L0_8x8:
                            x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] );
                            i_cost += analysis.l0.me8x8[i8x8].cost;
                            break;
                        case D_L0_8x4:
                            x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] );
                            i_cost += analysis.l0.me8x4[i8x8][0].cost +
                                      analysis.l0.me8x4[i8x8][1].cost;
                            break;
                        case D_L0_4x8:
                            x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] );
                            i_cost += analysis.l0.me4x8[i8x8][0].cost +
                                      analysis.l0.me4x8[i8x8][1].cost;
                            break;

                        case D_L0_4x4:
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] );
                            x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] );
                            i_cost += analysis.l0.me4x4[i8x8][0].cost +
                                      analysis.l0.me4x4[i8x8][1].cost +
                                      analysis.l0.me4x4[i8x8][2].cost +
                                      analysis.l0.me4x4[i8x8][3].cost;
                            break;
                        default:
                            x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" );
                            break;
                    }
                }
            }

            if( h->mb.b_chroma_me )
            {
                x264_mb_analyse_intra_chroma( h, &analysis );
                x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_i8x8chroma );
                analysis.i_satd_i16x16 += analysis.i_satd_i8x8chroma;
                analysis.i_satd_i8x8 += analysis.i_satd_i8x8chroma;
                analysis.i_satd_i4x4 += analysis.i_satd_i8x8chroma;
            }
            else
                x264_mb_analyse_intra( h, &analysis, i_cost );

            COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 );
            COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 );
            COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 );

            h->mb.i_type = i_type;

            if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM )
            {
                if( IS_INTRA( h->mb.i_type ) )
                {
                    x264_intra_rd_refine( h, &analysis );
                }
                else if( i_partition == D_16x16 )
                {
                    x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref );
                    analysis.l0.me16x16.cost = i_cost;
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 );
                }
                else if( i_partition == D_16x8 )
                {
                    h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
                    h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
                    x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref );
                    x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 );
                }
                else if( i_partition == D_8x16 )
                {
                    h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] =
                    h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8;
                    x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref );
                    x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 );
                    x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 );
                }
                else if( i_partition == D_8x8 )
                {
                    int i8x8;
                    x264_analyse_update_cache( h, &analysis );
                    for( i8x8 = 0; i8x8 < 4; i8x8++ )
                    {
                        if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
                        }
                        else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 )
                        {
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 );
                            x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 );
                        }
                    }
                }
            }
        }
    }
//} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P //} macroblock_analyse_P 

    dull_analyse_update_cache_P( h, &analysis );
}
__tcsm1_main int main() {

  int c;
  S32I2M(xr16,3);
  c = i_la(_gp); 
 
  int fifo_rp = 0;
  unsigned int XCHGtmp;
  H264_Frame_GlbARGs *dFRM = (H264_Frame_GlbARGs *)TCSM1_FRM_ARGS;
  H264_MB_DecARGs *dMB_aux = (H264_MB_DecARGs *)TCSM1_MBARGS_BUF;
  H264_MB_DecARGs *dMB2 = (H264_MB_DecARGs *)TCSM1_MBARGS_BUF2;
  H264_AUX_T *AUX_T = (H264_AUX_T *)TCSM1_AUX_T;
  MDMA_DesNode *MDMA1_TRAN = (MDMA_DesNode *)TCSM1_GP1_TRAN_CHAIN;
  MDMA_DesNode *MDMA1_ARG = (MDMA_DesNode *)TCSM1_GP1_ARG_CHAIN;
  H264_MB_DecARGs *dMBsrc;

  fifo_wp = (int *)TCSM1_FIFO_WP;
  dMBsrc = (H264_MB_DecARGs *)(dFRM->dMB_baseaddr_aux);
  int mb_num;
  int mb_start;
  int total_mbs;    

  mb_start = dFRM->first_mb_in_frame;
  total_mbs = dFRM->mb_width * dFRM->mb_height;

  int i;

  uint16_t *intra_pred4x4_top_ptr[2];
  intra_pred4x4_top_ptr[0]=intra_pred4x4_top[0];
  intra_pred4x4_top_ptr[1]=intra_pred4x4_top[1];
  uint16_t *intra_pred4x4_left_ptr[2];
  intra_pred4x4_left_ptr[0]=intra_pred4x4_left[0];
  intra_pred4x4_left_ptr[1]=intra_pred4x4_left[1];

  uint8_t mb_x_d1, mb_y_d1;
  uint8_t mb_x_d2, mb_y_d2;
  mb_x_d1 = mb_y_d1 = mb_x_d2 = mb_y_d2 =0;

  AUX_PMON_CREAT(mc);
  AUX_PMON_CREAT(idct);
  AUX_PMON_CREAT(dblk);
  AUX_PMON_CREAT(intra);
  AUX_PMON_CREAT(mdma);
  AUX_PMON_CREAT(sync);

  do{
  }while(fifo_rp == *fifo_wp);
  fifo_rp++;

  MDMA1_ARG->TSA = get_phy_addr_aux((uint32_t)dMBsrc);
  MDMA1_ARG->TDA = (uint32_t)dMB_aux;
  MDMA1_ARG->STRD = MDMA_STRD(64, 64);
  MDMA1_ARG->UNIT = MDMA_UNIT(1,64,(sizeof(H264_MB_DecARGs)));
  SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN);
  MDMA1_RUN();
  dMBsrc++;
  POLLING_MDMA1_END();

  char bakup_x0 = dFRM->mb_width-1;
  char bakup_x1 = 0;
  int mb_type_last= 0;

  volatile unsigned int *infar = (unsigned int *)TCSM1_H264_DBLK_INFAR;
  
  uint8_t non_zero_count_cache_luma_last[16];

  for ( mb_num = mb_start; (mb_num < total_mbs+2); mb_num ++ ) {

    AUX_PMON_ON(sync);

    do{
    }while(fifo_rp == *fifo_wp);
    fifo_rp++;

    int gp1_tran_start = 0;
    int gp1_tran_len   = sizeof(H264_MB_DecARGs);
    if (dMB_aux->next_mb_no_weight_flag) {
      gp1_tran_start = (53 << 2);
      gp1_tran_len -= (53 << 2);
    }
    if (dMB_aux->next_mb_skip_flag) {
      gp1_tran_len -= (192 << 2);
    }

    AUX_PMON_OFF(sync);

    AUX_PMON_ON(mdma);
    AUX_PMON_OFF(mdma);

    AUX_PMON_ON(intra);

    MDMA1_ARG->TSA = (get_phy_addr_aux((uint32_t)dMBsrc) + gp1_tran_start);
    MDMA1_ARG->TDA = ((uint32_t)dMB2 + gp1_tran_start);
    MDMA1_ARG->STRD = MDMA_STRD(64, 64);
    MDMA1_ARG->UNIT = MDMA_UNIT(1,64,gp1_tran_len);
    dMBsrc++;

    POLLING_MDMA1_END(); //ensure curr dblked MB has been trans out

    SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN);
    MDMA1_RUN();

    int mb_x= dMB_aux->mb_x;
    int mb_y= dMB_aux->mb_y;
    int mb_type= dMB_aux->mb_type;

    if (dMB_aux->curr_mb_skip_flag)
      for(i=0; i<24; i++)
	dMB_aux->mb[i*16] = 0;
 
    AUX_T->mc_des_dirty=0;

    if(IS_INTRA(mb_type_last)){
      // chroma predict
      Intra_pred_chroma(dMB_aux->chroma_pred_mode, AUX_T->h264_urecon[1], 
			AUX_T->h264_urecon[0] + MB_CHROM_WIDTH, TCSM1_BOTTOM_U_13PIX+4);
      Intra_pred_chroma(dMB_aux->chroma_pred_mode, AUX_T->h264_vrecon[1], 
			AUX_T->h264_vrecon[0] + MB_CHROM_WIDTH, TCSM1_BOTTOM_V_13PIX+4);

      // luma predict
      if(IS_INTRA4x4(mb_type_last)){
	if(IS_8x8DCT(mb_type_last)){
	  for(i=0; i<16; i+=4){
	    uint8_t * src_left = (uint32_t)(intra_pred4x4_left_ptr[1][i]) | 0xF4000000;
	    uint8_t * src_top = (uint32_t)(intra_pred4x4_top_ptr[1][i]) | 0xF4000000;
	    uint8_t * src_topleft;
	    if ( i==8 )
	      src_topleft = src_left - RECON_BUF_STRIDE - 1;
	    else src_topleft = src_top - 1;

	    uint8_t * ptr= AUX_T->h264_yrecon[1] + MB_offset_recon[i];
	    int dir= dMB_aux->intra4x4_pred_mode_cache[ i ];
	    int nnz = non_zero_count_cache_luma_last[i];
	    Intra_pred_luma_8x8l(dir,ptr,(dMB_aux->topleft_samples_available<<i)&0x8000,
				 (dMB_aux->topright_samples_available<<i)&0x4000, src_left, src_top, src_topleft, RECON_BUF_STRIDE);
	    if(nnz){
	      if(nnz == 1 && dMB_aux->mb[i*16])
		ff_h264_idct8_dc_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	      else
		ff_h264_idct8_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	    }
	  }
	}else {
	  for(i=0; i<16; i++){
	    uint8_t * src_left = (uint32_t)(intra_pred4x4_left_ptr[1][i]) | 0xf4000000;
	    uint8_t * src_top = (uint32_t)(intra_pred4x4_top_ptr[1][i]) | 0xf4000000;

	    uint8_t * src_topleft;
	    if ( (i==2) || (i==8) || (i==10) )
	      src_topleft = src_left - RECON_BUF_STRIDE;
	    else src_topleft = src_top;

	    uint8_t * ptr= AUX_T->h264_yrecon[1] + MB_offset_recon[i];
	    uint8_t *topright;
	    int dir= dMB_aux->intra4x4_pred_mode_cache[ i ];
	    int nnz, tr;

	    if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
	      int topright_avail= (dMB_aux->topright_samples_available<<i)&0x8000;
	      if(!topright_avail){
		tr= src_top[3]*0x01010101;
		topright= (uint8_t*) &tr;
	      }else{
		topright= src_top + 4;
	      }
	    }
	    Intra_pred_luma_4x4(dir, ptr, src_left, topright, src_top, src_topleft);

	    nnz = non_zero_count_cache_luma_last[i];
	    if(nnz){
	      if(nnz == 1 && dMB_aux->mb[i*16])
		ff_h264_idct_dc_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	      else
		ff_h264_idct_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	    }

	  }

	}
      }else{
	Intra_pred_luma_16x16(dMB_aux->intra16x16_pred_mode, AUX_T->h264_yrecon[1], 
			      AUX_T->h264_yrecon[0] + MB_LUMA_WIDTH, TCSM1_BOTTOM_Y_25PIX+4);
	luma_dc_dequant_idct_c(dMB_aux->mb, dMB_aux->dequant4_coeff[0]);
	for(i=0; i<16; i++){
	  if(non_zero_count_cache_luma_last[i])
	    ff_h264_idct_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], 
			       dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	  else if(dMB_aux->mb[i*16])
	    ff_h264_idct_dc_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i],
				  dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	}  
      }
    }

    AUX_PMON_OFF(intra);

    if(IS_INTER(mb_type)){
      hl_motion_hw(dFRM, dMB_aux, AUX_T);
    }

    AUX_PMON_ON(mc);
    MC_POLLING_END();
    AUX_PMON_OFF(mc);

    AUX_PMON_ON(dblk);
    while ((*infar)!= TCSM1_H264_DBLK_INFDA) {}
    AUX_PMON_OFF(dblk);

    if(AUX_T->mc_des_dirty){
      H264_MC_DesNode *h264_mc = (H264_MC_DesNode *)(AUX_T->h264_mc_des_ptr[0]);
      h264_mc[AUX_T->mc_des_dirty - 1].VNodeHead = H264_VHEAD_UNLINK;
      SET_MC_DHA((uint32_t)h264_mc);
      CLEAR_MC_TTEND();
      SET_MC_DCS();
    }

    if(IS_INTRA(mb_type)){
      uint32_t * bakup_src = AUX_T->BackupMBbottom_Y[bakup_x1]-4;
      uint32_t * bakup_dst = TCSM1_BOTTOM_Y_25PIX;
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];
      bakup_dst[2] = bakup_src[2];
      bakup_dst[3] = bakup_src[3];
      bakup_dst[4] = bakup_src[4];
      bakup_dst[5] = bakup_src[5];
      bakup_dst[6] = bakup_src[6];

      bakup_src = AUX_T->BackupMBbottom_U[bakup_x1]-4;
      bakup_dst = TCSM1_BOTTOM_U_13PIX;
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];
      bakup_dst[2] = bakup_src[2];
      bakup_dst[3] = bakup_src[3];

      bakup_src = AUX_T->BackupMBbottom_V[bakup_x1]-4;
      bakup_dst = TCSM1_BOTTOM_V_13PIX;
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];
      bakup_dst[2] = bakup_src[2];
      bakup_dst[3] = bakup_src[3];
    }

    if(IS_INTER(mb_type_last))
      hl_motion_hw_next(dMB_aux,AUX_T,mb_type_last);
   
    *(uint32_t*)&AUX_T->sub_mb_type[0] = *(uint32_t*)&dMB_aux->sub_mb_type[0];
    *(uint32_t*)&AUX_T->sub_mb_type[2] = *(uint32_t*)&dMB_aux->sub_mb_type[2];

    AUX_PMON_ON(idct);
    AUX_PMON_OFF(idct);

    if (IS_INTRA_PCM(mb_type)) {
      unsigned int x, y;
      for(i=0; i<24; i++) {
	for (y=0; y<4; y++) {
	  for (x=0; x<4; x++) {
	    *(AUX_T->h264_yrecon[0] + MB_offset_recon[i] + y*RECON_BUF_STRIDE + x) = dMB_aux->mb[i*16+y*4+x];
	  }
	}
      }
    } 

    {
      int linesize, uvlinesize;
      uint8_t *dest_y_d2, *dest_u_d2, *dest_v_d2;

      linesize = dFRM->linesize;
      uvlinesize = dFRM->uvlinesize;

      dest_y_d2 = dFRM->current_picture.y_ptr + (mb_y_d2 * 16* linesize  ) + mb_x_d2 * 16;
      dest_u_d2 = dFRM->current_picture.u_ptr + (mb_y_d2 * 8 * uvlinesize) + mb_x_d2 * 8;
      dest_v_d2 = dFRM->current_picture.v_ptr + (mb_y_d2 * 8 * uvlinesize) + mb_x_d2 * 8;

      //move dblked MB out
      MDMA1_TRAN[0].TSA = AUX_T->h264_ydblk[1]-4;
      MDMA1_TRAN[0].TDA = get_phy_addr_aux((uint32_t)dest_y_d2-4);

      MDMA1_TRAN[1].TSA = AUX_T->h264_ydblk[1]-4+DBLK_U_OFST;
      MDMA1_TRAN[1].TDA = get_phy_addr_aux((uint32_t)dest_u_d2-4);
      //MDMA1_TRAN[1].UNIT = MDMA_UNIT(1,12,96);

      MDMA1_TRAN[2].TSA = AUX_T->h264_ydblk[1]-4+DBLK_V_OFST;
      MDMA1_TRAN[2].TDA = get_phy_addr_aux((uint32_t)dest_v_d2-4);

      POLLING_MDMA1_END(); //ensure next MB's args has been received

      SET_MDMA1_DHA((uint32_t)MDMA1_TRAN);
      MDMA1_RUN();
    }

    //---------idct fo inter---------------
    if(IS_INTER(mb_type_last)){
      void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
      void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
      int di;
      if(IS_8x8DCT(mb_type_last)){
        idct_dc_add = ff_h264_idct8_dc_add_c;
        idct_add = ff_h264_idct8_add_c;
	di = 4;
      }else{
        idct_dc_add = ff_h264_idct_dc_add_c;
        idct_add = ff_h264_idct_add_c;
	di = 1;
      }

      for(i=0; i<16; i+=di){
	int nnz = non_zero_count_cache_luma_last[i];
	if(nnz){
	  if(nnz==1 && dMB_aux->mb[i*16])
	    idct_dc_add(AUX_T->h264_yrecon[1] + MB_offset_recon[i], 
			dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	  else
	    idct_add(AUX_T->h264_yrecon[1] + MB_offset_recon[i], 
		     dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	}
      }
    }

    //------------- chroma idct------------
    if(mb_type_last){
      chroma_dc_dequant_idct_c(dMB_aux->mb + 16*16, dMB_aux->dequant4_coeff[1]);
      chroma_dc_dequant_idct_c(dMB_aux->mb + 16*16+4*16, dMB_aux->dequant4_coeff[2]);
      for(i=16; i<16+8; i++){
	if(dMB_aux->non_zero_count_cache_chroma[ i - 16 ]) {
	  ff_h264_idct_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], 
			     dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	} else if (dMB_aux->mb[i*16]) {
	  ff_h264_idct_dc_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], 
				dMB_aux->mb + i*16, RECON_BUF_STRIDE);
	}
      }
    }

    if(!(mb_x==0 && mb_y==0)){
      SET_DHA_DBLK((unsigned int)AUX_T->h264_dblk_des_ptr[1]);
      *infar = 0;
      SET_DCS_DBLK(0x1);
    }

    filter_mb_dblk(dFRM, dMB_aux, AUX_T);

    {
      uint32_t * bakup_src = AUX_T->h264_yrecon[1] + 15*RECON_BUF_STRIDE;
      uint32_t * bakup_dst = AUX_T->BackupMBbottom_Y[bakup_x0];
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];
      bakup_dst[2] = bakup_src[2];
      bakup_dst[3] = bakup_src[3];

      bakup_src = AUX_T->h264_urecon[1] + 7*RECON_BUF_STRIDE;
      bakup_dst = AUX_T->BackupMBbottom_U[bakup_x0];
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];

      bakup_src = AUX_T->h264_vrecon[1] + 7*RECON_BUF_STRIDE;
      bakup_dst = AUX_T->BackupMBbottom_V[bakup_x0];
      bakup_dst[0] = bakup_src[0];
      bakup_dst[1] = bakup_src[1];
    }

    mb_x_d2 = mb_x_d1;
    mb_y_d2 = mb_y_d1;
    mb_x_d1 = mb_x;
    mb_y_d1 = mb_y;

    mb_type_last=mb_type;

    for(i=0;i<16;i++)
      non_zero_count_cache_luma_last[i]=dMB_aux->non_zero_count_cache_luma[ scan5[i] ];

    // abnormal exit
    if (dMB_aux->deblocking_filter & 0x8) break;

    bakup_x0=(bakup_x0==((dFRM->mb_width)-1))?0:(bakup_x0+1);//hit second line's tail
    bakup_x1=(bakup_x1==((dFRM->mb_width)-1))?0:(bakup_x1+1);
    XCHG2(AUX_T->h264_yrecon[0],AUX_T->h264_yrecon[1],XCHGtmp);
    XCHG2(AUX_T->h264_urecon[0],AUX_T->h264_urecon[1],XCHGtmp);
    XCHG2(AUX_T->h264_vrecon[0],AUX_T->h264_vrecon[1],XCHGtmp);
    XCHG2(AUX_T->h264_ydblk[0],AUX_T->h264_ydblk[1],XCHGtmp);
    XCHG2(AUX_T->h264_dblk_des_ptr[0],AUX_T->h264_dblk_des_ptr[1],XCHGtmp);
    XCHG2(AUX_T->h264_mc_des_ptr[0],AUX_T->h264_mc_des_ptr[1],XCHGtmp);
    XCHG2(intra_pred4x4_top_ptr[0],intra_pred4x4_top_ptr[1],XCHGtmp);
    XCHG2(intra_pred4x4_left_ptr[0],intra_pred4x4_left_ptr[1],XCHGtmp);
    XCHG2(dMB_aux,dMB2,XCHGtmp);

  }

  AUX_PMON_TRAN(mc,PMON_MC_BUF);
  AUX_PMON_TRAN(idct,PMON_IDCT_BUF);
  AUX_PMON_TRAN(dblk,PMON_DBLK_BUF);
  AUX_PMON_TRAN(intra,PMON_INTRA_BUF);
  AUX_PMON_TRAN(mdma,PMON_MDMA_BUF);
  AUX_PMON_TRAN(sync,PMON_SYNC_BUF);

  /*
    task_done: aux-cpu task done flag, only write by aux-cpu, only read by main-cpu
  */
  MDMA1_ARG->TSA = (TCSM1_TASK_DONE);
  MDMA1_ARG->TDA = (TCSM0_TASK_DONE);
  MDMA1_ARG->STRD = MDMA_STRD(4,4);
  MDMA1_ARG->UNIT = MDMA_UNIT(1,4,4);
  SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN);
  MDMA1_RUN();

  i_nop;  
  i_nop;    
  i_nop;      
  i_nop;  
  __asm__ __volatile__ ("wait");

}
Beispiel #15
0
/*!
*************************************************************************************
* \brief
*    Mode Decision for a macroblock
*************************************************************************************
*/
void encode_one_macroblock_low (Macroblock *currMB)
{

  int         block, mode, i, j, k, dummy;
  char        best_pdir;
  RD_PARAMS   enc_mb;
  char        best_ref[2] = {0, -1};
  int         bmcost[5] = {INT_MAX};
  double      rd_cost = 0, min_rd_cost = 1e30;
  int         cost = 0;
  int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
  int         intra1 = 0;
  int         temp_cpb = 0;
  int         best_transform_flag = 0;
  int         cost8x8_direct = 0;
  short       islice      = (short) (img->type==I_SLICE);
  short       bslice      = (short) (img->type==B_SLICE);
  short       pslice      = (short) ((img->type==P_SLICE) || (img->type==SP_SLICE));
  short       intra       = (short) (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
  int         lambda_mf[3];
  int         pix_x, pix_y;
  int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
  Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;

  char   **ipredmodes = img->ipredmode;
  short   *allmvs = params->IntraProfile ? NULL: img->all_mv[0][0][0][0][0];
  int     ****i4p;  //for non-RD-opt. mode
  imgpel  (*curr_mpr)[16] = img->mpr[0];

  int tmp_8x8_flag, tmp_no_mbpart;
  // Fast Mode Decision
  short inter_skip = 0;


  if(params->SearchMode == UM_HEX)
  {
    UMHEX_decide_intrabk_SAD();
  }
  else if (params->SearchMode == UM_HEX_SIMPLE)
  {
    smpUMHEX_decide_intrabk_SAD();
  }

  intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra

  //===== Setup Macroblock encoding parameters =====
  init_enc_mb_params(currMB, &enc_mb, intra, bslice);


  // reset chroma intra predictor to default
  currMB->c_ipred_mode = DC_PRED_8;

  //=====   S T O R E   C O D I N G   S T A T E   =====
  //---------------------------------------------------
  store_coding_state (currMB, cs_cm);

  if (!intra)
  {
    //===== set direct motion vectors =====
    best_mode = 1;
    if (bslice)
    {
      Get_Direct_Motion_Vectors (currMB);
    }

    if (params->CtxAdptLagrangeMult == 1)
    {
      get_initial_mb16x16_cost(currMB);
    }

    //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
    for (min_cost=INT_MAX, mode=1; mode<4; mode++)
    {
      bi_pred_me = 0;
      img->bi_pred_me[mode]=0;
      if (enc_mb.valid[mode] && !inter_skip)
      {
        for (cost=0, block=0; block<(mode==1?1:2); block++)
        {
          update_lambda_costs(&enc_mb, lambda_mf);
          PartitionMotionSearch (currMB, mode, block, lambda_mf);

          //--- set 4x4 block indizes (for getting MV) ---
          j = (block==1 && mode==2 ? 2 : 0);
          i = (block==1 && mode==3 ? 2 : 0);

          //--- get cost and reference frame for List 0 prediction ---
          bmcost[LIST_0] = INT_MAX;
          list_prediction_cost(currMB, LIST_0, block, mode, enc_mb, bmcost, best_ref);

          if (bslice)
          {
            //--- get cost and reference frame for List 1 prediction ---
            bmcost[LIST_1] = INT_MAX;
            list_prediction_cost(currMB, LIST_1, block, mode, enc_mb, bmcost, best_ref);

            // Compute bipredictive cost between best list 0 and best list 1 references
            list_prediction_cost(currMB, BI_PRED, block, mode, enc_mb, bmcost, best_ref);

            // Finally, if mode 16x16, compute cost for bipredictive ME vectore
            if (params->BiPredMotionEstimation && mode == 1)
            {
              list_prediction_cost(currMB, BI_PRED_L0, block, mode, enc_mb, bmcost, 0);
              list_prediction_cost(currMB, BI_PRED_L1, block, mode, enc_mb, bmcost, 0);
            }
            else
            {
              bmcost[BI_PRED_L0] = INT_MAX;
              bmcost[BI_PRED_L1] = INT_MAX;
            }

            // Determine prediction list based on mode cost
            determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bi_pred_me);
          }
          else // if (bslice)
          {
            best_pdir  = 0;
            cost      += bmcost[LIST_0];
          }

          assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice);

          //----- set reference frame and direction parameters -----
          if (mode==3)
          {
            best8x8l0ref [3][block  ] = best8x8l0ref [3][  block+2] = best_ref[LIST_0];
            best8x8pdir  [3][block  ] = best8x8pdir  [3][  block+2] = best_pdir;
            best8x8l1ref [3][block  ] = best8x8l1ref [3][  block+2] = best_ref[LIST_1];
          }
          else if (mode==2)
          {
            best8x8l0ref [2][2*block] = best8x8l0ref [2][2*block+1] = best_ref[LIST_0];
            best8x8pdir  [2][2*block] = best8x8pdir  [2][2*block+1] = best_pdir;
            best8x8l1ref [2][2*block] = best8x8l1ref [2][2*block+1] = best_ref[LIST_1];
          }
          else
          {
            memset(&best8x8l0ref [1][0], best_ref[LIST_0], 4 * sizeof(char));
            memset(&best8x8l1ref [1][0], best_ref[LIST_1], 4 * sizeof(char));
            best8x8pdir  [1][0] = best8x8pdir  [1][1] = best8x8pdir  [1][2] = best8x8pdir  [1][3] = best_pdir;
          }

          //--- set reference frames and motion vectors ---
          if (mode>1 && block==0)
            SetRefAndMotionVectors (currMB, block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1]);
        } // for (block=0; block<(mode==1?1:2); block++)

        currMB->luma_transform_size_8x8_flag = 0;
        if (params->Transform8x8Mode) //for inter rd-off, set 8x8 to do 8x8 transform
        {
          SetModesAndRefframeForBlocks(currMB, mode);
          currMB->luma_transform_size_8x8_flag = TransformDecision(currMB, -1, &cost);
        }

        if ((!inter_skip) && (cost < min_cost))
        {
          best_mode = (short) mode;
          min_cost  = cost;
          best_transform_flag = currMB->luma_transform_size_8x8_flag;

          if (params->CtxAdptLagrangeMult == 1)
          {
            adjust_mb16x16_cost(cost);
          }
        }
      } // if (enc_mb.valid[mode])
    } // for (mode=1; mode<4; mode++)

    if ((!inter_skip) && enc_mb.valid[P8x8])
    {
      giRDOpt_B8OnlyFlag = 1;

      tr8x8.cost8x8 = INT_MAX;
      tr4x4.cost8x8 = INT_MAX;
      //===== store coding state of macroblock =====
      store_coding_state (currMB, cs_mb);

      currMB->all_blk_8x8 = -1;

      if (params->Transform8x8Mode)
      {
        tr8x8.cost8x8 = 0;
        //===========================================================
        // Check 8x8 partition with transform size 8x8
        //===========================================================
        //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
        for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block = 0; block < 4; block++)
        {
          submacroblock_mode_decision(enc_mb, &tr8x8, currMB, cofAC8x8ts[0][block], cofAC8x8ts[1][block], cofAC8x8ts[2][block],
            &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1);
          best8x8mode       [block] = tr8x8.part8x8mode [block];
          best8x8pdir [P8x8][block] = tr8x8.part8x8pdir [block];
          best8x8l0ref[P8x8][block] = tr8x8.part8x8l0ref[block];
          best8x8l1ref[P8x8][block] = tr8x8.part8x8l1ref[block];
        }

        // following params could be added in RD_8x8DATA structure
        cbp8_8x8ts      = cbp8x8;
        cbp_blk8_8x8ts  = cbp_blk8x8;
        cnt_nonz8_8x8ts = cnt_nonz_8x8;
        currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size

        //--- re-set coding state (as it was before 8x8 block coding) ---
        //reset_coding_state (currMB, cs_mb);
      }// if (params->Transform8x8Mode)


      if (params->Transform8x8Mode != 2)
      {
        tr4x4.cost8x8 = 0;
        //=================================================================
        // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
        //=================================================================
        //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
        for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
        {
          submacroblock_mode_decision(enc_mb, &tr4x4, currMB, cofAC8x8[block], cofAC8x8CbCr[0][block], cofAC8x8CbCr[1][block],
            &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0);

          best8x8mode       [block] = tr4x4.part8x8mode [block];
          best8x8pdir [P8x8][block] = tr4x4.part8x8pdir [block];
          best8x8l0ref[P8x8][block] = tr4x4.part8x8l0ref[block];
          best8x8l1ref[P8x8][block] = tr4x4.part8x8l1ref[block];
        }
        //--- re-set coding state (as it was before 8x8 block coding) ---
        // reset_coding_state (currMB, cs_mb);
      }// if (params->Transform8x8Mode != 2)

      //--- re-set coding state (as it was before 8x8 block coding) ---
      reset_coding_state (currMB, cs_mb);


      // This is not enabled yet since mpr has reverse order.
      if (params->RCEnable)
        rc_store_diff(img->opix_x, img->opix_y, curr_mpr);

      //check cost for P8x8 for non-rdopt mode
      if (tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost)
      {
        best_mode = P8x8;
        if (params->Transform8x8Mode == 2)
        {
          min_cost = tr8x8.cost8x8;
          currMB->luma_transform_size_8x8_flag=1;
        }
        else if (params->Transform8x8Mode)
        {
          if (tr8x8.cost8x8 < tr4x4.cost8x8)
          {
            min_cost = tr8x8.cost8x8;
            currMB->luma_transform_size_8x8_flag=1;
          }
          else if(tr4x4.cost8x8 < tr8x8.cost8x8)
          {
            min_cost = tr4x4.cost8x8;
            currMB->luma_transform_size_8x8_flag=0;
          }
          else
          {
            if (GetBestTransformP8x8() == 0)
            {
              min_cost = tr4x4.cost8x8;
              currMB->luma_transform_size_8x8_flag=0;
            }
            else
            {
              min_cost = tr8x8.cost8x8;
              currMB->luma_transform_size_8x8_flag=1;
            }
          }
        }
        else
        {
          min_cost = tr4x4.cost8x8;
          currMB->luma_transform_size_8x8_flag=0;
        }
      }// if ((tr4x4.cost8x8 < min_cost || tr8x8.cost8x8 < min_cost))
      giRDOpt_B8OnlyFlag = 0;
    }
    else // if (enc_mb.valid[P8x8])
    {
      tr4x4.cost8x8 = INT_MAX;
    }

    // Find a motion vector for the Skip mode
    if(pslice)
      FindSkipModeMotionVector (currMB);
  }
  else // if (!intra)
  {
    min_cost = INT_MAX;
  }

  //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
  //-------------------------------------------------------------------------
  tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;  //save 8x8_flag
  tmp_no_mbpart = currMB->NoMbPartLessThan8x8Flag;      //save no-part-less
  if ((img->yuv_format != YUV400) && (img->yuv_format != YUV444))
    // precompute all chroma intra prediction modes
    IntraChromaPrediction(currMB, NULL, NULL, NULL);

  if (enc_mb.valid[0] && bslice) // check DIRECT MODE
  {
    if(have_direct)
    {
      switch(params->Transform8x8Mode)
      {
      case 1: // Mixture of 8x8 & 4x4 transform
        cost = ((cost8x8_direct < cost_direct) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
          ? cost8x8_direct : cost_direct;
        break;
      case 2: // 8x8 Transform only
        cost = cost8x8_direct;
        break;
      default: // 4x4 Transform only
        cost = cost_direct;
        break;
      }
    }
    else
    { //!have_direct
      cost = GetDirectCostMB (currMB, bslice);
    }
    if (cost!=INT_MAX)
    {
      cost -= (int)floor(16*enc_mb.lambda_md+0.4999);
    }

    if (cost <= min_cost)
    {
      if(active_sps->direct_8x8_inference_flag && params->Transform8x8Mode)
      {
        if(params->Transform8x8Mode==2)
          currMB->luma_transform_size_8x8_flag=1;
        else
        {
          if(cost8x8_direct < cost_direct)
            currMB->luma_transform_size_8x8_flag=1;
          else
            currMB->luma_transform_size_8x8_flag=0;
        }
      }
      else
        currMB->luma_transform_size_8x8_flag=0;

      //Rate control
      if (params->RCEnable)
        rc_store_diff(img->opix_x, img->opix_y, curr_mpr);

      min_cost  = cost;
      best_mode = 0;
      tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
    }
    else
    {
      currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
      currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart; // restore if not best
    }
  }

  min_rd_cost = (double) min_cost;

  if (enc_mb.valid[I8MB]) // check INTRA8x8
  {
    currMB->luma_transform_size_8x8_flag = 1; // at this point cost will ALWAYS be less than min_cost

    currMB->mb_type = I8MB;
    temp_cpb = Mode_Decision_for_new_Intra8x8Macroblock (currMB, enc_mb.lambda_md, &rd_cost);

    if (rd_cost <= min_rd_cost) //HYU_NOTE. bug fix. 08/15/07
    {
      currMB->cbp = temp_cpb;
      if (img->P444_joined)
      {
        curr_cbp[0] = cmp_cbp[1];  
        curr_cbp[1] = cmp_cbp[2];
      }

      if(enc_mb.valid[I4MB])   //KHHan. bug fix. Oct.15.2007
      {
        //coeffs
        if (params->Transform8x8Mode != 2) 
        {
          i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
        }
      }

      for(j=0; j<MB_BLOCK_SIZE; j++)
      {
        pix_y = img->pix_y + j;
        for(i=0; i<MB_BLOCK_SIZE; i++)
        {
          pix_x = img->pix_x + i;
          temp_imgY[j][i] = enc_picture->imgY[pix_y][pix_x];
        }
      }

      if (img->P444_joined)
      {
        for(j=0; j<MB_BLOCK_SIZE; j++)
        {
          pix_y = img->pix_y + j;
          for(i=0; i<MB_BLOCK_SIZE; i++)
          {
            pix_x = img->pix_x + i;
            temp_imgU[j][i] = enc_picture->imgUV[0][pix_y][pix_x]; 
            temp_imgV[j][i] = enc_picture->imgUV[1][pix_y][pix_x];
          }
        }
      }
      
      //Rate control
      if (params->RCEnable)
        rc_store_diff(img->opix_x, img->opix_y, curr_mpr);

      min_rd_cost  = rd_cost; 
      best_mode = I8MB;
      tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
    }
    else
    {
      currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
      if (img->P444_joined)
      {
        cmp_cbp[1] = curr_cbp[0]; 
        cmp_cbp[2] = curr_cbp[1]; 
        currMB->cbp |= cmp_cbp[1];    
        currMB->cbp |= cmp_cbp[2];    
        cmp_cbp[1] = currMB->cbp;   
        cmp_cbp[2] = currMB->cbp;
      }
    }
  }

  if (enc_mb.valid[I4MB]) // check INTRA4x4
  {
    currMB->luma_transform_size_8x8_flag = 0;
    currMB->mb_type = I4MB;
    temp_cpb = Mode_Decision_for_Intra4x4Macroblock (currMB, enc_mb.lambda_md, &rd_cost);

    if (rd_cost <= min_rd_cost) 
    {
      currMB->cbp = temp_cpb;

      //Rate control
      if (params->RCEnable)
        rc_store_diff(img->opix_x, img->opix_y, curr_mpr);

      min_rd_cost  = rd_cost; 
      best_mode = I4MB;
      tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
    }
    else
    {
      currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore if not best
      if (img->P444_joined)
      {
        cmp_cbp[1] = curr_cbp[0]; 
        cmp_cbp[2] = curr_cbp[1]; 
        currMB->cbp |= cmp_cbp[1];    
        currMB->cbp |= cmp_cbp[2];    
        cmp_cbp[1] = currMB->cbp;   
        cmp_cbp[2] = currMB->cbp;
      }
      //coeffs
      i4p=cofAC; cofAC=img->cofAC; img->cofAC=i4p;
    }
  }
  if (enc_mb.valid[I16MB]) // check INTRA16x16
  {
    currMB->luma_transform_size_8x8_flag = 0;
    intrapred_16x16 (currMB, PLANE_Y);
    if (img->P444_joined)
    {
      select_plane(PLANE_U);
      intrapred_16x16 (currMB, PLANE_U);
      select_plane(PLANE_V);
      intrapred_16x16 (currMB, PLANE_V);
      select_plane(PLANE_Y);
    }
    switch(params->FastIntra16x16)
    {
    case 0:
    default:
      find_sad_16x16 = find_sad_16x16_JM;
      break;
    }

    rd_cost = find_sad_16x16 (currMB, &i16mode);

    if (rd_cost < min_rd_cost)
    {
      //Rate control      
      if (params->RCEnable)
        rc_store_diff(img->opix_x,img->opix_y,img->mpr_16x16[0][i16mode]);

      best_mode   = I16MB;      
      min_rd_cost  = rd_cost; 
      currMB->cbp = pDCT_16x16 (currMB, PLANE_Y, i16mode);
      if (img->P444_joined)
      {
        select_plane(PLANE_U);
        cmp_cbp[1] = pDCT_16x16(currMB, PLANE_U, i16mode);
        select_plane(PLANE_V);
        cmp_cbp[2] = pDCT_16x16(currMB, PLANE_V, i16mode);   

        select_plane(PLANE_Y);
        currMB->cbp |= cmp_cbp[1];    
        currMB->cbp |= cmp_cbp[2];    
        cmp_cbp[1] = currMB->cbp;   
        cmp_cbp[2] = currMB->cbp;
      }

    }
    else
    {
      currMB->luma_transform_size_8x8_flag = tmp_8x8_flag; // restore
      currMB->NoMbPartLessThan8x8Flag = tmp_no_mbpart;     // restore
    }
  }

  intra1 = IS_INTRA(currMB);

  //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
  //---------------------------------------------------------------------------
  {
    //===== set parameters for chosen mode =====
    SetModesAndRefframeForBlocks (currMB, best_mode);

    if (best_mode==P8x8)
    {
      if (currMB->luma_transform_size_8x8_flag && (cbp8_8x8ts == 0) && params->Transform8x8Mode != 2)
        currMB->luma_transform_size_8x8_flag = 0;

      SetCoeffAndReconstruction8x8 (currMB);

      memset(currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
      for (k=0, j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
        memset(&ipredmodes[j][img->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
    }
    else
    {
      //===== set parameters for chosen mode =====
      if (best_mode == I8MB)
      {
        memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
        for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
          memcpy(&img->ipredmode[j][img->block_x],&img->ipredmode8x8[j][img->block_x], BLOCK_MULTIPLE * sizeof(char));

        //--- restore reconstruction for 8x8 transform ---
        for(j=0; j<MB_BLOCK_SIZE; j++)
        {
          memcpy(&enc_picture->imgY[img->pix_y + j][img->pix_x],temp_imgY[j], MB_BLOCK_SIZE * sizeof(imgpel));
        }
        if (img->P444_joined)
        {
          for(j=0; j<MB_BLOCK_SIZE; j++)
          {
            memcpy(&enc_picture->imgUV[0][img->pix_y + j][img->pix_x],temp_imgU[j], MB_BLOCK_SIZE * sizeof(imgpel)); 
            memcpy(&enc_picture->imgUV[1][img->pix_y + j][img->pix_x],temp_imgV[j], MB_BLOCK_SIZE * sizeof(imgpel));
          }
        }
      }

      if ((best_mode!=I4MB)&&(best_mode != I8MB))
      {
        memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
        for(j = img->block_y; j < img->block_y + BLOCK_MULTIPLE; j++)
          memset(&ipredmodes[j][img->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));

        if (best_mode!=I16MB)
        {
          if((best_mode>=1) && (best_mode<=3))
            currMB->luma_transform_size_8x8_flag = best_transform_flag;
          LumaResidualCoding (currMB);

          if (img->P444_joined)
          {
            if((currMB->cbp==0 && cmp_cbp[1] == 0 && cmp_cbp[2] == 0) &&(best_mode==0))
              currMB->luma_transform_size_8x8_flag = 0;
          }
          else if((currMB->cbp==0)&&(best_mode==0))
            currMB->luma_transform_size_8x8_flag = 0;

          //Rate control
          if (params->RCEnable)
            rc_store_diff(img->opix_x,img->opix_y,curr_mpr);
        }
      }
    }
    //check luma cbp for transform size flag
    if (((currMB->cbp&15) == 0) && !(IS_OLDINTRA(currMB) || currMB->mb_type == I8MB))
      currMB->luma_transform_size_8x8_flag = 0;

    // precompute all chroma intra prediction modes
    if ((img->yuv_format != YUV400) && (img->yuv_format != YUV444))
      IntraChromaPrediction(currMB, NULL, NULL, NULL);

    img->i16offset = 0;
    dummy = 0;

    if ((img->yuv_format != YUV400) && (img->yuv_format != YUV444))
      ChromaResidualCoding (currMB);

    if (best_mode==I16MB)
    {
      img->i16offset = I16Offset  (currMB->cbp, i16mode);
    }

    SetMotionVectorsMB (currMB, bslice);

    //===== check for SKIP mode =====
    if(img->P444_joined)
    {
      if ((pslice) && best_mode==1 && currMB->cbp==0 && cmp_cbp[1] == 0 && cmp_cbp[2] == 0 &&
        enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]    == 0 &&
        enc_picture->mv     [LIST_0][img->block_y][img->block_x][0] == allmvs[0] &&
        enc_picture->mv     [LIST_0][img->block_y][img->block_x][1] == allmvs[1])
      {
        currMB->mb_type = currMB->b8mode[0] = currMB->b8mode[1] = currMB->b8mode[2] = currMB->b8mode[3] = 0;
        currMB->luma_transform_size_8x8_flag = 0;
      }
    }
    else if ((pslice) && best_mode==1 && currMB->cbp==0 &&
      enc_picture->ref_idx[LIST_0][img->block_y][img->block_x]    == 0 &&
      enc_picture->mv     [LIST_0][img->block_y][img->block_x][0] == allmvs[0] &&
      enc_picture->mv     [LIST_0][img->block_y][img->block_x][1] == allmvs[1])
    {
      currMB->mb_type = currMB->b8mode[0] = currMB->b8mode[1] = currMB->b8mode[2] = currMB->b8mode[3] = 0;
      currMB->luma_transform_size_8x8_flag = 0;
    }

    if (img->MbaffFrameFlag || (params->UseRDOQuant && params->RDOQ_QP_Num > 1))
      set_mbaff_parameters(currMB);
  }

  // Rate control
  if(params->RCEnable && params->RCUpdateMode <= MAX_RC_MODE)
    rc_store_mad(currMB);
  update_qp_cbp(currMB, best_mode);

  rdopt->min_rdcost = min_rd_cost;
  rdopt->min_dcost = min_rd_cost;

  if ( (img->MbaffFrameFlag)
    && (img->current_mb_nr%2)
    && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
    && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
    && !(field_flag_inference(currMB) == enc_mb.curr_mb_field)) // top is skip
  {
    rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
  }

  //===== Decide if this MB will restrict the reference frames =====
  if (params->RestrictRef)
    update_refresh_map(intra, intra1, currMB);

  if(params->SearchMode == UM_HEX)
  {
    UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
  }
  else if(params->SearchMode == UM_HEX_SIMPLE)
  {
    smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
  }

  //--- constrain intra prediction ---
  if(params->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
  {
    img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
  }
}
Beispiel #16
0
void readLumaCoeff_B8(int block8x8, struct inp_par *inp, struct img_par *img)
{
    int i;
    int mb_nr          = img->current_mb_nr;
    Macroblock *currMB = &mb_data[mb_nr];
    const int cbp      = currMB->cbp;
    SyntaxElement currSE;
    int intra;
    int inumblk;                    /* number of blocks per CBP*/
    int inumcoeff;                  /* number of coeffs per block */
    int icoef;                      /* current coefficient */
    int ipos;
    int run, level;
    int ii,jj;
    int sbx, sby;
    int boff_x, boff_y;
    int any_coeff;
    int vlc_numcoef;
    int cbp_blk_mask;
    int tablenum;         //add by qwang
    static const int incVlc_intra[7] = { 0,1,2,4,7,10,3000};
    static const int incVlc_inter[7] = { 0,1,2,3,6,9,3000};

    int buffer_level[65];    //add by qwang
    int buffer_run[64];      //add by qwang
    int EOB_Pos_intra[7] = { -1, 8, 8, 8, 6, 0, 0};
    int EOB_Pos_inter[7] = {-1, 2, 2, 2, 2, 0, 0};
    char (*AVS_VLC_table_intra)[64][2];
    char (*AVS_VLC_table_inter)[64][2];

    int symbol2D,Golomb_se_type;
    const char (*table2D)[27];

    //digipro_1
    float QP99;
    long sum;
    int shift = 7;
    //digipro_0
    int val,  QPI;

    int qp, q_shift; // dequantization parameters
    static const int blkmode2ctx [4] = {LUMA_8x8, LUMA_8x4, LUMA_4x8, LUMA_4x4};

    // this has to be done for each subblock seperately
    intra     = IS_INTRA(currMB);

    inumblk   = 1;
    inumcoeff = 65; //  all positions + EOB

    // ========= dequantization values ===========
    qp       = currMB->qp; // using old style qp.

    q_shift  = qp/QUANT_PERIOD;

    QP99 =(float)(pow(2.0, (float)(qp-5.0)/8.0));

    for (shift=1; shift<16; shift++)
    {
        QPI = (int)((1<<shift)*QP99+0.5);
        if (QPI>(1<<16))
        {
            shift -=1;
            QPI = (int)((1<<shift)*QP99+0.5);
            break;
        }
    }

    if (shift==16) shift=15;
    QPI = (int)((1<<shift)*QP99+0.5);

    //make decoder table for 2DVLC_INTRA code
    if(AVS_2DVLC_INTRA_dec[0][0][1]<0)   // Don't need to set this every time. rewrite later.
    {
        memset(AVS_2DVLC_INTRA_dec,-1,sizeof(AVS_2DVLC_INTRA_dec));
        for(i=0;i<7;i++)
        {
            table2D=AVS_2DVLC_INTRA[i];
            for(run=0;run<26;run++)
                for(level=0;level<27;level++)
                {
                    ipos=table2D[run][level];
                    assert(ipos<64);
                    if(ipos>=0)
                    {
                        if(i==0)
                        {
                            AVS_2DVLC_INTRA_dec[i][ipos][0]=level+1;
                            AVS_2DVLC_INTRA_dec[i][ipos][1]=run;

                            AVS_2DVLC_INTRA_dec[i][ipos+1][0]=-(level+1);
                            AVS_2DVLC_INTRA_dec[i][ipos+1][1]=run;
                        }
                        else
                        {
                            AVS_2DVLC_INTRA_dec[i][ipos][0]=level;
                            AVS_2DVLC_INTRA_dec[i][ipos][1]=run;

                            if(level)
                            {
                                AVS_2DVLC_INTRA_dec[i][ipos+1][0]=-(level);
                                AVS_2DVLC_INTRA_dec[i][ipos+1][1]=run;
                            }
                        }
                    }
                }
        }
        assert(AVS_2DVLC_INTRA_dec[0][0][1]>=0);        //otherwise, tables are bad.
    }

    //make decoder table for 2DVLC_INTER code
    if(AVS_2DVLC_INTER_dec[0][0][1]<0)                                                          // Don't need to set this every time. rewrite later.
    {
        memset(AVS_2DVLC_INTER_dec,-1,sizeof(AVS_2DVLC_INTER_dec));
        for(i=0;i<7;i++)
        {
            table2D=AVS_2DVLC_INTER[i];
            for(run=0;run<26;run++)
                for(level=0;level<27;level++)
                {
                    ipos=table2D[run][level];
                    assert(ipos<64);
                    if(ipos>=0)
                    {
                        if(i==0)
                        {
                            AVS_2DVLC_INTER_dec[i][ipos][0]=level+1;
                            AVS_2DVLC_INTER_dec[i][ipos][1]=run;

                            AVS_2DVLC_INTER_dec[i][ipos+1][0]=-(level+1);
                            AVS_2DVLC_INTER_dec[i][ipos+1][1]=run;
                        }
                        else
                        {
                            AVS_2DVLC_INTER_dec[i][ipos][0]=level;
                            AVS_2DVLC_INTER_dec[i][ipos][1]=run;

                            if(level)
                            {
                                AVS_2DVLC_INTER_dec[i][ipos+1][0]=-(level);
                                AVS_2DVLC_INTER_dec[i][ipos+1][1]=run;
                            }
                        }
                    }
                }
        }
        assert(AVS_2DVLC_INTER_dec[0][0][1]>=0);        //otherwise, tables are bad.
    }

    //clear cbp_blk bits of thie 8x8 block (and not all 4!)
    cbp_blk_mask = cbp_blk_masks[0] ;
    if(block8x8&1)cbp_blk_mask<<=2;
    if(block8x8&2)cbp_blk_mask<<=8;
    currMB->cbp_blk&=~cbp_blk_mask;

    vlc_numcoef=-1;

    Golomb_se_type=SE_LUM_AC_INTER;
    if( intra )
    {
        vlc_numcoef=0;        //this means 'use numcoeffs symbol'.
        Golomb_se_type=SE_LUM_AC_INTRA;
    }

    AVS_VLC_table_intra = AVS_2DVLC_INTRA_dec;
    AVS_VLC_table_inter = AVS_2DVLC_INTER_dec;


    // === decoding ===
    if ( cbp & (1<<block8x8) )
    {
        // === set offset in current macroblock ===
        boff_x = ( (block8x8%2)<<3 );
        boff_y = ( (block8x8/2)<<3 );
        img->subblock_x = boff_x>>2;
        img->subblock_y = boff_y>>2;

        ipos  = -1;
        any_coeff=1;   //modified by qwang    any_coeff=0

        if(intra)
        {
            tablenum = 0;
            for(i=0; i<inumcoeff; i++)
            {
                //read 2D symbol
                currSE.type = Golomb_se_type;
                //currSE.golomb_grad = 2;
                //currSE.golomb_maxlevels=4;
                currSE.golomb_grad = VLC_Golomb_Order[0][tablenum][0];
                currSE.golomb_maxlevels = VLC_Golomb_Order[0][tablenum][1];
                readSyntaxElement_GOLOMB(&currSE,img,inp);
                symbol2D = currSE.value1;

                //if(symbol2D == EOB_Pos_intra[tablenum])
                if(symbol2D == EOB_Pos_intra[tablenum])
                {
                    vlc_numcoef = i;
                    break;
                }

                if(symbol2D < CODE2D_ESCAPE_SYMBOL)
                {
                    level = AVS_2DVLC_INTRA_dec[tablenum][symbol2D][0];
                    run   = AVS_2DVLC_INTRA_dec[tablenum][symbol2D][1];
                }
                else
                {
                    // changed by dj
                    run = (symbol2D-CODE2D_ESCAPE_SYMBOL)>>1;

                    //decode level
                    currSE.type=Golomb_se_type;
                    currSE.golomb_grad = 1;
                    currSE.golomb_maxlevels=11; //2007.05.09
                    readSyntaxElement_GOLOMB(&currSE,img,inp);
                    level = currSE.value1 + ((run>MaxRun[0][tablenum])?1:RefAbsLevel[tablenum][run]);
                    //        if( (symbol2D-CODE2D_ESCAPE_SYMBOL) & 1 )
                    if(symbol2D & 1)
                        level=-level;
                }

                // 保存level,run到缓冲区
                buffer_level[i] = level;
                buffer_run[i]   = run;

                if(abs(level) > incVlc_intra[tablenum])
                {
                    if(abs(level) <= 2)
                        tablenum = abs(level);
                    else if(abs(level) <= 4)
                        tablenum = 3;
                    else if(abs(level) <= 7)
                        tablenum = 4;
                    else if(abs(level) <= 10)
                        tablenum = 5;
                    else
                        tablenum = 6;
                }
            }//loop for icoef

            //将解码的level,run写到img->m7[][];
            for(i=(vlc_numcoef-1); i>=0; i--)
            {
                ipos += (buffer_run[i]+1);

                ii = SCAN[img->picture_structure][ipos][0];
                jj = SCAN[img->picture_structure][ipos][1];

                shift = IQ_SHIFT[qp];
                QPI   = IQ_TAB[qp];
                val = buffer_level[i];
                sum = (val*QPI+(1<<(shift-2)) )>>(shift-1);

                img->m7[boff_x + ii][boff_y + jj] = sum;
            }
        }//if (intra)
        else
        {
Beispiel #17
0
static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){
    MpegEncContext * const s = &h->s;
    int b8_stride = 2;
    int b4_stride = h->b_stride;
    int mb_xy = h->mb_xy, mb_y = s->mb_y;
    int mb_type_col[2];
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
    const int8_t *l1ref0, *l1ref1;
    const int is_b8x8 = IS_8X8(*mb_type);
    unsigned int sub_mb_type= MB_TYPE_L0L1;
    int i8, i4;
    int ref[2];
    int mv[2];
    int list;

    assert(h->ref_list[1][0].f.reference & 3);

    await_reference_mb_row(h, &h->ref_list[1][0], s->mb_y + !!IS_INTERLACED(*mb_type));

#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)


    /* ref = min(neighbors) */
    for(list=0; list<2; list++){
        int left_ref = h->ref_cache[list][scan8[0] - 1];
        int top_ref  = h->ref_cache[list][scan8[0] - 8];
        int refc = h->ref_cache[list][scan8[0] - 8 + 4];
        const int16_t *C= h->mv_cache[list][ scan8[0] - 8 + 4];
        if(refc == PART_NOT_AVAILABLE){
            refc = h->ref_cache[list][scan8[0] - 8 - 1];
            C    = h-> mv_cache[list][scan8[0] - 8 - 1];
        }
        ref[list] = FFMIN3((unsigned)left_ref, (unsigned)top_ref, (unsigned)refc);
        if(ref[list] >= 0){
            //this is just pred_motion() but with the cases removed that cannot happen for direct blocks
            const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
            const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];

            int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]);
            if(match_count > 1){ //most common
                mv[list]= pack16to32(mid_pred(A[0], B[0], C[0]),
                                     mid_pred(A[1], B[1], C[1]) );
            }else {
                assert(match_count==1);
                if(left_ref==ref[list]){
                    mv[list]= AV_RN32A(A);
                }else if(top_ref==ref[list]){
                    mv[list]= AV_RN32A(B);
                }else{
                    mv[list]= AV_RN32A(C);
                }
            }
        }else{
            int mask= ~(MB_TYPE_L0 << (2*list));
            mv[list] = 0;
            ref[list] = -1;
            if(!is_b8x8)
                *mb_type &= mask;
            sub_mb_type &= mask;
        }
    }
    if(ref[0] < 0 && ref[1] < 0){
        ref[0] = ref[1] = 0;
        if(!is_b8x8)
            *mb_type |= MB_TYPE_L0L1;
        sub_mb_type |= MB_TYPE_L0L1;
    }

    if(!(is_b8x8|mv[0]|mv[1])){
        fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
        fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
        fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4);
        fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4);
        *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
        return;
    }

    if (IS_INTERLACED(h->ref_list[1][0].f.mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL
        if (!IS_INTERLACED(*mb_type)) {                          //     AFR/FR    -> AFL/FL
            mb_y = (s->mb_y&~1) + h->col_parity;
            mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride;
            b8_stride = 0;
        }else{
            mb_y  += h->col_fieldoff;
            mb_xy += s->mb_stride*h->col_fieldoff; // non zero for FL -> FL & differ parity
        }
        goto single_col;
    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
            mb_y = s->mb_y&~1;
            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
            mb_type_col[0] = h->ref_list[1][0].f.mb_type[mb_xy];
            mb_type_col[1] = h->ref_list[1][0].f.mb_type[mb_xy + s->mb_stride];
            b8_stride = 2+4*s->mb_stride;
            b4_stride *= 6;
            if (IS_INTERLACED(mb_type_col[0]) != IS_INTERLACED(mb_type_col[1])) {
                mb_type_col[0] &= ~MB_TYPE_INTERLACED;
                mb_type_col[1] &= ~MB_TYPE_INTERLACED;
            }

            sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
                && !is_b8x8){
                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_DIRECT2; /* B_16x8 */
            }else{
                *mb_type   |= MB_TYPE_8x8;
            }
        }else{                                           //     AFR/FR    -> AFR/FR
single_col:
            mb_type_col[0] =
            mb_type_col[1] = h->ref_list[1][0].f.mb_type[mb_xy];

            sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
            if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
                *mb_type   |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_16x16 */
            }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){
                *mb_type   |= MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16));
            }else{
                if(!h->sps.direct_8x8_inference_flag){
                    /* FIXME save sub mb types from previous frames (or derive from MVs)
                    * so we know exactly what block size to use */
                    sub_mb_type += (MB_TYPE_8x8-MB_TYPE_16x16); /* B_SUB_4x4 */
                }
                *mb_type   |= MB_TYPE_8x8;
            }
        }
    }

    await_reference_mb_row(h, &h->ref_list[1][0], mb_y);

    l1mv0  = &h->ref_list[1][0].f.motion_val[0][h->mb2b_xy [mb_xy]];
    l1mv1  = &h->ref_list[1][0].f.motion_val[1][h->mb2b_xy [mb_xy]];
    l1ref0 = &h->ref_list[1][0].f.ref_index [0][4 * mb_xy];
    l1ref1 = &h->ref_list[1][0].f.ref_index [1][4 * mb_xy];
    if(!b8_stride){
        if(s->mb_y&1){
            l1ref0 += 2;
            l1ref1 += 2;
            l1mv0  +=  2*b4_stride;
            l1mv1  +=  2*b4_stride;
        }
    }


        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
            int n=0;
            for(i8=0; i8<4; i8++){
                int x8 = i8&1;
                int y8 = i8>>1;
                int xy8 = x8+y8*b8_stride;
                int xy4 = 3*x8+y8*b4_stride;
                int a,b;

                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
                    continue;
                h->sub_mb_type[i8] = sub_mb_type;

                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
                if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref
                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
                    a=b=0;
                    if(ref[0] > 0)
                        a= mv[0];
                    if(ref[1] > 0)
                        b= mv[1];
                    n++;
                }else{
                    a= mv[0];
                    b= mv[1];
                }
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
            }
            if(!is_b8x8 && !(n&3))
                *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2;
        }else if(IS_16X16(*mb_type)){
Beispiel #18
0
static void x264_cabac_mb_type( x264_t *h, x264_cabac_t *cb )
{
    const int i_mb_type = h->mb.i_type;

    if( h->sh.b_mbaff &&
        (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
    {
        x264_cabac_encode_decision( cb, 70 + h->mb.cache.i_neighbour_interlaced, h->mb.b_interlaced );
    }

    if( h->sh.i_type == SLICE_TYPE_I )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != I_4x4 )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != I_4x4 )
        {
            ctx++;
        }

        x264_cabac_mb_type_intra( h, cb, i_mb_type, 3+ctx, 3+3, 3+4, 3+5, 3+6, 3+7 );
    }
    else if( h->sh.i_type == SLICE_TYPE_P )
    {
        /* prefix: 14, suffix: 17 */
        if( i_mb_type == P_L0 )
        {
            if( h->mb.i_partition == D_16x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 0 );
                x264_cabac_encode_decision( cb, 16, 0 );
            }
            else if( h->mb.i_partition == D_16x8 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 1 );
            }
            else if( h->mb.i_partition == D_8x16 )
            {
                x264_cabac_encode_decision( cb, 14, 0 );
                x264_cabac_encode_decision( cb, 15, 1 );
                x264_cabac_encode_decision( cb, 17, 0 );
            }
        }
        else if( i_mb_type == P_8x8 )
        {
            x264_cabac_encode_decision( cb, 14, 0 );
            x264_cabac_encode_decision( cb, 15, 0 );
            x264_cabac_encode_decision( cb, 16, 1 );
        }
        else /* intra */
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 14, 1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 17+0, 17+1, 17+2, 17+2, 17+3, 17+3 );
        }
    }
    else if( h->sh.i_type == SLICE_TYPE_B )
    {
        int ctx = 0;
        if( h->mb.i_mb_type_left >= 0 && h->mb.i_mb_type_left != B_SKIP && h->mb.i_mb_type_left != B_DIRECT )
        {
            ctx++;
        }
        if( h->mb.i_mb_type_top >= 0 && h->mb.i_mb_type_top != B_SKIP && h->mb.i_mb_type_top != B_DIRECT )
        {
            ctx++;
        }

        if( i_mb_type == B_DIRECT )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 0 );
        }
        else if( i_mb_type == B_8x8 )
        {
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   1 );
        }
        else if( IS_INTRA( i_mb_type ) )
        {
            /* prefix */
            x264_cabac_encode_decision( cb, 27+ctx, 1 );
            x264_cabac_encode_decision( cb, 27+3,   1 );
            x264_cabac_encode_decision( cb, 27+4,   1 );

            x264_cabac_encode_decision( cb, 27+5,   1 );
            x264_cabac_encode_decision( cb, 27+5,   0 );
            x264_cabac_encode_decision( cb, 27+5,   1 );

            /* suffix */
            x264_cabac_mb_type_intra( h, cb, i_mb_type, 32+0, 32+1, 32+2, 32+2, 32+3, 32+3 );
        }
        else
        {
            static const int i_mb_len[9*3] =
            {
                6, 6, 3,    /* L0 L0 */
                6, 6, 0,    /* L0 L1 */
                7, 7, 0,    /* L0 BI */
                6, 6, 0,    /* L1 L0 */
                6, 6, 3,    /* L1 L1 */
                7, 7, 0,    /* L1 BI */
                7, 7, 0,    /* BI L0 */
                7, 7, 0,    /* BI L1 */
                7, 7, 6,    /* BI BI */
            };
            static const int i_mb_bits[9*3][7] =
            {
                { 1,1,0,0,0,1   }, { 1,1,0,0,1,0,  }, { 1,0,0 },       /* L0 L0 */
                { 1,1,0,1,0,1   }, { 1,1,0,1,1,0   }, {0},             /* L0 L1 */
                { 1,1,1,0,0,0,0 }, { 1,1,1,0,0,0,1 }, {0},             /* L0 BI */
                { 1,1,0,1,1,1   }, { 1,1,1,1,1,0   }, {0},             /* L1 L0 */
                { 1,1,0,0,1,1   }, { 1,1,0,1,0,0   }, { 1,0,1 },       /* L1 L1 */
                { 1,1,1,0,0,1,0 }, { 1,1,1,0,0,1,1 }, {0},             /* L1 BI */
                { 1,1,1,0,1,0,0 }, { 1,1,1,0,1,0,1 }, {0},             /* BI L0 */
                { 1,1,1,0,1,1,0 }, { 1,1,1,0,1,1,1 }, {0},             /* BI L1 */
                { 1,1,1,1,0,0,0 }, { 1,1,1,1,0,0,1 }, { 1,1,0,0,0,0 }, /* BI BI */
            };

            const int idx = (i_mb_type - B_L0_L0) * 3 + (h->mb.i_partition - D_16x8);
            int i;

            x264_cabac_encode_decision( cb, 27+ctx, i_mb_bits[idx][0] );
            x264_cabac_encode_decision( cb, 27+3,   i_mb_bits[idx][1] );
            x264_cabac_encode_decision( cb, 27+5-i_mb_bits[idx][1], i_mb_bits[idx][2] );
            for( i = 3; i < i_mb_len[idx]; i++ )
                x264_cabac_encode_decision( cb, 27+5, i_mb_bits[idx][i] );
        }
    }
    else
    {
        x264_log(h, X264_LOG_ERROR, "unknown SLICE_TYPE unsupported in x264_macroblock_write_cabac\n" );
    }
}
Beispiel #19
0
static void FUNC(intra_pred)(HEVCContext *s, int x0, int y0, int log2_size, int c_idx)
{
#define PU(x) \
    ((x) >> s->sps->log2_min_pu_size)
#define MVF(x, y) \
    (s->ref->tab_mvf[(x) + (y) * min_pu_width])
#define MVF_PU(x, y) \
    MVF(PU(x0 + ((x) << hshift)), PU(y0 + ((y) << vshift)))
#define IS_INTRA(x, y) \
    (MVF_PU(x, y).pred_flag == PF_INTRA)
#define MIN_TB_ADDR_ZS(x, y) \
    s->pps->min_tb_addr_zs[(y) * s->sps->min_tb_width + (x)]
#define EXTEND_LEFT(ptr, start, length) \
        for (i = (start); i > (start) - (length); i--) \
            ptr[i - 1] = ptr[i]
#define EXTEND_RIGHT(ptr, start, length) \
        for (i = (start); i < (start) + (length); i++) \
            ptr[i] = ptr[i - 1]
#define EXTEND_UP(ptr, start, length)   EXTEND_LEFT(ptr, start, length)
#define EXTEND_DOWN(ptr, start, length) EXTEND_RIGHT(ptr, start, length)
#define EXTEND_LEFT_CIP(ptr, start, length) \
        for (i = (start); i > (start) - (length); i--) \
            if (!IS_INTRA(i - 1, -1)) \
                ptr[i - 1] = ptr[i]
#define EXTEND_RIGHT_CIP(ptr, start, length) \
        for (i = (start); i < (start) + (length); i++) \
            if (!IS_INTRA(i, -1)) \
                ptr[i] = ptr[i - 1]
#define EXTEND_UP_CIP(ptr, start, length) \
        for (i = (start); i > (start) - (length); i--) \
            if (!IS_INTRA(-1, i - 1)) \
                ptr[i - 1] = ptr[i]
#define EXTEND_UP_CIP_0(ptr, start, length) \
        for (i = (start); i > (start) - (length); i--) \
            ptr[i - 1] = ptr[i]
#define EXTEND_DOWN_CIP(ptr, start, length) \
        for (i = (start); i < (start) + (length); i++) \
            if (!IS_INTRA(-1, i)) \
                ptr[i] = ptr[i - 1]
    HEVCLocalContext *lc = s->HEVClc;
    int i;
    int hshift = s->sps->hshift[c_idx];
    int vshift = s->sps->vshift[c_idx];
    int size = (1 << log2_size);
    int size_in_luma = size << hshift;
    int size_in_tbs = size_in_luma >> s->sps->log2_min_tb_size;
    int x = x0 >> hshift;
    int y = y0 >> vshift;
    int x_tb = x0 >> s->sps->log2_min_tb_size;
    int y_tb = y0 >> s->sps->log2_min_tb_size;
    int cur_tb_addr = MIN_TB_ADDR_ZS(x_tb, y_tb);

    ptrdiff_t stride = s->frame->linesize[c_idx] / sizeof(pixel);
    pixel *src = (pixel*)s->frame->data[c_idx] + x + y * stride;

    int min_pu_width = s->sps->min_pu_width;

    enum IntraPredMode mode = c_idx ? lc->pu.intra_pred_mode_c :
                              lc->tu.cur_intra_pred_mode;

    pixel left_array[2 * MAX_TB_SIZE + 1];
    pixel filtered_left_array[2 * MAX_TB_SIZE + 1];
    pixel top_array[2 * MAX_TB_SIZE + 1];
    pixel filtered_top_array[2 * MAX_TB_SIZE + 1];

    pixel *left          = left_array + 1;
    pixel *top           = top_array  + 1;
    pixel *filtered_left = filtered_left_array + 1;
    pixel *filtered_top  = filtered_top_array  + 1;

    int cand_bottom_left = lc->na.cand_bottom_left && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb - 1, y_tb + size_in_tbs);
    int cand_left        = lc->na.cand_left;
    int cand_up_left     = lc->na.cand_up_left;
    int cand_up          = lc->na.cand_up;
    int cand_up_right    = lc->na.cand_up_right && cur_tb_addr > MIN_TB_ADDR_ZS(x_tb + size_in_tbs, y_tb - 1);

    int bottom_left_size = (FFMIN(y0 + 2 * size_in_luma, s->sps->height) -
                            (y0 + size_in_luma)) >> vshift;
    int top_right_size   = (FFMIN(x0 + 2 * size_in_luma, s->sps->width) -
                            (x0 + size_in_luma)) >> hshift;

    if (s->pps->constrained_intra_pred_flag == 1) {
        int size_in_luma_pu = PU(size_in_luma);
        int on_pu_edge_x    = !(x0 & ((1 << s->sps->log2_min_pu_size) - 1));
        int on_pu_edge_y    = !(y0 & ((1 << s->sps->log2_min_pu_size) - 1));
        if (!size_in_luma_pu)
            size_in_luma_pu++;
        if (cand_bottom_left == 1 && on_pu_edge_x) {
            int x_left_pu   = PU(x0 - 1);
            int y_bottom_pu = PU(y0 + size_in_luma);
            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_bottom_pu);
            cand_bottom_left = 0;
            for (i = 0; i < max; i++)
                cand_bottom_left |= (MVF(x_left_pu, y_bottom_pu + i).pred_flag == PF_INTRA);
        }
        if (cand_left == 1 && on_pu_edge_x) {
            int x_left_pu   = PU(x0 - 1);
            int y_left_pu   = PU(y0);
            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_height - y_left_pu);
            cand_left = 0;
            for (i = 0; i < max; i++)
                cand_left |= (MVF(x_left_pu, y_left_pu + i).pred_flag == PF_INTRA);
        }
        if (cand_up_left == 1) {
            int x_left_pu   = PU(x0 - 1);
            int y_top_pu    = PU(y0 - 1);
            cand_up_left = MVF(x_left_pu, y_top_pu).pred_flag == PF_INTRA;
        }
        if (cand_up == 1 && on_pu_edge_y) {
            int x_top_pu    = PU(x0);
            int y_top_pu    = PU(y0 - 1);
            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_top_pu);
            cand_up = 0;
            for (i = 0; i < max; i++)
                cand_up |= (MVF(x_top_pu + i, y_top_pu).pred_flag == PF_INTRA);
        }
        if (cand_up_right == 1 && on_pu_edge_y) {
            int y_top_pu    = PU(y0 - 1);
            int x_right_pu  = PU(x0 + size_in_luma);
            int max = FFMIN(size_in_luma_pu, s->sps->min_pu_width - x_right_pu);
            cand_up_right = 0;
            for (i = 0; i < max; i++)
                cand_up_right |= (MVF(x_right_pu + i, y_top_pu).pred_flag == PF_INTRA);
        }
        for (i = 0; i < 2 * MAX_TB_SIZE; i++) {
            left[i] = 128;
            top[i]  = 128;
        }
        top[-1] = 128;
    }
    if (cand_bottom_left) {
        for (i = size + bottom_left_size; i < (size << 1); i++)
            if (IS_INTRA(-1, size + bottom_left_size - 1) ||
                !s->pps->constrained_intra_pred_flag)
                left[i] = POS(-1, size + bottom_left_size - 1);
        for (i = size + bottom_left_size - 1; i >= size; i--)
            if (IS_INTRA(-1, i) || !s->pps->constrained_intra_pred_flag)
                left[i] = POS(-1, i);
    }
    if (cand_left)
        for (i = size - 1; i >= 0; i--)
            if (IS_INTRA(-1, i) || !s->pps->constrained_intra_pred_flag)
                left[i] = POS(-1, i);
    if (cand_up_left)
        if (IS_INTRA(-1, -1) || !s->pps->constrained_intra_pred_flag) {
            left[-1] = POS(-1, -1);
            top[-1]  = left[-1];
        }
    if (cand_up)
        for (i = size - 1; i >= 0; i--)
            if (IS_INTRA(i, -1) || !s->pps->constrained_intra_pred_flag)
                top[i] = POS(i, -1);
    if (cand_up_right) {
        for (i = size + top_right_size; i < (size << 1); i++)
            if (IS_INTRA(size + top_right_size - 1, -1) ||
                !s->pps->constrained_intra_pred_flag)
                top[i] = POS(size + top_right_size - 1, -1);
        for (i = size + top_right_size - 1; i >= size; i--)
            if (IS_INTRA(i, -1) || !s->pps->constrained_intra_pred_flag)
                top[i] = POS(i, -1);
    }

    if (s->pps->constrained_intra_pred_flag == 1) {
        if (cand_bottom_left || cand_left || cand_up_left || cand_up || cand_up_right) {
            int size_max_x = x0 + ((2 * size) << hshift) < s->sps->width ?
                                    2 * size : (s->sps->width - x0) >> hshift;
            int size_max_y = y0 + ((2 * size) << vshift) < s->sps->height ?
                                    2 * size : (s->sps->height - y0) >> vshift;
            int j = size + (cand_bottom_left? bottom_left_size: 0) -1;
            if (!cand_up_right) {
                size_max_x = x0 + ((size) << hshift) < s->sps->width ?
                                                    size : (s->sps->width - x0) >> hshift;
            }
            if (!cand_bottom_left) {
                size_max_y = y0 + (( size) << vshift) < s->sps->height ?
                                                     size : (s->sps->height - y0) >> vshift;
            }
/*!
*************************************************************************************
* \brief
*    Mode Decision for a macroblock with error resilience
*************************************************************************************
*/
void encode_one_macroblock_highloss (Macroblock *currMB)
{
  int         max_index = 9;
  int         rerun, block, index, mode, i, j, ctr16x16;
  char        best_pdir;
  RD_PARAMS   enc_mb;
  double      min_rdcost = 1e30;
  double      min_dcost = 1e30;
  char        best_ref[2] = {0, -1};
  int         bmcost[5] = {INT_MAX};
  int         cost=0;
  int         min_cost = INT_MAX, cost_direct=0, have_direct=0, i16mode=0;
  int         intra1 = 0;
  int         cost8x8_direct = 0;
  int         mb_available_up;
  int         mb_available_left;
  int         mb_available_up_left;
  int         best8x8l0ref, best8x8l1ref; 
  int         is_cavlc = (img->currentSlice->symbol_mode == CAVLC);

  short       islice      = (short) (img->type==I_SLICE);
  short       bslice      = (short) (img->type==B_SLICE);
  short       pslice      = (short) ((img->type==P_SLICE) || (img->type==SP_SLICE));
  short       intra       = (short) (islice || (pslice && img->mb_y==img->mb_y_upd && img->mb_y_upd!=img->mb_y_intra));
  int         lambda_mf[3];
  short       runs        = (short) (params->RestrictRef==1 && (pslice  || (bslice && img->nal_reference_idc>0)) ? 2 : 1);

  int         prev_mb_nr  = FmoGetPreviousMBNr(img->current_mb_nr);
  Macroblock* prevMB      = (prev_mb_nr >= 0) ? &img->mb_data[prev_mb_nr]:NULL ;
  imgpel  (*mb_pred)[16] = img->mb_pred[0];
  Block8x8Info *b8x8info   = img->b8x8info;

  short   min_chroma_pred_mode, max_chroma_pred_mode;

  short   inter_skip = 0;
  short   bipred_me = 0;
  double  min_rate = 0;

  if(params->SearchMode == UM_HEX)
  {
    UMHEX_decide_intrabk_SAD();
  }
  else if (params->SearchMode == UM_HEX_SIMPLE)
  {
    smpUMHEX_decide_intrabk_SAD();
  }

  intra |= RandomIntra (img->current_mb_nr);    // Forced Pseudo-Random Intra

  //===== Setup Macroblock encoding parameters =====
  init_enc_mb_params(currMB, &enc_mb, intra, bslice);

  // Perform multiple encodings if rdopt with losses is enabled
  for (rerun=0; rerun<runs; rerun++)
  {
    if (runs==2)
      params->rdopt= (rerun==0) ? 1 : 3;

    // reset chroma intra predictor to default
    currMB->c_ipred_mode = DC_PRED_8;

    //=====   S T O R E   C O D I N G   S T A T E   =====
    //---------------------------------------------------
    store_coding_state (currMB, cs_cm);

    if (!intra)
    {
      //===== set direct motion vectors =====
      best_mode = 1;
      if (bslice)
      {
        Get_Direct_Motion_Vectors (currMB);
      }

      if (params->CtxAdptLagrangeMult == 1)
      {
        get_initial_mb16x16_cost(currMB);
      }

      //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
      for (min_cost=INT_MAX, mode=1; mode<4; mode++)
      {
        bipred_me = 0;
        b8x8info->bipred8x8me[mode][0] = 0;
        if (enc_mb.valid[mode])
        {
          for (cost=0, block=0; block<(mode==1?1:2); block++)
          {
            update_lambda_costs(&enc_mb, lambda_mf);
            PartitionMotionSearch (currMB, mode, block, lambda_mf);

            //--- set 4x4 block indizes (for getting MV) ---
            j = (block==1 && mode==2 ? 2 : 0);
            i = (block==1 && mode==3 ? 2 : 0);

            //--- get cost and reference frame for List 0 prediction ---
            bmcost[LIST_0] = INT_MAX;
            list_prediction_cost(currMB, LIST_0, block, mode, &enc_mb, bmcost, best_ref);

            if (bslice)
            {
              //--- get cost and reference frame for List 1 prediction ---
              bmcost[LIST_1] = INT_MAX;
              list_prediction_cost(currMB, LIST_1, block, mode, &enc_mb, bmcost, best_ref);

              // Compute bipredictive cost between best list 0 and best list 1 references
              list_prediction_cost(currMB, BI_PRED, block, mode, &enc_mb, bmcost, best_ref);

              // currently Bi prediction ME is only supported for modes 1, 2, 3
              if (is_bipred_enabled(mode))
              {
                list_prediction_cost(currMB, BI_PRED_L0, block, mode, &enc_mb, bmcost, 0);
                list_prediction_cost(currMB, BI_PRED_L1, block, mode, &enc_mb, bmcost, 0);
              }
              else
              {
                bmcost[BI_PRED_L0] = INT_MAX;
                bmcost[BI_PRED_L1] = INT_MAX;
              }

              // Determine prediction list based on mode cost
              determine_prediction_list(mode, bmcost, best_ref, &best_pdir, &cost, &bipred_me);
            }
            else // if (bslice)
            {
              best_pdir  = 0;
              cost      += bmcost[LIST_0];
            }

            assign_enc_picture_params(mode, best_pdir, block, enc_mb.list_offset[LIST_0], best_ref[LIST_0], best_ref[LIST_1], bslice, bipred_me);
            //----- set reference frame and direction parameters -----
            set_block8x8_info(b8x8info, mode, block, best_ref, best_pdir, bipred_me);
            
            
            //--- set reference frames and motion vectors ---
            if (mode>1 && block==0)
              SetRefAndMotionVectors (currMB, block, mode, best_pdir, best_ref[LIST_0], best_ref[LIST_1], bipred_me);
          } // for (block=0; block<(mode==1?1:2); block++)

          if (cost < min_cost)
          {
            best_mode = (short) mode;
            min_cost  = cost;
            if (params->CtxAdptLagrangeMult == 1)
            {
              adjust_mb16x16_cost(cost);
            }
          }
        } // if (enc_mb.valid[mode])
      } // for (mode=1; mode<4; mode++)

    if (enc_mb.valid[P8x8])
      {
        giRDOpt_B8OnlyFlag = 1;

        tr8x8.mb_p8x8_cost = INT_MAX;
        tr4x4.mb_p8x8_cost = INT_MAX;
        //===== store coding state of macroblock =====
        store_coding_state (currMB, cs_mb);

        currMB->all_blk_8x8 = -1;

        if (params->Transform8x8Mode)
        {
          tr8x8.mb_p8x8_cost = 0;
          //===========================================================
          // Check 8x8 partition with transform size 8x8
          //===========================================================
          //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
          for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
          {
            submacroblock_mode_decision(&enc_mb, &tr8x8, currMB, cofAC8x8ts[0][block], cofAC8x8ts[1][block], cofAC8x8ts[2][block],
              &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 1, is_cavlc);
            set_subblock8x8_info(b8x8info, P8x8, block, &tr8x8);
          }

          // following params could be added in RD_8x8DATA structure
          cbp8_8x8ts      = cbp8x8;
          cbp_blk8_8x8ts  = cbp_blk8x8;
          cnt_nonz8_8x8ts = cnt_nonz_8x8;
          currMB->luma_transform_size_8x8_flag = 0; //switch to 4x4 transform size

          //--- re-set coding state (as it was before 8x8 block coding) ---
          //reset_coding_state (currMB, cs_mb);
        }// if (params->Transform8x8Mode)


        if (params->Transform8x8Mode != 2)
        {
          tr4x4.mb_p8x8_cost = 0;
          //=================================================================
          // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
          //=================================================================
          //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
          for (cost_direct=cbp8x8=cbp_blk8x8=cnt_nonz_8x8=0, block=0; block<4; block++)
          {
            submacroblock_mode_decision(&enc_mb, &tr4x4, currMB, cofAC8x8[block], cofAC8x8CbCr[0][block], cofAC8x8CbCr[1][block],
              &have_direct, bslice, block, &cost_direct, &cost, &cost8x8_direct, 0, is_cavlc);
            set_subblock8x8_info(b8x8info, P8x8, block, &tr4x4);
          }
          //--- re-set coding state (as it was before 8x8 block coding) ---
          // reset_coding_state (currMB, cs_mb);
        }// if (params->Transform8x8Mode != 2)

        //--- re-set coding state (as it was before 8x8 block coding) ---
        reset_coding_state (currMB, cs_mb);

        // This is not enabled yet since mpr has reverse order.
        if (params->RCEnable)
          rc_store_diff(img->opix_x, img->opix_y, mb_pred);

        //check cost for P8x8 for non-rdopt mode
        giRDOpt_B8OnlyFlag = 0;
      }
      else // if (enc_mb.valid[P8x8])
      {
        tr4x4.mb_p8x8_cost = INT_MAX;
      }

      // Find a motion vector for the Skip mode
    if(pslice)
        FindSkipModeMotionVector (currMB);
    }
    else // if (!intra)
    {
      min_cost = INT_MAX;
    }

    //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
    //-------------------------------------------------------------------------

   {
     if ((img->yuv_format != YUV400) && !IS_INDEPENDENT(params))
     {
       // precompute all new chroma intra prediction modes
       IntraChromaPrediction(currMB, &mb_available_up, &mb_available_left, &mb_available_up_left);

       if (params->FastCrIntraDecision )
       {
         IntraChromaRDDecision(currMB, enc_mb);
         min_chroma_pred_mode = (short) currMB->c_ipred_mode;
         max_chroma_pred_mode = (short) currMB->c_ipred_mode;
       }
       else
       {
         min_chroma_pred_mode = DC_PRED_8;
         max_chroma_pred_mode = PLANE_8;
       }
     }
     else
     {
       min_chroma_pred_mode = DC_PRED_8;
       max_chroma_pred_mode = DC_PRED_8;
     }

     for (currMB->c_ipred_mode=min_chroma_pred_mode; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)
     {
       // bypass if c_ipred_mode is not allowed
       if ( (img->yuv_format != YUV400) &&
         (  ((!intra || !params->IntraDisableInterOnly) && params->ChromaIntraDisable == 1 && currMB->c_ipred_mode!=DC_PRED_8)
         || (currMB->c_ipred_mode == VERT_PRED_8 && !mb_available_up)
         || (currMB->c_ipred_mode == HOR_PRED_8  && !mb_available_left)
         || (currMB->c_ipred_mode == PLANE_8     && (!mb_available_left || !mb_available_up || !mb_available_up_left))))
         continue;

       //===== GET BEST MACROBLOCK MODE =====
       for (ctr16x16=0, index=0; index < max_index; index++)
       {
         mode = mb_mode_table[index];

         if (img->yuv_format != YUV400)
         {
           i16mode = 0;
         }
         //--- for INTER16x16 check all prediction directions ---
         if (mode==1 && bslice)
         {
           update_prediction_for_mode16x16(b8x8info, ctr16x16, &index);
           ctr16x16++;
         }

         // Skip intra modes in inter slices if best mode is inter <P8x8 with cbp equal to 0
         if (params->SkipIntraInInterSlices && !intra && mode >= I4MB && best_mode <=3 && currMB->cbp == 0)
           continue;

      // check if weights are in valid range for biprediction.
         if (bslice && active_pps->weighted_bipred_idc == 1 && mode < P8x8)
         {
           int cur_blk, cur_comp;
           int weight_sum;
           Boolean invalid_mode = FALSE;
           for (cur_blk = 0; cur_blk < 4; cur_blk ++)
           {
             if (b8x8info->best8x8pdir[mode][cur_blk] == 2)
             {
               for (cur_comp = 0; cur_comp < (active_sps->chroma_format_idc == YUV400 ? 1 : 3) ; cur_comp ++)
               {
                 best8x8l0ref = (int) b8x8info->best8x8l0ref[mode][cur_blk];
                 best8x8l1ref = (int) b8x8info->best8x8l1ref[mode][cur_blk];
                 weight_sum = wbp_weight[0][best8x8l0ref][best8x8l1ref][cur_comp] + wbp_weight[1][best8x8l0ref][best8x8l1ref][cur_comp];

                 if (weight_sum < -128 ||  weight_sum > 127)
                 {
                   invalid_mode = TRUE;
                   break;
                 }
               }
               if (invalid_mode == TRUE)
                 break;
             }
           }
           if (invalid_mode == TRUE)
             continue;
         }

         if (enc_mb.valid[mode])
           compute_mode_RD_cost(mode, currMB, &enc_mb, &min_rdcost, &min_dcost, &min_rate, i16mode, bslice, &inter_skip, is_cavlc);

       }// for (ctr16x16=0, index=0; index<max_index; index++)
     }// for (currMB->c_ipred_mode=DC_PRED_8; currMB->c_ipred_mode<=max_chroma_pred_mode; currMB->c_ipred_mode++)

#ifdef BEST_NZ_COEFF
     for (j=0;j<4;j++)
       for (i=0; i<(4+img->num_blk8x8_uv); i++)
         img->nz_coeff[img->current_mb_nr][j][i] = gaaiMBAFF_NZCoeff[j][i];
#endif
   }

   if (rerun==0)
     intra1 = IS_INTRA(currMB);
  } // for (rerun=0; rerun<runs; rerun++)

  //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
  //---------------------------------------------------------------------------

  update_qp_cbp_tmp(currMB, cbp, best_mode);
  set_stored_macroblock_parameters (currMB);

  // Rate control
  if(params->RCEnable && params->RCUpdateMode <= MAX_RC_MODE)
    rc_store_mad(currMB);
  update_qp_cbp(currMB, best_mode);

  rdopt->min_rdcost = min_rdcost;
  rdopt->min_dcost = min_dcost;

  if ( (img->MbaffFrameFlag)
    && (img->current_mb_nr%2)
    && (currMB->mb_type ? 0:((bslice) ? !currMB->cbp:1))  // bottom is skip
    && (prevMB->mb_type ? 0:((bslice) ? !prevMB->cbp:1))
    && !(field_flag_inference(currMB) == enc_mb.curr_mb_field)) // top is skip
  {
    rdopt->min_rdcost = 1e30;  // don't allow coding of a MB pair as skip if wrong inference
  }

  //===== Decide if this MB will restrict the reference frames =====
  if (params->RestrictRef)
    update_refresh_map(intra, intra1, currMB);

  if(params->SearchMode == UM_HEX)
  {
    UMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
  }
  else if(params->SearchMode == UM_HEX_SIMPLE)
  {
    smpUMHEX_skip_intrabk_SAD(best_mode, listXsize[enc_mb.list_offset[LIST_0]]);
  }

  //--- constrain intra prediction ---
  if(params->UseConstrainedIntraPred && (img->type==P_SLICE || img->type==B_SLICE))
  {
    img->intra_block[img->current_mb_nr] = IS_INTRA(currMB);
  }
}