Example #1
0
/*!
**************************************************************************************
* \brief 
*      Allocate memory for error resilient RDO.  
**************************************************************************************
*/
int allocate_errdo_mem(VideoParameters *p_Vid, InputParameters *p_Inp)
{
  int memory_size = 0;

  //allocate shared memory for all algorithms
  p_Vid->p_decs   = (Decoders *) malloc(sizeof(Decoders));
  memory_size += get_mem3Dint(&p_Vid->p_decs->res_img, MAX_PLANE, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
  memory_size += get_mem3Dint(&p_Vid->p_decs->res_mb_best8x8, MAX_PLANE, MB_BLOCK_SIZE, MB_BLOCK_SIZE);

  p_Vid->p_decs->RCD_bestY_mb         = NULL;
  p_Vid->p_decs->RCD_bestY_b8x8         = NULL;
  p_Vid->p_decs->MVCD_bestY_mb         = NULL;
  p_Vid->p_decs->MVCD_bestY_b8x8         = NULL;
  p_Vid->p_decs->flag_bestY_mb         = NULL;
  p_Vid->p_decs->flag_bestY_b8x8         = NULL;
  p_Vid->p_decs->flag_wo_res         = NULL;
  p_Vid->p_decs->flag_wo_res_bestY_b8x8         = NULL;
  p_Vid->p_decs->trans_dist_bestY_mb         = NULL;
  p_Vid->p_decs->trans_dist_bestY_b8x8         = NULL;
  p_Vid->p_decs->trans_dist_wo_res         = NULL;   //it is used for P8x8, where residual may be set to 0
  p_Vid->p_decs->trans_dist_wo_res_bestY_b8x8   = NULL;   //it is used for P8x8, where residual may be set to 0
  p_Vid->p_decs->trans_err_bestY_mb         = NULL;
  p_Vid->p_decs->trans_err_bestY_b8x8         = NULL;
  p_Vid->p_decs->trans_err_wo_res         = NULL;   //it is used for P8x8, where residual may be set to 0
  p_Vid->p_decs->trans_err_wo_res_bestY_b8x8   = NULL;   //it is used for P8x8, where residual may be set to 0
  p_Vid->p_decs->dec_mb_pred         = NULL;
  p_Vid->p_decs->dec_mbY_best        = NULL;
  p_Vid->p_decs->dec_mb_pred_best8x8 = NULL;
  p_Vid->p_decs->dec_mbY_best8x8     = NULL;
  p_Vid->p_decs->first_moment_bestY_mb         = NULL;
  p_Vid->p_decs->first_moment_bestY_b8x8       = NULL;
  p_Vid->p_decs->first_moment_pred_bestY_b8x8       = NULL;
  p_Vid->p_decs->first_moment_pred       = NULL;
  p_Vid->p_decs->second_moment_bestY_mb        = NULL;
  p_Vid->p_decs->second_moment_bestY_b8x8      = NULL;
  p_Vid->p_decs->second_moment_pred_bestY_b8x8      = NULL;
  p_Vid->p_decs->second_moment_pred      = NULL;

  //Zhifeng 090630
  switch (p_Inp->de)
  {
  case LLN:
    //allocate memory for lln algorithm
    memory_size += get_mem3Dpel(&p_Vid->p_decs->dec_mb_pred, p_Inp->NoOfDecoders, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
    memory_size += get_mem3Dpel(&p_Vid->p_decs->dec_mbY_best, p_Inp->NoOfDecoders, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
    memory_size += get_mem4Dpel(&p_Vid->p_decs->dec_mbY_best8x8, 2, p_Inp->NoOfDecoders, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
    memory_size += get_mem4Dpel(&p_Vid->p_decs->dec_mb_pred_best8x8, 2, p_Inp->NoOfDecoders, MB_BLOCK_SIZE, MB_BLOCK_SIZE);
    break;  
  default:
    ;
  }
  return memory_size;
}
Example #2
0
/*!
 *************************************************************************************
 * \brief
 *    allocate storable picture memory for errdo
*
 *************************************************************************************
*/
void errdo_alloc_storable_picture(StorablePicture *p, VideoParameters *p_Vid, InputParameters *p_Inp, int size_x, int size_y, int size_x_cr, int size_y_cr)
{
  Dist_Estm *s;
  int   dec, ndec, nplane;

  p->de_mem = (Dist_Estm *)malloc( sizeof(Dist_Estm) );
  s = p->de_mem;

  s->res_con_diff_Y   = NULL;
  s->res_con_diff_UV   = NULL;
  s->MV_con_diff_Y   = NULL;
  s->MV_con_diff_UV   = NULL;
  s->error_sign_flag_Y   = NULL;
  s->error_sign_flag_UV   = NULL;
  s->transmission_dist_Y   = NULL;
  s->transmission_dist_UV   = NULL;
  s->transmission_err_Y   = NULL;
  s->transmission_err_UV   = NULL;
  s->dec_imgY   = NULL;
  s->dec_imgUV  = NULL;
  s->mb_error_map = NULL;
  s->first_moment_Y   = NULL;
  s->first_moment_UV  = NULL;
  s->second_moment_Y   = NULL;
  s->second_moment_UV  = NULL;
  for (nplane = 0; nplane < 3; nplane++)
  {
    s->p_res_con_diff[nplane] = NULL;
    s->p_MV_con_diff[nplane] = NULL;
    s->p_error_sign_flag[nplane] = NULL;
    s->p_transmission_dist[nplane] = NULL;
    s->p_transmission_err[nplane] = NULL;
    s->p_dec_img[nplane] = NULL;
    s->p_first_moment[nplane] = NULL;
    s->p_second_moment[nplane] = NULL;
  }

  switch (p_Inp->de)
  {
  case LLN:
    ndec = p_Inp->NoOfDecoders;
    //check the consistent
    if (ndec == 0)
    {
      printf("Number of decoders cannot be zero for LLN and fast LLN algorithms, resetting to 30");
      ndec = 30;
    }
    get_mem3D(&(s->mb_error_map), ndec, size_y/MB_BLOCK_SIZE, size_x/MB_BLOCK_SIZE);
    get_mem3Dpel(&(s->dec_imgY), ndec, size_y, size_x);

    // This seems somewhat inefficient. Why not allocate array as [ndec][x] where x goes from 0 to 2?
    if ((s->p_dec_img[0] = (imgpel***)calloc(ndec,sizeof(imgpel**))) == NULL)
    {
      no_mem_exit("errdo.c: p_dec_img[0]");
    }

    if (p_Vid->yuv_format != YUV400)
    {
      get_mem4Dpel(&(s->dec_imgUV), ndec, 2, size_y_cr, size_x_cr);
      if ((s->p_dec_img[1] = (imgpel***)calloc(ndec,sizeof(imgpel**))) == NULL)
      {  
        no_mem_exit("errdo.c: p_dec_img[1]");
      }
      if ((s->p_dec_img[2] = (imgpel***)calloc(ndec,sizeof(imgpel**))) == NULL)
      {
        no_mem_exit("errdo.c: p_dec_img[2]");
      }
    }

    for (dec = 0; dec < ndec; dec++)
    {
      s->p_dec_img[0][dec] = s->dec_imgY[dec];
    }

    if (p_Vid->yuv_format != YUV400)
    {
      for (dec = 0; dec < ndec; dec++)
      {
        s->p_dec_img[1][dec] = s->dec_imgUV[dec][0];
        s->p_dec_img[2][dec] = s->dec_imgUV[dec][1];
      }
    }

    break;
  default:
    ;
  }
}
/*!
************************************************************************
* \brief
*    Dynamic memory allocation of frame size related global buffers
*    buffers are defined in global.h, allocated memory must be freed in
*    void free_global_buffers()
*
*  \par Input:
*    Input Parameters struct inp_par *inp, Image Parameters struct img_par *img
*
*  \par Output:
*     Number of allocated bytes
***********************************************************************
*/
int init_global_buffers()
{
    int memory_size=0;
    int quad_range, i;

    if (global_init_done)
    {
        free_global_buffers();
    }

    // allocate memory for reference frame in find_snr
    memory_size += get_mem2Dpel(&imgY_ref, img->height, img->width);

    if (active_sps->chroma_format_idc != YUV400)
        memory_size += get_mem3Dpel(&imgUV_ref, 2, img->height_cr, img->width_cr);
    else
        imgUV_ref=NULL;

    // allocate memory in structure img
    if(((img->mb_data) = (Macroblock *) calloc(img->FrameSizeInMbs, sizeof(Macroblock))) == NULL)
        no_mem_exit("init_global_buffers: img->mb_data");

    if(((img->intra_block) = (int*)calloc(img->FrameSizeInMbs, sizeof(int))) == NULL)
        no_mem_exit("init_global_buffers: img->intra_block");

    memory_size += get_mem2Dint(&(img->ipredmode), 4*img->PicWidthInMbs , 4*img->FrameHeightInMbs);

    memory_size += get_mem2Dint(&(img->field_anchor),4*img->FrameHeightInMbs, 4*img->PicWidthInMbs);

    memory_size += get_mem3Dint(&(img->wp_weight), 2, MAX_REFERENCE_PICTURES, 3);
    memory_size += get_mem3Dint(&(img->wp_offset), 6, MAX_REFERENCE_PICTURES, 3);
    memory_size += get_mem4Dint(&(img->wbp_weight), 6, MAX_REFERENCE_PICTURES, MAX_REFERENCE_PICTURES, 3);

    // CAVLC mem
    memory_size += get_mem3Dint(&(img->nz_coeff), img->FrameSizeInMbs, 4, 4 + img->num_blk8x8_uv);

    memory_size += get_mem2Dint(&(img->siblock),img->PicWidthInMbs  , img->FrameHeightInMbs);

    if(img->max_imgpel_value > img->max_imgpel_value_uv || active_sps->chroma_format_idc == YUV400)
        quad_range = (img->max_imgpel_value + 1) * 2;
    else
        quad_range = (img->max_imgpel_value_uv + 1) * 2;

    if ((img->quad = (int*)calloc (quad_range, sizeof(int))) == NULL)
        no_mem_exit ("init_img: img->quad");

    for (i=0; i < quad_range/2; ++i)
    {
        img->quad[i]=i*i;
    }

#ifdef ADAPTIVE_FILTER
    memory_size += get_mem2Ddouble (&tmp_coef, 21, 16);
#endif

#ifdef ADAPTIVE_LOOP_FILTER
    memory_size += InitALFGlobalBuffers();
#endif

    global_init_done = 1;

    img->oldFrameSizeInMbs = img->FrameSizeInMbs;

    return (memory_size);
}
Example #4
0
/*!
*************************************************************************************
* \brief
*    Mode Decision for a macroblock
*************************************************************************************
*/
void encode_one_macroblock_low (Macroblock *currMB)
{
    Slice *currSlice = currMB->p_slice;
    RDOPTStructure  *p_RDO = currSlice->p_RDO;
    ImageParameters *p_Img = currMB->p_Img;
    InputParameters *p_Inp = currMB->p_Inp;
    PicMotionParams *motion = &p_Img->enc_picture->motion;

    imgpel ***temp_img; // to temp store the Y data for 8x8 transform

    int         block, mode, i=0, j;
    RD_PARAMS   enc_mb;
    int         bmcost[5] = {INT_MAX};
    double      rd_cost = 0;
    int         cost = 0;
    int         min_cost = INT_MAX, cost_direct=0, have_direct=0;
    int         intra1 = 0;
    int         temp_cpb = 0;
    Boolean     best_transform_flag = FALSE;
    int         cost8x8_direct = 0;
    short       islice      = (short) (currSlice->slice_type == I_SLICE);
    short       bslice      = (short) (currSlice->slice_type == B_SLICE);
    short       pslice      = (short) ((currSlice->slice_type == P_SLICE) || (currSlice->slice_type == SP_SLICE));
    short       intra       = (short) (islice || (pslice && currMB->mb_y == p_Img->mb_y_upd && p_Img->mb_y_upd!=p_Img->mb_y_intra));
    int         lambda_mf[3];
    Block8x8Info *b8x8info   = p_Img->b8x8info;
    //int         mb_available[3] = { 1, 1, 1};

    char   **ipredmodes = p_Img->ipredmode;
    short   *allmvs = (currSlice->slice_type == I_SLICE) ? NULL: currSlice->all_mv[0][0][0][0][0];
    int     ****i4p;  //for non-RD-opt. mode
    imgpel  **mb_pred = currSlice->mb_pred[0];

    Boolean tmp_8x8_flag, tmp_no_mbpart;

    BestMode    md_best;
    Info8x8 best;

    init_md_best(&md_best);

    // Init best (need to create simple function)
    best.pdir = 0;
    best.bipred = 0;
    best.ref[LIST_0] = 0;
    best.ref[LIST_1] = -1;

    get_mem3Dpel(&temp_img, 3, MB_BLOCK_SIZE, MB_BLOCK_SIZE);

    intra |= RandomIntra (p_Img, currMB->mbAddrX);    // Forced Pseudo-Random Intra

    //===== Setup Macroblock encoding parameters =====
    init_enc_mb_params(currMB, &enc_mb, intra);
    if (p_Inp->AdaptiveRounding)
    {
        reset_adaptive_rounding(p_Img);
    }

    if (currSlice->MbaffFrameFlag)
    {
        reset_mb_nz_coeff(p_Img, currMB->mbAddrX);
    }

    //=====   S T O R E   C O D I N G   S T A T E   =====
    //---------------------------------------------------
    currSlice->store_coding_state (currMB, currSlice->p_RDO->cs_cm);

    if (!intra)
    {
        //===== set direct motion vectors =====
        currMB->best_mode = 10;  // let us set best_mode to an intra mode to avoid possible bug with RDOQ
        if (bslice && enc_mb.valid[0])
        {
            currSlice->Get_Direct_Motion_Vectors (currMB);
        }

        if (p_Inp->CtxAdptLagrangeMult == 1)
        {
            get_initial_mb16x16_cost(currMB);
        }

        //===== MOTION ESTIMATION FOR 16x16, 16x8, 8x16 BLOCKS =====
        for (mode = 1; mode < 4; mode++)
        {
            best.bipred = 0;
            best.mode = (char) mode;
            b8x8info->best[mode][0].bipred = 0;
            if (enc_mb.valid[mode])
            {
                for (cost=0, block=0; block<(mode==1?1:2); block++)
                {
                    update_lambda_costs(currMB, &enc_mb, lambda_mf);
                    PartitionMotionSearch (currMB, mode, block, lambda_mf);

                    //--- set 4x4 block indizes (for getting MV) ---
                    j = (block==1 && mode==2 ? 2 : 0);
                    i = (block==1 && mode==3 ? 2 : 0);

                    //--- get cost and reference frame for List 0 prediction ---
                    bmcost[LIST_0] = INT_MAX;
                    list_prediction_cost(currMB, LIST_0, block, mode, &enc_mb, bmcost, best.ref);

                    if (bslice)
                    {
                        //--- get cost and reference frame for List 1 prediction ---
                        bmcost[LIST_1] = INT_MAX;
                        list_prediction_cost(currMB, LIST_1, block, mode, &enc_mb, bmcost, best.ref);

                        // Compute bipredictive cost between best list 0 and best list 1 references
                        list_prediction_cost(currMB, BI_PRED, block, mode, &enc_mb, bmcost, best.ref);

                        // currently Bi predictive ME is only supported for modes 1, 2, 3 and ref 0
                        if (is_bipred_enabled(p_Inp, mode))
                        {
                            list_prediction_cost(currMB, BI_PRED_L0, block, mode, &enc_mb, bmcost, 0);
                            list_prediction_cost(currMB, BI_PRED_L1, block, mode, &enc_mb, bmcost, 0);
                        }
                        else
                        {
                            bmcost[BI_PRED_L0] = INT_MAX;
                            bmcost[BI_PRED_L1] = INT_MAX;
                        }

                        // Determine prediction list based on mode cost
                        determine_prediction_list(bmcost, &best, &cost);
                    }
                    else // if (bslice)
                    {
                        best.pdir  = 0;
                        cost      += bmcost[LIST_0];
                    }

                    assign_enc_picture_params(currMB, mode, &best, 2 * block);

                    //----- set reference frame and direction parameters -----
                    set_block8x8_info(b8x8info, mode, block, &best);

                    //--- set reference frames and motion vectors ---
                    if (mode>1 && block==0)
                        currSlice->set_ref_and_motion_vectors (currMB, motion, &best, block);
                } // for (block=0; block<(mode==1?1:2); block++)

                currMB->luma_transform_size_8x8_flag = FALSE;
                if (p_Inp->Transform8x8Mode) //for inter rd-off, set 8x8 to do 8x8 transform
                {
                    SetModesAndRefframeForBlocks(currMB, (short) mode);
                    currMB->luma_transform_size_8x8_flag = (byte) TransformDecision(currMB, -1, &cost);
                }

                if (cost < min_cost)
                {
                    currMB->best_mode = (short) mode;
                    min_cost  = cost;
                    best_transform_flag = currMB->luma_transform_size_8x8_flag;

                    if (p_Inp->CtxAdptLagrangeMult == 1)
                    {
                        adjust_mb16x16_cost(currMB, cost);
                    }
                }
            } // if (enc_mb.valid[mode])
        } // for (mode=1; mode<4; mode++)

        if (enc_mb.valid[P8x8])
        {
            //===== store coding state of macroblock =====
            currSlice->store_coding_state (currMB, currSlice->p_RDO->cs_mb);
            memset( currSlice->cofAC[0][0][0], 0, 2080 * sizeof(int)); // 4 * 4 * 2 * 65

            currMB->valid_8x8 = FALSE;

            if (p_Inp->Transform8x8Mode)
            {
                ResetRD8x8Data(p_Img, p_RDO->tr8x8);
                //===========================================================
                // Check 8x8 partition with transform size 8x8
                //===========================================================
                //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
                for (cost_direct =  0, block = 0; block < 4; block++)
                {
                    submacroblock_mode_decision_low(currMB, &enc_mb, p_RDO->tr8x8, p_RDO->cofAC8x8ts[block],
                                                    &have_direct, block, &cost_direct, &cost, &cost8x8_direct, 1);

                    set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr8x8);
                }

                currMB->luma_transform_size_8x8_flag = FALSE; //switch to 4x4 transform size
            }// if (p_Inp->Transform8x8Mode)


            if (p_Inp->Transform8x8Mode != 2)
            {
                ResetRD8x8Data(p_Img, p_RDO->tr4x4);
                //=================================================================
                // Check 8x8, 8x4, 4x8 and 4x4 partitions with transform size 4x4
                //=================================================================
                //=====  LOOP OVER 8x8 SUB-PARTITIONS  (Motion Estimation & Mode Decision) =====
                for (cost_direct = 0, block=0; block<4; block++)
                {
                    submacroblock_mode_decision_low(currMB, &enc_mb, p_RDO->tr4x4, p_RDO->coefAC8x8[block],
                                                    &have_direct, block, &cost_direct, &cost, &cost8x8_direct, 0);

                    set_subblock8x8_info(b8x8info, P8x8, block, p_RDO->tr4x4);
                }
            }// if (p_Inp->Transform8x8Mode != 2)

            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            //check cost for P8x8 for non-rdopt mode
            if (((p_Inp->Transform8x8Mode < 2) && (p_RDO->tr4x4->mb_p8x8_cost < min_cost)) ||
                    ((p_Inp->Transform8x8Mode >  0) && (p_RDO->tr8x8->mb_p8x8_cost < min_cost)))
            {
                currMB->best_mode = P8x8;
                if (p_Inp->Transform8x8Mode == 2)
                {
                    min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                    currMB->luma_transform_size_8x8_flag = TRUE;
                }
                else if (p_Inp->Transform8x8Mode)
                {
                    if (p_RDO->tr8x8->mb_p8x8_cost < p_RDO->tr4x4->mb_p8x8_cost)
                    {
                        min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                        currMB->luma_transform_size_8x8_flag = TRUE;
                    }
                    else if(p_RDO->tr4x4->mb_p8x8_cost < p_RDO->tr8x8->mb_p8x8_cost)
                    {
                        min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                        currMB->luma_transform_size_8x8_flag = FALSE;
                    }
                    else
                    {
                        if (GetBestTransformP8x8(currMB) == 0)
                        {
                            min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                            currMB->luma_transform_size_8x8_flag = FALSE;
                        }
                        else
                        {
                            min_cost = p_RDO->tr8x8->mb_p8x8_cost;
                            currMB->luma_transform_size_8x8_flag = TRUE;
                        }
                    }
                }
                else
                {
                    min_cost = p_RDO->tr4x4->mb_p8x8_cost;
                    currMB->luma_transform_size_8x8_flag = FALSE;
                }
            }// if ((p_RDO->tr4x4->mb_p8x8_cost < min_cost || p_RDO->tr8x8->mb_p8x8_cost < min_cost))
            p_Img->giRDOpt_B8OnlyFlag = FALSE;
        }

        // Find a motion vector for the Skip mode
        if(pslice)
            FindSkipModeMotionVector (currMB);
    }
    else // if (!intra)
    {
        min_cost = INT_MAX;
    }


    //========= C H O O S E   B E S T   M A C R O B L O C K   M O D E =========
    //-------------------------------------------------------------------------
    tmp_8x8_flag  = currMB->luma_transform_size_8x8_flag;  //save 8x8_flag
    tmp_no_mbpart = currMB->NoMbPartLessThan8x8Flag;      //save no-part-less
    if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
        // precompute all chroma intra prediction modes
        intra_chroma_prediction(currMB, NULL, NULL, NULL);

    if (enc_mb.valid[0] && bslice) // check DIRECT MODE
    {
        if(have_direct)
        {
            switch(p_Inp->Transform8x8Mode)
            {
            case 1: // Mixture of 8x8 & 4x4 transform
                cost = ((cost8x8_direct < cost_direct) || !(enc_mb.valid[5] && enc_mb.valid[6] && enc_mb.valid[7]))
                       ? cost8x8_direct : cost_direct;
                break;
            case 2: // 8x8 Transform only
                cost = cost8x8_direct;
                break;
            default: // 4x4 Transform only
                cost = cost_direct;
                break;
            }
        }
        else
        {   //!have_direct
            cost = GetDirectCostMB (currMB);
        }
        if (cost!=INT_MAX)
        {
            cost -= (int)floor(16 * enc_mb.lambda_md + 0.4999);
        }

        if (cost <= min_cost)
        {
            if(p_Img->active_sps->direct_8x8_inference_flag && p_Inp->Transform8x8Mode)
            {
                if(p_Inp->Transform8x8Mode==2)
                    currMB->luma_transform_size_8x8_flag = TRUE;
                else
                {
                    if(cost8x8_direct < cost_direct)
                        currMB->luma_transform_size_8x8_flag = TRUE;
                    else
                        currMB->luma_transform_size_8x8_flag = FALSE;
                }
            }
            else
                currMB->luma_transform_size_8x8_flag = FALSE;

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            min_cost  = cost;
            currMB->best_mode = 0;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            currMB->NoMbPartLessThan8x8Flag = (byte) tmp_no_mbpart; // restore if not best
        }
    }

    currMB->min_rdcost = (double) min_cost;

    if (enc_mb.valid[I8MB]) // check INTRA8x8
    {
        currMB->luma_transform_size_8x8_flag = TRUE; // at this point cost will ALWAYS be less than min_cost

        currMB->mb_type = currMB->ar_mode = I8MB;
        temp_cpb = Mode_Decision_for_Intra8x8Macroblock (currMB, enc_mb.lambda_md, &rd_cost);


        if (rd_cost <= currMB->min_rdcost) //HYU_NOTE. bug fix. 08/15/07
        {
            currMB->cbp = temp_cpb;
            if (p_Img->P444_joined)
            {
                currSlice->curr_cbp[0] = currSlice->cmp_cbp[1];
                currSlice->curr_cbp[1] = currSlice->cmp_cbp[2];
            }

            if(enc_mb.valid[I4MB])
            {
                //coeffs
                if (p_Inp->Transform8x8Mode != 2)
                {
                    i4p = p_RDO->cofAC;
                    p_RDO->cofAC = currSlice->cofAC;
                    currSlice->cofAC = i4p;
                }
            }

            copy_image_data_16x16(temp_img[0], &p_Img->enc_picture->imgY[currMB->pix_y], 0, currMB->pix_x);

            if (p_Img->P444_joined)
            {
                copy_image_data_16x16(temp_img[1], &p_Img->enc_picture->imgUV[0][currMB->pix_y], 0, currMB->pix_x);
                copy_image_data_16x16(temp_img[2], &p_Img->enc_picture->imgUV[1][currMB->pix_y], 0, currMB->pix_x);
            }

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            currMB->min_rdcost  = rd_cost;
            currMB->best_mode = I8MB;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            if (p_Img->P444_joined)
            {
                currMB->cbp |= currSlice->curr_cbp[0];
                currMB->cbp |= currSlice->curr_cbp[1];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }
        }
    }

    if (enc_mb.valid[I4MB]) // check INTRA4x4
    {
        currMB->luma_transform_size_8x8_flag = FALSE;
        currMB->mb_type = currMB->ar_mode = I4MB;
        temp_cpb = Mode_Decision_for_Intra4x4Macroblock (currMB, enc_mb.lambda_md, &rd_cost);

        if (rd_cost <= currMB->min_rdcost)
        {
            currMB->cbp = temp_cpb;

            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);

            currMB->min_rdcost  = rd_cost;
            currMB->best_mode = I4MB;
            tmp_8x8_flag = currMB->luma_transform_size_8x8_flag;
        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore if not best
            if (p_Img->P444_joined)
            {
                currMB->cbp |= currSlice->curr_cbp[0];
                currMB->cbp |= currSlice->curr_cbp[1];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }
            //coeffs
            i4p = p_RDO->cofAC;
            p_RDO->cofAC = currSlice->cofAC;
            currSlice->cofAC=i4p;
        }
    }
    if (enc_mb.valid[I16MB]) // check INTRA16x16
    {
        currMB->luma_transform_size_8x8_flag = FALSE;
        intrapred_16x16 (currMB, PLANE_Y);
        if (p_Img->P444_joined)
        {
            select_plane(p_Img, PLANE_U);
            intrapred_16x16 (currMB, PLANE_U);
            select_plane(p_Img, PLANE_V);
            intrapred_16x16 (currMB, PLANE_V);
            select_plane(p_Img, PLANE_Y);
        }

        rd_cost = currSlice->find_sad_16x16 (currMB);

        if (rd_cost < currMB->min_rdcost)
        {
            //Rate control
            if (p_Inp->RCEnable)
                rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, currSlice->mpr_16x16[0][(short) currMB->i16mode]);

            currMB->best_mode   = I16MB;
            currMB->min_rdcost  = rd_cost;
            currMB->cbp = currMB->trans_16x16 (currMB, PLANE_Y);

            if (p_Img->P444_joined)
            {
                select_plane(p_Img, PLANE_U);
                currSlice->cmp_cbp[1] = currMB->trans_16x16(currMB, PLANE_U);
                select_plane(p_Img, PLANE_V);
                currSlice->cmp_cbp[2] = currMB->trans_16x16(currMB, PLANE_V);

                select_plane(p_Img, PLANE_Y);
                currMB->cbp |= currSlice->cmp_cbp[1];
                currMB->cbp |= currSlice->cmp_cbp[2];
                currSlice->cmp_cbp[1] = currMB->cbp;
                currSlice->cmp_cbp[2] = currMB->cbp;
            }

        }
        else
        {
            currMB->luma_transform_size_8x8_flag = (byte) tmp_8x8_flag; // restore
            currMB->NoMbPartLessThan8x8Flag = (byte) tmp_no_mbpart;     // restore
        }
    }

    intra1 = IS_INTRA(currMB);

    //=====  S E T   F I N A L   M A C R O B L O C K   P A R A M E T E R S ======
    //---------------------------------------------------------------------------
    {
        //===== set parameters for chosen mode =====
        SetModesAndRefframeForBlocks (currMB, currMB->best_mode);

        if (currMB->best_mode == P8x8)
        {
            if (currMB->luma_transform_size_8x8_flag && (p_RDO->tr8x8->cbp8x8 == 0) && p_Inp->Transform8x8Mode != 2)
                currMB->luma_transform_size_8x8_flag = FALSE;

            SetCoeffAndReconstruction8x8 (currMB);

            memset(currMB->intra_pred_modes, DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
            for (j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                memset(&ipredmodes[j][currMB->block_x], DC_PRED, BLOCK_MULTIPLE * sizeof(char));
        }
        else
        {
            //===== set parameters for chosen mode =====
            if (currMB->best_mode == I8MB)
            {
                memcpy(currMB->intra_pred_modes,currMB->intra_pred_modes8x8, MB_BLOCK_PARTITIONS * sizeof(char));
                for(j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                    memcpy(&p_Img->ipredmode[j][currMB->block_x],&p_Img->ipredmode8x8[j][currMB->block_x], BLOCK_MULTIPLE * sizeof(char));

                //--- restore reconstruction for 8x8 transform ---
                copy_image_data_16x16(&p_Img->enc_picture->imgY[currMB->pix_y], temp_img[0], currMB->pix_x, 0);

                if (p_Img->P444_joined)
                {
                    copy_image_data_16x16(&p_Img->enc_picture->imgUV[0][currMB->pix_y], temp_img[1], currMB->pix_x, 0);
                    copy_image_data_16x16(&p_Img->enc_picture->imgUV[1][currMB->pix_y], temp_img[2], currMB->pix_x, 0);
                }
            }

            if ((currMB->best_mode!=I4MB)&&(currMB->best_mode != I8MB))
            {
                memset(currMB->intra_pred_modes,DC_PRED, MB_BLOCK_PARTITIONS * sizeof(char));
                for(j = currMB->block_y; j < currMB->block_y + BLOCK_MULTIPLE; j++)
                    memset(&ipredmodes[j][currMB->block_x],DC_PRED, BLOCK_MULTIPLE * sizeof(char));
                currMB->ar_mode = currMB->best_mode;

                if (currMB->best_mode!=I16MB)
                {
                    if((currMB->best_mode >= 1) && (currMB->best_mode <= 3))
                        currMB->luma_transform_size_8x8_flag = (byte) best_transform_flag;

                    if (currSlice->P444_joined)
                    {
                        luma_residual_coding_p444(currMB);
                        if((currMB->cbp==0 && currSlice->cmp_cbp[1] == 0 && currSlice->cmp_cbp[2] == 0) &&(currMB->best_mode == 0))
                            currMB->luma_transform_size_8x8_flag = FALSE;
                    }
                    else
                    {
                        luma_residual_coding(currMB);
                        if((currMB->cbp==0)&&(currMB->best_mode == 0))
                            currMB->luma_transform_size_8x8_flag = FALSE;
                    }

                    //Rate control
                    if (p_Inp->RCEnable)
                        rc_store_diff(currSlice->diffy, &p_Img->pCurImg[currMB->opix_y], currMB->pix_x, mb_pred);
                }
            }
        }
        //check luma cbp for transform size flag
        if (((currMB->cbp&15) == 0) && currMB->mb_type != I4MB && currMB->mb_type != I8MB)
            currMB->luma_transform_size_8x8_flag = FALSE;

        // precompute all chroma intra prediction modes
        if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
            intra_chroma_prediction(currMB, NULL, NULL, NULL);

        currMB->i16offset = 0;

        if ((p_Img->yuv_format != YUV400) && (p_Img->yuv_format != YUV444))
            chroma_residual_coding (currMB);

        if (currMB->best_mode == I16MB)
        {
            currMB->i16offset = I16Offset  (currMB->cbp, currMB->i16mode);
        }

        currSlice->SetMotionVectorsMB (currMB, motion);

        //===== check for SKIP mode =====
        if(p_Img->P444_joined)
        {
            if ((pslice) && currMB->best_mode == 1 && currMB->cbp==0 && currSlice->cmp_cbp[1] == 0 && currSlice->cmp_cbp[2] == 0 &&
                    motion->ref_idx[LIST_0][currMB->block_y][currMB->block_x]    == 0 &&
                    motion->mv     [LIST_0][currMB->block_y][currMB->block_x][0] == allmvs[0] &&
                    motion->mv     [LIST_0][currMB->block_y][currMB->block_x][1] == allmvs[1])
            {
                currMB->mb_type = currMB->b8x8[0].mode = currMB->b8x8[1].mode = currMB->b8x8[2].mode = currMB->b8x8[3].mode = 0;
                currMB->luma_transform_size_8x8_flag = FALSE;
            }
        }
        else if ((pslice) && currMB->best_mode == 1 && currMB->cbp==0 &&
                 motion->ref_idx[LIST_0][currMB->block_y][currMB->block_x]    == 0 &&
                 motion->mv     [LIST_0][currMB->block_y][currMB->block_x][0] == allmvs[0] &&
                 motion->mv     [LIST_0][currMB->block_y][currMB->block_x][1] == allmvs[1])
        {
            currMB->mb_type = currMB->b8x8[0].mode = currMB->b8x8[1].mode = currMB->b8x8[2].mode = currMB->b8x8[3].mode = 0;
            currMB->luma_transform_size_8x8_flag = FALSE;
        }

        if (currSlice->MbaffFrameFlag || (p_Inp->UseRDOQuant && currSlice->RDOQ_QP_Num > 1))
            set_mbaff_parameters(currMB);
    }

    // Rate control
    if(p_Inp->RCEnable && p_Inp->RCUpdateMode <= MAX_RC_MODE)
        rc_store_mad(currMB);

    //===== Decide if this MB will restrict the reference frames =====
    if (p_Inp->RestrictRef)
        update_refresh_map(currMB, intra, intra1);


    /*update adaptive rounding offset p_Inp*/
    if (p_Img->AdaptiveRounding)
    {
        update_offset_params(currMB, currMB->best_mode, currMB->luma_transform_size_8x8_flag);
    }

    free_mem3Dpel(temp_img);
}