void WelsFillCacheInter (PNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer) { int32_t iCurXy = pCurLayer->iMbXyIndex; int32_t iTopXy = 0; int32_t iLeftXy = 0; int32_t iLeftTopXy = 0; int32_t iRightTopXy = 0; //stuff non_zero_coeff_count from pNeighAvail(left and top) WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer); if (pNeighAvail->iTopAvail) { iTopXy = iCurXy - pCurLayer->iMbWidth; } if (pNeighAvail->iLeftAvail) { iLeftXy = iCurXy - 1; } if (pNeighAvail->iLeftTopAvail) { iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth; } if (pNeighAvail->iRightTopAvail) { iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth; } //stuff mv_cache and iRefIdxArray from left and top (inter) if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) { ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3])); ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7])); ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11])); ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15])); iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3]; iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7]; iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11]; iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15]; } else { ST32 (iMvArray[0][ 6], 0); ST32 (iMvArray[0][12], 0); ST32 (iMvArray[0][18], 0); ST32 (iMvArray[0][24], 0); if (0 == pNeighAvail->iLeftAvail) { //not available iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][ 6] = iRefIdxArray[0][12] = iRefIdxArray[0][18] = iRefIdxArray[0][24] = REF_NOT_IN_LIST; } } if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) { ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvArray[0][0], 0); if (0 == pNeighAvail->iLeftTopAvail) { //not available iRefIdxArray[0][0] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][0] = REF_NOT_IN_LIST; } } if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) { ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12])); ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14])); ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12])); } else { ST64 (iMvArray[0][1], 0); ST64 (iMvArray[0][3], 0); if (0 == pNeighAvail->iTopAvail) { //not available iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][1] = iRefIdxArray[0][2] = iRefIdxArray[0][3] = iRefIdxArray[0][4] = REF_NOT_IN_LIST; } } if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) { ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12])); iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvArray[0][5], 0); if (0 == pNeighAvail->iRightTopAvail) { //not available iRefIdxArray[0][5] = REF_NOT_AVAIL; } else { //available but is intra mb type iRefIdxArray[0][5] = REF_NOT_IN_LIST; } } //right-top 4*4 block unavailable ST32 (iMvArray[0][ 9], 0); ST32 (iMvArray[0][21], 0); ST32 (iMvArray[0][11], 0); ST32 (iMvArray[0][17], 0); ST32 (iMvArray[0][23], 0); iRefIdxArray[0][ 9] = iRefIdxArray[0][21] = iRefIdxArray[0][11] = iRefIdxArray[0][17] = iRefIdxArray[0][23] = REF_NOT_AVAIL; }
void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) { bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail; int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc; int32_t iLeftTopType, iRightTopType, iTopType, iLeftType; int32_t iCurX, iCurY, iCurXy, iLeftXy, iTopXy, iLeftTopXy, iRightTopXy; int8_t iLeftRef; int8_t iTopRef; int8_t iRightTopRef; int8_t iLeftTopRef; int8_t iDiagonalRef; int8_t iMatchRef; int16_t iMvA[2], iMvB[2], iMvC[2], iMvD[2]; iCurXy = pCurLayer->iMbXyIndex; iCurX = pCurLayer->iMbX; iCurY = pCurLayer->iMbY; iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy]; if (iCurX != 0) { iLeftXy = iCurXy - 1; iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy]; bLeftAvail = (iLeftSliceIdc == iCurSliceIdc); } else { bLeftAvail = 0; bLeftTopAvail = 0; } if (iCurY != 0) { iTopXy = iCurXy - pCurLayer->iMbWidth; iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy]; bTopAvail = (iTopSliceIdc == iCurSliceIdc); if (iCurX != 0) { iLeftTopXy = iTopXy - 1; iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy]; bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc); } else { bLeftTopAvail = 0; } if (iCurX != (pCurLayer->iMbWidth - 1)) { iRightTopXy = iTopXy + 1; iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy]; bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc); } else { bRightTopAvail = 0; } } else { bTopAvail = 0; bLeftTopAvail = 0; bRightTopAvail = 0; } iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0); iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0); iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail) ? pCurLayer->pMbType[iLeftTopXy] : 0); iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail) ? pCurLayer->pMbType[iRightTopXy] : 0); /*get neb mv&iRefIdxArray*/ /*left*/ if (bLeftAvail && IS_INTER (iLeftType)) { ST32 (iMvA, LD32 (pCurLayer->pMv[0][iLeftXy][3])); iLeftRef = pCurLayer->pRefIndex[0][iLeftXy][3]; } else { ST32 (iMvA, 0); if (0 == bLeftAvail) { //not available iLeftRef = REF_NOT_AVAIL; } else { //available but is intra mb type iLeftRef = REF_NOT_IN_LIST; } } if (REF_NOT_AVAIL == iLeftRef || (0 == iLeftRef && 0 == * (int32_t*)iMvA)) { ST32 (iMvp, 0); return; } /*top*/ if (bTopAvail && IS_INTER (iTopType)) { ST32 (iMvB, LD32 (pCurLayer->pMv[0][iTopXy][12])); iTopRef = pCurLayer->pRefIndex[0][iTopXy][12]; } else { ST32 (iMvB, 0); if (0 == bTopAvail) { //not available iTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iTopRef = REF_NOT_IN_LIST; } } if (REF_NOT_AVAIL == iTopRef || (0 == iTopRef && 0 == * (int32_t*)iMvB)) { ST32 (iMvp, 0); return; } /*right_top*/ if (bRightTopAvail && IS_INTER (iRightTopType)) { ST32 (iMvC, LD32 (pCurLayer->pMv[0][iRightTopXy][12])); iRightTopRef = pCurLayer->pRefIndex[0][iRightTopXy][12]; } else { ST32 (iMvC, 0); if (0 == bRightTopAvail) { //not available iRightTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iRightTopRef = REF_NOT_IN_LIST; } } /*left_top*/ if (bLeftTopAvail && IS_INTER (iLeftTopType)) { ST32 (iMvD, LD32 (pCurLayer->pMv[0][iLeftTopXy][15])); iLeftTopRef = pCurLayer->pRefIndex[0][iLeftTopXy][15]; } else { ST32 (iMvD, 0); if (0 == bLeftTopAvail) { //not available iLeftTopRef = REF_NOT_AVAIL; } else { //available but is intra mb type iLeftTopRef = REF_NOT_IN_LIST; } } iDiagonalRef = iRightTopRef; if (REF_NOT_AVAIL == iDiagonalRef) { iDiagonalRef = iLeftTopRef; * (int32_t*)iMvC = * (int32_t*)iMvD; } if (REF_NOT_AVAIL == iTopRef && REF_NOT_AVAIL == iDiagonalRef && iLeftRef >= REF_NOT_IN_LIST) { ST32 (iMvp, LD32 (iMvA)); return; } iMatchRef = (0 == iLeftRef) + (0 == iTopRef) + (0 == iDiagonalRef); if (1 == iMatchRef) { if (0 == iLeftRef) { ST32 (iMvp, LD32 (iMvA)); } else if (0 == iTopRef) { ST32 (iMvp, LD32 (iMvB)); } else { ST32 (iMvp, LD32 (iMvC)); } } else { iMvp[0] = WelsMedian (iMvA[0], iMvB[0], iMvC[0]); iMvp[1] = WelsMedian (iMvA[1], iMvB[1], iMvC[1]); } }
static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func(*qpix_put)[16], const h264_chroma_mc_func(*chroma_put), qpel_mc_func(*qpix_avg)[16], const h264_chroma_mc_func(*chroma_avg), const h264_weight_func *weight_op, const h264_biweight_func *weight_avg) { const int mb_xy = sl->mb_xy; const int mb_type = h->cur_pic.mb_type[mb_xy]; av_assert2(IS_INTER(mb_type)); if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME)) await_references(h, sl); prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC); if (IS_16X16(mb_type)) { mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); } else if (IS_16X8(mb_type)) { mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else if (IS_8X16(mb_type)) { mc_part(h, sl, 0, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 4, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else { int i; av_assert2(IS_8X8(mb_type)); for (i = 0; i < 4; i++) { const int sub_mb_type = sl->sub_mb_type[i]; const int n = 4 * i; int x_offset = (i & 1) << 2; int y_offset = (i & 2) << 1; if (IS_SUB_8X8(sub_mb_type)) { mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_8X4(sub_mb_type)) { mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset + 2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_4X8(sub_mb_type)) { mc_part(h, sl, n, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 1, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset + 2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else { int j; av_assert2(IS_SUB_4X4(sub_mb_type)); for (j = 0; j < 4; j++) { int sub_x_offset = x_offset + 2 * (j & 1); int sub_y_offset = y_offset + (j & 2); mc_part(h, sl, n + j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } } } } if (USES_LIST(mb_type, 1)) prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC); }
__tcsm1_main int main() { int c; S32I2M(xr16,3); c = i_la(_gp); int fifo_rp = 0; unsigned int XCHGtmp; H264_Frame_GlbARGs *dFRM = (H264_Frame_GlbARGs *)TCSM1_FRM_ARGS; H264_MB_DecARGs *dMB_aux = (H264_MB_DecARGs *)TCSM1_MBARGS_BUF; H264_MB_DecARGs *dMB2 = (H264_MB_DecARGs *)TCSM1_MBARGS_BUF2; H264_AUX_T *AUX_T = (H264_AUX_T *)TCSM1_AUX_T; MDMA_DesNode *MDMA1_TRAN = (MDMA_DesNode *)TCSM1_GP1_TRAN_CHAIN; MDMA_DesNode *MDMA1_ARG = (MDMA_DesNode *)TCSM1_GP1_ARG_CHAIN; H264_MB_DecARGs *dMBsrc; fifo_wp = (int *)TCSM1_FIFO_WP; dMBsrc = (H264_MB_DecARGs *)(dFRM->dMB_baseaddr_aux); int mb_num; int mb_start; int total_mbs; mb_start = dFRM->first_mb_in_frame; total_mbs = dFRM->mb_width * dFRM->mb_height; int i; uint16_t *intra_pred4x4_top_ptr[2]; intra_pred4x4_top_ptr[0]=intra_pred4x4_top[0]; intra_pred4x4_top_ptr[1]=intra_pred4x4_top[1]; uint16_t *intra_pred4x4_left_ptr[2]; intra_pred4x4_left_ptr[0]=intra_pred4x4_left[0]; intra_pred4x4_left_ptr[1]=intra_pred4x4_left[1]; uint8_t mb_x_d1, mb_y_d1; uint8_t mb_x_d2, mb_y_d2; mb_x_d1 = mb_y_d1 = mb_x_d2 = mb_y_d2 =0; AUX_PMON_CREAT(mc); AUX_PMON_CREAT(idct); AUX_PMON_CREAT(dblk); AUX_PMON_CREAT(intra); AUX_PMON_CREAT(mdma); AUX_PMON_CREAT(sync); do{ }while(fifo_rp == *fifo_wp); fifo_rp++; MDMA1_ARG->TSA = get_phy_addr_aux((uint32_t)dMBsrc); MDMA1_ARG->TDA = (uint32_t)dMB_aux; MDMA1_ARG->STRD = MDMA_STRD(64, 64); MDMA1_ARG->UNIT = MDMA_UNIT(1,64,(sizeof(H264_MB_DecARGs))); SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN); MDMA1_RUN(); dMBsrc++; POLLING_MDMA1_END(); char bakup_x0 = dFRM->mb_width-1; char bakup_x1 = 0; int mb_type_last= 0; volatile unsigned int *infar = (unsigned int *)TCSM1_H264_DBLK_INFAR; uint8_t non_zero_count_cache_luma_last[16]; for ( mb_num = mb_start; (mb_num < total_mbs+2); mb_num ++ ) { AUX_PMON_ON(sync); do{ }while(fifo_rp == *fifo_wp); fifo_rp++; int gp1_tran_start = 0; int gp1_tran_len = sizeof(H264_MB_DecARGs); if (dMB_aux->next_mb_no_weight_flag) { gp1_tran_start = (53 << 2); gp1_tran_len -= (53 << 2); } if (dMB_aux->next_mb_skip_flag) { gp1_tran_len -= (192 << 2); } AUX_PMON_OFF(sync); AUX_PMON_ON(mdma); AUX_PMON_OFF(mdma); AUX_PMON_ON(intra); MDMA1_ARG->TSA = (get_phy_addr_aux((uint32_t)dMBsrc) + gp1_tran_start); MDMA1_ARG->TDA = ((uint32_t)dMB2 + gp1_tran_start); MDMA1_ARG->STRD = MDMA_STRD(64, 64); MDMA1_ARG->UNIT = MDMA_UNIT(1,64,gp1_tran_len); dMBsrc++; POLLING_MDMA1_END(); //ensure curr dblked MB has been trans out SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN); MDMA1_RUN(); int mb_x= dMB_aux->mb_x; int mb_y= dMB_aux->mb_y; int mb_type= dMB_aux->mb_type; if (dMB_aux->curr_mb_skip_flag) for(i=0; i<24; i++) dMB_aux->mb[i*16] = 0; AUX_T->mc_des_dirty=0; if(IS_INTRA(mb_type_last)){ // chroma predict Intra_pred_chroma(dMB_aux->chroma_pred_mode, AUX_T->h264_urecon[1], AUX_T->h264_urecon[0] + MB_CHROM_WIDTH, TCSM1_BOTTOM_U_13PIX+4); Intra_pred_chroma(dMB_aux->chroma_pred_mode, AUX_T->h264_vrecon[1], AUX_T->h264_vrecon[0] + MB_CHROM_WIDTH, TCSM1_BOTTOM_V_13PIX+4); // luma predict if(IS_INTRA4x4(mb_type_last)){ if(IS_8x8DCT(mb_type_last)){ for(i=0; i<16; i+=4){ uint8_t * src_left = (uint32_t)(intra_pred4x4_left_ptr[1][i]) | 0xF4000000; uint8_t * src_top = (uint32_t)(intra_pred4x4_top_ptr[1][i]) | 0xF4000000; uint8_t * src_topleft; if ( i==8 ) src_topleft = src_left - RECON_BUF_STRIDE - 1; else src_topleft = src_top - 1; uint8_t * ptr= AUX_T->h264_yrecon[1] + MB_offset_recon[i]; int dir= dMB_aux->intra4x4_pred_mode_cache[ i ]; int nnz = non_zero_count_cache_luma_last[i]; Intra_pred_luma_8x8l(dir,ptr,(dMB_aux->topleft_samples_available<<i)&0x8000, (dMB_aux->topright_samples_available<<i)&0x4000, src_left, src_top, src_topleft, RECON_BUF_STRIDE); if(nnz){ if(nnz == 1 && dMB_aux->mb[i*16]) ff_h264_idct8_dc_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE); else ff_h264_idct8_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE); } } }else { for(i=0; i<16; i++){ uint8_t * src_left = (uint32_t)(intra_pred4x4_left_ptr[1][i]) | 0xf4000000; uint8_t * src_top = (uint32_t)(intra_pred4x4_top_ptr[1][i]) | 0xf4000000; uint8_t * src_topleft; if ( (i==2) || (i==8) || (i==10) ) src_topleft = src_left - RECON_BUF_STRIDE; else src_topleft = src_top; uint8_t * ptr= AUX_T->h264_yrecon[1] + MB_offset_recon[i]; uint8_t *topright; int dir= dMB_aux->intra4x4_pred_mode_cache[ i ]; int nnz, tr; if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ int topright_avail= (dMB_aux->topright_samples_available<<i)&0x8000; if(!topright_avail){ tr= src_top[3]*0x01010101; topright= (uint8_t*) &tr; }else{ topright= src_top + 4; } } Intra_pred_luma_4x4(dir, ptr, src_left, topright, src_top, src_topleft); nnz = non_zero_count_cache_luma_last[i]; if(nnz){ if(nnz == 1 && dMB_aux->mb[i*16]) ff_h264_idct_dc_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE); else ff_h264_idct_add_c(ptr, dMB_aux->mb + i*16, RECON_BUF_STRIDE); } } } }else{ Intra_pred_luma_16x16(dMB_aux->intra16x16_pred_mode, AUX_T->h264_yrecon[1], AUX_T->h264_yrecon[0] + MB_LUMA_WIDTH, TCSM1_BOTTOM_Y_25PIX+4); luma_dc_dequant_idct_c(dMB_aux->mb, dMB_aux->dequant4_coeff[0]); for(i=0; i<16; i++){ if(non_zero_count_cache_luma_last[i]) ff_h264_idct_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); else if(dMB_aux->mb[i*16]) ff_h264_idct_dc_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); } } } AUX_PMON_OFF(intra); if(IS_INTER(mb_type)){ hl_motion_hw(dFRM, dMB_aux, AUX_T); } AUX_PMON_ON(mc); MC_POLLING_END(); AUX_PMON_OFF(mc); AUX_PMON_ON(dblk); while ((*infar)!= TCSM1_H264_DBLK_INFDA) {} AUX_PMON_OFF(dblk); if(AUX_T->mc_des_dirty){ H264_MC_DesNode *h264_mc = (H264_MC_DesNode *)(AUX_T->h264_mc_des_ptr[0]); h264_mc[AUX_T->mc_des_dirty - 1].VNodeHead = H264_VHEAD_UNLINK; SET_MC_DHA((uint32_t)h264_mc); CLEAR_MC_TTEND(); SET_MC_DCS(); } if(IS_INTRA(mb_type)){ uint32_t * bakup_src = AUX_T->BackupMBbottom_Y[bakup_x1]-4; uint32_t * bakup_dst = TCSM1_BOTTOM_Y_25PIX; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; bakup_dst[2] = bakup_src[2]; bakup_dst[3] = bakup_src[3]; bakup_dst[4] = bakup_src[4]; bakup_dst[5] = bakup_src[5]; bakup_dst[6] = bakup_src[6]; bakup_src = AUX_T->BackupMBbottom_U[bakup_x1]-4; bakup_dst = TCSM1_BOTTOM_U_13PIX; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; bakup_dst[2] = bakup_src[2]; bakup_dst[3] = bakup_src[3]; bakup_src = AUX_T->BackupMBbottom_V[bakup_x1]-4; bakup_dst = TCSM1_BOTTOM_V_13PIX; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; bakup_dst[2] = bakup_src[2]; bakup_dst[3] = bakup_src[3]; } if(IS_INTER(mb_type_last)) hl_motion_hw_next(dMB_aux,AUX_T,mb_type_last); *(uint32_t*)&AUX_T->sub_mb_type[0] = *(uint32_t*)&dMB_aux->sub_mb_type[0]; *(uint32_t*)&AUX_T->sub_mb_type[2] = *(uint32_t*)&dMB_aux->sub_mb_type[2]; AUX_PMON_ON(idct); AUX_PMON_OFF(idct); if (IS_INTRA_PCM(mb_type)) { unsigned int x, y; for(i=0; i<24; i++) { for (y=0; y<4; y++) { for (x=0; x<4; x++) { *(AUX_T->h264_yrecon[0] + MB_offset_recon[i] + y*RECON_BUF_STRIDE + x) = dMB_aux->mb[i*16+y*4+x]; } } } } { int linesize, uvlinesize; uint8_t *dest_y_d2, *dest_u_d2, *dest_v_d2; linesize = dFRM->linesize; uvlinesize = dFRM->uvlinesize; dest_y_d2 = dFRM->current_picture.y_ptr + (mb_y_d2 * 16* linesize ) + mb_x_d2 * 16; dest_u_d2 = dFRM->current_picture.u_ptr + (mb_y_d2 * 8 * uvlinesize) + mb_x_d2 * 8; dest_v_d2 = dFRM->current_picture.v_ptr + (mb_y_d2 * 8 * uvlinesize) + mb_x_d2 * 8; //move dblked MB out MDMA1_TRAN[0].TSA = AUX_T->h264_ydblk[1]-4; MDMA1_TRAN[0].TDA = get_phy_addr_aux((uint32_t)dest_y_d2-4); MDMA1_TRAN[1].TSA = AUX_T->h264_ydblk[1]-4+DBLK_U_OFST; MDMA1_TRAN[1].TDA = get_phy_addr_aux((uint32_t)dest_u_d2-4); //MDMA1_TRAN[1].UNIT = MDMA_UNIT(1,12,96); MDMA1_TRAN[2].TSA = AUX_T->h264_ydblk[1]-4+DBLK_V_OFST; MDMA1_TRAN[2].TDA = get_phy_addr_aux((uint32_t)dest_v_d2-4); POLLING_MDMA1_END(); //ensure next MB's args has been received SET_MDMA1_DHA((uint32_t)MDMA1_TRAN); MDMA1_RUN(); } //---------idct fo inter--------------- if(IS_INTER(mb_type_last)){ void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); int di; if(IS_8x8DCT(mb_type_last)){ idct_dc_add = ff_h264_idct8_dc_add_c; idct_add = ff_h264_idct8_add_c; di = 4; }else{ idct_dc_add = ff_h264_idct_dc_add_c; idct_add = ff_h264_idct_add_c; di = 1; } for(i=0; i<16; i+=di){ int nnz = non_zero_count_cache_luma_last[i]; if(nnz){ if(nnz==1 && dMB_aux->mb[i*16]) idct_dc_add(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); else idct_add(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); } } } //------------- chroma idct------------ if(mb_type_last){ chroma_dc_dequant_idct_c(dMB_aux->mb + 16*16, dMB_aux->dequant4_coeff[1]); chroma_dc_dequant_idct_c(dMB_aux->mb + 16*16+4*16, dMB_aux->dequant4_coeff[2]); for(i=16; i<16+8; i++){ if(dMB_aux->non_zero_count_cache_chroma[ i - 16 ]) { ff_h264_idct_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); } else if (dMB_aux->mb[i*16]) { ff_h264_idct_dc_add_c(AUX_T->h264_yrecon[1] + MB_offset_recon[i], dMB_aux->mb + i*16, RECON_BUF_STRIDE); } } } if(!(mb_x==0 && mb_y==0)){ SET_DHA_DBLK((unsigned int)AUX_T->h264_dblk_des_ptr[1]); *infar = 0; SET_DCS_DBLK(0x1); } filter_mb_dblk(dFRM, dMB_aux, AUX_T); { uint32_t * bakup_src = AUX_T->h264_yrecon[1] + 15*RECON_BUF_STRIDE; uint32_t * bakup_dst = AUX_T->BackupMBbottom_Y[bakup_x0]; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; bakup_dst[2] = bakup_src[2]; bakup_dst[3] = bakup_src[3]; bakup_src = AUX_T->h264_urecon[1] + 7*RECON_BUF_STRIDE; bakup_dst = AUX_T->BackupMBbottom_U[bakup_x0]; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; bakup_src = AUX_T->h264_vrecon[1] + 7*RECON_BUF_STRIDE; bakup_dst = AUX_T->BackupMBbottom_V[bakup_x0]; bakup_dst[0] = bakup_src[0]; bakup_dst[1] = bakup_src[1]; } mb_x_d2 = mb_x_d1; mb_y_d2 = mb_y_d1; mb_x_d1 = mb_x; mb_y_d1 = mb_y; mb_type_last=mb_type; for(i=0;i<16;i++) non_zero_count_cache_luma_last[i]=dMB_aux->non_zero_count_cache_luma[ scan5[i] ]; // abnormal exit if (dMB_aux->deblocking_filter & 0x8) break; bakup_x0=(bakup_x0==((dFRM->mb_width)-1))?0:(bakup_x0+1);//hit second line's tail bakup_x1=(bakup_x1==((dFRM->mb_width)-1))?0:(bakup_x1+1); XCHG2(AUX_T->h264_yrecon[0],AUX_T->h264_yrecon[1],XCHGtmp); XCHG2(AUX_T->h264_urecon[0],AUX_T->h264_urecon[1],XCHGtmp); XCHG2(AUX_T->h264_vrecon[0],AUX_T->h264_vrecon[1],XCHGtmp); XCHG2(AUX_T->h264_ydblk[0],AUX_T->h264_ydblk[1],XCHGtmp); XCHG2(AUX_T->h264_dblk_des_ptr[0],AUX_T->h264_dblk_des_ptr[1],XCHGtmp); XCHG2(AUX_T->h264_mc_des_ptr[0],AUX_T->h264_mc_des_ptr[1],XCHGtmp); XCHG2(intra_pred4x4_top_ptr[0],intra_pred4x4_top_ptr[1],XCHGtmp); XCHG2(intra_pred4x4_left_ptr[0],intra_pred4x4_left_ptr[1],XCHGtmp); XCHG2(dMB_aux,dMB2,XCHGtmp); } AUX_PMON_TRAN(mc,PMON_MC_BUF); AUX_PMON_TRAN(idct,PMON_IDCT_BUF); AUX_PMON_TRAN(dblk,PMON_DBLK_BUF); AUX_PMON_TRAN(intra,PMON_INTRA_BUF); AUX_PMON_TRAN(mdma,PMON_MDMA_BUF); AUX_PMON_TRAN(sync,PMON_SYNC_BUF); /* task_done: aux-cpu task done flag, only write by aux-cpu, only read by main-cpu */ MDMA1_ARG->TSA = (TCSM1_TASK_DONE); MDMA1_ARG->TDA = (TCSM0_TASK_DONE); MDMA1_ARG->STRD = MDMA_STRD(4,4); MDMA1_ARG->UNIT = MDMA_UNIT(1,4,4); SET_MDMA1_DHA((uint32_t)TCSM1_GP1_ARG_CHAIN); MDMA1_RUN(); i_nop; i_nop; i_nop; i_nop; __asm__ __volatile__ ("wait"); }