static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func(*qpix_put)[16], const h264_chroma_mc_func(*chroma_put), qpel_mc_func(*qpix_avg)[16], const h264_chroma_mc_func(*chroma_avg), const h264_weight_func *weight_op, const h264_biweight_func *weight_avg) { const int mb_xy = sl->mb_xy; const int mb_type = h->cur_pic.mb_type[mb_xy]; av_assert2(IS_INTER(mb_type)); if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME)) await_references(h, sl); prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC); if (IS_16X16(mb_type)) { mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); } else if (IS_16X8(mb_type)) { mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else if (IS_8X16(mb_type)) { mc_part(h, sl, 0, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 4, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else { int i; av_assert2(IS_8X8(mb_type)); for (i = 0; i < 4; i++) { const int sub_mb_type = sl->sub_mb_type[i]; const int n = 4 * i; int x_offset = (i & 1) << 2; int y_offset = (i & 2) << 1; if (IS_SUB_8X8(sub_mb_type)) { mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_8X4(sub_mb_type)) { mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset + 2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_4X8(sub_mb_type)) { mc_part(h, sl, n, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 1, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset + 2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else { int j; av_assert2(IS_SUB_4X4(sub_mb_type)); for (j = 0; j < 4; j++) { int sub_x_offset = x_offset + 2 * (j & 1); int sub_y_offset = y_offset + (j & 2); mc_part(h, sl, n + j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } } } } if (USES_LIST(mb_type, 1)) prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC); }
void render_mbs() { H264Context *h = g_h; GPUH264Context * const g = &h->gpu; MpegEncContext * const s = &h->s; H264mb* blockStore = g->block_buffer; int i, l; int lists = (h->slice_type==FF_B_TYPE)?2:1; int dpb_pos = s->current_picture.gpu_dpb; printf("Attempting to motion compensate %d blocks\n", (g->end-g->start+1)); glNewList(dispList, GL_COMPILE); for(l=0; l < lists; l++) { glBegin(GL_QUADS); for(i= g->start; i <= g->end; i++) { const int mb_x = blockStore[i].mb_x; const int mb_y = blockStore[i].mb_y; const int mb_xy = mb_x + mb_y*s->mb_stride; const int mb_type = s->current_picture.mb_type[mb_xy]; int mv_x, mv_y, j; //RUDD TODO ignoring Intra blocks for now if(IS_INTER(mb_type)) { if(IS_16X16(mb_type) && IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 16, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } else if(IS_16X8(mb_type)) { if(IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[l][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[l][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } if(IS_DIR(mb_type, 1, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[8]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[8]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, -8, h->ref_list[l][h->ref_cache[l][ scan8[8] ]].gpu_dpb, dpb_pos); } } else if(IS_8X16(mb_type)) { if(IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } if(IS_DIR(mb_type, 1, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[4]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[4]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 8, 0, h->ref_list[l][h->ref_cache[l][ scan8[4] ]].gpu_dpb, dpb_pos); } } else { assert(IS_8X8(mb_type)); int j; for(j=0;j<4;j++) { const int sub_mb_type= h->sub_mb_type[j]; const int n= 4*j; int x_offset= (j&1); int y_offset= (j&2)>>1; if(!IS_DIR(sub_mb_type, 0, l)) continue; if(IS_SUB_8X8(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 8, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); } else if(IS_SUB_8X4(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos); } else if(IS_SUB_4X8(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset+4,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos); } else { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+3]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+3]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+3] ]].gpu_dpb, dpb_pos); } } } } } } glEnd(); glEndList(); }
void motion_execute(H264_Slice_GlbARGs *SLICE_T, H264_MB_Ctrl_DecARGs *dmb, uint8_t *recon_buf, uint8_t *motion_dha) { uint8_t *motion_douty, *motion_doutc; uint8_t *motion_dsa = motion_dha + 0x108; const int mb_type= dmb->mb_type; motion_douty = recon_buf; motion_doutc = recon_buf + PREVIOUS_OFFSET_U; SET_REG1_DSTA(TCSM1_PADDR((int)motion_douty)); SET_REG1_DSA(TCSM1_PADDR((int)motion_dsa)); SET_REG2_DSTA(TCSM1_PADDR((int)motion_doutc)); SET_REG2_DSA(TCSM1_PADDR((int)motion_dsa)); volatile int *tdd = (int *)motion_dha; int tkn = 0; motion_dsa[0] = 0x0; tdd++; if(IS_16X16(mb_type)){ motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 3/*blkh*/, 3/*blkw*/, 0/*boy*/, 0/*box*/); }else if(IS_16X8(mb_type)){ motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 2/*blkh*/, 3/*blkw*/, 0/*boy*/, 0/*box*/); motion_task(SLICE_T, dmb, 1, 2, tdd, &tkn, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 2/*blkh*/, 3/*blkw*/, 2/*boy*/, 0/*box*/); }else if(IS_8X16(mb_type)){ motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), 3/*blkh*/, 2/*blkw*/, 0/*boy*/, 0/*box*/); motion_task(SLICE_T, dmb, 1, 1, tdd, &tkn, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), 3/*blkh*/, 2/*blkw*/, 0/*boy*/, 2/*box*/); }else{ int i; int mv_n=0; for(i=0; i<4; i++){ const int sub_mb_type= dmb->sub_mb_type[i]; if(IS_SUB_8X8(sub_mb_type)){ motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 2/*blkh*/, 2/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/); mv_n++; }else if(IS_SUB_8X4(sub_mb_type)){ motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 1/*blkh*/, 2/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/); mv_n++; motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 1/*blkh*/, 2/*blkw*/, (i & 0x2)+1/*boy*/, (i & 0x1)*2/*box*/); mv_n++; }else if(IS_SUB_4X8(sub_mb_type)){ motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 2/*blkh*/, 1/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/); mv_n++; motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 2/*blkh*/, 1/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2+1/*box*/); mv_n++; }else{ int j; for(j=0; j<4; j++){ motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn, IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), 1/*blkh*/, 1/*blkw*/, (i & 0x2) + (j & 0x2)/2/*boy*/, (i & 0x1)*2 + (j & 0x1)/*box*/); mv_n++; } //j } //BLK4X4 } //i } //BLK8X8 tdd[2*tkn-1] |= 0x1<<TDD_DOE_SFT; tdd[-1] = TDD_HEAD(1,/*vld*/ 1,/*lk*/ 0,/*sync*/ 1,/*ch1pel*/ 2,/*ch2pel*/ TDD_POS_SPEC,/*posmd*/ TDD_MV_AUTO,/*mvmd*/ 1,/*ch2en*/ tkn,/*tkn*/ dmb->mb_y,/*mby*/ dmb->mb_x/*mbx*/); tdd[2*tkn] = TDD_HEAD(1,/*vld*/ 0,/*lk*/ 1,/*sync*/ 1,/*ch1pel*/ 2,/*ch2pel*/ TDD_POS_SPEC,/*posmd*/ TDD_MV_AUTO,/*mvmd*/ 1,/*ch2en*/ 0,/*tkn*/ 0xFF,/*mby*/ 0xFF/*mbx*/); }