/** * * @return the mb_type */ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ const int mb_index= s->mb_x + s->mb_y*s->mb_stride; const int colocated_mb_type= s->next_picture.mb_type[mb_index]; uint16_t time_pp; uint16_t time_pb; int i; //FIXME avoid divides // try special case with shifts for 1 and 3 B-frames? if(IS_8X8(colocated_mb_type)){ s->mv_type = MV_TYPE_8X8; for(i=0; i<4; i++){ ff_mpeg4_set_one_direct_mv(s, mx, my, i); } return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1; } else if(IS_INTERLACED(colocated_mb_type)){ s->mv_type = MV_TYPE_FIELD; for(i=0; i<2; i++){ int field_select= s->next_picture.ref_index[0][4*mb_index + 2*i]; s->field_select[0][i]= field_select; s->field_select[1][i]= i; if(s->top_field_first){ time_pp= s->pp_field_time - field_select + i; time_pb= s->pb_field_time - field_select + i; }else{ time_pp= s->pp_field_time + field_select - i; time_pb= s->pb_field_time + field_select - i; } // NARFLEX: SageTV Fix divide by zero errors that can happen here s->mv[0][i][0] = (time_pp == 0) ? 0 : (s->p_field_mv_table[i][0][mb_index][0]*time_pb/time_pp + mx); s->mv[0][i][1] = (time_pp == 0) ? 0 : (s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my); s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0] : ((time_pp == 0) ? 0 : (s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp)); s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] : ((time_pp == 0) ? 0 : (s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp)); } return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED; }else{ ff_mpeg4_set_one_direct_mv(s, mx, my, 0); s->mv[0][1][0] = s->mv[0][2][0] = s->mv[0][3][0] = s->mv[0][0][0]; s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->mv[0][0][1]; s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = s->mv[1][0][0]; s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = s->mv[1][0][1]; if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample) s->mv_type= MV_TYPE_16X16; else s->mv_type= MV_TYPE_8X8; return MB_TYPE_DIRECT2 | MB_TYPE_16x16 | MB_TYPE_L0L1; //Note see prev line } }
static void pred_spatial_direct_motion(H264Context * const h, int *mb_type){ MpegEncContext * const s = &h->s; int b8_stride = 2; int b4_stride = h->b_stride; int mb_xy = h->mb_xy, mb_y = s->mb_y; int mb_type_col[2]; const int16_t (*l1mv0)[2], (*l1mv1)[2]; const int8_t *l1ref0, *l1ref1; const int is_b8x8 = IS_8X8(*mb_type); unsigned int sub_mb_type= MB_TYPE_L0L1; int i8, i4; int ref[2]; int mv[2]; int list; assert(h->ref_list[1][0].f.reference & 3); await_reference_mb_row(h, &h->ref_list[1][0], s->mb_y + !!IS_INTERLACED(*mb_type)); #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM) /* ref = min(neighbors) */ for(list=0; list<2; list++){ int left_ref = h->ref_cache[list][scan8[0] - 1]; int top_ref = h->ref_cache[list][scan8[0] - 8]; int refc = h->ref_cache[list][scan8[0] - 8 + 4]; const int16_t *C= h->mv_cache[list][ scan8[0] - 8 + 4]; if(refc == PART_NOT_AVAILABLE){ refc = h->ref_cache[list][scan8[0] - 8 - 1]; C = h-> mv_cache[list][scan8[0] - 8 - 1]; } ref[list] = FFMIN3((unsigned)left_ref, (unsigned)top_ref, (unsigned)refc); if(ref[list] >= 0){ //this is just pred_motion() but with the cases removed that cannot happen for direct blocks const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]); if(match_count > 1){ //most common mv[list]= pack16to32(mid_pred(A[0], B[0], C[0]), mid_pred(A[1], B[1], C[1]) ); }else { assert(match_count==1); if(left_ref==ref[list]){ mv[list]= AV_RN32A(A); }else if(top_ref==ref[list]){ mv[list]= AV_RN32A(B); }else{ mv[list]= AV_RN32A(C); } } }else{ int mask= ~(MB_TYPE_L0 << (2*list)); mv[list] = 0; ref[list] = -1; if(!is_b8x8) *mb_type &= mask; sub_mb_type &= mask; } } if(ref[0] < 0 && ref[1] < 0){ ref[0] = ref[1] = 0; if(!is_b8x8) *mb_type |= MB_TYPE_L0L1; sub_mb_type |= MB_TYPE_L0L1; } if(!(is_b8x8|mv[0]|mv[1])){ fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1); fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1); fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, 0, 4); fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, 0, 4); *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2; return; } if (IS_INTERLACED(h->ref_list[1][0].f.mb_type[mb_xy])) { // AFL/AFR/FR/FL -> AFL/FL if (!IS_INTERLACED(*mb_type)) { // AFR/FR -> AFL/FL mb_y = (s->mb_y&~1) + h->col_parity; mb_xy= s->mb_x + ((s->mb_y&~1) + h->col_parity)*s->mb_stride; b8_stride = 0; }else{ mb_y += h->col_fieldoff; mb_xy += s->mb_stride*h->col_fieldoff; // non zero for FL -> FL & differ parity } goto single_col; }else{ // AFL/AFR/FR/FL -> AFR/FR if(IS_INTERLACED(*mb_type)){ // AFL /FL -> AFR/FR mb_y = s->mb_y&~1; mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride; mb_type_col[0] = h->ref_list[1][0].f.mb_type[mb_xy]; mb_type_col[1] = h->ref_list[1][0].f.mb_type[mb_xy + s->mb_stride]; b8_stride = 2+4*s->mb_stride; b4_stride *= 6; if (IS_INTERLACED(mb_type_col[0]) != IS_INTERLACED(mb_type_col[1])) { mb_type_col[0] &= ~MB_TYPE_INTERLACED; mb_type_col[1] &= ~MB_TYPE_INTERLACED; } sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ if( (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA) && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA) && !is_b8x8){ *mb_type |= MB_TYPE_16x8 |MB_TYPE_DIRECT2; /* B_16x8 */ }else{ *mb_type |= MB_TYPE_8x8; } }else{ // AFR/FR -> AFR/FR single_col: mb_type_col[0] = mb_type_col[1] = h->ref_list[1][0].f.mb_type[mb_xy]; sub_mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_SUB_8x8 */ if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){ *mb_type |= MB_TYPE_16x16|MB_TYPE_DIRECT2; /* B_16x16 */ }else if(!is_b8x8 && (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16))){ *mb_type |= MB_TYPE_DIRECT2 | (mb_type_col[0] & (MB_TYPE_16x8|MB_TYPE_8x16)); }else{ if(!h->sps.direct_8x8_inference_flag){ /* FIXME save sub mb types from previous frames (or derive from MVs) * so we know exactly what block size to use */ sub_mb_type += (MB_TYPE_8x8-MB_TYPE_16x16); /* B_SUB_4x4 */ } *mb_type |= MB_TYPE_8x8; } } } await_reference_mb_row(h, &h->ref_list[1][0], mb_y); l1mv0 = &h->ref_list[1][0].f.motion_val[0][h->mb2b_xy [mb_xy]]; l1mv1 = &h->ref_list[1][0].f.motion_val[1][h->mb2b_xy [mb_xy]]; l1ref0 = &h->ref_list[1][0].f.ref_index [0][4 * mb_xy]; l1ref1 = &h->ref_list[1][0].f.ref_index [1][4 * mb_xy]; if(!b8_stride){ if(s->mb_y&1){ l1ref0 += 2; l1ref1 += 2; l1mv0 += 2*b4_stride; l1mv1 += 2*b4_stride; } } if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){ int n=0; for(i8=0; i8<4; i8++){ int x8 = i8&1; int y8 = i8>>1; int xy8 = x8+y8*b8_stride; int xy4 = 3*x8+y8*b4_stride; int a,b; if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) continue; h->sub_mb_type[i8] = sub_mb_type; fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ a=b=0; if(ref[0] > 0) a= mv[0]; if(ref[1] > 0) b= mv[1]; n++; }else{ a= mv[0]; b= mv[1]; } fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); } if(!is_b8x8 && !(n&3)) *mb_type= (*mb_type & ~(MB_TYPE_8x8|MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_P1L0|MB_TYPE_P1L1))|MB_TYPE_16x16|MB_TYPE_DIRECT2; }else if(IS_16X16(*mb_type)){
void render_mbs() { H264Context *h = g_h; GPUH264Context * const g = &h->gpu; MpegEncContext * const s = &h->s; H264mb* blockStore = g->block_buffer; int i, l; int lists = (h->slice_type==FF_B_TYPE)?2:1; int dpb_pos = s->current_picture.gpu_dpb; printf("Attempting to motion compensate %d blocks\n", (g->end-g->start+1)); glNewList(dispList, GL_COMPILE); for(l=0; l < lists; l++) { glBegin(GL_QUADS); for(i= g->start; i <= g->end; i++) { const int mb_x = blockStore[i].mb_x; const int mb_y = blockStore[i].mb_y; const int mb_xy = mb_x + mb_y*s->mb_stride; const int mb_type = s->current_picture.mb_type[mb_xy]; int mv_x, mv_y, j; //RUDD TODO ignoring Intra blocks for now if(IS_INTER(mb_type)) { if(IS_16X16(mb_type) && IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 16, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } else if(IS_16X8(mb_type)) { if(IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[l][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[l][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } if(IS_DIR(mb_type, 1, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[8]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[8]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, -8, h->ref_list[l][h->ref_cache[l][ scan8[8] ]].gpu_dpb, dpb_pos); } } else if(IS_8X16(mb_type)) { if(IS_DIR(mb_type, 0, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[0]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[0]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 0, 0, h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos); } if(IS_DIR(mb_type, 1, l)) { mv_x = blockStore[i].mv_cache[0][ scan8[4]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[4]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 8, 0, h->ref_list[l][h->ref_cache[l][ scan8[4] ]].gpu_dpb, dpb_pos); } } else { assert(IS_8X8(mb_type)); int j; for(j=0;j<4;j++) { const int sub_mb_type= h->sub_mb_type[j]; const int n= 4*j; int x_offset= (j&1); int y_offset= (j&2)>>1; if(!IS_DIR(sub_mb_type, 0, l)) continue; if(IS_SUB_8X8(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 8, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); } else if(IS_SUB_8X4(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos); } else if(IS_SUB_4X8(sub_mb_type)) { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset+4,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos); } else { mv_x = blockStore[i].mv_cache[0][ scan8[n]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset, h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos); mv_x = blockStore[i].mv_cache[0][ scan8[n+3]][0]; mv_y = blockStore[i].mv_cache[0][ scan8[n+3]][1]; render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset-4, h->ref_list[l][h->ref_cache[l][ scan8[n+3] ]].gpu_dpb, dpb_pos); } } } } } } glEnd(); glEndList(); }
static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, qpel_mc_func(*qpix_put)[16], const h264_chroma_mc_func(*chroma_put), qpel_mc_func(*qpix_avg)[16], const h264_chroma_mc_func(*chroma_avg), const h264_weight_func *weight_op, const h264_biweight_func *weight_avg) { const int mb_xy = sl->mb_xy; const int mb_type = h->cur_pic.mb_type[mb_xy]; av_assert2(IS_INTER(mb_type)); if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME)) await_references(h, sl); prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC); if (IS_16X16(mb_type)) { mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); } else if (IS_16X8(mb_type)) { mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4, qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], weight_op, weight_avg, IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else if (IS_8X16(mb_type)) { mc_part(h, sl, 0, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); mc_part(h, sl, 4, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); } else { int i; av_assert2(IS_8X8(mb_type)); for (i = 0; i < 4; i++) { const int sub_mb_type = sl->sub_mb_type[i]; const int n = 4 * i; int x_offset = (i & 1) << 2; int y_offset = (i & 2) << 1; if (IS_SUB_8X8(sub_mb_type)) { mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_8X4(sub_mb_type)) { mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, x_offset, y_offset + 2, qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], &weight_op[1], &weight_avg[1], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else if (IS_SUB_4X8(sub_mb_type)) { mc_part(h, sl, n, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); mc_part(h, sl, n + 1, 0, 8, 4 * sl->mb_linesize, dest_y, dest_cb, dest_cr, x_offset + 2, y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } else { int j; av_assert2(IS_SUB_4X4(sub_mb_type)); for (j = 0; j < 4; j++) { int sub_x_offset = x_offset + 2 * (j & 1); int sub_y_offset = y_offset + (j & 2); mc_part(h, sl, n + j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset, qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2], &weight_op[2], &weight_avg[2], IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); } } } } if (USES_LIST(mb_type, 1)) prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC); }