Пример #1
0
static void MCFUNC(hl_motion)(const H264Context *h, H264SliceContext *sl,
                              uint8_t *dest_y,
                              uint8_t *dest_cb, uint8_t *dest_cr,
                              qpel_mc_func(*qpix_put)[16],
                              const h264_chroma_mc_func(*chroma_put),
                              qpel_mc_func(*qpix_avg)[16],
                              const h264_chroma_mc_func(*chroma_avg),
                              const h264_weight_func *weight_op,
                              const h264_biweight_func *weight_avg)
{
    const int mb_xy   = sl->mb_xy;
    const int mb_type = h->cur_pic.mb_type[mb_xy];

    av_assert2(IS_INTER(mb_type));

    if (HAVE_THREADS && (h->avctx->active_thread_type & FF_THREAD_FRAME))
        await_references(h, sl);
    prefetch_motion(h, sl, 0, PIXEL_SHIFT, CHROMA_IDC);

    if (IS_16X16(mb_type)) {
        mc_part(h, sl, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
    } else if (IS_16X8(mb_type)) {
        mc_part(h, sl, 0, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
        mc_part(h, sl, 8, 0, 8, 8 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr, 0, 4,
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
                weight_op, weight_avg,
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
    } else if (IS_8X16(mb_type)) {
        mc_part(h, sl, 0, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                &weight_op[1], &weight_avg[1],
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
        mc_part(h, sl, 4, 0, 16, 8 * sl->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                &weight_op[1], &weight_avg[1],
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
    } else {
        int i;

        av_assert2(IS_8X8(mb_type));

        for (i = 0; i < 4; i++) {
            const int sub_mb_type = sl->sub_mb_type[i];
            const int n  = 4 * i;
            int x_offset = (i & 1) << 2;
            int y_offset = (i & 2) << 1;

            if (IS_SUB_8X8(sub_mb_type)) {
                mc_part(h, sl, n, 1, 8, 0, dest_y, dest_cb, dest_cr,
                        x_offset, y_offset,
                        qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
            } else if (IS_SUB_8X4(sub_mb_type)) {
                mc_part(h, sl, n, 0, 4, 4 << PIXEL_SHIFT, dest_y, dest_cb, dest_cr,
                        x_offset, y_offset,
                        qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
                mc_part(h, sl, n + 2, 0, 4, 4 << PIXEL_SHIFT,
                        dest_y, dest_cb, dest_cr, x_offset, y_offset + 2,
                        qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
                        &weight_op[1], &weight_avg[1],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
            } else if (IS_SUB_4X8(sub_mb_type)) {
                mc_part(h, sl, n, 0, 8, 4 * sl->mb_linesize,
                        dest_y, dest_cb, dest_cr, x_offset, y_offset,
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                        &weight_op[2], &weight_avg[2],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
                mc_part(h, sl, n + 1, 0, 8, 4 * sl->mb_linesize,
                        dest_y, dest_cb, dest_cr, x_offset + 2, y_offset,
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                        &weight_op[2], &weight_avg[2],
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
            } else {
                int j;
                av_assert2(IS_SUB_4X4(sub_mb_type));
                for (j = 0; j < 4; j++) {
                    int sub_x_offset = x_offset + 2 * (j & 1);
                    int sub_y_offset = y_offset + (j & 2);
                    mc_part(h, sl, n + j, 1, 4, 0,
                            dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
                            qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
                            &weight_op[2], &weight_avg[2],
                            IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
                }
            }
        }
    }

    if (USES_LIST(mb_type, 1))
        prefetch_motion(h, sl, 1, PIXEL_SHIFT, CHROMA_IDC);
}
Пример #2
0
void render_mbs()
{
  H264Context *h = g_h;
  GPUH264Context * const g = &h->gpu;
  MpegEncContext * const s = &h->s;
  H264mb* blockStore = g->block_buffer;
  int i, l;
  int lists = (h->slice_type==FF_B_TYPE)?2:1;
  int dpb_pos = s->current_picture.gpu_dpb;
  printf("Attempting to motion compensate %d blocks\n", (g->end-g->start+1));

  glNewList(dispList, GL_COMPILE);
  for(l=0; l < lists; l++)
  {
    glBegin(GL_QUADS);
    for(i= g->start; i <= g->end; i++)
    {
      const int mb_x = blockStore[i].mb_x;
      const int mb_y = blockStore[i].mb_y;
      const int mb_xy = mb_x + mb_y*s->mb_stride;
      const int mb_type = s->current_picture.mb_type[mb_xy];
      int mv_x, mv_y, j;
      
      //RUDD TODO ignoring Intra blocks for now
      if(IS_INTER(mb_type))
      {
        if(IS_16X16(mb_type) && IS_DIR(mb_type, 0, l))
          {
            mv_x = blockStore[i].mv_cache[0][ scan8[0]][0];
            mv_y = blockStore[i].mv_cache[0][ scan8[0]][1];
            render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 16, 0, 0,
                             h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos);
          }
	else if(IS_16X8(mb_type))
        {
	  if(IS_DIR(mb_type, 0, l))
	  {
	      mv_x = blockStore[i].mv_cache[l][ scan8[0]][0];
	      mv_y = blockStore[i].mv_cache[l][ scan8[0]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, 0,
			       h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos);
	  }
	  if(IS_DIR(mb_type, 1, l))
	  {
	    mv_x = blockStore[i].mv_cache[0][ scan8[8]][0];
	    mv_y = blockStore[i].mv_cache[0][ scan8[8]][1];
	    render_one_block(mb_x, mb_y, mv_x, mv_y, 16, 8, 0, -8,
			     h->ref_list[l][h->ref_cache[l][ scan8[8] ]].gpu_dpb, dpb_pos);	
	  }
	}
	else if(IS_8X16(mb_type))
	{
	  if(IS_DIR(mb_type, 0, l))
          {
	    mv_x = blockStore[i].mv_cache[0][ scan8[0]][0];
	    mv_y = blockStore[i].mv_cache[0][ scan8[0]][1];
	    render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 0, 0,
			     h->ref_list[l][h->ref_cache[l][ scan8[0] ]].gpu_dpb, dpb_pos);
	  }
	  if(IS_DIR(mb_type, 1, l))
	  {
	      mv_x = blockStore[i].mv_cache[0][ scan8[4]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[4]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 16, 8, 0,
			       h->ref_list[l][h->ref_cache[l][ scan8[4] ]].gpu_dpb, dpb_pos);
	  }
	}
	else
        {
	  assert(IS_8X8(mb_type));
	  int j;
	  for(j=0;j<4;j++)
	  {
	    const int sub_mb_type= h->sub_mb_type[j];
	    const int n= 4*j;
	    int x_offset= (j&1);
	    int y_offset= (j&2)>>1;
	    if(!IS_DIR(sub_mb_type, 0, l))
              continue;

	    if(IS_SUB_8X8(sub_mb_type))
	    {
		mv_x = blockStore[i].mv_cache[0][ scan8[n]][0];
		mv_y = blockStore[i].mv_cache[0][ scan8[n]][1];
		render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 8, 8*x_offset,-8*y_offset,
			      h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos);
	    }
	    else if(IS_SUB_8X4(sub_mb_type))
	    {
	      mv_x = blockStore[i].mv_cache[0][ scan8[n]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset,
			      h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos);

	      mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 8, 4, 8*x_offset,-8*y_offset-4,
			    h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos);
	    }
	    else if(IS_SUB_4X8(sub_mb_type))
            {
	      mv_x = blockStore[i].mv_cache[0][ scan8[n]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset,-8*y_offset,
			    h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos);

	      mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 8, 8*x_offset+4,-8*y_offset,
			    h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos);
	    }
            else
	    {
	      mv_x = blockStore[i].mv_cache[0][ scan8[n]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset,
			    h->ref_list[l][h->ref_cache[l][ scan8[n] ]].gpu_dpb, dpb_pos);

	      mv_x = blockStore[i].mv_cache[0][ scan8[n+1]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n+1]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset,
			    h->ref_list[l][h->ref_cache[l][ scan8[n+1] ]].gpu_dpb, dpb_pos);

	      mv_x = blockStore[i].mv_cache[0][ scan8[n+2]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n+2]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset,-8*y_offset-4,
			    h->ref_list[l][h->ref_cache[l][ scan8[n+2] ]].gpu_dpb, dpb_pos);

	      mv_x = blockStore[i].mv_cache[0][ scan8[n+3]][0];
	      mv_y = blockStore[i].mv_cache[0][ scan8[n+3]][1];
	      render_one_block(mb_x, mb_y, mv_x, mv_y, 4, 4, 8*x_offset+4,-8*y_offset-4,
			    h->ref_list[l][h->ref_cache[l][ scan8[n+3] ]].gpu_dpb, dpb_pos);
	    }
	  }
	}
      }
    }
  }
  glEnd();
  glEndList();
}
Пример #3
0
void motion_execute(H264_Slice_GlbARGs *SLICE_T, H264_MB_Ctrl_DecARGs *dmb, uint8_t *recon_buf,
		    uint8_t *motion_dha)
{
  uint8_t *motion_douty, *motion_doutc;
  uint8_t *motion_dsa = motion_dha + 0x108;
  const int mb_type= dmb->mb_type;

  motion_douty = recon_buf;
  motion_doutc = recon_buf + PREVIOUS_OFFSET_U;
  
  SET_REG1_DSTA(TCSM1_PADDR((int)motion_douty));
  SET_REG1_DSA(TCSM1_PADDR((int)motion_dsa));
  SET_REG2_DSTA(TCSM1_PADDR((int)motion_doutc));
  SET_REG2_DSA(TCSM1_PADDR((int)motion_dsa));

  volatile int *tdd = (int *)motion_dha;
  int tkn = 0;
  motion_dsa[0] = 0x0;
  tdd++;
 
  if(IS_16X16(mb_type)){
    motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn,
		IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
		3/*blkh*/, 3/*blkw*/, 0/*boy*/, 0/*box*/);
  }else if(IS_16X8(mb_type)){
    motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn,
		IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
		2/*blkh*/, 3/*blkw*/, 0/*boy*/, 0/*box*/);

    motion_task(SLICE_T, dmb, 1, 2, tdd, &tkn,
    		IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
    		2/*blkh*/, 3/*blkw*/, 2/*boy*/, 0/*box*/);
  }else if(IS_8X16(mb_type)){
    motion_task(SLICE_T, dmb, 0, 0, tdd, &tkn,
		IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
		3/*blkh*/, 2/*blkw*/, 0/*boy*/, 0/*box*/);

    motion_task(SLICE_T, dmb, 1, 1, tdd, &tkn,
		IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
		3/*blkh*/, 2/*blkw*/, 0/*boy*/, 2/*box*/);
  }else{
    int i;
    int mv_n=0;
    for(i=0; i<4; i++){
      const int sub_mb_type= dmb->sub_mb_type[i];
      if(IS_SUB_8X8(sub_mb_type)){
	motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		    2/*blkh*/, 2/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/);
        mv_n++;
      }else if(IS_SUB_8X4(sub_mb_type)){
	motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		    1/*blkh*/, 2/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/);
        mv_n++;
	motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		    1/*blkh*/, 2/*blkw*/, (i & 0x2)+1/*boy*/, (i & 0x1)*2/*box*/);
        mv_n++;
      }else if(IS_SUB_4X8(sub_mb_type)){
	motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		    2/*blkh*/, 1/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2/*box*/);
        mv_n++;
	motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		    2/*blkh*/, 1/*blkw*/, (i & 0x2)/*boy*/, (i & 0x1)*2+1/*box*/);
        mv_n++;
      }else{
	int j;
	for(j=0; j<4; j++){
	  motion_task(SLICE_T, dmb, mv_n, i, tdd, &tkn,
		      IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
		      1/*blkh*/, 1/*blkw*/, 
		      (i & 0x2) + (j & 0x2)/2/*boy*/, (i & 0x1)*2 + (j & 0x1)/*box*/);
          mv_n++;
	} //j
      } //BLK4X4
    } //i
  } //BLK8X8
  
  tdd[2*tkn-1] |= 0x1<<TDD_DOE_SFT;
  tdd[-1] = TDD_HEAD(1,/*vld*/
		     1,/*lk*/
		     0,/*sync*/
		     1,/*ch1pel*/
		     2,/*ch2pel*/ 
		     TDD_POS_SPEC,/*posmd*/
		     TDD_MV_AUTO,/*mvmd*/ 
		     1,/*ch2en*/
		     tkn,/*tkn*/
		     dmb->mb_y,/*mby*/
		     dmb->mb_x/*mbx*/);

  tdd[2*tkn] = TDD_HEAD(1,/*vld*/
		      0,/*lk*/
		      1,/*sync*/
		      1,/*ch1pel*/
		      2,/*ch2pel*/ 
		      TDD_POS_SPEC,/*posmd*/
		      TDD_MV_AUTO,/*mvmd*/ 
		      1,/*ch2en*/
		      0,/*tkn*/
		      0xFF,/*mby*/
		      0xFF/*mbx*/);

}