Exemplo n.º 1
0
Arquivo: clpf.c Projeto: jmvalin/aom
// Iterate over blocks within a superblock
static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
                         const VP10_COMMON *cm, MACROBLOCKD *xd,
                         MODE_INFO *const *mi_8x8, int xpos, int ypos) {
  // Temporary buffer (to allow SIMD parallelism)
  uint8_t buf_unaligned[BS * BS + 15];
  uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
  int x, y, p;

  for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
    for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
      for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
        const MB_MODE_INFO *mbmi =
            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;

        // Do not filter if there is no residual
        if (!mbmi->skip) {
          // Do not filter frame edges
          int has_top = ypos + y > 0;
          int has_left = xpos + x > 0;
          int has_bottom = ypos + y < cm->mi_rows - 1;
          int has_right = xpos + x < cm->mi_cols - 1;
#if CLPF_ALLOW_BLOCK_PARALLELISM
          // Do not filter superblock edges
          has_top &= !!y;
          has_left &= !!x;
          has_bottom &= y != MI_BLOCK_SIZE - 1;
          has_right &= x != MI_BLOCK_SIZE - 1;
#endif
          vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
          clpf_block(
              xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
                                        ? buf + y * MI_SIZE * BS + x * MI_SIZE
                                        : xd->plane[p].dst.buf,
              xd->plane[p].dst.stride,
              CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
              has_top, has_left, has_bottom, has_right,
              MI_SIZE >> xd->plane[p].subsampling_x,
              MI_SIZE >> xd->plane[p].subsampling_y);
        }
      }
    }
#if CLPF_ALLOW_PIXEL_PARALLELISM
    for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
      for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
        const MB_MODE_INFO *mbmi =
            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
        vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
        if (!mbmi->skip) {
          int i = 0;
          for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
            memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
                   buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
                   MI_SIZE >> xd->plane[p].subsampling_x);
        }
      }
    }
Exemplo n.º 2
0
// Implement row loopfiltering for each thread.
static INLINE
void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer,
                             VP10_COMMON *const cm,
                             struct macroblockd_plane planes[MAX_MB_PLANE],
                             int start, int stop, int y_only,
                             VP9LfSync *const lf_sync) {
  const int num_planes = y_only ? 1 : MAX_MB_PLANE;
  const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2;
  int mi_row, mi_col;
  enum lf_path path;
  if (y_only)
    path = LF_PATH_444;
  else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1)
    path = LF_PATH_420;
  else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0)
    path = LF_PATH_444;
  else
    path = LF_PATH_SLOW;

  for (mi_row = start; mi_row < stop;
       mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) {
    MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride;

    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) {
      const int r = mi_row >> MI_BLOCK_SIZE_LOG2;
      const int c = mi_col >> MI_BLOCK_SIZE_LOG2;
      LOOP_FILTER_MASK lfm;
      int plane;

      sync_read(lf_sync, r, c);

      vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col);

      // TODO(JBB): Make setup_mask work for non 420.
      vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride,
                     &lfm);

      vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm);
      for (plane = 1; plane < num_planes; ++plane) {
        switch (path) {
          case LF_PATH_420:
            vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm);
            break;
          case LF_PATH_444:
            vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm);
            break;
          case LF_PATH_SLOW:
            vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col,
                                          mi_row, mi_col);
            break;
        }
      }

      sync_write(lf_sync, r, c, sb_cols);
    }
  }
}
Exemplo n.º 3
0
Arquivo: dering.c Projeto: jmvalin/aom
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm,
                       MACROBLOCKD *xd, int global_level) {
    int r, c;
    int sbr, sbc;
    int nhsb, nvsb;
    od_dering_in *src[3];
    unsigned char *bskip;
    int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}};
    int stride;
    int bsize[3];
    int dec[3];
    int pli;
    int coeff_shift = VPXMAX(cm->bit_depth - 8, 0);
    nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
    nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE;
    bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols);
    vp10_setup_dst_planes(xd->plane, frame, 0, 0);
    for (pli = 0; pli < 3; pli++) {
        dec[pli] = xd->plane[pli].subsampling_x;
        bsize[pli] = 8 >> dec[pli];
    }
    stride = bsize[0]*cm->mi_cols;
    for (pli = 0; pli < 3; pli++) {
        src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64);
        for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) {
            for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
                if (cm->use_highbitdepth) {
                    src[pli][r * stride + c] =
                        CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
                        [r * xd->plane[pli].dst.stride + c];
                } else {
#endif
                    src[pli][r * stride + c] =
                        xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c];
#if CONFIG_VPX_HIGHBITDEPTH
                }
#endif
            }
        }
    }
    for (r = 0; r < cm->mi_rows; ++r) {
        for (c = 0; c < cm->mi_cols; ++c) {
            const MB_MODE_INFO *mbmi =
                &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi;
            bskip[r * cm->mi_cols + c] = mbmi->skip;
        }
    }
    for (sbr = 0; sbr < nvsb; sbr++) {
        for (sbc = 0; sbc < nhsb; sbc++) {
            int level;
            int nhb, nvb;
            nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc);
            nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr);
            for (pli = 0; pli < 3; pli++) {
                int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8];
                int threshold;
#if DERING_REFINEMENT
                level = compute_level_from_index(
                            global_level,
                            cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride +
                                                MI_BLOCK_SIZE*sbc]->mbmi.dering_gain);
#else
                level = global_level;
#endif
                /* FIXME: This is a temporary hack that uses more conservative
                   deringing for chroma. */
                if (pli) level = (level*5 + 4) >> 3;
                if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0;
                threshold = level << coeff_shift;
                od_dering(
                    &OD_DERING_VTBL_C,
                    dst,
                    MI_BLOCK_SIZE*bsize[pli],
                    &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE +
                              sbc*bsize[pli]*MI_BLOCK_SIZE],
                    stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli,
                    &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc],
                    cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift);
                for (r = 0; r < bsize[pli]*nvb; ++r) {
                    for (c = 0; c < bsize[pli]*nhb; ++c) {
#if CONFIG_VPX_HIGHBITDEPTH
                        if (cm->use_highbitdepth) {
                            CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf)
                            [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r)
                             + sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
                                 dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
                        } else {
#endif
                            xd->plane[pli].dst.buf[xd->plane[pli].dst.stride*
                                                   (bsize[pli]*MI_BLOCK_SIZE*sbr + r) +
                                                   sbc*bsize[pli]*MI_BLOCK_SIZE + c] =
                                                       dst[r * MI_BLOCK_SIZE * bsize[pli] + c];
#if CONFIG_VPX_HIGHBITDEPTH
                        }
#endif
                    }
                }
            }
        }
    }
    for (pli = 0; pli < 3; pli++) {
        vpx_free(src[pli]);
    }
    vpx_free(bskip);
}