Exemplo n.º 1
0
// Iterate over blocks within a superblock
static void av1_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer,
                        const AV1_COMMON *cm, MACROBLOCKD *xd,
                        MODE_INFO *const *mi_8x8, int xpos, int ypos) {
  // Temporary buffer (to allow SIMD parallelism)
  uint8_t buf_unaligned[BS * BS + 15];
  uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15);
  int x, y, p;

  for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) {
    for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
      for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
        const MB_MODE_INFO *mbmi =
            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;

        // Do not filter if there is no residual
        if (!mbmi->skip) {
          // Do not filter frame edges
          int has_top = ypos + y > 0;
          int has_left = xpos + x > 0;
          int has_bottom = ypos + y < cm->mi_rows - 1;
          int has_right = xpos + x < cm->mi_cols - 1;
#if CLPF_ALLOW_BLOCK_PARALLELISM
          // Do not filter superblock edges
          has_top &= !!y;
          has_left &= !!x;
          has_bottom &= y != MI_BLOCK_SIZE - 1;
          has_right &= x != MI_BLOCK_SIZE - 1;
#endif
          av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
          clpf_block(
              xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM
                                        ? buf + y * MI_SIZE * BS + x * MI_SIZE
                                        : xd->plane[p].dst.buf,
              xd->plane[p].dst.stride,
              CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride,
              has_top, has_left, has_bottom, has_right,
              MI_SIZE >> xd->plane[p].subsampling_x,
              MI_SIZE >> xd->plane[p].subsampling_y);
        }
      }
    }
#if CLPF_ALLOW_PIXEL_PARALLELISM
    for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) {
      for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) {
        const MB_MODE_INFO *mbmi =
            &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi;
        av1_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x);
        if (!mbmi->skip) {
          int i = 0;
          for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++)
            memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride,
                   buf + (y * MI_SIZE + i) * BS + x * MI_SIZE,
                   MI_SIZE >> xd->plane[p].subsampling_x);
        }
      }
    }
Exemplo n.º 2
0
static int loop_filter_row_worker(AV1LfSync *const lf_sync,
                                  LFWorkerData *const lf_data) {
  const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
  const int sb_cols =
      mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
  int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
  enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif  // !CONFIG_EXT_PARTITION_TYPES

#if CONFIG_EXT_PARTITION
  printf(
      "STOPPING: This code has not been modified to work with the "
      "extended coding unit size experiment");
  exit(EXIT_FAILURE);
#endif  // CONFIG_EXT_PARTITION

  for (mi_row = lf_data->start; mi_row < lf_data->stop;
       mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
    MODE_INFO **const mi =
        lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;

    for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
         mi_col += lf_data->cm->mib_size) {
      const int r = mi_row >> lf_data->cm->mib_size_log2;
      const int c = mi_col >> lf_data->cm->mib_size_log2;
#if !CONFIG_EXT_PARTITION_TYPES
      LOOP_FILTER_MASK lfm;
#endif
      int plane;

      sync_read(lf_sync, r, c);

      av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
                           lf_data->frame_buffer, mi_row, mi_col);
#if CONFIG_EXT_PARTITION_TYPES
      for (plane = 0; plane < num_planes; ++plane) {
        av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
                                          mi + mi_col, mi_row, mi_col, plane);
        av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
                                          mi + mi_col, mi_row, mi_col, plane);
      }
#else
      av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
                     lf_data->cm->mi_stride, &lfm);

      for (plane = 0; plane < num_planes; ++plane) {
        loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
                                    mi + mi_col, mi_row, mi_col, path, &lfm);
        loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
                                    mi + mi_col, mi_row, mi_col, path, &lfm);
      }
#endif  // CONFIG_EXT_PARTITION_TYPES
      sync_write(lf_sync, r, c, sb_cols);
    }
  }
  return 1;
}
Exemplo n.º 3
0
static int loop_filter_hor_row_worker(AV1LfSync *const lf_sync,
                                      LFWorkerData *const lf_data) {
  const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
  const int sb_cols =
      mi_cols_aligned_to_sb(lf_data->cm) >> lf_data->cm->mib_size_log2;
  int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
  enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif

  for (mi_row = lf_data->start; mi_row < lf_data->stop;
       mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
    MODE_INFO **const mi =
        lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;

    for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
         mi_col += lf_data->cm->mib_size) {
      const int r = mi_row >> lf_data->cm->mib_size_log2;
      const int c = mi_col >> lf_data->cm->mib_size_log2;
      LOOP_FILTER_MASK lfm;
      int plane;

      // TODO([email protected]): For better parallelization, reorder
      // the outer loop to column-based and remove the synchronizations here.
      sync_read(lf_sync, r, c);

      av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
                           lf_data->frame_buffer, mi_row, mi_col);
      av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
                     lf_data->cm->mi_stride, &lfm);
#if CONFIG_EXT_PARTITION_TYPES
      for (plane = 0; plane < num_planes; ++plane)
        av1_filter_block_plane_non420_hor(lf_data->cm, &lf_data->planes[plane],
                                          mi + mi_col, mi_row, mi_col, plane);
#else
      for (plane = 0; plane < num_planes; ++plane)
        loop_filter_block_plane_hor(lf_data->cm, lf_data->planes, plane,
                                    mi + mi_col, mi_row, mi_col, path, &lfm);
#endif
      sync_write(lf_sync, r, c, sb_cols);
    }
  }
  return 1;
}
Exemplo n.º 4
0
static int loop_filter_ver_row_worker(AV1LfSync *const lf_sync,
                                      LFWorkerData *const lf_data) {
  const int num_planes = lf_data->y_only ? 1 : MAX_MB_PLANE;
  int mi_row, mi_col;
#if !CONFIG_EXT_PARTITION_TYPES
  enum lf_path path = get_loop_filter_path(lf_data->y_only, lf_data->planes);
#endif
  for (mi_row = lf_data->start; mi_row < lf_data->stop;
       mi_row += lf_sync->num_workers * lf_data->cm->mib_size) {
    MODE_INFO **const mi =
        lf_data->cm->mi_grid_visible + mi_row * lf_data->cm->mi_stride;

    for (mi_col = 0; mi_col < lf_data->cm->mi_cols;
         mi_col += lf_data->cm->mib_size) {
      LOOP_FILTER_MASK lfm;
      int plane;

      av1_setup_dst_planes(lf_data->planes, lf_data->cm->sb_size,
                           lf_data->frame_buffer, mi_row, mi_col);
      av1_setup_mask(lf_data->cm, mi_row, mi_col, mi + mi_col,
                     lf_data->cm->mi_stride, &lfm);

#if CONFIG_EXT_PARTITION_TYPES
      for (plane = 0; plane < num_planes; ++plane)
        av1_filter_block_plane_non420_ver(lf_data->cm, &lf_data->planes[plane],
                                          mi + mi_col, mi_row, mi_col, plane);
#else

      for (plane = 0; plane < num_planes; ++plane)
        loop_filter_block_plane_ver(lf_data->cm, lf_data->planes, plane,
                                    mi + mi_col, mi_row, mi_col, path, &lfm);
#endif
    }
  }
  return 1;
}
Exemplo n.º 5
0
void av1_dering_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
                      MACROBLOCKD *xd, int global_level) {
  int r, c;
  int sbr, sbc;
  int nhsb, nvsb;
  int16_t src[OD_DERING_INBUF_SIZE];
  int16_t *linebuf[3];
  int16_t colbuf[3][OD_BSIZE_MAX + 2 * OD_FILT_VBORDER][OD_FILT_HBORDER];
  dering_list dlist[MAX_MIB_SIZE * MAX_MIB_SIZE];
  unsigned char *row_dering, *prev_row_dering, *curr_row_dering;
  int dering_count;
  int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = { { 0 } };
  int stride;
  int bsize[3];
  int dec[3];
  int pli;
  int dering_left;
  int coeff_shift = AOMMAX(cm->bit_depth - 8, 0);
  int nplanes;
  if (xd->plane[1].subsampling_x == xd->plane[1].subsampling_y &&
      xd->plane[2].subsampling_x == xd->plane[2].subsampling_y)
    nplanes = 3;
  else
    nplanes = 1;
  nvsb = (cm->mi_rows + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
  nhsb = (cm->mi_cols + MAX_MIB_SIZE - 1) / MAX_MIB_SIZE;
  av1_setup_dst_planes(xd->plane, frame, 0, 0);
  row_dering = aom_malloc(sizeof(*row_dering) * nhsb * 2);
  memset(row_dering, 1, sizeof(*row_dering) * (nhsb + 2) * 2);
  prev_row_dering = row_dering + 1;
  curr_row_dering = prev_row_dering + nhsb + 2;
  for (pli = 0; pli < nplanes; pli++) {
    dec[pli] = xd->plane[pli].subsampling_x;
    bsize[pli] = OD_DERING_SIZE_LOG2 - dec[pli];
  }
  stride = (cm->mi_cols << bsize[0]) + 2 * OD_FILT_HBORDER;
  for (pli = 0; pli < nplanes; pli++) {
    linebuf[pli] = aom_malloc(sizeof(*linebuf) * OD_FILT_VBORDER * stride);
  }
  for (sbr = 0; sbr < nvsb; sbr++) {
    for (pli = 0; pli < nplanes; pli++) {
      for (r = 0; r < (MAX_MIB_SIZE << bsize[pli]) + 2 * OD_FILT_VBORDER; r++) {
        for (c = 0; c < OD_FILT_HBORDER; c++) {
          colbuf[pli][r][c] = OD_DERING_VERY_LARGE;
        }
      }
    }
    dering_left = 1;
    for (sbc = 0; sbc < nhsb; sbc++) {
      int level;
      int nhb, nvb;
      int cstart = 0;
      if (!dering_left) cstart = -OD_FILT_HBORDER;
      nhb = AOMMIN(MAX_MIB_SIZE, cm->mi_cols - MAX_MIB_SIZE * sbc);
      nvb = AOMMIN(MAX_MIB_SIZE, cm->mi_rows - MAX_MIB_SIZE * sbr);
      level = compute_level_from_index(
          global_level, cm->mi_grid_visible[MAX_MIB_SIZE * sbr * cm->mi_stride +
                                            MAX_MIB_SIZE * sbc]
                            ->mbmi.dering_gain);
      curr_row_dering[sbc] = 0;
      if (level == 0 ||
          (dering_count = sb_compute_dering_list(
               cm, sbr * MAX_MIB_SIZE, sbc * MAX_MIB_SIZE, dlist)) == 0) {
        dering_left = 0;
        continue;
      }
      curr_row_dering[sbc] = 1;
      for (pli = 0; pli < nplanes; pli++) {
        int16_t dst[OD_BSIZE_MAX * OD_BSIZE_MAX];
        int threshold;
        int coffset;
        int rend, cend;
        if (sbc == nhsb - 1)
          cend = (nhb << bsize[pli]);
        else
          cend = (nhb << bsize[pli]) + OD_FILT_HBORDER;
        if (sbr == nvsb - 1)
          rend = (nvb << bsize[pli]);
        else
          rend = (nvb << bsize[pli]) + OD_FILT_VBORDER;
        coffset = sbc * MAX_MIB_SIZE << bsize[pli];
        if (sbc == nhsb - 1) {
          /* On the last superblock column, fill in the right border with
             OD_DERING_VERY_LARGE to avoid filtering with the outside. */
          for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
            for (c = cend; c < (nhb << bsize[pli]) + OD_FILT_HBORDER; ++c) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  OD_DERING_VERY_LARGE;
            }
          }
        }
        if (sbr == nvsb - 1) {
          /* On the last superblock row, fill in the bottom border with
             OD_DERING_VERY_LARGE to avoid filtering with the outside. */
          for (r = rend; r < rend + OD_FILT_VBORDER; r++) {
            for (c = 0; c < (nhb << bsize[pli]) + 2 * OD_FILT_HBORDER; c++) {
              src[(r + OD_FILT_VBORDER) * OD_FILT_BSTRIDE + c] =
                  OD_DERING_VERY_LARGE;
            }
          }
        }
        /* Copy in the pixels we need from the current superblock for
           deringing.*/
        copy_sb8_16(
            cm,
            &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER + cstart],
            OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
            (MAX_MIB_SIZE << bsize[pli]) * sbr, coffset + cstart,
            xd->plane[pli].dst.stride, rend, cend - cstart);
        if (!prev_row_dering[sbc]) {
          copy_sb8_16(cm, &src[OD_FILT_HBORDER], OD_FILT_BSTRIDE,
                      xd->plane[pli].dst.buf,
                      (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
                      coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
                      nhb << bsize[pli]);
        } else if (sbr > 0) {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = 0; c < nhb << bsize[pli]; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  linebuf[pli][r * stride + coffset + c];
            }
          }
        } else {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = 0; c < nhb << bsize[pli]; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  OD_DERING_VERY_LARGE;
            }
          }
        }
        if (!prev_row_dering[sbc - 1]) {
          copy_sb8_16(cm, src, OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
                      (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
                      coffset - OD_FILT_HBORDER, xd->plane[pli].dst.stride,
                      OD_FILT_VBORDER, OD_FILT_HBORDER);
        } else if (sbr > 0 && sbc > 0) {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = -OD_FILT_HBORDER; c < 0; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  linebuf[pli][r * stride + coffset + c];
            }
          }
        } else {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = -OD_FILT_HBORDER; c < 0; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  OD_DERING_VERY_LARGE;
            }
          }
        }
        if (!prev_row_dering[sbc + 1]) {
          copy_sb8_16(cm, &src[OD_FILT_HBORDER + (nhb << bsize[pli])],
                      OD_FILT_BSTRIDE, xd->plane[pli].dst.buf,
                      (MAX_MIB_SIZE << bsize[pli]) * sbr - OD_FILT_VBORDER,
                      coffset + (nhb << bsize[pli]), xd->plane[pli].dst.stride,
                      OD_FILT_VBORDER, OD_FILT_HBORDER);
        } else if (sbr > 0 && sbc < nhsb - 1) {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = nhb << bsize[pli];
                 c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  linebuf[pli][r * stride + coffset + c];
            }
          }
        } else {
          for (r = 0; r < OD_FILT_VBORDER; r++) {
            for (c = nhb << bsize[pli];
                 c < (nhb << bsize[pli]) + OD_FILT_HBORDER; c++) {
              src[r * OD_FILT_BSTRIDE + c + OD_FILT_HBORDER] =
                  OD_DERING_VERY_LARGE;
            }
          }
        }
        if (dering_left) {
          /* If we deringed the superblock on the left then we need to copy in
             saved pixels. */
          for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
            for (c = 0; c < OD_FILT_HBORDER; c++) {
              src[r * OD_FILT_BSTRIDE + c] = colbuf[pli][r][c];
            }
          }
        }
        for (r = 0; r < rend + OD_FILT_VBORDER; r++) {
          for (c = 0; c < OD_FILT_HBORDER; c++) {
            /* Saving pixels in case we need to dering the superblock on the
               right. */
            colbuf[pli][r][c] =
                src[r * OD_FILT_BSTRIDE + c + (nhb << bsize[pli])];
          }
        }
        copy_sb8_16(cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
                    (MAX_MIB_SIZE << bsize[pli]) * (sbr + 1) - OD_FILT_VBORDER,
                    coffset, xd->plane[pli].dst.stride, OD_FILT_VBORDER,
                    (nhb << bsize[pli]));

        /* FIXME: This is a temporary hack that uses more conservative
           deringing for chroma. */
        if (pli)
          threshold = (level * 5 + 4) >> 3 << coeff_shift;
        else
          threshold = level << coeff_shift;
        if (threshold == 0) continue;
        od_dering(
            dst, &src[OD_FILT_VBORDER * OD_FILT_BSTRIDE + OD_FILT_HBORDER],
            dec[pli], dir, pli, dlist, dering_count, threshold, coeff_shift);
#if CONFIG_AOM_HIGHBITDEPTH
        if (cm->use_highbitdepth) {
          copy_dering_16bit_to_16bit(
              (int16_t *)&CONVERT_TO_SHORTPTR(
                  xd->plane[pli]
                      .dst.buf)[xd->plane[pli].dst.stride *
                                    (MAX_MIB_SIZE * sbr << bsize[pli]) +
                                (sbc * MAX_MIB_SIZE << bsize[pli])],
              xd->plane[pli].dst.stride, dst, dlist, dering_count,
              3 - dec[pli]);
        } else {
#endif
          copy_dering_16bit_to_8bit(
              &xd->plane[pli].dst.buf[xd->plane[pli].dst.stride *
                                          (MAX_MIB_SIZE * sbr << bsize[pli]) +
                                      (sbc * MAX_MIB_SIZE << bsize[pli])],
              xd->plane[pli].dst.stride, dst, dlist, dering_count, bsize[pli]);
#if CONFIG_AOM_HIGHBITDEPTH
        }
#endif
      }
      dering_left = 1;
    }
    {
      unsigned char *tmp;
      tmp = prev_row_dering;
      prev_row_dering = curr_row_dering;
      curr_row_dering = tmp;
    }
  }