// Iterate over blocks within a superblock static void vp10_clpf_sb(const YV12_BUFFER_CONFIG *frame_buffer, const VP10_COMMON *cm, MACROBLOCKD *xd, MODE_INFO *const *mi_8x8, int xpos, int ypos) { // Temporary buffer (to allow SIMD parallelism) uint8_t buf_unaligned[BS * BS + 15]; uint8_t *buf = (uint8_t *)(((intptr_t)buf_unaligned + 15) & ~15); int x, y, p; for (p = 0; p < (CLPF_FILTER_ALL_PLANES ? MAX_MB_PLANE : 1); p++) { for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) { for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) { const MB_MODE_INFO *mbmi = &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi; // Do not filter if there is no residual if (!mbmi->skip) { // Do not filter frame edges int has_top = ypos + y > 0; int has_left = xpos + x > 0; int has_bottom = ypos + y < cm->mi_rows - 1; int has_right = xpos + x < cm->mi_cols - 1; #if CLPF_ALLOW_BLOCK_PARALLELISM // Do not filter superblock edges has_top &= !!y; has_left &= !!x; has_bottom &= y != MI_BLOCK_SIZE - 1; has_right &= x != MI_BLOCK_SIZE - 1; #endif vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x); clpf_block( xd->plane[p].dst.buf, CLPF_ALLOW_PIXEL_PARALLELISM ? buf + y * MI_SIZE * BS + x * MI_SIZE : xd->plane[p].dst.buf, xd->plane[p].dst.stride, CLPF_ALLOW_PIXEL_PARALLELISM ? BS : xd->plane[p].dst.stride, has_top, has_left, has_bottom, has_right, MI_SIZE >> xd->plane[p].subsampling_x, MI_SIZE >> xd->plane[p].subsampling_y); } } } #if CLPF_ALLOW_PIXEL_PARALLELISM for (y = 0; y < MI_BLOCK_SIZE && ypos + y < cm->mi_rows; y++) { for (x = 0; x < MI_BLOCK_SIZE && xpos + x < cm->mi_cols; x++) { const MB_MODE_INFO *mbmi = &mi_8x8[(ypos + y) * cm->mi_stride + xpos + x]->mbmi; vp10_setup_dst_planes(xd->plane, frame_buffer, ypos + y, xpos + x); if (!mbmi->skip) { int i = 0; for (i = 0; i<MI_SIZE>> xd->plane[p].subsampling_y; i++) memcpy(xd->plane[p].dst.buf + i * xd->plane[p].dst.stride, buf + (y * MI_SIZE + i) * BS + x * MI_SIZE, MI_SIZE >> xd->plane[p].subsampling_x); } } }
// Implement row loopfiltering for each thread. static INLINE void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, VP10_COMMON *const cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; int mi_row, mi_col; enum lf_path path; if (y_only) path = LF_PATH_444; else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) path = LF_PATH_420; else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) path = LF_PATH_444; else path = LF_PATH_SLOW; for (mi_row = start; mi_row < stop; mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { const int r = mi_row >> MI_BLOCK_SIZE_LOG2; const int c = mi_col >> MI_BLOCK_SIZE_LOG2; LOOP_FILTER_MASK lfm; int plane; sync_read(lf_sync, r, c); vp10_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(JBB): Make setup_mask work for non 420. vp10_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, &lfm); vp10_filter_block_plane_ss00(cm, &planes[0], mi_row, &lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { case LF_PATH_420: vp10_filter_block_plane_ss11(cm, &planes[plane], mi_row, &lfm); break; case LF_PATH_444: vp10_filter_block_plane_ss00(cm, &planes[plane], mi_row, &lfm); break; case LF_PATH_SLOW: vp10_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, mi_col); break; } } sync_write(lf_sync, r, c, sb_cols); } } }
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; int sbr, sbc; int nhsb, nvsb; od_dering_in *src[3]; unsigned char *bskip; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; int stride; int bsize[3]; int dec[3]; int pli; int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); vp10_setup_dst_planes(xd->plane, frame, 0, 0); for (pli = 0; pli < 3; pli++) { dec[pli] = xd->plane[pli].subsampling_x; bsize[pli] = 8 >> dec[pli]; } stride = bsize[0]*cm->mi_cols; for (pli = 0; pli < 3; pli++) { src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) { for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [r * xd->plane[pli].dst.stride + c]; } else { #endif src[pli][r * stride + c] = xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } for (r = 0; r < cm->mi_rows; ++r) { for (c = 0; c < cm->mi_cols; ++c) { const MB_MODE_INFO *mbmi = &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; bskip[r * cm->mi_cols + c] = mbmi->skip; } } for (sbr = 0; sbr < nvsb; sbr++) { for (sbc = 0; sbc < nhsb; sbc++) { int level; int nhb, nvb; nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc); nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr); for (pli = 0; pli < 3; pli++) { int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8]; int threshold; #if DERING_REFINEMENT level = compute_level_from_index( global_level, cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride + MI_BLOCK_SIZE*sbc]->mbmi.dering_gain); #else level = global_level; #endif /* FIXME: This is a temporary hack that uses more conservative deringing for chroma. */ if (pli) level = (level*5 + 4) >> 3; if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0; threshold = level << coeff_shift; od_dering( &OD_DERING_VTBL_C, dst, MI_BLOCK_SIZE*bsize[pli], &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE + sbc*bsize[pli]*MI_BLOCK_SIZE], stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc], cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); for (r = 0; r < bsize[pli]*nvb; ++r) { for (c = 0; c < bsize[pli]*nhb; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; } else { #endif xd->plane[pli].dst.buf[xd->plane[pli].dst.stride* (bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } } } for (pli = 0; pli < 3; pli++) { vpx_free(src[pli]); } vpx_free(bskip); }