static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pred_mode, int chroma_mode, int part_mode) { const int width = LCU_WIDTH >> depth; const int x_cu = SUB_SCU(x_px); const int y_cu = SUB_SCU(y_px); if (part_mode == SIZE_NxN) { assert(depth == MAX_DEPTH + 1); assert(width == SCU_WIDTH); } if (depth > MAX_DEPTH) { depth = MAX_DEPTH; assert(part_mode == SIZE_NxN); } // Set mode in every CU covered by part_mode in this depth. for (int y = y_cu; y < y_cu + width; y += SCU_WIDTH) { for (int x = x_cu; x < x_cu + width; x += SCU_WIDTH) { cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y); cu->depth = depth; cu->type = CU_INTRA; cu->intra.mode = pred_mode; cu->intra.mode_chroma = chroma_mode; cu->part_size = part_mode; } } }
/** * Copy all non-reference CU data from depth+1 to depth. */ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) { assert(depth >= 0 && depth < MAX_PU_DEPTH); // Copy non-reference CUs. { const int x_orig = SUB_SCU(x_px); const int y_orig = SUB_SCU(y_px); const int width_cu = LCU_WIDTH >> depth; for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) { for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) { const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y); cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } } // Copy reconstructed pixels. { const int x = SUB_SCU(x_px); const int y = SUB_SCU(y_px); const int width_px = LCU_WIDTH >> depth; const int luma_index = x + y * LCU_WIDTH; const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); const lcu_yuv_t *from = &work_tree[depth + 1].rec; lcu_yuv_t *to = &work_tree[depth].rec; const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff; lcu_coeff_t *to_coeff = &work_tree[depth].coeff; kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], width_px, width_px, LCU_WIDTH, LCU_WIDTH); if (from->chroma_format != KVZ_CSP_400) { kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } // Copy coefficients up. They do not have to be copied down because they // are not used for the search. kvz_coefficients_blit(&from_coeff->y[luma_index], &to_coeff->y[luma_index], width_px, width_px, LCU_WIDTH, LCU_WIDTH); if (from->chroma_format != KVZ_CSP_400) { kvz_coefficients_blit(&from_coeff->u[chroma_index], &to_coeff->u[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); kvz_coefficients_blit(&from_coeff->v[chroma_index], &to_coeff->v[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } } }
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth) { const int width = LCU_WIDTH >> depth; const vector2d_t lcu_cu = { SUB_SCU(x_px), SUB_SCU(y_px) }; // Depth 4 doesn't go inside the loop. Set the top-left CU. LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth; for (unsigned y = 0; y < width; y += SCU_WIDTH) { for (unsigned x = 0; x < width; x += SCU_WIDTH) { cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, lcu_cu.x + x, lcu_cu.y + y); cu->tr_depth = tr_depth; } } }
static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu) { const int width = LCU_WIDTH >> depth; const int x_local = SUB_SCU(x_px); const int y_local = SUB_SCU(y_px); const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size]; for (int i = 0; i < num_pu; ++i) { const int x_pu = PU_GET_X(cur_cu->part_size, width, x_local, i); const int y_pu = PU_GET_Y(cur_cu->part_size, width, y_local, i); const int width_pu = PU_GET_W(cur_cu->part_size, width, i); const int height_pu = PU_GET_H(cur_cu->part_size, width, i); cu_info_t *cur_pu = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu); lcu_set_inter_pu(lcu, x_pu, y_pu, width_pu, height_pu, cur_pu); } }
/** * Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH. */ static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) { assert(depth >= 0 && depth < MAX_PU_DEPTH); // TODO: clean up to remove the copy pasta const int width_px = LCU_WIDTH >> depth; int d; for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { const int x_orig = SUB_SCU(x_px); const int y_orig = SUB_SCU(y_px); for (int y = y_orig; y < y_orig + width_px; y += SCU_WIDTH) { for (int x = x_orig; x < x_orig + width_px; x += SCU_WIDTH) { const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[d], x, y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } } // Copy reconstructed pixels. for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) { const int x = SUB_SCU(x_px); const int y = SUB_SCU(y_px); const int luma_index = x + y * LCU_WIDTH; const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); lcu_yuv_t *from = &work_tree[depth].rec; lcu_yuv_t *to = &work_tree[d].rec; kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], width_px, width_px, LCU_WIDTH, LCU_WIDTH); if (from->chroma_format != KVZ_CSP_400) { kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } } }
static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu) { const uint32_t width = LCU_WIDTH >> depth; const uint32_t x_local = SUB_SCU(x_px); const uint32_t y_local = SUB_SCU(y_px); const uint32_t tr_split = cur_cu->tr_depth-cur_cu->depth; const uint32_t mask = ~((width >> tr_split)-1); // Set coeff flags in every CU covered by part_mode in this depth. for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) { for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) { cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y); // Use TU top-left CU to propagate coeff flags cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask); if (cu != cu_from) { // Chroma coeff data is not used, luma is needed for deblocking cbf_copy(&cu->cbf, cu_from->cbf, COLOR_Y); } } } }
void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu) { // we have 64>>depth transform size const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH>>depth; if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); // Split transform and increase depth if (depth == 0 || cur_cu->tr_depth > depth) { int offset = width / 2; kvz_quantize_lcu_chroma_residual(state, x, y, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x + offset, y, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x, y + offset, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. if (depth < MAX_DEPTH) { uint16_t child_cbfs[3] = { LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, }; cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); } return; } // If luma is 4x4, do chroma for the 8x8 luma area when handling the top // left PU because the coordinates are correct. if (depth <= MAX_DEPTH || (lcu_px.x % 8 == 0 && lcu_px.y % 8 == 0)) { cbf_clear(&cur_cu->cbf, depth, COLOR_U); cbf_clear(&cur_cu->cbf, depth, COLOR_V); const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C; kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset]; kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset]; const kvz_pixel *base_u = &lcu->ref.u[chroma_offset]; const kvz_pixel *base_v = &lcu->ref.v[chroma_offset]; coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset]; coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset]; coeff_scan_order_t scan_idx_chroma; int tr_skip = 0; int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); int chroma_width = LCU_WIDTH_C >> chroma_depth; scan_idx_chroma = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth); if (state->encoder_control->cfg->lossless) { if (bypass_transquant(chroma_width, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { cbf_set(&cur_cu->cbf, depth, COLOR_U); } if (bypass_transquant(chroma_width, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) { cbf_set(&cur_cu->cbf, depth, COLOR_V); } } else { if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_U, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { cbf_set(&cur_cu->cbf, depth, COLOR_U); } if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_V, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) { cbf_set(&cur_cu->cbf, depth, COLOR_V); } } }
/** * This function calculates the residual coefficients for a region of the LCU * (defined by x, y and depth) and updates the reconstruction with the * kvantized residual. * * It handles recursion for transform split, but that is currently only work * for 64x64 inter to 32x32 transform blocks. * * Inputs are: * - lcu->rec pixels after prediction for the area * - lcu->ref reference pixels for the area * - lcu->cu for the area * * Outputs are: * - lcu->rec reconstruction after quantized residual * - lcu->coeff quantized coefficients for the area * - lcu->cbf coded block flags for the area * - lcu->cu.intra[].tr_skip for the area */ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu) { // we have 64>>depth transform size const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; if (cur_pu == NULL) { cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } const int8_t width = LCU_WIDTH>>depth; // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); // Split transform and increase depth if (depth == 0 || cur_pu->tr_depth > depth) { int offset = width / 2; kvz_quantize_lcu_luma_residual(state, x, y, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x + offset, y, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x, y + offset, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. if (depth <= MAX_DEPTH) { uint16_t child_cbfs[3] = { LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, }; cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_Y); } return; } { const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; // Pointers to current location in arrays with prediction. kvz_pixel *recbase_y = &lcu->rec.y[luma_offset]; // Pointers to current location in arrays with reference. const kvz_pixel *base_y = &lcu->ref.y[luma_offset]; // Pointers to current location in arrays with kvantized coefficients. coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset]; coeff_scan_order_t scan_idx_luma = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD uint32_t residual_sum = 0; #endif // Clear coded block flag structures for depths lower than current depth. // This should ensure that the CBF data doesn't get corrupted if this function // is called more than once. cbf_clear(&cur_pu->cbf, depth, COLOR_Y); if (state->encoder_control->cfg->lossless) { if (bypass_transquant(width, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y)) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } else if (width == 4 && state->encoder_control->trskip_enable) { // Try quantization with trskip and use it if it's better. int has_coeffs = kvz_quantize_residual_trskip( state, cur_pu, width, COLOR_Y, scan_idx_luma, &cur_pu->intra.tr_skip, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y ); if (has_coeffs) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } else { int has_coeffs = kvz_quantize_residual( state, cur_pu, width, COLOR_Y, scan_idx_luma, 0, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y ); if (has_coeffs) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } } }