void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu) { // we have 64>>depth transform size const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; const int8_t width = LCU_WIDTH>>depth; if (cur_cu == NULL) { cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); // Split transform and increase depth if (depth == 0 || cur_cu->tr_depth > depth) { int offset = width / 2; kvz_quantize_lcu_chroma_residual(state, x, y, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x + offset, y, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x, y + offset, depth+1, NULL, lcu); kvz_quantize_lcu_chroma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. if (depth < MAX_DEPTH) { uint16_t child_cbfs[3] = { LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, }; cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U); cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V); } return; } // If luma is 4x4, do chroma for the 8x8 luma area when handling the top // left PU because the coordinates are correct. if (depth <= MAX_DEPTH || (lcu_px.x % 8 == 0 && lcu_px.y % 8 == 0)) { cbf_clear(&cur_cu->cbf, depth, COLOR_U); cbf_clear(&cur_cu->cbf, depth, COLOR_V); const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C; kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset]; kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset]; const kvz_pixel *base_u = &lcu->ref.u[chroma_offset]; const kvz_pixel *base_v = &lcu->ref.v[chroma_offset]; coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset]; coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset]; coeff_scan_order_t scan_idx_chroma; int tr_skip = 0; int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth); int chroma_width = LCU_WIDTH_C >> chroma_depth; scan_idx_chroma = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth); if (state->encoder_control->cfg->lossless) { if (bypass_transquant(chroma_width, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { cbf_set(&cur_cu->cbf, depth, COLOR_U); } if (bypass_transquant(chroma_width, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) { cbf_set(&cur_cu->cbf, depth, COLOR_V); } } else { if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_U, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) { cbf_set(&cur_cu->cbf, depth, COLOR_U); } if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_V, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) { cbf_set(&cur_cu->cbf, depth, COLOR_V); } } }
/** * Calculate RD cost for a Coding Unit. * \return Cost of block * \param ref_cu CU used for prediction parameters. * * Calculates the RDO cost of a single CU that will not be split further. * Takes into account SSD of reconstruction and the cost of encoding whatever * prediction unit data needs to be coded. */ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, lcu_t *const lcu) { const int width = LCU_WIDTH >> depth; // cur_cu is used for TU parameters. cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); double coeff_bits = 0; double tr_tree_bits = 0; // Check that lcu is not in assert(x_px >= 0 && x_px < LCU_WIDTH); assert(y_px >= 0 && y_px < LCU_WIDTH); const uint8_t tr_depth = tr_cu->tr_depth - depth; // Add transform_tree split_transform_flag bit cost. bool intra_split_flag = pred_cu->type == CU_INTRA && pred_cu->part_size == SIZE_NxN && depth == 3; if (width <= TR_MAX_WIDTH && width > TR_MIN_WIDTH && !intra_split_flag) { const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); tr_tree_bits += CTX_ENTROPY_FBITS(ctx, tr_depth > 0); } if (tr_depth > 0) { int offset = width / 2; double sum = 0; sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); return sum + tr_tree_bits * state->lambda; } // Add transform_tree cbf_luma bit cost. if (pred_cu->type == CU_INTRA || tr_depth > 0 || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || cbf_is_set(tr_cu->cbf, depth, COLOR_V)) { const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[!tr_depth]); tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y)); } // SSD between reconstruction and original int ssd = 0; if (!state->encoder_control->cfg->lossless) { int index = y_px * LCU_WIDTH + x_px; ssd = kvz_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, width); } { coeff_t coeff_temp[32 * 32]; int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); // Code coeffs using cabac to get a better estimate of real coding costs. kvz_coefficients_blit(&lcu->coeff.y[(y_px*LCU_WIDTH) + x_px], coeff_temp, width, width, LCU_WIDTH, width); coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 0, luma_scan_mode); } double bits = tr_tree_bits + coeff_bits; return (double)ssd * LUMA_MULT + bits * state->lambda; }
/** * This function calculates the residual coefficients for a region of the LCU * (defined by x, y and depth) and updates the reconstruction with the * kvantized residual. * * It handles recursion for transform split, but that is currently only work * for 64x64 inter to 32x32 transform blocks. * * Inputs are: * - lcu->rec pixels after prediction for the area * - lcu->ref reference pixels for the area * - lcu->cu for the area * * Outputs are: * - lcu->rec reconstruction after quantized residual * - lcu->coeff quantized coefficients for the area * - lcu->cbf coded block flags for the area * - lcu->cu.intra[].tr_skip for the area */ void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu) { // we have 64>>depth transform size const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) }; if (cur_pu == NULL) { cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y); } const int8_t width = LCU_WIDTH>>depth; // Tell clang-analyzer what is up. For some reason it can't figure out from // asserting just depth. assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64); // Split transform and increase depth if (depth == 0 || cur_pu->tr_depth > depth) { int offset = width / 2; kvz_quantize_lcu_luma_residual(state, x, y, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x + offset, y, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x, y + offset, depth+1, NULL, lcu); kvz_quantize_lcu_luma_residual(state, x + offset, y + offset, depth+1, NULL, lcu); // Propagate coded block flags from child CUs to parent CU. if (depth <= MAX_DEPTH) { uint16_t child_cbfs[3] = { LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y )->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y + offset)->cbf, LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf, }; cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_Y); } return; } { const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH; // Pointers to current location in arrays with prediction. kvz_pixel *recbase_y = &lcu->rec.y[luma_offset]; // Pointers to current location in arrays with reference. const kvz_pixel *base_y = &lcu->ref.y[luma_offset]; // Pointers to current location in arrays with kvantized coefficients. coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset]; coeff_scan_order_t scan_idx_luma = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth); #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD uint32_t residual_sum = 0; #endif // Clear coded block flag structures for depths lower than current depth. // This should ensure that the CBF data doesn't get corrupted if this function // is called more than once. cbf_clear(&cur_pu->cbf, depth, COLOR_Y); if (state->encoder_control->cfg->lossless) { if (bypass_transquant(width, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y)) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } else if (width == 4 && state->encoder_control->trskip_enable) { // Try quantization with trskip and use it if it's better. int has_coeffs = kvz_quantize_residual_trskip( state, cur_pu, width, COLOR_Y, scan_idx_luma, &cur_pu->intra.tr_skip, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y ); if (has_coeffs) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } else { int has_coeffs = kvz_quantize_residual( state, cur_pu, width, COLOR_Y, scan_idx_luma, 0, LCU_WIDTH, LCU_WIDTH, base_y, recbase_y, recbase_y, orig_coeff_y ); if (has_coeffs) { cbf_set(&cur_pu->cbf, depth, COLOR_Y); } } } }