/** * Copy all non-reference CU data from depth+1 to depth. */ static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1]) { assert(depth >= 0 && depth < MAX_PU_DEPTH); // Copy non-reference CUs. { const int x_orig = SUB_SCU(x_px); const int y_orig = SUB_SCU(y_px); const int width_cu = LCU_WIDTH >> depth; for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) { for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) { const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y); cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y); memcpy(to_cu, from_cu, sizeof(*to_cu)); } } } // Copy reconstructed pixels. { const int x = SUB_SCU(x_px); const int y = SUB_SCU(y_px); const int width_px = LCU_WIDTH >> depth; const int luma_index = x + y * LCU_WIDTH; const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2); const lcu_yuv_t *from = &work_tree[depth + 1].rec; lcu_yuv_t *to = &work_tree[depth].rec; const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff; lcu_coeff_t *to_coeff = &work_tree[depth].coeff; kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index], width_px, width_px, LCU_WIDTH, LCU_WIDTH); if (from->chroma_format != KVZ_CSP_400) { kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } // Copy coefficients up. They do not have to be copied down because they // are not used for the search. kvz_coefficients_blit(&from_coeff->y[luma_index], &to_coeff->y[luma_index], width_px, width_px, LCU_WIDTH, LCU_WIDTH); if (from->chroma_format != KVZ_CSP_400) { kvz_coefficients_blit(&from_coeff->u[chroma_index], &to_coeff->u[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); kvz_coefficients_blit(&from_coeff->v[chroma_index], &to_coeff->v[chroma_index], width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2); } } }
/** * \brief Like kvz_quantize_residual except that this uses trskip if that is better. * * Using this function saves one step of quantization and inverse quantization * compared to doing the decision separately from the actual operation. * * \param width Transform width. * \param color Color. * \param scan_order Coefficient scan order. * \param trskip_out Whether transform skip is used. * \param stride Stride for ref_in, pred_in rec_out and coeff_out. * \param ref_in Reference pixels. * \param pred_in Predicted pixels. * \param rec_out Reconstructed pixels. * \param coeff_out Coefficients used for reconstruction of rec_out. * * \returns Whether coeff_out contains any non-zero coefficients. */ int kvz_quantize_residual_trskip( encoder_state_t *const state, const cu_info_t *const cur_cu, const int width, const color_t color, const coeff_scan_order_t scan_order, int8_t *trskip_out, const int in_stride, const int out_stride, const kvz_pixel *const ref_in, const kvz_pixel *const pred_in, kvz_pixel *rec_out, coeff_t *coeff_out) { struct { kvz_pixel rec[4*4]; coeff_t coeff[4*4]; uint32_t cost; int has_coeffs; } skip, noskip, *best; const int bit_cost = (int)(state->global->cur_lambda_cost+0.5); noskip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, 0, in_stride, 4, ref_in, pred_in, noskip.rec, noskip.coeff); noskip.cost = kvz_pixels_calc_ssd(ref_in, noskip.rec, in_stride, 4, 4); noskip.cost += kvz_get_coeff_cost(state, noskip.coeff, 4, 0, scan_order) * bit_cost; skip.has_coeffs = kvz_quantize_residual( state, cur_cu, width, color, scan_order, 1, in_stride, 4, ref_in, pred_in, skip.rec, skip.coeff); skip.cost = kvz_pixels_calc_ssd(ref_in, skip.rec, in_stride, 4, 4); skip.cost += kvz_get_coeff_cost(state, skip.coeff, 4, 0, scan_order) * bit_cost; if (noskip.cost <= skip.cost) { *trskip_out = 0; best = &noskip; } else { *trskip_out = 1; best = &skip; } if (best->has_coeffs || rec_out != pred_in) { // If there is no residual and reconstruction is already in rec_out, // we can skip this. kvz_pixels_blit(best->rec, rec_out, width, width, 4, out_stride); } kvz_coefficients_blit(best->coeff, coeff_out, width, width, 4, out_stride); return best->has_coeffs; }
/** * Calculate RD cost for a Coding Unit. * \return Cost of block * \param ref_cu CU used for prediction parameters. * * Calculates the RDO cost of a single CU that will not be split further. * Takes into account SSD of reconstruction and the cost of encoding whatever * prediction unit data needs to be coded. */ double kvz_cu_rd_cost_luma(const encoder_state_t *const state, const int x_px, const int y_px, const int depth, const cu_info_t *const pred_cu, lcu_t *const lcu) { const int width = LCU_WIDTH >> depth; // cur_cu is used for TU parameters. cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px); double coeff_bits = 0; double tr_tree_bits = 0; // Check that lcu is not in assert(x_px >= 0 && x_px < LCU_WIDTH); assert(y_px >= 0 && y_px < LCU_WIDTH); const uint8_t tr_depth = tr_cu->tr_depth - depth; // Add transform_tree split_transform_flag bit cost. bool intra_split_flag = pred_cu->type == CU_INTRA && pred_cu->part_size == SIZE_NxN && depth == 3; if (width <= TR_MAX_WIDTH && width > TR_MIN_WIDTH && !intra_split_flag) { const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]); tr_tree_bits += CTX_ENTROPY_FBITS(ctx, tr_depth > 0); } if (tr_depth > 0) { int offset = width / 2; double sum = 0; sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu); sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu); return sum + tr_tree_bits * state->lambda; } // Add transform_tree cbf_luma bit cost. if (pred_cu->type == CU_INTRA || tr_depth > 0 || cbf_is_set(tr_cu->cbf, depth, COLOR_U) || cbf_is_set(tr_cu->cbf, depth, COLOR_V)) { const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[!tr_depth]); tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y)); } // SSD between reconstruction and original int ssd = 0; if (!state->encoder_control->cfg->lossless) { int index = y_px * LCU_WIDTH + x_px; ssd = kvz_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index], LCU_WIDTH, LCU_WIDTH, width); } { coeff_t coeff_temp[32 * 32]; int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth); // Code coeffs using cabac to get a better estimate of real coding costs. kvz_coefficients_blit(&lcu->coeff.y[(y_px*LCU_WIDTH) + x_px], coeff_temp, width, width, LCU_WIDTH, width); coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 0, luma_scan_mode); } double bits = tr_tree_bits + coeff_bits; return (double)ssd * LUMA_MULT + bits * state->lambda; }