Пример #1
0
/**
 * Copy all non-reference CU data from depth+1 to depth.
 */
static void work_tree_copy_up(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
{
  assert(depth >= 0 && depth < MAX_PU_DEPTH);

  // Copy non-reference CUs.
  {
    const int x_orig = SUB_SCU(x_px);
    const int y_orig = SUB_SCU(y_px);
    const int width_cu = LCU_WIDTH >> depth;
    for (int y = y_orig; y < y_orig + width_cu; y += SCU_WIDTH) {
      for (int x = x_orig; x < x_orig + width_cu; x += SCU_WIDTH) {
        const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth + 1], x, y);
        cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y);
        memcpy(to_cu, from_cu, sizeof(*to_cu));
      }
    }
  }

  // Copy reconstructed pixels.
  {
    const int x = SUB_SCU(x_px);
    const int y = SUB_SCU(y_px);
    const int width_px = LCU_WIDTH >> depth;
    const int luma_index = x + y * LCU_WIDTH;
    const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2);

    const lcu_yuv_t *from = &work_tree[depth + 1].rec;
    lcu_yuv_t *to = &work_tree[depth].rec;

    const lcu_coeff_t *from_coeff = &work_tree[depth + 1].coeff;
    lcu_coeff_t *to_coeff = &work_tree[depth].coeff;

    kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index],
                    width_px, width_px, LCU_WIDTH, LCU_WIDTH);
    if (from->chroma_format != KVZ_CSP_400) {
      kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index],
                      width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
      kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index],
                      width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
    }

    // Copy coefficients up. They do not have to be copied down because they
    // are not used for the search.
    kvz_coefficients_blit(&from_coeff->y[luma_index], &to_coeff->y[luma_index],
                          width_px, width_px, LCU_WIDTH, LCU_WIDTH);
    if (from->chroma_format != KVZ_CSP_400) {
      kvz_coefficients_blit(&from_coeff->u[chroma_index], &to_coeff->u[chroma_index],
                            width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
      kvz_coefficients_blit(&from_coeff->v[chroma_index], &to_coeff->v[chroma_index],
                            width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
    }
  }
}
Пример #2
0
void kvz_lcu_set_trdepth(lcu_t *lcu, int x_px, int y_px, int depth, int tr_depth)
{
  const int width = LCU_WIDTH >> depth;
  const vector2d_t lcu_cu = { SUB_SCU(x_px), SUB_SCU(y_px) };

  // Depth 4 doesn't go inside the loop. Set the top-left CU.
  LCU_GET_CU_AT_PX(lcu, lcu_cu.x, lcu_cu.y)->tr_depth = tr_depth;

  for (unsigned y = 0; y < width; y += SCU_WIDTH) {
    for (unsigned x = 0; x < width; x += SCU_WIDTH) {
      cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, lcu_cu.x + x, lcu_cu.y + y);
      cu->tr_depth = tr_depth;
    }
  }
}
Пример #3
0
static void lcu_set_intra_mode(lcu_t *lcu, int x_px, int y_px, int depth, int pred_mode, int chroma_mode, int part_mode)
{
  const int width = LCU_WIDTH >> depth;
  const int x_cu  = SUB_SCU(x_px);
  const int y_cu  = SUB_SCU(y_px);

  if (part_mode == SIZE_NxN) {
    assert(depth == MAX_DEPTH + 1);
    assert(width == SCU_WIDTH);
  }

  if (depth > MAX_DEPTH) {
    depth = MAX_DEPTH;
    assert(part_mode == SIZE_NxN);
  }

  // Set mode in every CU covered by part_mode in this depth.
  for (int y = y_cu; y < y_cu + width; y += SCU_WIDTH) {
    for (int x = x_cu; x < x_cu + width; x += SCU_WIDTH) {
      cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
      cu->depth = depth;
      cu->type = CU_INTRA;
      cu->intra.mode = pred_mode;
      cu->intra.mode_chroma = chroma_mode;
      cu->part_size = part_mode;
    }
  }
}
Пример #4
0
/**
 * Copy all non-reference CU data from depth to depth+1..MAX_PU_DEPTH.
 */
static void work_tree_copy_down(int x_px, int y_px, int depth, lcu_t work_tree[MAX_PU_DEPTH + 1])
{
  assert(depth >= 0 && depth < MAX_PU_DEPTH);

  // TODO: clean up to remove the copy pasta
  const int width_px = LCU_WIDTH >> depth;

  int d;

  for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
    const int x_orig = SUB_SCU(x_px);
    const int y_orig = SUB_SCU(y_px);

    for (int y = y_orig; y < y_orig + width_px; y += SCU_WIDTH) {
      for (int x = x_orig; x < x_orig + width_px; x += SCU_WIDTH) {
        const cu_info_t *from_cu = LCU_GET_CU_AT_PX(&work_tree[depth], x, y);
        cu_info_t *to_cu = LCU_GET_CU_AT_PX(&work_tree[d], x, y);
        memcpy(to_cu, from_cu, sizeof(*to_cu));
      }
    }
  }

  // Copy reconstructed pixels.
  for (d = depth + 1; d < MAX_PU_DEPTH + 1; ++d) {
    const int x = SUB_SCU(x_px);
    const int y = SUB_SCU(y_px);

    const int luma_index = x + y * LCU_WIDTH;
    const int chroma_index = (x / 2) + (y / 2) * (LCU_WIDTH / 2);

    lcu_yuv_t *from = &work_tree[depth].rec;
    lcu_yuv_t *to = &work_tree[d].rec;

    kvz_pixels_blit(&from->y[luma_index], &to->y[luma_index],
                    width_px, width_px, LCU_WIDTH, LCU_WIDTH);
    if (from->chroma_format != KVZ_CSP_400) {
      kvz_pixels_blit(&from->u[chroma_index], &to->u[chroma_index],
                      width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
      kvz_pixels_blit(&from->v[chroma_index], &to->v[chroma_index],
                      width_px / 2, width_px / 2, LCU_WIDTH / 2, LCU_WIDTH / 2);
    }
  }
}
Пример #5
0
static void lcu_set_coeff(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu)
{
  const uint32_t width = LCU_WIDTH >> depth;
  const uint32_t x_local = SUB_SCU(x_px);
  const uint32_t y_local = SUB_SCU(y_px);
  const uint32_t tr_split = cur_cu->tr_depth-cur_cu->depth;
  const uint32_t mask = ~((width >> tr_split)-1);

  // Set coeff flags in every CU covered by part_mode in this depth.
  for (uint32_t y = y_local; y < y_local + width; y += SCU_WIDTH) {
    for (uint32_t x = x_local; x < x_local + width; x += SCU_WIDTH) {
      cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
      // Use TU top-left CU to propagate coeff flags
      cu_info_t *cu_from = LCU_GET_CU_AT_PX(lcu, x & mask, y & mask);
      if (cu != cu_from) {
        // Chroma coeff data is not used, luma is needed for deblocking
        cbf_copy(&cu->cbf, cu_from->cbf, COLOR_Y);
      }
    }
  }
}
Пример #6
0
static void lcu_set_inter(lcu_t *lcu, int x_px, int y_px, int depth, cu_info_t *cur_cu)
{
  const int width = LCU_WIDTH >> depth;
  const int x_local = SUB_SCU(x_px);
  const int y_local = SUB_SCU(y_px);
  const int num_pu = kvz_part_mode_num_parts[cur_cu->part_size];

  for (int i = 0; i < num_pu; ++i) {
    const int x_pu      = PU_GET_X(cur_cu->part_size, width, x_local, i);
    const int y_pu      = PU_GET_Y(cur_cu->part_size, width, y_local, i);
    const int width_pu  = PU_GET_W(cur_cu->part_size, width, i);
    const int height_pu = PU_GET_H(cur_cu->part_size, width, i);
    cu_info_t *cur_pu   = LCU_GET_CU_AT_PX(lcu, x_pu, y_pu);
    lcu_set_inter_pu(lcu, x_pu, y_pu, width_pu, height_pu, cur_pu);
  }
}
Пример #7
0
double kvz_cu_rd_cost_chroma(const encoder_state_t *const state,
                         const int x_px, const int y_px, const int depth,
                         const cu_info_t *const pred_cu,
                         lcu_t *const lcu)
{
  const vector2d_t lcu_px = { x_px / 2, y_px / 2 };
  const int width = (depth <= MAX_DEPTH) ? LCU_WIDTH >> (depth + 1) : LCU_WIDTH >> depth;
  cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);

  double tr_tree_bits = 0;
  double coeff_bits = 0;

  assert(x_px >= 0 && x_px < LCU_WIDTH);
  assert(y_px >= 0 && y_px < LCU_WIDTH);

  if (x_px % 8 != 0 || y_px % 8 != 0) {
    // For MAX_PU_DEPTH calculate chroma for previous depth for the first
    // block and return 0 cost for all others.
    return 0;
  }

  if (depth < MAX_PU_DEPTH) {
    const int tr_depth = depth - pred_cu->depth;
    const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_chroma[tr_depth]);
    if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_U)) {
      tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_U));
    }
    if (tr_depth == 0 || cbf_is_set(pred_cu->cbf, depth - 1, COLOR_V)) {
      tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_V));
    }
  }

  if (tr_cu->tr_depth > depth) {
    int offset = LCU_WIDTH >> (depth + 1);
    int sum = 0;

    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_chroma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_chroma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);

    return sum + tr_tree_bits * state->lambda;
  }
Пример #8
0
static void lcu_set_inter_pu(lcu_t *lcu, int x_px, int y_px, int width, int height, cu_info_t *cur_pu)
{
  // Set mode in every CU covered by part_mode in this depth.
  for (int y = y_px; y < y_px + height; y += SCU_WIDTH) {
    for (int x = x_px; x < x_px + width; x += SCU_WIDTH) {
      cu_info_t *cu = LCU_GET_CU_AT_PX(lcu, x, y);
      //Check if this could be moved inside the if
      if (cu != cur_pu) {
        cu->depth     = cur_pu->depth;
        cu->part_size = cur_pu->part_size;
        cu->type      = CU_INTER;
        cu->tr_depth  = cur_pu->tr_depth;
        cu->merged    = cur_pu->merged;
        cu->skipped   = cur_pu->skipped;
        memcpy(&cu->inter, &cur_pu->inter, sizeof(cur_pu->inter));
      }
    }
  }
}
Пример #9
0
/**
* Calculate RD cost for a Coding Unit.
* \return Cost of block
* \param ref_cu  CU used for prediction parameters.
*
* Calculates the RDO cost of a single CU that will not be split further.
* Takes into account SSD of reconstruction and the cost of encoding whatever
* prediction unit data needs to be coded.
*/
double kvz_cu_rd_cost_luma(const encoder_state_t *const state,
                       const int x_px, const int y_px, const int depth,
                       const cu_info_t *const pred_cu,
                       lcu_t *const lcu)
{
  const int width = LCU_WIDTH >> depth;

  // cur_cu is used for TU parameters.
  cu_info_t *const tr_cu = LCU_GET_CU_AT_PX(lcu, x_px, y_px);

  double coeff_bits = 0;
  double tr_tree_bits = 0;

  // Check that lcu is not in 
  assert(x_px >= 0 && x_px < LCU_WIDTH);
  assert(y_px >= 0 && y_px < LCU_WIDTH);

  const uint8_t tr_depth = tr_cu->tr_depth - depth;

  // Add transform_tree split_transform_flag bit cost.
  bool intra_split_flag = pred_cu->type == CU_INTRA && pred_cu->part_size == SIZE_NxN && depth == 3;
  if (width <= TR_MAX_WIDTH
      && width > TR_MIN_WIDTH
      && !intra_split_flag)
  {
    const cabac_ctx_t *ctx = &(state->cabac.ctx.trans_subdiv_model[5 - (6 - depth)]);
    tr_tree_bits += CTX_ENTROPY_FBITS(ctx, tr_depth > 0);
  }

  if (tr_depth > 0) {
    int offset = width / 2;
    double sum = 0;

    sum += kvz_cu_rd_cost_luma(state, x_px, y_px, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_luma(state, x_px, y_px + offset, depth + 1, pred_cu, lcu);
    sum += kvz_cu_rd_cost_luma(state, x_px + offset, y_px + offset, depth + 1, pred_cu, lcu);

    return sum + tr_tree_bits * state->lambda;
  }

  // Add transform_tree cbf_luma bit cost.
  if (pred_cu->type == CU_INTRA ||
      tr_depth > 0 ||
      cbf_is_set(tr_cu->cbf, depth, COLOR_U) ||
      cbf_is_set(tr_cu->cbf, depth, COLOR_V))
  {
    const cabac_ctx_t *ctx = &(state->cabac.ctx.qt_cbf_model_luma[!tr_depth]);
    tr_tree_bits += CTX_ENTROPY_FBITS(ctx, cbf_is_set(pred_cu->cbf, depth, COLOR_Y));
  }

  // SSD between reconstruction and original
  int ssd = 0;
  if (!state->encoder_control->cfg->lossless) {
    int index = y_px * LCU_WIDTH + x_px;
    ssd = kvz_pixels_calc_ssd(&lcu->ref.y[index], &lcu->rec.y[index],
                                        LCU_WIDTH,          LCU_WIDTH,
                                        width);
  }

  {
    coeff_t coeff_temp[32 * 32];
    int8_t luma_scan_mode = kvz_get_scan_order(pred_cu->type, pred_cu->intra.mode, depth);

    // Code coeffs using cabac to get a better estimate of real coding costs.
    kvz_coefficients_blit(&lcu->coeff.y[(y_px*LCU_WIDTH) + x_px], coeff_temp, width, width, LCU_WIDTH, width);
    coeff_bits += kvz_get_coeff_cost(state, coeff_temp, width, 0, luma_scan_mode);
  }

  double bits = tr_tree_bits + coeff_bits;
  return (double)ssd * LUMA_MULT + bits * state->lambda;
}
Пример #10
0
void kvz_quantize_lcu_chroma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_cu, lcu_t* lcu)
{
  // we have 64>>depth transform size
  const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
  const int8_t width = LCU_WIDTH>>depth;
  if (cur_cu == NULL) {
    cur_cu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
  }
  
  // Tell clang-analyzer what is up. For some reason it can't figure out from
  // asserting just depth.
  assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64);

  // Split transform and increase depth
  if (depth == 0 || cur_cu->tr_depth > depth) {
    int offset = width / 2;
    kvz_quantize_lcu_chroma_residual(state, x,          y,          depth+1, NULL, lcu);
    kvz_quantize_lcu_chroma_residual(state, x + offset, y,          depth+1, NULL, lcu);
    kvz_quantize_lcu_chroma_residual(state, x,          y + offset, depth+1, NULL, lcu);
    kvz_quantize_lcu_chroma_residual(state, x + offset, y + offset, depth+1, NULL, lcu);

    // Propagate coded block flags from child CUs to parent CU.
    if (depth < MAX_DEPTH) {
      uint16_t child_cbfs[3] = {
        LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y         )->cbf,
        LCU_GET_CU_AT_PX(lcu, lcu_px.x,          lcu_px.y + offset)->cbf,
        LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf,
      };
      cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_U);
      cbf_set_conditionally(&cur_cu->cbf, child_cbfs, depth, COLOR_V);
    }

    return;
  }

  // If luma is 4x4, do chroma for the 8x8 luma area when handling the top
  // left PU because the coordinates are correct.
  if (depth <= MAX_DEPTH || (lcu_px.x % 8 == 0 && lcu_px.y % 8 == 0)) {
    cbf_clear(&cur_cu->cbf, depth, COLOR_U);
    cbf_clear(&cur_cu->cbf, depth, COLOR_V);

    const int chroma_offset = lcu_px.x / 2 + lcu_px.y / 2 * LCU_WIDTH_C;
    kvz_pixel *recbase_u = &lcu->rec.u[chroma_offset];
    kvz_pixel *recbase_v = &lcu->rec.v[chroma_offset];
    const kvz_pixel *base_u = &lcu->ref.u[chroma_offset];
    const kvz_pixel *base_v = &lcu->ref.v[chroma_offset];
    coeff_t *orig_coeff_u = &lcu->coeff.u[chroma_offset];
    coeff_t *orig_coeff_v = &lcu->coeff.v[chroma_offset];
    coeff_scan_order_t scan_idx_chroma;
    int tr_skip = 0;
    int chroma_depth = (depth == MAX_PU_DEPTH ? depth - 1 : depth);
    int chroma_width = LCU_WIDTH_C >> chroma_depth;

    scan_idx_chroma = kvz_get_scan_order(cur_cu->type, cur_cu->intra.mode_chroma, depth);

    if (state->encoder_control->cfg->lossless) {
      if (bypass_transquant(chroma_width,
                            LCU_WIDTH_C, LCU_WIDTH_C,
                            base_u, recbase_u,
                            recbase_u, orig_coeff_u)) {
        cbf_set(&cur_cu->cbf, depth, COLOR_U);
      }
      if (bypass_transquant(chroma_width,
                            LCU_WIDTH_C, LCU_WIDTH_C,
                            base_v, recbase_v,
                            recbase_v, orig_coeff_v)) {
        cbf_set(&cur_cu->cbf, depth, COLOR_V);
      }
    } else {
      if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_U, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_u, recbase_u, recbase_u, orig_coeff_u)) {
        cbf_set(&cur_cu->cbf, depth, COLOR_U);
      }
      if (kvz_quantize_residual(state, cur_cu, chroma_width, COLOR_V, scan_idx_chroma, tr_skip, LCU_WIDTH_C, LCU_WIDTH_C, base_v, recbase_v, recbase_v, orig_coeff_v)) {
        cbf_set(&cur_cu->cbf, depth, COLOR_V);
      }
    }
  }
Пример #11
0
/**
 * This function calculates the residual coefficients for a region of the LCU
 * (defined by x, y and depth) and updates the reconstruction with the
 * kvantized residual.
 *
 * It handles recursion for transform split, but that is currently only work
 * for 64x64 inter to 32x32 transform blocks.
 *
 * Inputs are:
 * - lcu->rec  pixels after prediction for the area
 * - lcu->ref  reference pixels for the area
 * - lcu->cu   for the area
 *
 * Outputs are:
 * - lcu->rec  reconstruction after quantized residual
 * - lcu->coeff  quantized coefficients for the area
 * - lcu->cbf  coded block flags for the area
 * - lcu->cu.intra[].tr_skip  for the area
 */
void kvz_quantize_lcu_luma_residual(encoder_state_t * const state, int32_t x, int32_t y, const uint8_t depth, cu_info_t *cur_pu, lcu_t* lcu)
{
  // we have 64>>depth transform size
  const vector2d_t lcu_px = { SUB_SCU(x), SUB_SCU(y) };
  if (cur_pu == NULL) {
    cur_pu = LCU_GET_CU_AT_PX(lcu, lcu_px.x, lcu_px.y);
  }
  const int8_t width = LCU_WIDTH>>depth;
  
  // Tell clang-analyzer what is up. For some reason it can't figure out from
  // asserting just depth.
  assert(width == 4 || width == 8 || width == 16 || width == 32 || width == 64);

  // Split transform and increase depth
  if (depth == 0 || cur_pu->tr_depth > depth) {
    int offset = width / 2;
    kvz_quantize_lcu_luma_residual(state, x,          y,          depth+1, NULL, lcu);
    kvz_quantize_lcu_luma_residual(state, x + offset, y,          depth+1, NULL, lcu);
    kvz_quantize_lcu_luma_residual(state, x,          y + offset, depth+1, NULL, lcu);
    kvz_quantize_lcu_luma_residual(state, x + offset, y + offset, depth+1, NULL, lcu);

    // Propagate coded block flags from child CUs to parent CU.
    if (depth <= MAX_DEPTH) {
      uint16_t child_cbfs[3] = {
        LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y         )->cbf,
        LCU_GET_CU_AT_PX(lcu, lcu_px.x,          lcu_px.y + offset)->cbf,
        LCU_GET_CU_AT_PX(lcu, lcu_px.x + offset, lcu_px.y + offset)->cbf,
      };
      cbf_set_conditionally(&cur_pu->cbf, child_cbfs, depth, COLOR_Y);
    }

    return;
  }

  {
    const int luma_offset = lcu_px.x + lcu_px.y * LCU_WIDTH;

    // Pointers to current location in arrays with prediction.
    kvz_pixel *recbase_y = &lcu->rec.y[luma_offset];
    // Pointers to current location in arrays with reference.
    const kvz_pixel *base_y = &lcu->ref.y[luma_offset];
    // Pointers to current location in arrays with kvantized coefficients.
    coeff_t *orig_coeff_y = &lcu->coeff.y[luma_offset];

    coeff_scan_order_t scan_idx_luma = kvz_get_scan_order(cur_pu->type, cur_pu->intra.mode, depth);

    #if OPTIMIZATION_SKIP_RESIDUAL_ON_THRESHOLD
    uint32_t residual_sum = 0;
    #endif

    // Clear coded block flag structures for depths lower than current depth.
    // This should ensure that the CBF data doesn't get corrupted if this function
    // is called more than once.
    cbf_clear(&cur_pu->cbf, depth, COLOR_Y);


    if (state->encoder_control->cfg->lossless) {
      if (bypass_transquant(width,
                            LCU_WIDTH, LCU_WIDTH,
                            base_y, recbase_y,
                            recbase_y, orig_coeff_y)) {
        cbf_set(&cur_pu->cbf, depth, COLOR_Y);
      }
    } else if (width == 4 && state->encoder_control->trskip_enable) {
      // Try quantization with trskip and use it if it's better.
      int has_coeffs = kvz_quantize_residual_trskip(
          state, cur_pu, width, COLOR_Y, scan_idx_luma,
          &cur_pu->intra.tr_skip,
          LCU_WIDTH, LCU_WIDTH,
          base_y, recbase_y, recbase_y, orig_coeff_y
      );
      if (has_coeffs) {
        cbf_set(&cur_pu->cbf, depth, COLOR_Y);
      }
    } else {
      int has_coeffs = kvz_quantize_residual(
          state, cur_pu, width, COLOR_Y, scan_idx_luma,
          0,
          LCU_WIDTH, LCU_WIDTH,
          base_y, recbase_y, recbase_y, orig_coeff_y
      );
      if (has_coeffs) {
        cbf_set(&cur_pu->cbf, depth, COLOR_Y);
      }
    }
  }
}