static void accumulate_fp_tile_stat(TileDataEnc *tile_data, TileDataEnc *tile_data_t) { tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor; tile_data->fp_data.brightness_factor += tile_data_t->fp_data.brightness_factor; tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error; tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error; tile_data->fp_data.frame_noise_energy += tile_data_t->fp_data.frame_noise_energy; tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error; tile_data->fp_data.intercount += tile_data_t->fp_data.intercount; tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count; tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count; tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low; tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high; tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count; tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount; tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr; tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs; tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc; tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs; tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs; tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs; tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors; tile_data->fp_data.intra_smooth_count += tile_data_t->fp_data.intra_smooth_count; tile_data->fp_data.image_data_start_row = VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) == INVALID_ROW ? VPXMAX(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) : VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row); }
// Select a segment for the current block. // The choice of segment for a block depends on the ratio of the projected // bits for the block vs a target average and its spatial complexity. void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, int mi_row, int mi_col, int projected_rate) { VP10_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; int i; unsigned char segment; if (0) { segment = DEFAULT_AQ2_SEG; } else { // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). // It is converted to bits * 256 units. const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh); double logvar; double low_var_thresh; const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); vpx_clear_system_state(); low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH) : DEFAULT_LV_THRESH; vp10_setup_src_planes(mb, cpi->Source, mi_row, mi_col); logvar = vp10_log_block_var(cpi, mb, bs); segment = AQ_C_SEGMENTS - 1; // Just in case no break out below. for (i = 0; i < AQ_C_SEGMENTS; ++i) { // Test rate against a threshold value and variance against a threshold. // Increasing segment number (higher variance and complexity) = higher Q. if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) && (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) { segment = i; break; } } } // Fill in the entires in the segment map corresponding to this SB64. for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; } } }
void vp9_encode_fp_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; TileDataEnc *first_tile_col; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, FIRST_PASS_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; } } launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt, num_workers); first_tile_col = &cpi->tile_data[0]; for (i = 1; i < tile_cols; i++) { TileDataEnc *this_tile = &cpi->tile_data[i]; accumulate_fp_tile_stat(first_tile_col, this_tile); } }
void vp10_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int frame_filter_level, int y_only, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; start_mi_row = 0; mi_rows_to_filter = cm->mi_rows; if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); }
static int get_max_tile_cols(VP9_COMP *cpi) { const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2); int mi_cols = aligned_width >> MI_SIZE_LOG2; int min_log2_tile_cols, max_log2_tile_cols; int log2_tile_cols; vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); log2_tile_cols = clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); if (cpi->oxcf.target_level == LEVEL_AUTO) { const int level_tile_cols = log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height); if (log2_tile_cols > level_tile_cols) { log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols); } } return (1 << log2_tile_cols); }
static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { double q; #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break; case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break; case VPX_BITS_12: q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else (void) bit_depth; q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; #endif // CONFIG_VP9_HIGHBITDEPTH // TODO(debargha): Adjust the function below. return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); }
void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ENCODE_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; int sbr, sbc; int nhsb, nvsb; od_dering_in *src[3]; unsigned char *bskip; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; int stride; int bsize[3]; int dec[3]; int pli; int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); vp10_setup_dst_planes(xd->plane, frame, 0, 0); for (pli = 0; pli < 3; pli++) { dec[pli] = xd->plane[pli].subsampling_x; bsize[pli] = 8 >> dec[pli]; } stride = bsize[0]*cm->mi_cols; for (pli = 0; pli < 3; pli++) { src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) { for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [r * xd->plane[pli].dst.stride + c]; } else { #endif src[pli][r * stride + c] = xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } for (r = 0; r < cm->mi_rows; ++r) { for (c = 0; c < cm->mi_cols; ++c) { const MB_MODE_INFO *mbmi = &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; bskip[r * cm->mi_cols + c] = mbmi->skip; } } for (sbr = 0; sbr < nvsb; sbr++) { for (sbc = 0; sbc < nhsb; sbc++) { int level; int nhb, nvb; nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc); nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr); for (pli = 0; pli < 3; pli++) { int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8]; int threshold; #if DERING_REFINEMENT level = compute_level_from_index( global_level, cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride + MI_BLOCK_SIZE*sbc]->mbmi.dering_gain); #else level = global_level; #endif /* FIXME: This is a temporary hack that uses more conservative deringing for chroma. */ if (pli) level = (level*5 + 4) >> 3; if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0; threshold = level << coeff_shift; od_dering( &OD_DERING_VTBL_C, dst, MI_BLOCK_SIZE*bsize[pli], &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE + sbc*bsize[pli]*MI_BLOCK_SIZE], stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc], cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); for (r = 0; r < bsize[pli]*nvb; ++r) { for (c = 0; c < bsize[pli]*nhb; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; } else { #endif xd->plane[pli].dst.buf[xd->plane[pli].dst.stride* (bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } } } for (pli = 0; pli < 3; pli++) { vpx_free(src[pli]); } vpx_free(bskip); }