// Setup cyclic background refresh: set delta q and segmentation map. void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; const int apply_cyclic_refresh = apply_cyclic_refresh_bitrate(cm, rc); if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; // Don't apply refresh on key frame or temporal enhancement layer frames. if (!apply_cyclic_refresh || (cm->frame_type == KEY_FRAME) || (cpi->svc.temporal_layer_id > 0)) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); if (cm->frame_type == KEY_FRAME) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); memset(cr->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cr->consec_zero_mv)); cr->sb_index = 0; } return; } else { int qindex_delta = 0; int qindex2; const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); vpx_clear_system_state(); // Set rate threshold to some multiple (set to 2 for now) of the target // rate (target is given by sb64_target_rate and scaled by 256). cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. // q will not exceed 457, so (q * q) is within 32bit; see: // vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[]. cr->thresh_dist_sb = ((int64_t)(q * q)) << 2; // Set up segmentation. // Clear down the segment map. vp9_enable_segmentation(&cm->seg); vp9_clearall_segfeatures(seg); // Select delta coding method. seg->abs_delta = SEGMENT_DELTADATA; // Note: setting temporal_update has no effect, as the seg-map coding method // (temporal or spatial) is determined in vp9_choose_segmap_coding_method(), // based on the coding cost of each method. For error_resilient mode on the // last_frame_seg_map is set to 0, so if temporal coding is used, it is // relative to 0 previous map. // seg->temporal_update = 0; // Segment BASE "Q" feature is disabled so it defaults to the baseline Q. vp9_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q); // Use segment BOOST1 for in-frame Q adjustment. vp9_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q); // Use segment BOOST2 for more aggressive in-frame Q adjustment. vp9_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q); // Set the q delta for segment BOOST1. qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta); cr->qindex_delta[1] = qindex_delta; // Compute rd-mult for segment BOOST1. qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta); // Set a more aggressive (higher) q delta for segment BOOST2. qindex_delta = compute_deltaq( cpi, cm->base_qindex, VPXMIN(CR_MAX_RATE_TARGET_RATIO, 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta)); cr->qindex_delta[2] = qindex_delta; vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); // Reset if resoluton change has occurred. if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); // Update the segmentation and refresh map. cyclic_refresh_update_map(cpi); } }
static void accumulate_fp_tile_stat(TileDataEnc *tile_data, TileDataEnc *tile_data_t) { tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor; tile_data->fp_data.brightness_factor += tile_data_t->fp_data.brightness_factor; tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error; tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error; tile_data->fp_data.frame_noise_energy += tile_data_t->fp_data.frame_noise_energy; tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error; tile_data->fp_data.intercount += tile_data_t->fp_data.intercount; tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count; tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count; tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low; tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high; tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count; tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount; tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr; tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs; tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc; tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs; tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs; tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs; tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors; tile_data->fp_data.intra_smooth_count += tile_data_t->fp_data.intra_smooth_count; tile_data->fp_data.image_data_start_row = VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) == INVALID_ROW ? VPXMAX(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) : VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row); }
void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; int x, y; for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; // Inter skip blocks were clearly not coded at the current qindex, so // don't update the map for them. For cases where motion is non-zero or // the reference frame isn't the previous frame, the previous value in // the map for this spatial location is not entirely correct. if ((!is_inter_block(mi) || !mi->skip) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ); } else if (is_inter_block(mi) && mi->skip && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = VPXMIN( clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ), cr->last_coded_q_map[map_offset]); } } }
static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; if (speed >= 1) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; } } if (speed >= 2) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; } } if (speed >= 5) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->partition_search_breakout_dist_thr = (1 << 25); } else { sf->partition_search_breakout_dist_thr = (1 << 23); } } if (speed >= 7) { sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300; } }
// Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MB_MODE_INFO *const mbmi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, int skip) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; const int refresh_this_block = candidate_refresh_aq(cr, mbmi, rate, dist, bsize); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; // If this block is labeled for refresh, check if we should reset the // segment_id. if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { mbmi->segment_id = refresh_this_block; // Reset segment_id if it will be skipped. if (skip) mbmi->segment_id = CR_SEGMENT_ID_BASE; } // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. if (cyclic_refresh_segment_id_boosted(mbmi->segment_id)) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already // been refreshed (marked as 1) then mark it as a candidate for cleanup // for future time (marked as 0), otherwise don't update it. if (cr->map[block_index] == 1) new_map_value = 0; } else { // Leave it marked as block that is not candidate for refresh. new_map_value = 1; } // Update entries in the cyclic refresh map with new_map_value, and // copy mbmi->segment_id into global segmentation map. for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; cr->map[map_offset] = new_map_value; cpi->segmentation_map[map_offset] = mbmi->segment_id; } }
// Select a segment for the current block. // The choice of segment for a block depends on the ratio of the projected // bits for the block vs a target average and its spatial complexity. void vp10_caq_select_segment(VP10_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, int mi_row, int mi_col, int projected_rate) { VP10_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; int i; unsigned char segment; if (0) { segment = DEFAULT_AQ2_SEG; } else { // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). // It is converted to bits * 256 units. const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh); double logvar; double low_var_thresh; const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); vpx_clear_system_state(); low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH) : DEFAULT_LV_THRESH; vp10_setup_src_planes(mb, cpi->Source, mi_row, mi_col); logvar = vp10_log_block_var(cpi, mb, bs); segment = AQ_C_SEGMENTS - 1; // Just in case no break out below. for (i = 0; i < AQ_C_SEGMENTS; ++i) { // Test rate against a threshold value and variance against a threshold. // Increasing segment number (higher variance and complexity) = higher Q. if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) && (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) { segment = i; break; } } } // Fill in the entires in the segment map corresponding to this SB64. for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; } } }
static void read_intra_frame_mode_info(VP10_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r) { MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO *const mbmi = &mi->mbmi; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const BLOCK_SIZE bsize = mbmi->sb_type; int i; const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = xd->plane[0].n4_w >> 1; const int bh = xd->plane[0].n4_h >> 1; // TODO(slavarnway): move x_mis, y_mis into xd ????? const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); mbmi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); mbmi->skip = read_skip(cm, xd, mbmi->segment_id, r); mbmi->tx_size = read_tx_size(cm, xd, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) mi->bmi[i].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); mbmi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); mi->bmi[1].as_mode = mi->bmi[3].as_mode = mbmi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); break; case BLOCK_8X4: mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); mi->bmi[2].as_mode = mi->bmi[3].as_mode = mbmi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); break; default: mbmi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); } mbmi->uv_mode = read_intra_mode(r, vp10_kf_uv_mode_prob[mbmi->mode]); }
void vpx_reader_fill(vpx_reader *r) { const uint8_t *const buffer_end = r->buffer_end; const uint8_t *buffer = r->buffer; const uint8_t *buffer_start = buffer; BD_VALUE value = r->value; int count = r->count; const size_t bytes_left = buffer_end - buffer; const size_t bits_left = bytes_left * CHAR_BIT; int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); if (r->decrypt_cb) { size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); buffer = r->clear_buffer; buffer_start = r->clear_buffer; } if (bits_left > BD_VALUE_SIZE) { const int bits = (shift & 0xfffffff8) + CHAR_BIT; BD_VALUE nv; BD_VALUE big_endian_values; memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); #if SIZE_MAX == 0xffffffffffffffffULL big_endian_values = HToBE64(big_endian_values); #else big_endian_values = HToBE32(big_endian_values); #endif nv = big_endian_values >> (BD_VALUE_SIZE - bits); count += bits; buffer += (bits >> 3); value = r->value | (nv << (shift & 0x7)); } else {
static void create_enc_workers(VP9_COMP *cpi, int num_workers) { VP9_COMMON *const cm = &cpi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. When row based multithreading is enabled, it is OK to // allocate more threads than the number of max tile columns. if (cpi->use_svc && !cpi->row_mt) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } }
static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, MV *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const SEARCH_METHODS old_search_method = mv_sf->search_method; const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; const int tmp_col_min = x->mv_col_min; const int tmp_col_max = x->mv_col_max; const int tmp_row_min = x->mv_row_min; const int tmp_row_max = x->mv_row_max; MV ref_full; int cost_list[5]; // Further step/diamond searches as necessary int step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(x, ref_mv); ref_full.col = ref_mv->col >> 3; ref_full.row = ref_mv->row >> 3; mv_sf->search_method = HEX; vp9_full_pixel_search(cpi, x, BLOCK_16X16, &ref_full, step_param, x->errorperbit, cond_cost_list(cpi, cost_list), ref_mv, dst_mv, 0, 0); mv_sf->search_method = old_search_method; // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) { uint32_t distortion; uint32_t sse; cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_iters_per_step, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0); } xd->mi[0]->mode = NEWMV; xd->mi[0]->mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); /* restore UMV window */ x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); }
int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); #if CONFIG_VP9_HIGHBITDEPTH int64_t rdmult = 0; switch (cpi->common.bit_depth) { case VPX_BITS_8: rdmult = 88 * q * q / 24; break; case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4); break; case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8); break; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else int64_t rdmult = 88 * q * q / 24; #endif // CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100)); rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7; rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7); } if (rdmult < 1) rdmult = 1; return (int)rdmult; }
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VPxWorker *workers, int nworkers, VP9LfSync *lf_sync) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; // Decoder may allocate more threads than number of tiles based on user's // input. const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = VPXMIN(nworkers, tile_cols); int i; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || num_workers > lf_sync->num_workers) { vp10_loop_filter_dealloc(lf_sync); vp10_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Set up loopfilter thread data. // The decoder is capping num_workers because it has been observed that using // more threads on the loopfilter than there are cores will hurt performance // on Android. This is because the system will only schedule the tile decode // workers on cores equal to the number of tile columns. Then if the decoder // tries to use more threads for the loopfilter, it will hurt performance // because of contention. If the multithreading code changes in the future // then the number of workers used by the loopfilter should be revisited. for (i = 0; i < num_workers; ++i) { VPxWorker *const worker = &workers[i]; LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = (VPxWorkerHook)loop_filter_row_worker; worker->data1 = lf_sync; worker->data2 = lf_data; // Loopfilter data vp10_loop_filter_data_reset(lf_data, frame, cm, planes); lf_data->start = start + i * MI_BLOCK_SIZE; lf_data->stop = stop; lf_data->y_only = y_only; // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { winterface->sync(&workers[i]); } }
static TX_SIZE read_tx_size(VP10_COMMON *cm, MACROBLOCKD *xd, int allow_select, vpx_reader *r) { TX_MODE tx_mode = cm->tx_mode; BLOCK_SIZE bsize = xd->mi[0]->mbmi.sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, max_tx_size, r); else return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); }
static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { vpx_codec_err_t res = VPX_CODEC_OK; assert(data != NULL); if(data + data_sz <= data) { res = VPX_CODEC_INVALID_PARAM; } else { /* Parse uncompresssed part of key frame header. * 3 bytes:- including version, frame type and an offset * 3 bytes:- sync code (0x9d, 0x01, 0x2a) * 4 bytes:- including image width and height in the lowest 14 bits * of each 2-byte value. */ uint8_t clear_buffer[10]; const uint8_t *clear = data; if (decrypt_cb) { int n = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, n); clear = clear_buffer; } si->is_kf = 0; if (data_sz >= 10 && !(clear[0] & 0x01)) /* I-Frame */ { si->is_kf = 1; /* vet via sync code */ if (clear[3] != 0x9d || clear[4] != 0x01 || clear[5] != 0x2a) return VPX_CODEC_UNSUP_BITSTREAM; si->w = (clear[6] | (clear[7] << 8)) & 0x3fff; si->h = (clear[8] | (clear[9] << 8)) & 0x3fff; /*printf("w=%d, h=%d\n", si->w, si->h);*/ if (!(si->h | si->w)) res = VPX_CODEC_UNSUP_BITSTREAM; } else { res = VPX_CODEC_UNSUP_BITSTREAM; } } return res; }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); create_enc_workers(cpi, num_workers); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_worker_hook, NULL, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
static int dec_get_segment_id(const VP10_COMMON *cm, const uint8_t *segment_ids, int mi_offset, int x_mis, int y_mis) { int x, y, segment_id = INT_MAX; for (y = 0; y < y_mis; y++) for (x = 0; x < x_mis; x++) segment_id = VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); return segment_id; }
// Set golden frame update interval, for non-svc 1 pass CBR mode. void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; // Set minimum gf_interval for GF update to a multiple of the refresh period, // with some max limit. Depending on past encoding stats, GF flag may be // reset and update may not occur until next baseline_gf_interval. if (cr->percent_refresh > 0) rc->baseline_gf_interval = VPXMIN(4 * (100 / cr->percent_refresh), 40); else rc->baseline_gf_interval = 40; if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20; }
static int read_inter_segment_id(VP10_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r) { struct segmentation *const seg = &cm->seg; MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi; int predicted_segment_id, segment_id; const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = xd->plane[0].n4_w >> 1; const int bh = xd->plane[0].n4_h >> 1; // TODO(slavarnway): move x_mis, y_mis into xd ????? const int x_mis = VPXMIN(cm->mi_cols - mi_col, bw); const int y_mis = VPXMIN(cm->mi_rows - mi_row, bh); if (!seg->enabled) return 0; // Default for disabled segmentation predicted_segment_id = cm->last_frame_seg_map ? dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : 0; if (!seg->update_map) { copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, mi_offset, x_mis, y_mis); return predicted_segment_id; } if (seg->temporal_update) { const vpx_prob pred_prob = vp10_get_pred_prob_seg_id(seg, xd); mbmi->seg_id_predicted = vpx_read(r, pred_prob); segment_id = mbmi->seg_id_predicted ? predicted_segment_id : read_segment_id(r, seg); } else { segment_id = read_segment_id(r, seg); } set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); return segment_id; }
static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { // Mode vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2); if (cm->tx_mode >= ALLOW_32X32) vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT); // Probabilities if (cm->tx_mode == TX_MODE_SELECT) { int i, j; unsigned int ct_8x8p[TX_SIZES - 3][2]; unsigned int ct_16x16p[TX_SIZES - 2][2]; unsigned int ct_32x32p[TX_SIZES - 1][2]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p); for (j = 0; j < TX_SIZES - 3; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p); for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j], ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p); for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j], ct_32x32p[j]); } } }
static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { int intra_only_flag = 0; uint8_t clear_buffer[9]; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; si->is_kf = 0; si->w = si->h = 0; if (decrypt_cb) { data_sz = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, data_sz); data = clear_buffer; } { int show_frame; int error_resilient; struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; const int frame_marker = vpx_rb_read_literal(&rb, 2); const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); if (frame_marker != VP9_FRAME_MARKER) return VPX_CODEC_UNSUP_BITSTREAM; if (profile >= MAX_PROFILES) return VPX_CODEC_UNSUP_BITSTREAM; if ((profile >= 2 && data_sz <= 1) || data_sz < 1) return VPX_CODEC_UNSUP_BITSTREAM; if (vpx_rb_read_bit(&rb)) { // show an existing frame vpx_rb_read_literal(&rb, 3); // Frame buffer to show. return VPX_CODEC_OK; } if (data_sz <= 8) return VPX_CODEC_UNSUP_BITSTREAM; si->is_kf = !vpx_rb_read_bit(&rb); show_frame = vpx_rb_read_bit(&rb); error_resilient = vpx_rb_read_bit(&rb); if (si->is_kf) { if (!vp9_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM; if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); } else { intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context if (intra_only_flag) { if (!vp9_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM; if (profile > PROFILE_0) { if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; } rb.bit_offset += REF_FRAMES; // refresh_frame_flags vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); } } } if (is_intra_only != NULL) *is_intra_only = intra_only_flag; return VPX_CODEC_OK; }
// Update the layer context from a change_config() call. void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; int sl, tl, layer = 0, spatial_layer_target; float bitrate_alloc = 1.0; if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); svc->layer_context[layer].target_bandwidth = oxcf->layer_target_bitrate[layer]; } layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ? 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers); spatial_layer_target = svc->layer_context[layer].target_bandwidth = oxcf->layer_target_bitrate[layer]; for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { LAYER_CONTEXT *const lc = &svc->layer_context[sl * oxcf->ts_number_layers + tl]; RATE_CONTROL *const lrc = &lc->rc; lc->spatial_layer_target_bandwidth = spatial_layer_target; bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target; lrc->starting_buffer_level = (int64_t)(rc->starting_buffer_level * bitrate_alloc); lrc->optimal_buffer_level = (int64_t)(rc->optimal_buffer_level * bitrate_alloc); lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; lrc->worst_quality = rc->worst_quality; lrc->best_quality = rc->best_quality; } } } else { int layer_end; if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { layer_end = svc->number_temporal_layers; } else { layer_end = svc->number_spatial_layers; } for (layer = 0; layer < layer_end; ++layer) { LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. lrc->starting_buffer_level = (int64_t)(rc->starting_buffer_level * bitrate_alloc); lrc->optimal_buffer_level = (int64_t)(rc->optimal_buffer_level * bitrate_alloc); lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; } else { lc->framerate = cpi->framerate; } lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; // Update qp-related quantities. lrc->worst_quality = rc->worst_quality; lrc->best_quality = rc->best_quality; } } }
void vp9_compute_skin_sb(VP9_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, int mi_col) { int i, j, num_bl; VP9_COMMON *const cm = &cpi->common; const uint8_t *src_y = cpi->Source->y_buffer; const uint8_t *src_u = cpi->Source->u_buffer; const uint8_t *src_v = cpi->Source->v_buffer; const int src_ystride = cpi->Source->y_stride; const int src_uvstride = cpi->Source->uv_stride; const int y_bsize = 4 << b_width_log2_lookup[bsize]; const int uv_bsize = y_bsize >> 1; const int shy = (y_bsize == 8) ? 3 : 4; const int shuv = shy - 1; const int fac = y_bsize / 8; const int y_shift = src_ystride * (mi_row << 3) + (mi_col << 3); const int uv_shift = src_uvstride * (mi_row << 2) + (mi_col << 2); const int mi_row_limit = VPXMIN(mi_row + 8, cm->mi_rows - 2); const int mi_col_limit = VPXMIN(mi_col + 8, cm->mi_cols - 2); src_y += y_shift; src_u += uv_shift; src_v += uv_shift; for (i = mi_row; i < mi_row_limit; i += fac) { num_bl = 0; for (j = mi_col; j < mi_col_limit; j += fac) { int consec_zeromv = 0; int bl_index = i * cm->mi_cols + j; int bl_index1 = bl_index + 1; int bl_index2 = bl_index + cm->mi_cols; int bl_index3 = bl_index2 + 1; // Don't detect skin on the boundary. if (i == 0 || j == 0) continue; if (bsize == BLOCK_8X8) consec_zeromv = cpi->consec_zero_mv[bl_index]; else consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], VPXMIN(cpi->consec_zero_mv[bl_index1], VPXMIN(cpi->consec_zero_mv[bl_index2], cpi->consec_zero_mv[bl_index3]))); cpi->skin_map[bl_index] = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, src_uvstride, bsize, consec_zeromv, 0); num_bl++; src_y += y_bsize; src_u += uv_bsize; src_v += uv_bsize; } src_y += (src_ystride << shy) - (num_bl << shy); src_u += (src_uvstride << shuv) - (num_bl << shuv); src_v += (src_uvstride << shuv) - (num_bl << shuv); } // Remove isolated skin blocks (none of its neighbors are skin) and isolated // non-skin blocks (all of its neighbors are skin). // Skip 4 corner blocks which have only 3 neighbors to remove isolated skin // blocks. Skip superblock borders to remove isolated non-skin blocks. for (i = mi_row; i < mi_row_limit; i += fac) { for (j = mi_col; j < mi_col_limit; j += fac) { int bl_index = i * cm->mi_cols + j; int num_neighbor = 0; int mi, mj; int non_skin_threshold = 8; // Skip 4 corners. if ((i == mi_row && (j == mi_col || j == mi_col_limit - fac)) || (i == mi_row_limit - fac && (j == mi_col || j == mi_col_limit - fac))) continue; // There are only 5 neighbors for non-skin blocks on the border. if (i == mi_row || i == mi_row_limit - fac || j == mi_col || j == mi_col_limit - fac) non_skin_threshold = 5; for (mi = -fac; mi <= fac; mi += fac) { for (mj = -fac; mj <= fac; mj += fac) { if (i + mi >= mi_row && i + mi < mi_row_limit && j + mj >= mi_col && j + mj < mi_col_limit) { int bl_neighbor_index = (i + mi) * cm->mi_cols + j + mj; if (cpi->skin_map[bl_neighbor_index]) num_neighbor++; } } } if (cpi->skin_map[bl_index] && num_neighbor < 2) cpi->skin_map[bl_index] = 0; if (!cpi->skin_map[bl_index] && num_neighbor == non_skin_threshold) cpi->skin_map[bl_index] = 1; } } }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
int vp8_decode_frame(VP8D_COMP *pbi) { vp8_reader *const bc = &pbi->mbc[8]; VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; const unsigned char *data = pbi->fragments.ptrs[0]; const unsigned int data_sz = pbi->fragments.sizes[0]; const unsigned char *data_end = data + data_sz; ptrdiff_t first_partition_length_in_bytes; int i, j, k, l; const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; int corrupt_tokens = 0; int prev_independent_partitions = pbi->independent_partitions; YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; /* start with no corruption of current frame */ xd->corrupted = 0; yv12_fb_new->corrupted = 0; if (data_end - data < 3) { if (!pbi->ec_active) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } /* Declare the missing frame as an inter frame since it will be handled as an inter frame when we have estimated its motion vectors. */ pc->frame_type = INTER_FRAME; pc->version = 0; pc->show_frame = 1; first_partition_length_in_bytes = 0; } else { unsigned char clear_buffer[10]; const unsigned char *clear = data; if (pbi->decrypt_cb) { int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); clear = clear_buffer; } pc->frame_type = (FRAME_TYPE)(clear[0] & 1); pc->version = (clear[0] >> 1) & 7; pc->show_frame = (clear[0] >> 4) & 1; first_partition_length_in_bytes = (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; if (!pbi->ec_active && (data + first_partition_length_in_bytes > data_end || data + first_partition_length_in_bytes < data)) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition 0 length"); } data += 3; clear += 3; vp8_setup_version(pc); if (pc->frame_type == KEY_FRAME) { /* vet via sync code */ /* When error concealment is enabled we should only check the sync * code if we have enough bits available */ if (data + 3 < data_end) { if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) { vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); } } /* If error concealment is enabled we should only parse the new size * if we have enough data. Otherwise we will end up with the wrong * size. */ if (data + 6 < data_end) { pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; pc->horiz_scale = clear[4] >> 6; pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; pc->vert_scale = clear[6] >> 6; data += 7; } else if (!pbi->ec_active) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated key frame header"); } else { /* Error concealment is active, clear the frame. */ data = data_end; } } else {
void vp10_encode_tiles_mt(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp10_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp10_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
// Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. // The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to // 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock. // Blocks labeled as BOOST1 may later get set to BOOST2 (during the // encoding of the superblock). static void cyclic_refresh_update_map(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; int xmis, ymis, x, y; int consec_zero_mv_thresh = 0; int qindex_thresh = 0; int count_sel = 0; int count_tot = 0; memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sbs_in_frame = sb_cols * sb_rows; // Number of target blocks to get the q delta (segment 1). block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; // Set the segmentation map: cycle through the superblocks, starting at // cr->mb_index, and stopping when either block_count blocks have been found // to be refreshed, or we have passed through whole frame. assert(cr->sb_index < sbs_in_frame); i = cr->sb_index; cr->target_num_seg_blocks = 0; if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) { consec_zero_mv_thresh = 100; if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) consec_zero_mv_thresh = 80; } qindex_thresh = cpi->oxcf.content == VP9E_CONTENT_SCREEN ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex); do { int sum_map = 0; // Get the mi_row/mi_col corresponding to superblock index i. int sb_row_index = (i / sb_cols); int sb_col_index = i - sb_row_index * sb_cols; int mi_row = sb_row_index * MI_BLOCK_SIZE; int mi_col = sb_col_index * MI_BLOCK_SIZE; assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; // If the block is as a candidate for clean up then mark it // for possible boost/refresh (segment 1). The segment id may get // reset to 0 later if block gets coded anything other than ZEROMV. if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || cr->consec_zero_mv[bl_index2] < consec_zero_mv_thresh) { sum_map++; count_sel++; } } else if (cr->map[bl_index2] < 0) { cr->map[bl_index2]++; } } } // Enforce constant segment over superblock. // If segment is at least half of superblock, set to 1. if (sum_map >= xmis * ymis / 2) { for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1; } cr->target_num_seg_blocks += xmis * ymis; } i++; if (i == sbs_in_frame) { i = 0; } } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); cr->sb_index = i; cr->reduce_refresh = 0; if (count_sel < (3 * count_tot) >> 2) cr->reduce_refresh = 1; }
static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; if (speed >= 1) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->partition_search_breakout_dist_thr = (1 << 23); } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_dist_thr = (1 << 21); } } if (speed >= 2) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; sf->partition_search_breakout_dist_thr = (1 << 24); sf->partition_search_breakout_rate_thr = 120; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_dist_thr = (1 << 22); sf->partition_search_breakout_rate_thr = 100; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); } if (speed >= 3) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_dist_thr = (1 << 25); sf->partition_search_breakout_rate_thr = 200; } else { sf->max_intra_bsize = BLOCK_32X32; sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; sf->partition_search_breakout_dist_thr = (1 << 23); sf->partition_search_breakout_rate_thr = 120; } } // If this is a two pass clip that fits the criteria for animated or // graphics content then reset disable_split_mask for speeds 1-4. // Also if the image edge is internal to the coded area. if ((speed >= 1) && (cpi->oxcf.pass == 2) && ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) || (vp9_internal_image_edge(cpi)))) { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; } if (speed >= 4) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->partition_search_breakout_dist_thr = (1 << 26); } else { sf->partition_search_breakout_dist_thr = (1 << 24); } sf->disable_split_mask = DISABLE_ALL_SPLIT; } }
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; int sbr, sbc; int nhsb, nvsb; od_dering_in *src[3]; unsigned char *bskip; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; int stride; int bsize[3]; int dec[3]; int pli; int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); vp10_setup_dst_planes(xd->plane, frame, 0, 0); for (pli = 0; pli < 3; pli++) { dec[pli] = xd->plane[pli].subsampling_x; bsize[pli] = 8 >> dec[pli]; } stride = bsize[0]*cm->mi_cols; for (pli = 0; pli < 3; pli++) { src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) { for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [r * xd->plane[pli].dst.stride + c]; } else { #endif src[pli][r * stride + c] = xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } for (r = 0; r < cm->mi_rows; ++r) { for (c = 0; c < cm->mi_cols; ++c) { const MB_MODE_INFO *mbmi = &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; bskip[r * cm->mi_cols + c] = mbmi->skip; } } for (sbr = 0; sbr < nvsb; sbr++) { for (sbc = 0; sbc < nhsb; sbc++) { int level; int nhb, nvb; nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc); nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr); for (pli = 0; pli < 3; pli++) { int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8]; int threshold; #if DERING_REFINEMENT level = compute_level_from_index( global_level, cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride + MI_BLOCK_SIZE*sbc]->mbmi.dering_gain); #else level = global_level; #endif /* FIXME: This is a temporary hack that uses more conservative deringing for chroma. */ if (pli) level = (level*5 + 4) >> 3; if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0; threshold = level << coeff_shift; od_dering( &OD_DERING_VTBL_C, dst, MI_BLOCK_SIZE*bsize[pli], &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE + sbc*bsize[pli]*MI_BLOCK_SIZE], stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc], cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); for (r = 0; r < bsize[pli]*nvb; ++r) { for (c = 0; c < bsize[pli]*nhb; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; } else { #endif xd->plane[pli].dst.buf[xd->plane[pli].dst.stride* (bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } } } for (pli = 0; pli < 3; pli++) { vpx_free(src[pli]); } vpx_free(bskip); }