static void update_mbgraph_mb_stats(VP9_COMP *cpi, MBGRAPH_MB_STATS *stats, YV12_BUFFER_CONFIG *buf, int mb_y_offset, YV12_BUFFER_CONFIG *golden_ref, const MV *prev_golden_ref_mv, YV12_BUFFER_CONFIG *alt_ref, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; int intra_error; VP9_COMMON *cm = &cpi->common; // FIXME in practice we're completely ignoring chroma here x->plane[0].src.buf = buf->y_buffer + mb_y_offset; x->plane[0].src.stride = buf->y_stride; xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset; xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride; // do intra 16x16 prediction intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode); if (intra_error <= 0) intra_error = 1; stats->ref[INTRA_FRAME].err = intra_error; // Golden frame MV search, if it exists and is different than last frame if (golden_ref) { int g_motion_error; xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = golden_ref->y_stride; g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; } // Do an Alt-ref frame MV search, if it exists and is different than // last/golden frame. if (alt_ref) { int a_motion_error; xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = alt_ref->y_stride; a_motion_error = do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { stats->ref[ALTREF_FRAME].err = INT_MAX; stats->ref[ALTREF_FRAME].m.mv.as_int = 0; } }
/* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. if (mask & 1) { decrease_ref_count(old_idx, frame_bufs, pool); } cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } // Current thread releases the holding of reference frame. for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { const int old_idx = cm->ref_frame_map[ref_index]; decrease_ref_count(old_idx, frame_bufs, pool); cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; } pbi->hold_ref_buf = 0; cm->frame_to_show = get_frame_new_buffer(cm); --frame_bufs[cm->new_fb_idx].ref_count; // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < 3; ref_index++) cm->frame_refs[ref_index].idx = -1; }
/* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9D_COMP *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { if (mask & 1) ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->ref_frame_map[ref_index], cm->new_fb_idx); ++ref_index; } cm->frame_to_show = get_frame_new_buffer(cm); cm->fb_idx_ref_cnt[cm->new_fb_idx]--; // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < 3; ref_index++) cm->active_ref_idx[ref_index] = INT_MAX; }
/* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(AV1Decoder *pbi) { int ref_index = 0, mask; AV1_COMMON *const cm = &pbi->common; BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; lock_buffer_pool(pool); for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame holding in the reference map for the decoding // of the next frame. if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } // Current thread releases the holding of reference frame. for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { const int old_idx = cm->ref_frame_map[ref_index]; decrease_ref_count(old_idx, frame_bufs, pool); cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; } unlock_buffer_pool(pool); pbi->hold_ref_buf = 0; cm->frame_to_show = get_frame_new_buffer(cm); // TODO(zoeliu): To fix the ref frame buffer update for the scenario of // cm->frame_parellel_decode == 1 if (!cm->frame_parallel_decode || !cm->show_frame) { lock_buffer_pool(pool); --frame_bufs[cm->new_fb_idx].ref_count; unlock_buffer_pool(pool); } // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < INTER_REFS_PER_FRAME; ref_index++) { cm->frame_refs[ref_index].idx = INVALID_IDX; cm->frame_refs[ref_index].buf = NULL; } }
/* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { if (mask & 1) { const int old_idx = cm->ref_frame_map[ref_index]; ref_cnt_fb(cm->frame_bufs, &cm->ref_frame_map[ref_index], cm->new_fb_idx); if (old_idx >= 0 && cm->frame_bufs[old_idx].ref_count == 0) cm->release_fb_cb(cm->cb_priv, &cm->frame_bufs[old_idx].raw_frame_buffer); } ++ref_index; } cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_bufs[cm->new_fb_idx].ref_count--; // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < 3; ref_index++) cm->frame_refs[ref_index].idx = INT_MAX; }
// VP9 decoder: Implement multi-threaded loopfilter that uses the tile // threads. void vp9_loop_filter_frame_mt(VP9Decoder *pbi, VP9_COMMON *cm, int frame_filter_level, int y_only, int partial_frame) { VP9LfSync *const lf_sync = &pbi->lf_row_sync; // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); int i; // Allocate memory used in thread synchronization. // This always needs to be done even if frame_filter_level is 0. if (!cm->current_video_frame || cm->last_height != cm->height) { if (cm->last_height != cm->height) { const int aligned_last_height = ALIGN_POWER_OF_TWO(cm->last_height, MI_SIZE_LOG2); const int last_sb_rows = mi_cols_aligned_to_sb(aligned_last_height >> MI_SIZE_LOG2) >> MI_BLOCK_SIZE_LOG2; vp9_loop_filter_dealloc(lf_sync, last_sb_rows); } vp9_loop_filter_alloc(cm, lf_sync, sb_rows, cm->width); } if (!frame_filter_level) return; vp9_loop_filter_frame_init(cm, frame_filter_level); // Initialize cur_sb_col to -1 for all SB rows. vpx_memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Set up loopfilter thread data. // The decoder is using num_workers instead of pbi->num_tile_workers // because it has been observed that using more threads on the // loopfilter, than there are tile columns in the frame will hurt // performance on Android. This is because the system will only // schedule the tile decode workers on cores equal to the number // of tile columns. Then if the decoder tries to use more threads for the // loopfilter, it will hurt performance because of contention. If the // multithreading code changes in the future then the number of workers // used by the loopfilter should be revisited. for (i = 0; i < num_workers; ++i) { VP9Worker *const worker = &pbi->tile_workers[i]; TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; LFWorkerData *const lf_data = &tile_data->lfdata; worker->hook = (VP9WorkerHook)loop_filter_row_worker; // Loopfilter data lf_data->frame_buffer = get_frame_new_buffer(cm); lf_data->cm = cm; lf_data->xd = pbi->mb; lf_data->start = i; lf_data->stop = sb_rows; lf_data->y_only = y_only; // always do all planes in decoder lf_data->lf_sync = lf_sync; lf_data->num_lf_workers = num_workers; // Start loopfiltering if (i == num_workers - 1) { vp9_worker_execute(worker); } else { vp9_worker_launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { vp9_worker_sync(&pbi->tile_workers[i]); } }