void av1_decoder_remove(AV1Decoder *pbi) { int i; if (!pbi) return; aom_get_worker_interface()->end(&pbi->lf_worker); aom_free(pbi->lf_worker.data1); aom_free(pbi->tile_data); for (i = 0; i < pbi->num_tile_workers; ++i) { AVxWorker *const worker = &pbi->tile_workers[i]; aom_get_worker_interface()->end(worker); } aom_free(pbi->tile_worker_data); aom_free(pbi->tile_worker_info); aom_free(pbi->tile_workers); if (pbi->num_tile_workers > 0) { av1_loop_filter_dealloc(&pbi->lf_row_sync); } #if CONFIG_ACCOUNTING aom_accounting_clear(&pbi->accounting); #endif aom_free(pbi); }
void av1_encode_tiles_mt(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; const int tile_cols = cm->tile_cols; const AVxWorkerInterface *const winterface = aom_get_worker_interface(); const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols); int i; av1_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { CHECK_MEM_ERROR(cm, cpi->workers, aom_malloc(num_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, aom_calloc(num_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); thread_data->cpi = cpi; if (i < num_workers - 1) { // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, aom_memalign(32, sizeof(*thread_data->td))); av1_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; av1_setup_pc_tree(cm, thread_data->td); // Set up variance tree if needed. if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) av1_setup_var_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, aom_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) aom_internal_error(&cm->error, AOM_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (AVxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData *)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } #if CONFIG_PALETTE // Allocate buffers used by palette coding mode. if (cpi->common.allow_screen_content_tools && i < num_workers - 1) { MACROBLOCK *x = &thread_data->td->mb; CHECK_MEM_ERROR(cm, x->palette_buffer, aom_memalign(16, sizeof(*x->palette_buffer))); } #endif // CONFIG_PALETTE } // Encode a frame for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
AV1Decoder *av1_decoder_create(BufferPool *const pool) { AV1Decoder *volatile const pbi = aom_memalign(32, sizeof(*pbi)); AV1_COMMON *volatile const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; av1_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; av1_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)aom_memalign(32, sizeof(*cm->fc))); CHECK_MEM_ERROR(cm, cm->frame_contexts, (FRAME_CONTEXT *)aom_memalign( 32, FRAME_CONTEXTS * sizeof(*cm->frame_contexts))); memset(cm->fc, 0, sizeof(*cm->fc)); memset(cm->frame_contexts, 0, FRAME_CONTEXTS * sizeof(*cm->frame_contexts)); pbi->need_resync = 1; once(initialize_dec); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; pbi->common.buffer_pool = pool; cm->bit_depth = AOM_BITS_8; cm->dequant_bit_depth = AOM_BITS_8; cm->alloc_mi = av1_dec_alloc_mi; cm->free_mi = av1_dec_free_mi; cm->setup_mi = av1_dec_setup_mi; av1_loop_filter_init(cm); #if CONFIG_NCOBMC_ADAPT_WEIGHT get_default_ncobmc_kernels(cm); #endif // CONFIG_NCOBMC_ADAPT_WEIGHT #if CONFIG_AOM_QM aom_qm_init(cm); #endif #if CONFIG_LOOP_RESTORATION av1_loop_restoration_precal(); #endif // CONFIG_LOOP_RESTORATION #if CONFIG_ACCOUNTING pbi->acct_enabled = 1; aom_accounting_init(&pbi->accounting); #endif cm->error.setjmp = 0; aom_get_worker_interface()->init(&pbi->lf_worker); return pbi; }
int av1_receive_compressed_data(AV1Decoder *pbi, size_t size, const uint8_t **psource) { AV1_COMMON *volatile const cm = &pbi->common; BufferPool *volatile const pool = cm->buffer_pool; RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; const uint8_t *source = *psource; int retcode = 0; cm->error.error_code = AOM_CODEC_OK; if (size == 0) { // This is used to signal that we are missing frames. // We do not know if the missing frame(s) was supposed to update // any of the reference buffers, but we act conservative and // mark only the last buffer as corrupted. // // TODO(jkoleszar): Error concealment is undefined and non-normative // at this point, but if it becomes so, [0] may not always be the correct // thing to do here. if (cm->frame_refs[0].idx > 0) { assert(cm->frame_refs[0].buf != NULL); cm->frame_refs[0].buf->corrupted = 1; } } pbi->ready_for_new_data = 0; // Find a free buffer for the new frame, releasing the reference previously // held. // Check if the previous frame was a frame without any references to it. // Release frame buffer if not decoding in frame parallel mode. if (!cm->frame_parallel_decode && cm->new_fb_idx >= 0 && frame_bufs[cm->new_fb_idx].ref_count == 0) pool->release_fb_cb(pool->cb_priv, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); if (cm->new_fb_idx == INVALID_IDX) return AOM_CODEC_MEM_ERROR; // Assign a MV array to the frame buffer. cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; pbi->hold_ref_buf = 0; if (cm->frame_parallel_decode) { AVxWorker *const worker = pbi->frame_worker_owner; av1_frameworker_lock_stats(worker); frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; // Reset decoding progress. pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; pbi->cur_buf->row = -1; pbi->cur_buf->col = -1; av1_frameworker_unlock_stats(worker); } else { pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; } if (setjmp(cm->error.jmp)) { const AVxWorkerInterface *const winterface = aom_get_worker_interface(); int i; cm->error.setjmp = 0; pbi->ready_for_new_data = 1; // Synchronize all threads immediately as a subsequent decode call may // cause a resize invalidating some allocations. winterface->sync(&pbi->lf_worker); for (i = 0; i < pbi->num_tile_workers; ++i) { winterface->sync(&pbi->tile_workers[i]); } lock_buffer_pool(pool); // Release all the reference buffers if worker thread is holding them. if (pbi->hold_ref_buf == 1) { int ref_index = 0, mask; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame holding in the reference map for the // decoding of the next frame. if (mask & 1) decrease_ref_count(old_idx, frame_bufs, pool); ++ref_index; } // Current thread releases the holding of reference frame. for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { const int old_idx = cm->ref_frame_map[ref_index]; decrease_ref_count(old_idx, frame_bufs, pool); } pbi->hold_ref_buf = 0; } // Release current frame. decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); unlock_buffer_pool(pool); aom_clear_system_state(); return -1; }
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm, struct macroblockd_plane *planes, int start, int stop, int y_only, AVxWorker *workers, int nworkers, AV1LfSync *lf_sync) { #if CONFIG_EXT_PARTITION printf( "STOPPING: This code has not been modified to work with the " "extended coding unit size experiment"); exit(EXIT_FAILURE); #endif // CONFIG_EXT_PARTITION const AVxWorkerInterface *const winterface = aom_get_worker_interface(); // Number of superblock rows and cols const int sb_rows = mi_rows_aligned_to_sb(cm) >> cm->mib_size_log2; // Decoder may allocate more threads than number of tiles based on user's // input. const int tile_cols = cm->tile_cols; const int num_workers = AOMMIN(nworkers, tile_cols); int i; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || num_workers > lf_sync->num_workers) { av1_loop_filter_dealloc(lf_sync); av1_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } // Set up loopfilter thread data. // The decoder is capping num_workers because it has been observed that using // more threads on the loopfilter than there are cores will hurt performance // on Android. This is because the system will only schedule the tile decode // workers on cores equal to the number of tile columns. Then if the decoder // tries to use more threads for the loopfilter, it will hurt performance // because of contention. If the multithreading code changes in the future // then the number of workers used by the loopfilter should be revisited. #if CONFIG_PARALLEL_DEBLOCKING // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Filter all the vertical edges in the whole frame for (i = 0; i < num_workers; ++i) { AVxWorker *const worker = &workers[i]; LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = (AVxWorkerHook)loop_filter_ver_row_worker; worker->data1 = lf_sync; worker->data2 = lf_data; // Loopfilter data av1_loop_filter_data_reset(lf_data, frame, cm, planes); lf_data->start = start + i * cm->mib_size; lf_data->stop = stop; lf_data->y_only = y_only; // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { winterface->sync(&workers[i]); } memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Filter all the horizontal edges in the whole frame for (i = 0; i < num_workers; ++i) { AVxWorker *const worker = &workers[i]; LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = (AVxWorkerHook)loop_filter_hor_row_worker; worker->data1 = lf_sync; worker->data2 = lf_data; // Loopfilter data av1_loop_filter_data_reset(lf_data, frame, cm, planes); lf_data->start = start + i * cm->mib_size; lf_data->stop = stop; lf_data->y_only = y_only; // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { winterface->sync(&workers[i]); } #else // CONFIG_PARALLEL_DEBLOCKING // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); for (i = 0; i < num_workers; ++i) { AVxWorker *const worker = &workers[i]; LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = (AVxWorkerHook)loop_filter_row_worker; worker->data1 = lf_sync; worker->data2 = lf_data; // Loopfilter data av1_loop_filter_data_reset(lf_data, frame, cm, planes); lf_data->start = start + i * cm->mib_size; lf_data->stop = stop; lf_data->y_only = y_only; // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { winterface->sync(&workers[i]); } #endif // CONFIG_PARALLEL_DEBLOCKING }