void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); create_enc_workers(cpi, num_workers); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_worker_hook, NULL, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void av1_encode_tiles_mt(AV1_COMP *cpi) { AV1_COMMON *const cm = &cpi->common; const int tile_cols = cm->tile_cols; const AVxWorkerInterface *const winterface = aom_get_worker_interface(); const int num_workers = AOMMIN(cpi->oxcf.max_threads, tile_cols); int i; av1_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { CHECK_MEM_ERROR(cm, cpi->workers, aom_malloc(num_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, aom_calloc(num_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); thread_data->cpi = cpi; if (i < num_workers - 1) { // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, aom_memalign(32, sizeof(*thread_data->td))); av1_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; av1_setup_pc_tree(cm, thread_data->td); // Set up variance tree if needed. if (cpi->sf.partition_search_type == VAR_BASED_PARTITION) av1_setup_var_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, aom_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) aom_internal_error(&cm->error, AOM_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (AVxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData *)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } #if CONFIG_PALETTE // Allocate buffers used by palette coding mode. if (cpi->common.allow_screen_content_tools && i < num_workers - 1) { MACROBLOCK *x = &thread_data->td->mb; CHECK_MEM_ERROR(cm, x->palette_buffer, aom_memalign(16, sizeof(*x->palette_buffer))); } #endif // CONFIG_PALETTE } // Encode a frame for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { AVxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { av1_accumulate_frame_counts(&cm->counts, thread_data->td->counts); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ENCODE_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp10_encode_tiles_mt(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp10_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp10_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }