void vp9_encode_fp_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; TileDataEnc *first_tile_col; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, FIRST_PASS_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; } } launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt, num_workers); first_tile_col = &cpi->tile_data[0]; for (i = 1; i < tile_cols; i++) { TileDataEnc *this_tile = &cpi->tile_data[i]; accumulate_fp_tile_stat(first_tile_col, this_tile); } }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); create_enc_workers(cpi, num_workers); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_worker_hook, NULL, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp9_temporal_filter_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = cpi->num_workers ? cpi->num_workers : 1; int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ARNR_JOB); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; } } launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt, num_workers); }
void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ENCODE_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }