// Allocate memory for lf row synchronization void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, int width) { lf_sync->rows = rows; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR(cm, lf_sync->mutex_, vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); for (i = 0; i < rows; ++i) { pthread_mutex_init(&lf_sync->mutex_[i], NULL); } CHECK_MEM_ERROR(cm, lf_sync->cond_, vpx_malloc(sizeof(*lf_sync->cond_) * rows)); for (i = 0; i < rows; ++i) { pthread_cond_init(&lf_sync->cond_[i], NULL); } } #endif // CONFIG_MULTITHREAD CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); // Set up nsync. lf_sync->sync_range = get_sync_range(width); }
CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; size_t consec_zero_mv_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); if (cr->map == NULL) { vpx_free(cr); return NULL; } last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map); cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size); if (cr->last_coded_q_map == NULL) { vpx_free(cr); return NULL; } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); consec_zero_mv_size = mi_rows * mi_cols * sizeof(*cr->consec_zero_mv); cr->consec_zero_mv = vpx_malloc(consec_zero_mv_size); if (cr->consec_zero_mv == NULL) { vpx_free(cr); return NULL; } memset(cr->consec_zero_mv, 0, consec_zero_mv_size); return cr; }
// Allocate memory for row synchronization void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm, int rows) { row_mt_sync->rows = rows; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR(cm, row_mt_sync->mutex_, vpx_malloc(sizeof(*row_mt_sync->mutex_) * rows)); if (row_mt_sync->mutex_) { for (i = 0; i < rows; ++i) { pthread_mutex_init(&row_mt_sync->mutex_[i], NULL); } } CHECK_MEM_ERROR(cm, row_mt_sync->cond_, vpx_malloc(sizeof(*row_mt_sync->cond_) * rows)); if (row_mt_sync->cond_) { for (i = 0; i < rows; ++i) { pthread_cond_init(&row_mt_sync->cond_[i], NULL); } } } #endif // CONFIG_MULTITHREAD CHECK_MEM_ERROR(cm, row_mt_sync->cur_col, vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows)); // Set up nsync. row_mt_sync->sync_range = 1; }
void vp9_create_encoding_threads(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; const VP9WorkerInterface * const winterface = vp9_get_worker_interface(); int i; CHECK_MEM_ERROR(cm, cpi->enc_thread_hndl, vpx_malloc(sizeof(*cpi->enc_thread_hndl) * cpi->max_threads)); for (i = 0; i < cpi->max_threads; ++i) { VP9Worker * const worker = &cpi->enc_thread_hndl[i]; winterface->init(worker); CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(thread_context))); worker->data2 = NULL; if (i < cpi->max_threads - 1 && !winterface->reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } } // set row encoding hook for (i = 0; i < cpi->max_threads; ++i) { winterface->sync(&cpi->enc_thread_hndl[i]); cpi->enc_thread_hndl[i].hook = (VP9WorkerHook) encoding_thread_process; } CHECK_MEM_ERROR(cm, cpi->cur_sb_col, vpx_malloc(sizeof(*cpi->cur_sb_col) * cm->sb_rows)); // init cur sb col vpx_memset(cpi->cur_sb_col, -1, (sizeof(*cpi->cur_sb_col) * cm->sb_rows)); // set up nsync (currently unused). cpi->sync_range = get_sync_range(cpi->oxcf.width); }
void vp8cx_create_encoder_threads(VP8_COMP *cpi) { cpi->b_multi_threaded = 0; cpi->processor_core_count = 32; //vp8_get_proc_core_count(); CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); #if CONFIG_MULTITHREAD if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; if (cpi->oxcf.multi_threaded > cpi->processor_core_count) cpi->encoding_thread_count = cpi->processor_core_count - 1; else cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count)); CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count)); //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL); sem_init(&cpi->h_event_main, 0, 0); cpi->b_multi_threaded = 1; //printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++) { //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL); sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0); cpi->en_thread_data[ithread].ithread = ithread; cpi->en_thread_data[ithread].ptr1 = (void *)cpi; cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread]; //printf(" call begin thread %d \n", ithread); //cpi->h_encoding_thread[ithread] = (HANDLE)_beginthreadex( // NULL, // security // 0, // stksize // thread_encoding_proc, // (&cpi->en_thread_data[ithread]), // Thread data // 0, // NULL); pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread])); } } #endif }
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, VP9_COMMON *cm, int num_sbs, int max_threads, int num_jobs) { int plane; const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * sizeof(*row_mt_worker_data->dqcoeff[0]); row_mt_worker_data->num_jobs = num_jobs; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_mutex, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs)); if (row_mt_worker_data->recon_sync_mutex) { for (i = 0; i < num_jobs; ++i) { pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL); } } CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_cond, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs)); if (row_mt_worker_data->recon_sync_cond) { for (i = 0; i < num_jobs; ++i) { pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL); } } } #endif row_mt_worker_data->num_sbs = num_sbs; for (plane = 0; plane < 3; ++plane) { CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], vpx_memalign(16, dqcoeff_size)); memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, sizeof(*row_mt_worker_data->eob[plane]))); } CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, vpx_calloc(num_sbs * PARTITIONS_PER_SB, sizeof(*row_mt_worker_data->partition))); CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); // allocate memory for thread_data if (row_mt_worker_data->thread_data == NULL) { const size_t thread_size = max_threads * sizeof(*row_mt_worker_data->thread_data); CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data, vpx_memalign(32, thread_size)); } }
static void create_enc_workers(VP9_COMP *cpi, int num_workers) { VP9_COMMON *const cm = &cpi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. When row based multithreading is enabled, it is OK to // allocate more threads than the number of max tile columns. if (cpi->use_svc && !cpi->row_mt) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } }
struct frame_dec_param *frame_dec_param_get(struct task *tsk) { struct frame_dec_param *param; param = vpx_malloc(sizeof(*param)); if (!param) { return NULL; } tsk->priv = param; tsk->dtor = task_dtor; return param; }
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { int i; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); ctx->last_show_frame = -1; ctx->next_submit_worker_id = 0; ctx->last_submit_worker_id = 0; ctx->next_output_worker_id = 0; ctx->frame_cache_read = 0; ctx->frame_cache_write = 0; ctx->num_cache_frames = 0; ctx->need_resync = 1; ctx->num_frame_workers = (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; if (ctx->num_frame_workers > MAX_DECODE_THREADS) ctx->num_frame_workers = MAX_DECODE_THREADS; ctx->available_threads = ctx->num_frame_workers; ctx->flushed = 0; ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); if (ctx->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR; #if CONFIG_MULTITHREAD if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { set_error_detail(ctx, "Failed to allocate buffer pool mutex"); return VPX_CODEC_MEM_ERROR; } #endif ctx->frame_workers = (VPxWorker *) vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); if (ctx->frame_workers == NULL) { set_error_detail(ctx, "Failed to allocate frame_workers"); return VPX_CODEC_MEM_ERROR; } for (i = 0; i < ctx->num_frame_workers; ++i) { VPxWorker *const worker = &ctx->frame_workers[i]; FrameWorkerData *frame_worker_data = NULL; winterface->init(worker); worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); if (worker->data1 == NULL) { set_error_detail(ctx, "Failed to allocate frame_worker_data"); return VPX_CODEC_MEM_ERROR; } frame_worker_data = (FrameWorkerData *)worker->data1; frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); if (frame_worker_data->pbi == NULL) { set_error_detail(ctx, "Failed to allocate frame_worker_data"); return VPX_CODEC_MEM_ERROR; } frame_worker_data->pbi->frame_worker_owner = worker; frame_worker_data->worker_id = i; frame_worker_data->scratch_buffer = NULL; frame_worker_data->scratch_buffer_size = 0; frame_worker_data->frame_context_ready = 0; frame_worker_data->received_frame = 0; #if CONFIG_MULTITHREAD if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); return VPX_CODEC_MEM_ERROR; } if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); return VPX_CODEC_MEM_ERROR; } #endif // If decoding in serial mode, FrameWorker thread could create tile worker // thread or loopfilter thread. frame_worker_data->pbi->max_threads = (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; frame_worker_data->pbi->common.frame_parallel_decode = ctx->frame_parallel_decode; worker->hook = (VPxWorkerHook)frame_worker_hook; if (!winterface->reset(worker)) { set_error_detail(ctx, "Frame Worker thread creation failed"); return VPX_CODEC_MEM_ERROR; } } // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) set_default_ppflags(&ctx->postproc_cfg); init_buffer_callbacks(ctx); return VPX_CODEC_OK; }
int vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON * cm = &cpi->common; cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; cpi->b_lpf_running = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; int rc = 0; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) th_count = cm->processor_core_count - 1; /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; } if(th_count == 0) return 0; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); sem_init(&cpi->h_event_end_encoding, 0, 0); cpi->b_multi_threaded = 1; cpi->encoding_thread_count = th_count; /* printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); */ for (ithread = 0; ithread < th_count; ithread++) { ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; /* Setup block ptrs and offsets */ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); if(rc) break; } if(rc) { /* shutdown other threads */ cpi->b_multi_threaded = 0; for(--ithread; ithread >= 0; ithread--) { pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); } sem_destroy(&cpi->h_event_end_encoding); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -1; } { LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; sem_init(&cpi->h_event_start_lpf, 0, 0); sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); if(rc) { /* shutdown other threads */ cpi->b_multi_threaded = 0; for(--ithread; ithread >= 0; ithread--) { sem_post(&cpi->h_event_start_encoding[ithread]); pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); } sem_destroy(&cpi->h_event_end_encoding); sem_destroy(&cpi->h_event_end_lpf); sem_destroy(&cpi->h_event_start_lpf); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -2; } } } return 0; }
void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; int mi_rows = cpi->common.mi_rows; int mi_cols = cpi->common.mi_cols; int sl, tl, i; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; svc->first_spatial_layer_to_encode = 0; svc->rc_drop_superframe = 0; svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; svc->scaled_temp_is_alloc = 0; svc->scaled_one_half = 0; svc->current_superframe = 0; for (i = 0; i < REF_FRAMES; ++i) svc->ref_frame_index[i] = -1; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { cpi->svc.ext_frame_flags[sl] = 0; cpi->svc.ext_lst_fb_idx[sl] = 0; cpi->svc.ext_gld_fb_idx[sl] = 1; cpi->svc.ext_alt_fb_idx[sl] = 2; } if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, cpi->common.subsampling_x, cpi->common.subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cpi->common.use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cpi->common.byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate empty frame for multiple frame " "contexts"); memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, cpi->svc.empty_frame.img.buffer_alloc_sz); } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; int i; lc->current_video_frame_in_layer = 0; lc->layer_size = 0; lc->frames_from_key_frame = 0; lc->last_frame_type = FRAME_TYPES; lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; lrc->tot_q = 0.0; lrc->avg_q = 0.0; lrc->ni_frames = 0; lrc->decimation_count = 0; lrc->decimation_factor = 0; for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { lrc->rate_correction_factors[i] = 1.0; } if (cpi->oxcf.rc_mode == VPX_CBR) { lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; } else { lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; if (oxcf->ss_enable_auto_arf[sl]) lc->alt_ref_idx = alt_ref_idx++; else lc->alt_ref_idx = INVALID_IDX; lc->gold_ref_idx = INVALID_IDX; } lrc->buffer_level = oxcf->starting_buffer_level_ms * lc->target_bandwidth / 1000; lrc->bits_off_target = lrc->buffer_level; // Initialize the cyclic refresh parameters. If spatial layers are used // (i.e., ss_number_layers > 1), these need to be updated per spatial // layer. // Cyclic refresh is only applied on base temporal layer. if (oxcf->ss_number_layers > 1 && tl == 0) { size_t last_coded_q_map_size; size_t consec_zero_mv_size; VP9_COMMON *const cm = &cpi->common; lc->sb_index = 0; CHECK_MEM_ERROR(cm, lc->map, vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map))); memset(lc->map, 0, mi_rows * mi_cols); last_coded_q_map_size = mi_rows * mi_cols * sizeof(*lc->last_coded_q_map); CHECK_MEM_ERROR(cm, lc->last_coded_q_map, vpx_malloc(last_coded_q_map_size)); assert(MAXQ <= 255); memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size); consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv); CHECK_MEM_ERROR(cm, lc->consec_zero_mv, vpx_malloc(consec_zero_mv_size)); memset(lc->consec_zero_mv, 0, consec_zero_mv_size); } } } // Still have extra buffer for base layer golden frame if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) && alt_ref_idx < REF_FRAMES) svc->layer_context[0].gold_ref_idx = alt_ref_idx; }
static void setup_token_decoder(VP8D_COMP *pbi, const unsigned char* token_part_sizes) { vp8_reader *bool_decoder = &pbi->bc2; unsigned int partition_idx; int fragment_idx; int num_token_partitions; const unsigned char *first_fragment_end = pbi->fragments[0] + pbi->fragment_sizes[0]; TOKEN_PARTITION multi_token_partition = (TOKEN_PARTITION)vp8_read_literal(&pbi->bc, 2); if (!vp8dx_bool_error(&pbi->bc)) pbi->common.multi_token_partition = multi_token_partition; num_token_partitions = 1 << pbi->common.multi_token_partition; if (num_token_partitions > 1) { CHECK_MEM_ERROR(pbi->mbc, vpx_malloc(num_token_partitions * sizeof(vp8_reader))); bool_decoder = pbi->mbc; } /* Check for partitions within the fragments and unpack the fragments * so that each fragment pointer points to its corresponding partition. */ for (fragment_idx = 0; fragment_idx < pbi->num_fragments; ++fragment_idx) { unsigned int fragment_size = pbi->fragment_sizes[fragment_idx]; const unsigned char *fragment_end = pbi->fragments[fragment_idx] + fragment_size; /* Special case for handling the first partition since we have already * read its size. */ if (fragment_idx == 0) { /* Size of first partition + token partition sizes element */ ptrdiff_t ext_first_part_size = token_part_sizes - pbi->fragments[0] + 3 * (num_token_partitions - 1); fragment_size -= ext_first_part_size; if (fragment_size > 0) { pbi->fragment_sizes[0] = ext_first_part_size; /* The fragment contains an additional partition. Move to * next. */ fragment_idx++; pbi->fragments[fragment_idx] = pbi->fragments[0] + pbi->fragment_sizes[0]; } } /* Split the chunk into partitions read from the bitstream */ while (fragment_size > 0) { ptrdiff_t partition_size = read_available_partition_size( pbi, token_part_sizes, pbi->fragments[fragment_idx], first_fragment_end, fragment_end, fragment_idx - 1, num_token_partitions); pbi->fragment_sizes[fragment_idx] = partition_size; fragment_size -= partition_size; assert(fragment_idx <= num_token_partitions); if (fragment_size > 0) { /* The fragment contains an additional partition. * Move to next. */ fragment_idx++; pbi->fragments[fragment_idx] = pbi->fragments[fragment_idx - 1] + partition_size; } } } pbi->num_fragments = num_token_partitions + 1; for (partition_idx = 1; partition_idx < pbi->num_fragments; ++partition_idx) { if (vp8dx_start_decode(bool_decoder, pbi->fragments[partition_idx], pbi->fragment_sizes[partition_idx])) vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", partition_idx); bool_decoder++; } #if CONFIG_MULTITHREAD /* Clamp number of decoder threads */ if (pbi->decoding_thread_count > num_token_partitions - 1) pbi->decoding_thread_count = num_token_partitions - 1; #endif }
void vp10_encode_tiles_mt(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp10_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp10_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
void vp10_dering_frame(YV12_BUFFER_CONFIG *frame, VP10_COMMON *cm, MACROBLOCKD *xd, int global_level) { int r, c; int sbr, sbc; int nhsb, nvsb; od_dering_in *src[3]; unsigned char *bskip; int dir[OD_DERING_NBLOCKS][OD_DERING_NBLOCKS] = {{0}}; int stride; int bsize[3]; int dec[3]; int pli; int coeff_shift = VPXMAX(cm->bit_depth - 8, 0); nvsb = (cm->mi_rows + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; nhsb = (cm->mi_cols + MI_BLOCK_SIZE - 1)/MI_BLOCK_SIZE; bskip = vpx_malloc(sizeof(*bskip)*cm->mi_rows*cm->mi_cols); vp10_setup_dst_planes(xd->plane, frame, 0, 0); for (pli = 0; pli < 3; pli++) { dec[pli] = xd->plane[pli].subsampling_x; bsize[pli] = 8 >> dec[pli]; } stride = bsize[0]*cm->mi_cols; for (pli = 0; pli < 3; pli++) { src[pli] = vpx_malloc(sizeof(*src)*cm->mi_rows*cm->mi_cols*64); for (r = 0; r < bsize[pli]*cm->mi_rows; ++r) { for (c = 0; c < bsize[pli]*cm->mi_cols; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { src[pli][r * stride + c] = CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [r * xd->plane[pli].dst.stride + c]; } else { #endif src[pli][r * stride + c] = xd->plane[pli].dst.buf[r * xd->plane[pli].dst.stride + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } for (r = 0; r < cm->mi_rows; ++r) { for (c = 0; c < cm->mi_cols; ++c) { const MB_MODE_INFO *mbmi = &cm->mi_grid_visible[r * cm->mi_stride + c]->mbmi; bskip[r * cm->mi_cols + c] = mbmi->skip; } } for (sbr = 0; sbr < nvsb; sbr++) { for (sbc = 0; sbc < nhsb; sbc++) { int level; int nhb, nvb; nhb = VPXMIN(MI_BLOCK_SIZE, cm->mi_cols - MI_BLOCK_SIZE*sbc); nvb = VPXMIN(MI_BLOCK_SIZE, cm->mi_rows - MI_BLOCK_SIZE*sbr); for (pli = 0; pli < 3; pli++) { int16_t dst[MI_BLOCK_SIZE*MI_BLOCK_SIZE*8*8]; int threshold; #if DERING_REFINEMENT level = compute_level_from_index( global_level, cm->mi_grid_visible[MI_BLOCK_SIZE*sbr*cm->mi_stride + MI_BLOCK_SIZE*sbc]->mbmi.dering_gain); #else level = global_level; #endif /* FIXME: This is a temporary hack that uses more conservative deringing for chroma. */ if (pli) level = (level*5 + 4) >> 3; if (sb_all_skip(cm, sbr*MI_BLOCK_SIZE, sbc*MI_BLOCK_SIZE)) level = 0; threshold = level << coeff_shift; od_dering( &OD_DERING_VTBL_C, dst, MI_BLOCK_SIZE*bsize[pli], &src[pli][sbr*stride*bsize[pli]*MI_BLOCK_SIZE + sbc*bsize[pli]*MI_BLOCK_SIZE], stride, nhb, nvb, sbc, sbr, nhsb, nvsb, dec[pli], dir, pli, &bskip[MI_BLOCK_SIZE*sbr*cm->mi_cols + MI_BLOCK_SIZE*sbc], cm->mi_cols, threshold, OD_DERING_NO_CHECK_OVERLAP, coeff_shift); for (r = 0; r < bsize[pli]*nvb; ++r) { for (c = 0; c < bsize[pli]*nhb; ++c) { #if CONFIG_VPX_HIGHBITDEPTH if (cm->use_highbitdepth) { CONVERT_TO_SHORTPTR(xd->plane[pli].dst.buf) [xd->plane[pli].dst.stride*(bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; } else { #endif xd->plane[pli].dst.buf[xd->plane[pli].dst.stride* (bsize[pli]*MI_BLOCK_SIZE*sbr + r) + sbc*bsize[pli]*MI_BLOCK_SIZE + c] = dst[r * MI_BLOCK_SIZE * bsize[pli] + c]; #if CONFIG_VPX_HIGHBITDEPTH } #endif } } } } } for (pli = 0; pli < 3; pli++) { vpx_free(src[pli]); } vpx_free(bskip); }
void vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON * cm = &cpi->common; cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) th_count = cm->processor_core_count - 1; /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; } if(th_count == 0) return; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); sem_init(&cpi->h_event_end_encoding, 0, 0); cpi->b_multi_threaded = 1; cpi->encoding_thread_count = th_count; /* printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); */ for (ithread = 0; ithread < th_count; ithread++) { ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); } { LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; sem_init(&cpi->h_event_start_lpf, 0, 0); sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd); } } }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }