static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, PICK_MODE_CONTEXT *ctx) { const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk); const int num_pix = num_blk << 4; int i, k; ctx->num_4x4_blk = num_blk; CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_4x4_blk, sizeof(uint8_t))); for (i = 0; i < MAX_MB_PLANE; ++i) { for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], vpx_memalign(16, num_pix * sizeof(int16_t))); CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], vpx_memalign(16, num_pix * sizeof(int16_t))); CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k], vpx_memalign(16, num_pix * sizeof(int16_t))); CHECK_MEM_ERROR(cm, ctx->eobs[i][k], vpx_memalign(16, num_pix * sizeof(uint16_t))); ctx->coeff_pbuf[i][k] = ctx->coeff[i][k]; ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k]; ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k]; ctx->eobs_pbuf[i][k] = ctx->eobs[i][k]; } } }
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, VP9_COMMON *cm, int num_sbs, int max_threads, int num_jobs) { int plane; const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * sizeof(*row_mt_worker_data->dqcoeff[0]); row_mt_worker_data->num_jobs = num_jobs; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_mutex, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs)); if (row_mt_worker_data->recon_sync_mutex) { for (i = 0; i < num_jobs; ++i) { pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL); } } CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_cond, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs)); if (row_mt_worker_data->recon_sync_cond) { for (i = 0; i < num_jobs; ++i) { pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL); } } } #endif row_mt_worker_data->num_sbs = num_sbs; for (plane = 0; plane < 3; ++plane) { CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], vpx_memalign(16, dqcoeff_size)); memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, sizeof(*row_mt_worker_data->eob[plane]))); } CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, vpx_calloc(num_sbs * PARTITIONS_PER_SB, sizeof(*row_mt_worker_data->partition))); CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); // allocate memory for thread_data if (row_mt_worker_data->thread_data == NULL) { const size_t thread_size = max_threads * sizeof(*row_mt_worker_data->thread_data); CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data, vpx_memalign(32, thread_size)); } }
int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { if (ybf) { int aligned_width = (width + 15) & ~15; int aligned_height = (height + 15) & ~15; int y_stride = ((aligned_width + 2 * border) + 31) & ~31; int yplane_size = (aligned_height + 2 * border) * y_stride; int uv_width = aligned_width >> 1; int uv_height = aligned_height >> 1; /** There is currently a bunch of code which assumes * uv_stride == y_stride/2, so enforce this here. */ int uv_stride = y_stride >> 1; int uvplane_size = (uv_height + border) * uv_stride; const int frame_size = yplane_size + 2 * uvplane_size; if (!ybf->buffer_alloc) { ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); ybf->buffer_alloc_sz = frame_size; } if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) return -1; /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) return -3; ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + 1) / 2; ybf->uv_crop_height = (height + 1) / 2; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->alpha_width = 0; ybf->alpha_height = 0; ybf->alpha_stride = 0; ybf->border = border; ybf->frame_size = frame_size; ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; ybf->alpha_buffer = NULL; ybf->corrupted = 0; /* assume not currupted by errors */ return 0; } return -2; }
static void create_enc_workers(VP9_COMP *cpi, int num_workers) { VP9_COMMON *const cm = &cpi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. When row based multithreading is enabled, it is OK to // allocate more threads than the number of max tile columns. if (cpi->use_svc && !cpi->row_mt) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } }
static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t // structure. More memory may be required at the time the stream // information becomes known. if (!ctx->priv) { vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv)); if (alg_priv == NULL) return VPX_CODEC_MEM_ERROR; vp9_zero(*alg_priv); ctx->priv = (vpx_codec_priv_t *)alg_priv; ctx->priv->sz = sizeof(*ctx->priv); ctx->priv->iface = ctx->iface; ctx->priv->alg_priv = alg_priv; ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si); ctx->priv->init_flags = ctx->init_flags; if (ctx->config.dec) { // Update the reference to the config structure to an internal copy. ctx->priv->alg_priv->cfg = *ctx->config.dec; ctx->config.dec = &ctx->priv->alg_priv->cfg; } } return VPX_CODEC_OK; }
void vp9_create_encoding_threads(VP9_COMP *cpi) { VP9_COMMON * const cm = &cpi->common; const VP9WorkerInterface * const winterface = vp9_get_worker_interface(); int i; CHECK_MEM_ERROR(cm, cpi->enc_thread_hndl, vpx_malloc(sizeof(*cpi->enc_thread_hndl) * cpi->max_threads)); for (i = 0; i < cpi->max_threads; ++i) { VP9Worker * const worker = &cpi->enc_thread_hndl[i]; winterface->init(worker); CHECK_MEM_ERROR(cm, worker->data1, vpx_memalign(32, sizeof(thread_context))); worker->data2 = NULL; if (i < cpi->max_threads - 1 && !winterface->reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } } // set row encoding hook for (i = 0; i < cpi->max_threads; ++i) { winterface->sync(&cpi->enc_thread_hndl[i]); cpi->enc_thread_hndl[i].hook = (VP9WorkerHook) encoding_thread_process; } CHECK_MEM_ERROR(cm, cpi->cur_sb_col, vpx_malloc(sizeof(*cpi->cur_sb_col) * cm->sb_rows)); // init cur sb col vpx_memset(cpi->cur_sb_col, -1, (sizeof(*cpi->cur_sb_col) * cm->sb_rows)); // set up nsync (currently unused). cpi->sync_range = get_sync_range(cpi->oxcf.width); }
void vp8cx_create_encoder_threads(VP8_COMP *cpi) { cpi->b_multi_threaded = 0; cpi->processor_core_count = 32; //vp8_get_proc_core_count(); CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows)); #if CONFIG_MULTITHREAD if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; if (cpi->oxcf.multi_threaded > cpi->processor_core_count) cpi->encoding_thread_count = cpi->processor_core_count - 1; else cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count)); CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count)); //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL); sem_init(&cpi->h_event_main, 0, 0); cpi->b_multi_threaded = 1; //printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++) { //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL); sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0); cpi->en_thread_data[ithread].ithread = ithread; cpi->en_thread_data[ithread].ptr1 = (void *)cpi; cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread]; //printf(" call begin thread %d \n", ithread); //cpi->h_encoding_thread[ithread] = (HANDLE)_beginthreadex( // NULL, // security // 0, // stksize // thread_encoding_proc, // (&cpi->en_thread_data[ithread]), // Thread data // 0, // NULL); pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread])); } } #endif }
static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf) { VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); if (!pbi) return NULL; memset(pbi, 0, sizeof(VP8D_COMP)); if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; remove_decompressor(pbi); return 0; } pbi->common.error.setjmp = 1; vp8_create_common(&pbi->common); pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. */ vp8cx_init_de_quantizer(pbi); vp8_loop_filter_init(&pbi->common); pbi->common.error.setjmp = 0; #if CONFIG_ERROR_CONCEALMENT pbi->ec_enabled = oxcf->error_concealment; pbi->overlaps = NULL; #else (void)oxcf; pbi->ec_enabled = 0; #endif /* Error concealment is activated after a key frame has been * decoded without errors when error concealment is enabled. */ pbi->ec_active = 0; pbi->decoded_key_frame = 0; /* Independent partitions is activated when a frame updates the * token probability table to have equal probabilities over the * PREV_COEF context. */ pbi->independent_partitions = 0; vp8_setup_block_dptrs(&pbi->mb); return pbi; }
VP9Decoder *vp9_decoder_create() { VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; vp9_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); CHECK_MEM_ERROR(cm, cm->frame_contexts, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); pbi->need_resync = 1; initialize_dec(); // Initialize the references to not point to any frame buffers. vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; cm->bit_depth = VPX_BITS_8; cm->dequant_bit_depth = VPX_BITS_8; cm->alloc_mi = vp9_dec_alloc_mi; cm->free_mi = vp9_dec_free_mi; cm->setup_mi = vp9_dec_setup_mi; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of // vp9_init_dequantizer() for every frame. vp9_init_dequantizer(cm); vp9_loop_filter_init(cm); cm->error.setjmp = 0; vp9_get_worker_interface()->init(&pbi->lf_worker); return pbi; }
VP10Decoder *vp10_decoder_create(BufferPool *const pool) { VP10Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); VP10_COMMON *volatile const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; vp10_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp10_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); CHECK_MEM_ERROR(cm, cm->frame_contexts, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); pbi->need_resync = 1; once(initialize_dec); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; pbi->common.buffer_pool = pool; cm->bit_depth = VPX_BITS_8; cm->dequant_bit_depth = VPX_BITS_8; cm->alloc_mi = vp10_dec_alloc_mi; cm->free_mi = vp10_dec_free_mi; cm->setup_mi = vp10_dec_setup_mi; vp10_loop_filter_init(cm); cm->error.setjmp = 0; vpx_get_worker_interface()->init(&pbi->lf_worker); return pbi; }
VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf) { VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); if (!pbi) return NULL; vpx_memset(pbi, 0, sizeof(VP8D_COMP)); if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; vp8dx_remove_decompressor(pbi); return 0; } pbi->common.error.setjmp = 1; vp8dx_initialize(); vp8_create_common(&pbi->common); vp8_dmachine_specific_config(pbi); pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/ #if CONFIG_MULTITHREAD pbi->max_threads = oxcf->max_threads; vp8_decoder_create_threads(pbi); #endif /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. */ vp8cx_init_de_quantizer(pbi); { VP8_COMMON *cm = &pbi->common; vp8_init_loop_filter(cm); cm->last_frame_type = KEY_FRAME; cm->last_filter_type = cm->filter_type; cm->last_sharpness_level = cm->sharpness_level; } pbi->common.error.setjmp = 0; return (VP8D_PTR) pbi; }
VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; vp9_zero(*pbi); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_remove_decompressor(pbi); return NULL; } cm->error.setjmp = 1; vp9_initialize_dec(); vp9_create_common(cm); pbi->oxcf = *oxcf; pbi->ready_for_new_data = 1; cm->current_video_frame = 0; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of // vp9_init_dequantizer() for every frame. vp9_init_dequantizer(cm); vp9_loop_filter_init(cm); cm->error.setjmp = 0; pbi->decoded_key_frame = 0; init_macroblockd(pbi); vp9_worker_init(&pbi->lf_worker); return pbi; }
VP9Decoder *vp9_decoder_create() { VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; vp9_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; initialize_dec(); vp9_rtcd(); // Initialize the references to not point to any frame buffers. vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; // vp9_init_dequantizer() is first called here. Add check in // frame_init_dequantizer() to avoid unnecessary calling of // vp9_init_dequantizer() for every frame. vp9_init_dequantizer(cm); vp9_loop_filter_init(cm); cm->error.setjmp = 0; vp9_get_worker_interface()->init(&pbi->lf_worker); return pbi; }
void vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON * cm = &cpi->common; cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) th_count = cm->processor_core_count - 1; /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; } if(th_count == 0) return; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); sem_init(&cpi->h_event_end_encoding, 0, 0); cpi->b_multi_threaded = 1; cpi->encoding_thread_count = th_count; /* printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); */ for (ithread = 0; ithread < th_count; ithread++) { ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread]; sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); } { LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; sem_init(&cpi->h_event_start_lpf, 0, 0); sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd); } } }
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) { int i; vp8_de_alloc_frame_buffers(oci); /* our internal buffers are always multiples of 16 */ if ((width & 0xf) != 0) width += 16 - (width & 0xf); if ((height & 0xf) != 0) height += 16 - (height & 0xf); for (i = 0; i < NUM_YV12_BUFFERS; i++) { oci->fb_idx_ref_cnt[i] = 0; oci->yv12_fb[i].flags = 0; if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) goto allocation_fail; } oci->new_fb_idx = 0; oci->lst_fb_idx = 1; oci->gld_fb_idx = 2; oci->alt_fb_idx = 3; oci->fb_idx_ref_cnt[0] = 1; oci->fb_idx_ref_cnt[1] = 1; oci->fb_idx_ref_cnt[2] = 1; oci->fb_idx_ref_cnt[3] = 1; if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) goto allocation_fail; oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; oci->MBs = oci->mb_rows * oci->mb_cols; oci->mode_info_stride = oci->mb_cols + 1; oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); if (!oci->mip) goto allocation_fail; oci->mi = oci->mip + oci->mode_info_stride + 1; /* Allocation of previous mode info will be done in vp8_decode_frame() * as it is a decoder only data */ oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); if (!oci->above_context) goto allocation_fail; #if CONFIG_POSTPROC if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) goto allocation_fail; oci->post_proc_buffer_int_used = 0; vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, oci->post_proc_buffer.frame_size); /* Allocate buffer to store post-processing filter coefficients. * * Note: Round up mb_cols to support SIMD reads */ oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); if (!oci->pp_limits_buffer) goto allocation_fail; #endif return 0; allocation_fail: vp8_de_alloc_frame_buffers(oci); return 1; }
void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. if (cpi->use_svc) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) { int i; vp8_de_alloc_frame_buffers(oci); if ((width & 0xf) != 0) width += 16 - (width & 0xf); if ((height & 0xf) != 0) height += 16 - (height & 0xf); for (i = 0; i < NUM_YV12_BUFFERS; i++) { oci->fb_idx_ref_cnt[i] = 0; oci->yv12_fb[i].flags = 0; if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) goto allocation_fail; } oci->new_fb_idx = 0; oci->lst_fb_idx = 1; oci->gld_fb_idx = 2; oci->alt_fb_idx = 3; oci->fb_idx_ref_cnt[0] = 1; oci->fb_idx_ref_cnt[1] = 1; oci->fb_idx_ref_cnt[2] = 1; oci->fb_idx_ref_cnt[3] = 1; if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) goto allocation_fail; oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; oci->MBs = oci->mb_rows * oci->mb_cols; oci->mode_info_stride = oci->mb_cols + 1; oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); if (!oci->mip) goto allocation_fail; oci->mi = oci->mip + oci->mode_info_stride + 1; oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); if (!oci->above_context) goto allocation_fail; #if CONFIG_POSTPROC if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) goto allocation_fail; oci->post_proc_buffer_int_used = 0; vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); vpx_memset(oci->post_proc_buffer.buffer_alloc, 128, oci->post_proc_buffer.frame_size); oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); if (!oci->pp_limits_buffer) goto allocation_fail; #endif return 0; allocation_fail: vp8_de_alloc_frame_buffers(oci); return 1; }
int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int ss_x, int ss_y, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border, int byte_alignment, vpx_codec_frame_buffer_t *fb, vpx_get_frame_buffer_cb_fn_t cb, void *cb_priv) { if (ybf) { const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; const int aligned_width = (width + 7) & ~7; const int aligned_height = (height + 7) & ~7; const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; const uint64_t yplane_size = (aligned_height + 2 * border) * (uint64_t)y_stride + byte_alignment; const int uv_width = aligned_width >> ss_x; const int uv_height = aligned_height >> ss_y; const int uv_stride = y_stride >> ss_x; const int uv_border_w = border >> ss_x; const int uv_border_h = border >> ss_y; const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment; #if CONFIG_ALPHA const int alpha_width = aligned_width; const int alpha_height = aligned_height; const int alpha_stride = y_stride; const int alpha_border_w = border; const int alpha_border_h = border; const uint64_t alpha_plane_size = (alpha_height + 2 * alpha_border_h) * (uint64_t)alpha_stride + byte_alignment; #if CONFIG_VP9_HIGHBITDEPTH const uint64_t frame_size = (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size + alpha_plane_size); #else const uint64_t frame_size = yplane_size + 2 * uvplane_size + alpha_plane_size; #endif // CONFIG_VP9_HIGHBITDEPTH #else #if CONFIG_VP9_HIGHBITDEPTH const uint64_t frame_size = (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); #else const uint64_t frame_size = yplane_size + 2 * uvplane_size; #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_ALPHA uint8_t *buf = NULL; if (cb != NULL) { const int align_addr_extra_size = 31; const uint64_t external_frame_size = frame_size + align_addr_extra_size; assert(fb != NULL); if (external_frame_size != (size_t)external_frame_size) return -1; // Allocation to hold larger frame, or first allocation. if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) return -1; if (fb->data == NULL || fb->size < external_frame_size) return -1; ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); } else if (frame_size > (size_t)ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. vpx_free(ybf->buffer_alloc); ybf->buffer_alloc = NULL; if (frame_size != (size_t)frame_size) return -1; ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); if (!ybf->buffer_alloc) return -1; ybf->buffer_alloc_sz = (int)frame_size; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) return -3; ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + ss_x) >> ss_x; ybf->uv_crop_height = (height + ss_y) >> ss_y; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->border = border; ybf->frame_size = (int)frame_size; ybf->subsampling_x = ss_x; ybf->subsampling_y = ss_y; buf = ybf->buffer_alloc; #if CONFIG_VP9_HIGHBITDEPTH if (use_highbitdepth) { // Store uint16 addresses when using 16bit framebuffers buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); ybf->flags = YV12_FLAG_HIGHBITDEPTH; } else { ybf->flags = 0; } #endif // CONFIG_VP9_HIGHBITDEPTH ybf->y_buffer = (uint8_t *)yv12_align_addr( buf + (border * y_stride) + border, vp9_byte_align); ybf->u_buffer = (uint8_t *)yv12_align_addr( buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, vp9_byte_align); ybf->v_buffer = (uint8_t *)yv12_align_addr( buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + uv_border_w, vp9_byte_align); #if CONFIG_ALPHA ybf->alpha_width = alpha_width; ybf->alpha_height = alpha_height; ybf->alpha_stride = alpha_stride; ybf->alpha_buffer = (uint8_t *)yv12_align_addr( buf + yplane_size + 2 * uvplane_size + (alpha_border_h * alpha_stride) + alpha_border_w, vp9_byte_align); #endif ybf->corrupted = 0; /* assume not corrupted by errors */ return 0; } return -2; }
void vp10_encode_tiles_mt(VP10_COMP *cpi) { VP10_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp10_init_tile_data(cpi); // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp10_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp10_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data; worker->hook = (VPxWorkerHook)enc_worker_hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = NULL; thread_data = (EncWorkerData*)worker->data1; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData*)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } }
int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int ss_x, int ss_y, int border, vpx_codec_frame_buffer_t *fb, vpx_get_frame_buffer_cb_fn_t cb, void *cb_priv) { if (ybf) { const int aligned_width = (width + 7) & ~7; const int aligned_height = (height + 7) & ~7; const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; const int yplane_size = (aligned_height + 2 * border) * y_stride; const int uv_width = aligned_width >> ss_x; const int uv_height = aligned_height >> ss_y; const int uv_stride = y_stride >> ss_x; const int uv_border_w = border >> ss_x; const int uv_border_h = border >> ss_y; const int uvplane_size = (uv_height + 2 * uv_border_h) * uv_stride; #if CONFIG_ALPHA const int alpha_width = aligned_width; const int alpha_height = aligned_height; const int alpha_stride = y_stride; const int alpha_border_w = border; const int alpha_border_h = border; const int alpha_plane_size = (alpha_height + 2 * alpha_border_h) * alpha_stride; const int frame_size = yplane_size + 2 * uvplane_size + alpha_plane_size; #else const int frame_size = yplane_size + 2 * uvplane_size; #endif if (cb != NULL) { const int align_addr_extra_size = 31; const size_t external_frame_size = frame_size + align_addr_extra_size; assert(fb != NULL); // Allocation to hold larger frame, or first allocation. if (cb(cb_priv, external_frame_size, fb) < 0) return -1; if (fb->data == NULL || fb->size < external_frame_size) return -1; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. vpx_memset(fb->data, 0, fb->size); ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); } else if (frame_size > ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. if (ybf->buffer_alloc) vpx_free(ybf->buffer_alloc); ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); if (!ybf->buffer_alloc) return -1; ybf->buffer_alloc_sz = frame_size; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. vpx_memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) return -3; ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + ss_x) >> ss_x; ybf->uv_crop_height = (height + ss_y) >> ss_y; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->border = border; ybf->frame_size = frame_size; ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ybf->u_buffer = ybf->buffer_alloc + yplane_size + (uv_border_h * uv_stride) + uv_border_w; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (uv_border_h * uv_stride) + uv_border_w; #if CONFIG_ALPHA ybf->alpha_width = alpha_width; ybf->alpha_height = alpha_height; ybf->alpha_stride = alpha_stride; ybf->alpha_buffer = ybf->buffer_alloc + yplane_size + 2 * uvplane_size + (alpha_border_h * alpha_stride) + alpha_border_w; #endif ybf->corrupted = 0; /* assume not corrupted by errors */ return 0; } return -2; }
int vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON * cm = &cpi->common; cpi->b_multi_threaded = 0; cpi->encoding_thread_count = 0; cpi->b_lpf_running = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; int rc = 0; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) th_count = cm->processor_core_count - 1; /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; } if(th_count == 0) return 0; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); sem_init(&cpi->h_event_end_encoding, 0, 0); cpi->b_multi_threaded = 1; cpi->encoding_thread_count = th_count; /* printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); */ for (ithread = 0; ithread < th_count; ithread++) { ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; /* Setup block ptrs and offsets */ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); if(rc) break; } if(rc) { /* shutdown other threads */ cpi->b_multi_threaded = 0; for(--ithread; ithread >= 0; ithread--) { pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); } sem_destroy(&cpi->h_event_end_encoding); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -1; } { LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data; sem_init(&cpi->h_event_start_lpf, 0, 0); sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); if(rc) { /* shutdown other threads */ cpi->b_multi_threaded = 0; for(--ithread; ithread >= 0; ithread--) { sem_post(&cpi->h_event_start_encoding[ithread]); pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); } sem_destroy(&cpi->h_event_end_encoding); sem_destroy(&cpi->h_event_end_lpf); sem_destroy(&cpi->h_event_start_lpf); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -2; } } } return 0; }
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { int i; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); ctx->last_show_frame = -1; ctx->next_submit_worker_id = 0; ctx->last_submit_worker_id = 0; ctx->next_output_worker_id = 0; ctx->frame_cache_read = 0; ctx->frame_cache_write = 0; ctx->num_cache_frames = 0; ctx->need_resync = 1; ctx->num_frame_workers = (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; if (ctx->num_frame_workers > MAX_DECODE_THREADS) ctx->num_frame_workers = MAX_DECODE_THREADS; ctx->available_threads = ctx->num_frame_workers; ctx->flushed = 0; ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); if (ctx->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR; #if CONFIG_MULTITHREAD if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { set_error_detail(ctx, "Failed to allocate buffer pool mutex"); return VPX_CODEC_MEM_ERROR; } #endif ctx->frame_workers = (VPxWorker *) vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); if (ctx->frame_workers == NULL) { set_error_detail(ctx, "Failed to allocate frame_workers"); return VPX_CODEC_MEM_ERROR; } for (i = 0; i < ctx->num_frame_workers; ++i) { VPxWorker *const worker = &ctx->frame_workers[i]; FrameWorkerData *frame_worker_data = NULL; winterface->init(worker); worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); if (worker->data1 == NULL) { set_error_detail(ctx, "Failed to allocate frame_worker_data"); return VPX_CODEC_MEM_ERROR; } frame_worker_data = (FrameWorkerData *)worker->data1; frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); if (frame_worker_data->pbi == NULL) { set_error_detail(ctx, "Failed to allocate frame_worker_data"); return VPX_CODEC_MEM_ERROR; } frame_worker_data->pbi->frame_worker_owner = worker; frame_worker_data->worker_id = i; frame_worker_data->scratch_buffer = NULL; frame_worker_data->scratch_buffer_size = 0; frame_worker_data->frame_context_ready = 0; frame_worker_data->received_frame = 0; #if CONFIG_MULTITHREAD if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); return VPX_CODEC_MEM_ERROR; } if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); return VPX_CODEC_MEM_ERROR; } #endif // If decoding in serial mode, FrameWorker thread could create tile worker // thread or loopfilter thread. frame_worker_data->pbi->max_threads = (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; frame_worker_data->pbi->common.frame_parallel_decode = ctx->frame_parallel_decode; worker->hook = (VPxWorkerHook)frame_worker_hook; if (!winterface->reset(worker)) { set_error_detail(ctx, "Frame Worker thread creation failed"); return VPX_CODEC_MEM_ERROR; } } // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) set_default_ppflags(&ctx->postproc_cfg); init_buffer_callbacks(ctx); return VPX_CODEC_OK; }
static vpx_codec_err_t decode_one_recon_ex(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline, void *texture) { vpx_codec_err_t res = VPX_CODEC_OK; VP9D_COMP *pbi; VP9D_COMP *pbi_storage; VP9D_COMP *my_pbi; static int flag = 0; int i_is_last_frame = 0; int ret = -1; struct vpx_usec_timer timer; unsigned long yuv2rgb_time = 0; unsigned long decode_time = 0; // ctx->img_avail = 0; vpx_usec_timer_start(&timer); if (data_sz == 0) { pbi = (VP9D_COMP *)ctx->pbi; if (!pbi->l_bufpool_flag_output) { return 0; } } /* Determine the stream parameters. Note that we rely on peek_si to * validate that we have a buffer that does not wrap around the top * of the heap. */ if (!ctx->si.h) res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si); /* Perform deferred allocations, if required */ if (!res && ctx->defer_alloc) { int i; for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) { vpx_codec_dec_cfg_t cfg; cfg.w = ctx->si.w; cfg.h = ctx->si.h; ctx->mmaps[i].id = vp9_mem_req_segs[i].id; ctx->mmaps[i].sz = vp9_mem_req_segs[i].sz; ctx->mmaps[i].align = vp9_mem_req_segs[i].align; ctx->mmaps[i].flags = vp9_mem_req_segs[i].flags; if (!ctx->mmaps[i].sz) ctx->mmaps[i].sz = vp9_mem_req_segs[i].calc_sz(&cfg, ctx->base.init_flags); res = vpx_mmap_alloc(&ctx->mmaps[i]); } if (!res) vp9_finalize_mmaps(ctx); ctx->defer_alloc = 0; } /* Initialize the decoder instance on the first frame*/ if (!res && !ctx->decoder_init) { res = vpx_validate_mmaps(&ctx->si, ctx->mmaps, vp9_mem_req_segs, NELEMENTS(vp9_mem_req_segs), ctx->base.init_flags); if (!res) { VP9D_CONFIG oxcf; VP9D_PTR optr; VP9D_COMP *const new_pbi = vpx_memalign(32, sizeof(VP9D_COMP)); VP9D_COMP *const new_pbi_two = vpx_memalign(32, sizeof(VP9D_COMP)); vp9_initialize_dec(); oxcf.width = ctx->si.w; oxcf.height = ctx->si.h; oxcf.version = 9; oxcf.postprocess = 0; oxcf.max_threads = ctx->cfg.threads; oxcf.inv_tile_order = ctx->invert_tile_order; optr = vp9_create_decompressor_recon(&oxcf); vp9_zero(*new_pbi); vp9_zero(*new_pbi_two); // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; ctx->postproc_cfg.deblocking_level = 4; ctx->postproc_cfg.noise_level = 0; } if (!optr) { res = VPX_CODEC_ERROR; } else { VP9D_COMP *const pbi = (VP9D_COMP*)optr; VP9_COMMON *const cm = &pbi->common; VP9_COMMON *const cm0 = &new_pbi->common; VP9_COMMON *const cm1 = &new_pbi_two->common; if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL && ctx->fb_count > 0) { cm->fb_list = ctx->fb_list; cm->fb_count = ctx->fb_count; cm->realloc_fb_cb = ctx->realloc_fb_cb; cm->user_priv = ctx->user_priv; CpuFlag = 1; } else { CpuFlag = 0; cm->fb_count = FRAME_BUFFERS; } cm->fb_lru = ctx->fb_lru; CHECK_MEM_ERROR(cm, cm->yv12_fb, vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb))); CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt, vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt))); if (cm->fb_lru) { CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru, vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_lru))); } ctx->pbi = optr; ctx->storage_pbi[0] = new_pbi; ctx->storage_pbi[1] = new_pbi_two; // cm 0 if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL && ctx->fb_count > 0) { cm0->fb_list = ctx->fb_list; cm0->fb_count = ctx->fb_count; cm0->realloc_fb_cb = ctx->realloc_fb_cb; cm0->user_priv = ctx->user_priv; } else { cm0->fb_count = FRAME_BUFFERS; } cm0->fb_lru = ctx->fb_lru; // CHECK_MEM_ERROR(cm, cm->yv12_fb, // vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb))); CHECK_MEM_ERROR(cm0, cm0->fb_idx_ref_cnt, vpx_calloc(cm0->fb_count, sizeof(*cm0->fb_idx_ref_cnt))); if (cm0->fb_lru) { CHECK_MEM_ERROR(cm0, cm0->fb_idx_ref_lru, vpx_calloc(cm0->fb_count, sizeof(*cm0->fb_idx_ref_lru))); } // cm 1 if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL && ctx->fb_count > 0) { cm1->fb_list = ctx->fb_list; cm1->fb_count = ctx->fb_count; cm1->realloc_fb_cb = ctx->realloc_fb_cb; cm1->user_priv = ctx->user_priv; } else { cm1->fb_count = FRAME_BUFFERS; } cm1->fb_lru = ctx->fb_lru; // CHECK_MEM_ERROR(cm, cm->yv12_fb, // vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb))); CHECK_MEM_ERROR(cm1, cm1->fb_idx_ref_cnt, vpx_calloc(cm1->fb_count, sizeof(*cm1->fb_idx_ref_cnt))); if (cm1->fb_lru) { CHECK_MEM_ERROR(cm1, cm1->fb_idx_ref_lru, vpx_calloc(cm1->fb_count, sizeof(*cm1->fb_idx_ref_lru))); } } } ctx->decoder_init = 1; } if (!res && ctx->pbi) { YV12_BUFFER_CONFIG sd; int64_t time_stamp = 0, time_end_stamp = 0; vp9_ppflags_t flags = {0}; if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { flags.post_proc_flag = #if CONFIG_POSTPROC_VISUALIZER (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) | (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) | (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) | #endif ctx->postproc_cfg.post_proc_flag; flags.deblocking_level = ctx->postproc_cfg.deblocking_level; flags.noise_level = ctx->postproc_cfg.noise_level; #if CONFIG_POSTPROC_VISUALIZER flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag; flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag; flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag; flags.display_mv_flag = ctx->dbg_display_mv_flag; #endif } #if 0 if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) { VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi; res = update_error_state(ctx, &pbi->common.error); } if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags)) { yuvconfig2image(&ctx->img, &sd, user_priv); ctx->img_avail = 1; } #endif if (data_sz == 0) { i_is_last_frame = 1; } if (vp9_receive_compressed_data_recon(ctx->pbi, ctx->storage_pbi, data_sz, data, deadline, i_is_last_frame)) { pbi = (VP9D_COMP *)ctx->pbi; if (pbi->l_bufpool_flag_output == 0) pbi_storage = (VP9D_COMP *)ctx->storage_pbi[1]; else pbi_storage = (VP9D_COMP *)ctx->storage_pbi[pbi->l_bufpool_flag_output & 1]; res = update_error_state(ctx, &pbi_storage->common.error); } vpx_usec_timer_mark(&timer); decode_time = (unsigned int)vpx_usec_timer_elapsed(&timer); if (ctx->pbi) { pbi = (VP9D_COMP *)ctx->pbi; if (pbi->l_bufpool_flag_output) { ret = vp9_get_raw_frame(ctx->storage_pbi[pbi->l_bufpool_flag_output & 1], &sd, &time_stamp, &time_end_stamp, &flags); if (!pbi->res && 0 == ret ) { //for render my_pbi = (VP9D_COMP *)(ctx->storage_pbi[pbi->l_bufpool_flag_output & 1]); yuv2rgba_ocl_obj.y_plane_offset = my_pbi->common.frame_to_show->y_buffer - inter_ocl_obj.buffer_pool_map_ptr; yuv2rgba_ocl_obj.u_plane_offset = my_pbi->common.frame_to_show->u_buffer - inter_ocl_obj.buffer_pool_map_ptr; yuv2rgba_ocl_obj.v_plane_offset = my_pbi->common.frame_to_show->v_buffer - inter_ocl_obj.buffer_pool_map_ptr; yuv2rgba_ocl_obj.Y_stride = my_pbi->common.frame_to_show->y_stride; yuv2rgba_ocl_obj.UV_stride = my_pbi->common.frame_to_show->uv_stride; yuv2rgba_ocl_obj.globalThreads[0] = my_pbi->common.width >> 1; yuv2rgba_ocl_obj.globalThreads[1] = my_pbi->common.height >> 1; vpx_usec_timer_start(&timer); vp9_yuv2rgba(&yuv2rgba_ocl_obj, texture); vpx_usec_timer_mark(&timer); yuv2rgb_time = (unsigned int)vpx_usec_timer_elapsed(&timer); fprintf(pLog, "decode one frame time(without YUV to RGB): %lu us\n" "the whole time of YUV to RGB: %lu us\n", decode_time, yuv2rgb_time); // for render end yuvconfig2image(&ctx->img, &sd, user_priv); ctx->img_avail = 1; }