Example #1
0
static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk,
                               PICK_MODE_CONTEXT *ctx) {
  const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk);
  const int num_pix = num_blk << 4;
  int i, k;
  ctx->num_4x4_blk = num_blk;

  CHECK_MEM_ERROR(cm, ctx->zcoeff_blk,
                  vpx_calloc(num_4x4_blk, sizeof(uint8_t)));
  for (i = 0; i < MAX_MB_PLANE; ++i) {
    for (k = 0; k < 3; ++k) {
      CHECK_MEM_ERROR(cm, ctx->coeff[i][k],
                      vpx_memalign(16, num_pix * sizeof(int16_t)));
      CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k],
                      vpx_memalign(16, num_pix * sizeof(int16_t)));
      CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k],
                      vpx_memalign(16, num_pix * sizeof(int16_t)));
      CHECK_MEM_ERROR(cm, ctx->eobs[i][k],
                      vpx_memalign(16, num_pix * sizeof(uint16_t)));
      ctx->coeff_pbuf[i][k]   = ctx->coeff[i][k];
      ctx->qcoeff_pbuf[i][k]  = ctx->qcoeff[i][k];
      ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k];
      ctx->eobs_pbuf[i][k]    = ctx->eobs[i][k];
    }
  }
}
Example #2
0
void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data,
                              VP9_COMMON *cm, int num_sbs, int max_threads,
                              int num_jobs) {
  int plane;
  const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) *
                              sizeof(*row_mt_worker_data->dqcoeff[0]);
  row_mt_worker_data->num_jobs = num_jobs;
#if CONFIG_MULTITHREAD
  {
    int i;
    CHECK_MEM_ERROR(
        cm, row_mt_worker_data->recon_sync_mutex,
        vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs));
    if (row_mt_worker_data->recon_sync_mutex) {
      for (i = 0; i < num_jobs; ++i) {
        pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL);
      }
    }

    CHECK_MEM_ERROR(
        cm, row_mt_worker_data->recon_sync_cond,
        vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs));
    if (row_mt_worker_data->recon_sync_cond) {
      for (i = 0; i < num_jobs; ++i) {
        pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL);
      }
    }
  }
#endif
  row_mt_worker_data->num_sbs = num_sbs;
  for (plane = 0; plane < 3; ++plane) {
    CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane],
                    vpx_memalign(16, dqcoeff_size));
    memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size);
    CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane],
                    vpx_calloc(num_sbs << EOBS_PER_SB_LOG2,
                               sizeof(*row_mt_worker_data->eob[plane])));
  }
  CHECK_MEM_ERROR(cm, row_mt_worker_data->partition,
                  vpx_calloc(num_sbs * PARTITIONS_PER_SB,
                             sizeof(*row_mt_worker_data->partition)));
  CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map,
                  vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map)));

  // allocate memory for thread_data
  if (row_mt_worker_data->thread_data == NULL) {
    const size_t thread_size =
        max_threads * sizeof(*row_mt_worker_data->thread_data);
    CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data,
                    vpx_memalign(32, thread_size));
  }
}
Example #3
0
int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                                  int width, int height, int border) {
  if (ybf) {
    int aligned_width = (width + 15) & ~15;
    int aligned_height = (height + 15) & ~15;
    int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
    int yplane_size = (aligned_height + 2 * border) * y_stride;
    int uv_width = aligned_width >> 1;
    int uv_height = aligned_height >> 1;
    /** There is currently a bunch of code which assumes
      *  uv_stride == y_stride/2, so enforce this here. */
    int uv_stride = y_stride >> 1;
    int uvplane_size = (uv_height + border) * uv_stride;
    const int frame_size = yplane_size + 2 * uvplane_size;

    if (!ybf->buffer_alloc) {
      ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
      ybf->buffer_alloc_sz = frame_size;
    }

    if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size)
      return -1;

    /* Only support allocating buffers that have a border that's a multiple
     * of 32. The border restriction is required to get 16-byte alignment of
     * the start of the chroma rows without introducing an arbitrary gap
     * between planes, which would break the semantics of things like
     * vpx_img_set_rect(). */
    if (border & 0x1f)
      return -3;

    ybf->y_crop_width = width;
    ybf->y_crop_height = height;
    ybf->y_width  = aligned_width;
    ybf->y_height = aligned_height;
    ybf->y_stride = y_stride;

    ybf->uv_crop_width = (width + 1) / 2;
    ybf->uv_crop_height = (height + 1) / 2;
    ybf->uv_width = uv_width;
    ybf->uv_height = uv_height;
    ybf->uv_stride = uv_stride;

    ybf->alpha_width = 0;
    ybf->alpha_height = 0;
    ybf->alpha_stride = 0;

    ybf->border = border;
    ybf->frame_size = frame_size;

    ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
    ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2  * uv_stride) + border / 2;
    ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2  * uv_stride) + border / 2;
    ybf->alpha_buffer = NULL;

    ybf->corrupted = 0; /* assume not currupted by errors */
    return 0;
  }
  return -2;
}
Example #4
0
static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
  VP9_COMMON *const cm = &cpi->common;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  int i;

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution. When row based multithreading is enabled, it is OK to
    // allocate more threads than the number of max tile columns.
    if (cpi->use_svc && !cpi->row_mt) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }
      winterface->sync(worker);
    }
  }
}
Example #5
0
static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx,
                            vpx_codec_priv_enc_mr_cfg_t *data) {
  // This function only allocates space for the vpx_codec_alg_priv_t
  // structure. More memory may be required at the time the stream
  // information becomes known.
  if (!ctx->priv) {
    vpx_codec_alg_priv_t *alg_priv = vpx_memalign(32, sizeof(*alg_priv));
    if (alg_priv == NULL)
      return VPX_CODEC_MEM_ERROR;

    vp9_zero(*alg_priv);

    ctx->priv = (vpx_codec_priv_t *)alg_priv;
    ctx->priv->sz = sizeof(*ctx->priv);
    ctx->priv->iface = ctx->iface;
    ctx->priv->alg_priv = alg_priv;
    ctx->priv->alg_priv->si.sz = sizeof(ctx->priv->alg_priv->si);
    ctx->priv->init_flags = ctx->init_flags;

    if (ctx->config.dec) {
      // Update the reference to the config structure to an internal copy.
      ctx->priv->alg_priv->cfg = *ctx->config.dec;
      ctx->config.dec = &ctx->priv->alg_priv->cfg;
    }
  }

  return VPX_CODEC_OK;
}
Example #6
0
void vp9_create_encoding_threads(VP9_COMP *cpi) {
  VP9_COMMON * const cm = &cpi->common;
  const VP9WorkerInterface * const winterface = vp9_get_worker_interface();
  int i;

  CHECK_MEM_ERROR(cm, cpi->enc_thread_hndl,
                  vpx_malloc(sizeof(*cpi->enc_thread_hndl) * cpi->max_threads));
  for (i = 0; i < cpi->max_threads; ++i) {
    VP9Worker * const worker = &cpi->enc_thread_hndl[i];
    winterface->init(worker);
    CHECK_MEM_ERROR(cm, worker->data1,
                    vpx_memalign(32, sizeof(thread_context)));
    worker->data2 = NULL;
    if (i < cpi->max_threads - 1 && !winterface->reset(worker)) {
      vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                         "Tile decoder thread creation failed");
    }
  }
  // set row encoding hook
  for (i = 0; i < cpi->max_threads; ++i) {
    winterface->sync(&cpi->enc_thread_hndl[i]);
    cpi->enc_thread_hndl[i].hook = (VP9WorkerHook) encoding_thread_process;
  }
  CHECK_MEM_ERROR(cm, cpi->cur_sb_col,
                  vpx_malloc(sizeof(*cpi->cur_sb_col) * cm->sb_rows));
  // init cur sb col
  vpx_memset(cpi->cur_sb_col, -1, (sizeof(*cpi->cur_sb_col) * cm->sb_rows));
  // set up nsync (currently unused).
  cpi->sync_range = get_sync_range(cpi->oxcf.width);
}
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    cpi->b_multi_threaded = 0;

    cpi->processor_core_count = 32; //vp8_get_proc_core_count();

    CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cpi->common.mb_rows));

#if CONFIG_MULTITHREAD

    if (cpi->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;

        if (cpi->oxcf.multi_threaded > cpi->processor_core_count)
            cpi->encoding_thread_count = cpi->processor_core_count - 1;
        else
            cpi->encoding_thread_count = cpi->oxcf.multi_threaded - 1;


        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * cpi->encoding_thread_count));
        CHECK_MEM_ERROR(cpi->h_event_mbrencoding, vpx_malloc(sizeof(sem_t) * cpi->encoding_thread_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * cpi->encoding_thread_count);
        CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * cpi->encoding_thread_count));
        //cpi->h_event_main = CreateEvent(NULL, FALSE, FALSE, NULL);
        sem_init(&cpi->h_event_main, 0, 0);

        cpi->b_multi_threaded = 1;

        //printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1));

        for (ithread = 0; ithread < cpi->encoding_thread_count; ithread++)
        {
            //cpi->h_event_mbrencoding[ithread] = CreateEvent(NULL, FALSE, FALSE, NULL);
            sem_init(&cpi->h_event_mbrencoding[ithread], 0, 0);
            cpi->en_thread_data[ithread].ithread = ithread;
            cpi->en_thread_data[ithread].ptr1 = (void *)cpi;
            cpi->en_thread_data[ithread].ptr2 = (void *)&cpi->mb_row_ei[ithread];

            //printf(" call begin thread %d \n", ithread);

            //cpi->h_encoding_thread[ithread] =   (HANDLE)_beginthreadex(
            //  NULL,           // security
            //  0,              // stksize
            //  thread_encoding_proc,
            //  (&cpi->en_thread_data[ithread]),          // Thread data
            //  0,
            //  NULL);

            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, (&cpi->en_thread_data[ithread]));

        }

    }

#endif
}
Example #8
0
static struct VP8D_COMP * create_decompressor(VP8D_CONFIG *oxcf)
{
    VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));

    if (!pbi)
        return NULL;

    memset(pbi, 0, sizeof(VP8D_COMP));

    if (setjmp(pbi->common.error.jmp))
    {
        pbi->common.error.setjmp = 0;
        remove_decompressor(pbi);
        return 0;
    }

    pbi->common.error.setjmp = 1;

    vp8_create_common(&pbi->common);

    pbi->common.current_video_frame = 0;
    pbi->ready_for_new_data = 1;

    /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
     *  unnecessary calling of vp8cx_init_de_quantizer() for every frame.
     */
    vp8cx_init_de_quantizer(pbi);

    vp8_loop_filter_init(&pbi->common);

    pbi->common.error.setjmp = 0;

#if CONFIG_ERROR_CONCEALMENT
    pbi->ec_enabled = oxcf->error_concealment;
    pbi->overlaps = NULL;
#else
    (void)oxcf;
    pbi->ec_enabled = 0;
#endif
    /* Error concealment is activated after a key frame has been
     * decoded without errors when error concealment is enabled.
     */
    pbi->ec_active = 0;

    pbi->decoded_key_frame = 0;

    /* Independent partitions is activated when a frame updates the
     * token probability table to have equal probabilities over the
     * PREV_COEF context.
     */
    pbi->independent_partitions = 0;

    vp8_setup_block_dptrs(&pbi->mb);

    return pbi;
}
Example #9
0
VP9Decoder *vp9_decoder_create() {
  VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
  VP9_COMMON *const cm = pbi ? &pbi->common : NULL;

  if (!cm)
    return NULL;

  vp9_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    vp9_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;

  CHECK_MEM_ERROR(cm, cm->fc,
                  (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
  CHECK_MEM_ERROR(cm, cm->frame_contexts,
                  (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
                  sizeof(*cm->frame_contexts)));

  pbi->need_resync = 1;
  initialize_dec();

  // Initialize the references to not point to any frame buffers.
  vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
  cm->bit_depth = VPX_BITS_8;
  cm->dequant_bit_depth = VPX_BITS_8;

  cm->alloc_mi = vp9_dec_alloc_mi;
  cm->free_mi = vp9_dec_free_mi;
  cm->setup_mi = vp9_dec_setup_mi;

  // vp9_init_dequantizer() is first called here. Add check in
  // frame_init_dequantizer() to avoid unnecessary calling of
  // vp9_init_dequantizer() for every frame.
  vp9_init_dequantizer(cm);

  vp9_loop_filter_init(cm);

  cm->error.setjmp = 0;

  vp9_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Example #10
0
VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
  VP10Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi));
  VP10_COMMON *volatile const cm = pbi ? &pbi->common : NULL;

  if (!cm)
    return NULL;

  vp10_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    vp10_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;

  CHECK_MEM_ERROR(cm, cm->fc,
                  (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
  CHECK_MEM_ERROR(cm, cm->frame_contexts,
                  (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
                  sizeof(*cm->frame_contexts)));

  pbi->need_resync = 1;
  once(initialize_dec);

  // Initialize the references to not point to any frame buffers.
  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
  memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
  pbi->common.buffer_pool = pool;

  cm->bit_depth = VPX_BITS_8;
  cm->dequant_bit_depth = VPX_BITS_8;

  cm->alloc_mi = vp10_dec_alloc_mi;
  cm->free_mi = vp10_dec_free_mi;
  cm->setup_mi = vp10_dec_setup_mi;

  vp10_loop_filter_init(cm);

  cm->error.setjmp = 0;

  vpx_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Example #11
0
VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
{
    VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));

    if (!pbi)
        return NULL;

    vpx_memset(pbi, 0, sizeof(VP8D_COMP));

    if (setjmp(pbi->common.error.jmp))
    {
        pbi->common.error.setjmp = 0;
        vp8dx_remove_decompressor(pbi);
        return 0;
    }

    pbi->common.error.setjmp = 1;
    vp8dx_initialize();

    vp8_create_common(&pbi->common);
    vp8_dmachine_specific_config(pbi);

    pbi->common.current_video_frame = 0;
    pbi->ready_for_new_data = 1;

    pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
#if CONFIG_MULTITHREAD
    pbi->max_threads = oxcf->max_threads;
    vp8_decoder_create_threads(pbi);
#endif

    /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
     *  unnecessary calling of vp8cx_init_de_quantizer() for every frame.
     */
    vp8cx_init_de_quantizer(pbi);

    {
        VP8_COMMON *cm = &pbi->common;

        vp8_init_loop_filter(cm);
        cm->last_frame_type = KEY_FRAME;
        cm->last_filter_type = cm->filter_type;
        cm->last_sharpness_level = cm->sharpness_level;
    }

    pbi->common.error.setjmp = 0;
    return (VP8D_PTR) pbi;
}
Example #12
0
VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) {
  VP9D_COMP *const pbi = vpx_memalign(32, sizeof(VP9D_COMP));
  VP9_COMMON *const cm = pbi ? &pbi->common : NULL;

  if (!cm)
    return NULL;

  vp9_zero(*pbi);

  // Initialize the references to not point to any frame buffers.
  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    vp9_remove_decompressor(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;
  vp9_initialize_dec();

  vp9_create_common(cm);

  pbi->oxcf = *oxcf;
  pbi->ready_for_new_data = 1;
  cm->current_video_frame = 0;

  // vp9_init_dequantizer() is first called here. Add check in
  // frame_init_dequantizer() to avoid unnecessary calling of
  // vp9_init_dequantizer() for every frame.
  vp9_init_dequantizer(cm);

  vp9_loop_filter_init(cm);

  cm->error.setjmp = 0;
  pbi->decoded_key_frame = 0;

  init_macroblockd(pbi);

  vp9_worker_init(&pbi->lf_worker);

  return pbi;
}
VP9Decoder *vp9_decoder_create() {
  VP9Decoder *const pbi = vpx_memalign(32, sizeof(*pbi));
  VP9_COMMON *const cm = pbi ? &pbi->common : NULL;

  if (!cm)
    return NULL;

  vp9_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    vp9_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;
  initialize_dec();

  vp9_rtcd();

  // Initialize the references to not point to any frame buffers.
  vpx_memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;

  // vp9_init_dequantizer() is first called here. Add check in
  // frame_init_dequantizer() to avoid unnecessary calling of
  // vp9_init_dequantizer() for every frame.
  vp9_init_dequantizer(cm);

  vp9_loop_filter_init(cm);

  cm->error.setjmp = 0;

  vp9_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Example #14
0
void vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    const VP8_COMMON * cm = &cpi->common;

    cpi->b_multi_threaded = 0;
    cpi->encoding_thread_count = 0;

    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;
        int th_count = cpi->oxcf.multi_threaded - 1;

        /* don't allocate more threads than cores available */
        if (cpi->oxcf.multi_threaded > cm->processor_core_count)
            th_count = cm->processor_core_count - 1;

        /* we have th_count + 1 (main) threads processing one row each */
        /* no point to have more threads than the sync range allows */
        if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
        {
            th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
        }

        if(th_count == 0)
            return;

        CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count));
        CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
        CHECK_MEM_ERROR(cpi->en_thread_data,
                        vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));
        CHECK_MEM_ERROR(cpi->mt_current_mb_col,
                        vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows));

        sem_init(&cpi->h_event_end_encoding, 0, 0);

        cpi->b_multi_threaded = 1;
        cpi->encoding_thread_count = th_count;

        /*
        printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
               (cpi->encoding_thread_count +1));
        */

        for (ithread = 0; ithread < th_count; ithread++)
        {
            ENCODETHREAD_DATA * ethd = &cpi->en_thread_data[ithread];

            sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);
            ethd->ithread = ithread;
            ethd->ptr1 = (void *)cpi;
            ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];

            pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd);
        }

        {
            LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;

            sem_init(&cpi->h_event_start_lpf, 0, 0);
            sem_init(&cpi->h_event_end_lpf, 0, 0);

            lpfthd->ptr1 = (void *)cpi;
            pthread_create(&cpi->h_filter_thread, 0, loopfilter_thread, lpfthd);
        }
    }

}
Example #15
0
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
    int i;

    vp8_de_alloc_frame_buffers(oci);

    /* our internal buffers are always multiples of 16 */
    if ((width & 0xf) != 0)
        width += 16 - (width & 0xf);

    if ((height & 0xf) != 0)
        height += 16 - (height & 0xf);


    for (i = 0; i < NUM_YV12_BUFFERS; i++)
    {
        oci->fb_idx_ref_cnt[i] = 0;
        oci->yv12_fb[i].flags = 0;
        if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
            goto allocation_fail;
    }

    oci->new_fb_idx = 0;
    oci->lst_fb_idx = 1;
    oci->gld_fb_idx = 2;
    oci->alt_fb_idx = 3;

    oci->fb_idx_ref_cnt[0] = 1;
    oci->fb_idx_ref_cnt[1] = 1;
    oci->fb_idx_ref_cnt[2] = 1;
    oci->fb_idx_ref_cnt[3] = 1;

    if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
        goto allocation_fail;

    oci->mb_rows = height >> 4;
    oci->mb_cols = width >> 4;
    oci->MBs = oci->mb_rows * oci->mb_cols;
    oci->mode_info_stride = oci->mb_cols + 1;
    oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));

    if (!oci->mip)
        goto allocation_fail;

    oci->mi = oci->mip + oci->mode_info_stride + 1;

    /* Allocation of previous mode info will be done in vp8_decode_frame()
     * as it is a decoder only data */

    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);

    if (!oci->above_context)
        goto allocation_fail;

#if CONFIG_POSTPROC
    if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
        goto allocation_fail;

    oci->post_proc_buffer_int_used = 0;
    vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
    vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
               oci->post_proc_buffer.frame_size);

    /* Allocate buffer to store post-processing filter coefficients.
     *
     * Note: Round up mb_cols to support SIMD reads
     */
    oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
    if (!oci->pp_limits_buffer)
        goto allocation_fail;
#endif

    return 0;

allocation_fail:
    vp8_de_alloc_frame_buffers(oci);
    return 1;
}
Example #16
0
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp9_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution.
    if (cpi->use_svc) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }

    // Handle use_nonrd_pick_mode case.
    if (cpi->sf.use_nonrd_pick_mode) {
      MACROBLOCK *const x = &thread_data->td->mb;
      MACROBLOCKD *const xd = &x->e_mbd;
      struct macroblock_plane *const p = x->plane;
      struct macroblockd_plane *const pd = xd->plane;
      PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
      int j;

      for (j = 0; j < MAX_MB_PLANE; ++j) {
        p[j].coeff = ctx->coeff_pbuf[j][0];
        p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
        pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
        p[j].eobs = ctx->eobs_pbuf[j][0];
      }
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Example #17
0
int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
{
    int i;

    vp8_de_alloc_frame_buffers(oci);

    
    if ((width & 0xf) != 0)
        width += 16 - (width & 0xf);

    if ((height & 0xf) != 0)
        height += 16 - (height & 0xf);


    for (i = 0; i < NUM_YV12_BUFFERS; i++)
    {
        oci->fb_idx_ref_cnt[i] = 0;
        oci->yv12_fb[i].flags = 0;
        if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0)
            goto allocation_fail;
    }

    oci->new_fb_idx = 0;
    oci->lst_fb_idx = 1;
    oci->gld_fb_idx = 2;
    oci->alt_fb_idx = 3;

    oci->fb_idx_ref_cnt[0] = 1;
    oci->fb_idx_ref_cnt[1] = 1;
    oci->fb_idx_ref_cnt[2] = 1;
    oci->fb_idx_ref_cnt[3] = 1;

    if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
        goto allocation_fail;

    oci->mb_rows = height >> 4;
    oci->mb_cols = width >> 4;
    oci->MBs = oci->mb_rows * oci->mb_cols;
    oci->mode_info_stride = oci->mb_cols + 1;
    oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO));

    if (!oci->mip)
        goto allocation_fail;

    oci->mi = oci->mip + oci->mode_info_stride + 1;


    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);

    if (!oci->above_context)
        goto allocation_fail;

#if CONFIG_POSTPROC
    if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
        goto allocation_fail;

    oci->post_proc_buffer_int_used = 0;
    vpx_memset(&oci->postproc_state, 0, sizeof(oci->postproc_state));
    vpx_memset(oci->post_proc_buffer.buffer_alloc, 128,
               oci->post_proc_buffer.frame_size);

    oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1));
    if (!oci->pp_limits_buffer)
        goto allocation_fail;
#endif

    return 0;

allocation_fail:
    vp8_de_alloc_frame_buffers(oci);
    return 1;
}
Example #18
0
int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                             int width, int height,
                             int ss_x, int ss_y,
#if CONFIG_VP9_HIGHBITDEPTH
                             int use_highbitdepth,
#endif
                             int border,
                             int byte_alignment,
                             vpx_codec_frame_buffer_t *fb,
                             vpx_get_frame_buffer_cb_fn_t cb,
                             void *cb_priv) {
  if (ybf) {
    const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment;
    const int aligned_width = (width + 7) & ~7;
    const int aligned_height = (height + 7) & ~7;
    const int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
    const uint64_t yplane_size = (aligned_height + 2 * border) *
                                 (uint64_t)y_stride + byte_alignment;
    const int uv_width = aligned_width >> ss_x;
    const int uv_height = aligned_height >> ss_y;
    const int uv_stride = y_stride >> ss_x;
    const int uv_border_w = border >> ss_x;
    const int uv_border_h = border >> ss_y;
    const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) *
                                  (uint64_t)uv_stride + byte_alignment;

#if CONFIG_ALPHA
    const int alpha_width = aligned_width;
    const int alpha_height = aligned_height;
    const int alpha_stride = y_stride;
    const int alpha_border_w = border;
    const int alpha_border_h = border;
    const uint64_t alpha_plane_size = (alpha_height + 2 * alpha_border_h) *
                                      (uint64_t)alpha_stride + byte_alignment;
#if CONFIG_VP9_HIGHBITDEPTH
    const uint64_t frame_size = (1 + use_highbitdepth) *
        (yplane_size + 2 * uvplane_size + alpha_plane_size);
#else
    const uint64_t frame_size = yplane_size + 2 * uvplane_size +
                                alpha_plane_size;
#endif  // CONFIG_VP9_HIGHBITDEPTH
#else
#if CONFIG_VP9_HIGHBITDEPTH
    const uint64_t frame_size =
        (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size);
#else
    const uint64_t frame_size = yplane_size + 2 * uvplane_size;
#endif  // CONFIG_VP9_HIGHBITDEPTH
#endif  // CONFIG_ALPHA

    uint8_t *buf = NULL;

    if (cb != NULL) {
      const int align_addr_extra_size = 31;
      const uint64_t external_frame_size = frame_size + align_addr_extra_size;

      assert(fb != NULL);

      if (external_frame_size != (size_t)external_frame_size)
        return -1;

      // Allocation to hold larger frame, or first allocation.
      if (cb(cb_priv, (size_t)external_frame_size, fb) < 0)
        return -1;

      if (fb->data == NULL || fb->size < external_frame_size)
        return -1;

      ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
    } else if (frame_size > (size_t)ybf->buffer_alloc_sz) {
      // Allocation to hold larger frame, or first allocation.
      vpx_free(ybf->buffer_alloc);
      ybf->buffer_alloc = NULL;

      if (frame_size != (size_t)frame_size)
        return -1;

      ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size);
      if (!ybf->buffer_alloc)
        return -1;

      ybf->buffer_alloc_sz = (int)frame_size;

      // This memset is needed for fixing valgrind error from C loop filter
      // due to access uninitialized memory in frame border. It could be
      // removed if border is totally removed.
      memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
    }

    /* Only support allocating buffers that have a border that's a multiple
     * of 32. The border restriction is required to get 16-byte alignment of
     * the start of the chroma rows without introducing an arbitrary gap
     * between planes, which would break the semantics of things like
     * vpx_img_set_rect(). */
    if (border & 0x1f)
      return -3;

    ybf->y_crop_width = width;
    ybf->y_crop_height = height;
    ybf->y_width  = aligned_width;
    ybf->y_height = aligned_height;
    ybf->y_stride = y_stride;

    ybf->uv_crop_width = (width + ss_x) >> ss_x;
    ybf->uv_crop_height = (height + ss_y) >> ss_y;
    ybf->uv_width = uv_width;
    ybf->uv_height = uv_height;
    ybf->uv_stride = uv_stride;

    ybf->border = border;
    ybf->frame_size = (int)frame_size;
    ybf->subsampling_x = ss_x;
    ybf->subsampling_y = ss_y;

    buf = ybf->buffer_alloc;
#if CONFIG_VP9_HIGHBITDEPTH
    if (use_highbitdepth) {
      // Store uint16 addresses when using 16bit framebuffers
      buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc);
      ybf->flags = YV12_FLAG_HIGHBITDEPTH;
    } else {
      ybf->flags = 0;
    }
#endif  // CONFIG_VP9_HIGHBITDEPTH

    ybf->y_buffer = (uint8_t *)yv12_align_addr(
        buf + (border * y_stride) + border, vp9_byte_align);
    ybf->u_buffer = (uint8_t *)yv12_align_addr(
        buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w,
        vp9_byte_align);
    ybf->v_buffer = (uint8_t *)yv12_align_addr(
        buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) +
        uv_border_w, vp9_byte_align);

#if CONFIG_ALPHA
    ybf->alpha_width = alpha_width;
    ybf->alpha_height = alpha_height;
    ybf->alpha_stride = alpha_stride;
    ybf->alpha_buffer = (uint8_t *)yv12_align_addr(
        buf + yplane_size + 2 * uvplane_size +
        (alpha_border_h * alpha_stride) + alpha_border_w, vp9_byte_align);
#endif
    ybf->corrupted = 0; /* assume not corrupted by errors */
    return 0;
  }
  return -2;
}
Example #19
0
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
  VP10_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp10_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp10_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp10_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Example #20
0
int vp9_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf,
                             int width, int height,
                             int ss_x, int ss_y, int border,
                             vpx_codec_frame_buffer_t *fb,
                             vpx_get_frame_buffer_cb_fn_t cb,
                             void *cb_priv) {
  if (ybf) {
    const int aligned_width = (width + 7) & ~7;
    const int aligned_height = (height + 7) & ~7;
    const int y_stride = ((aligned_width + 2 * border) + 31) & ~31;
    const int yplane_size = (aligned_height + 2 * border) * y_stride;
    const int uv_width = aligned_width >> ss_x;
    const int uv_height = aligned_height >> ss_y;
    const int uv_stride = y_stride >> ss_x;
    const int uv_border_w = border >> ss_x;
    const int uv_border_h = border >> ss_y;
    const int uvplane_size = (uv_height + 2 * uv_border_h) * uv_stride;
#if CONFIG_ALPHA
    const int alpha_width = aligned_width;
    const int alpha_height = aligned_height;
    const int alpha_stride = y_stride;
    const int alpha_border_w = border;
    const int alpha_border_h = border;
    const int alpha_plane_size = (alpha_height + 2 * alpha_border_h) *
                                 alpha_stride;
    const int frame_size = yplane_size + 2 * uvplane_size +
                           alpha_plane_size;
#else
    const int frame_size = yplane_size + 2 * uvplane_size;
#endif
    if (cb != NULL) {
      const int align_addr_extra_size = 31;
      const size_t external_frame_size = frame_size + align_addr_extra_size;

      assert(fb != NULL);

      // Allocation to hold larger frame, or first allocation.
      if (cb(cb_priv, external_frame_size, fb) < 0)
        return -1;

      if (fb->data == NULL || fb->size < external_frame_size)
        return -1;

      // This memset is needed for fixing valgrind error from C loop filter
      // due to access uninitialized memory in frame border. It could be
      // removed if border is totally removed.
      vpx_memset(fb->data, 0, fb->size);

      ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32);
    } else if (frame_size > ybf->buffer_alloc_sz) {
      // Allocation to hold larger frame, or first allocation.
      if (ybf->buffer_alloc)
        vpx_free(ybf->buffer_alloc);
      ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
      if (!ybf->buffer_alloc)
        return -1;

      ybf->buffer_alloc_sz = frame_size;

      // This memset is needed for fixing valgrind error from C loop filter
      // due to access uninitialized memory in frame border. It could be
      // removed if border is totally removed.
      vpx_memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz);
    }

    /* Only support allocating buffers that have a border that's a multiple
     * of 32. The border restriction is required to get 16-byte alignment of
     * the start of the chroma rows without introducing an arbitrary gap
     * between planes, which would break the semantics of things like
     * vpx_img_set_rect(). */
    if (border & 0x1f)
      return -3;

    ybf->y_crop_width = width;
    ybf->y_crop_height = height;
    ybf->y_width  = aligned_width;
    ybf->y_height = aligned_height;
    ybf->y_stride = y_stride;

    ybf->uv_crop_width = (width + ss_x) >> ss_x;
    ybf->uv_crop_height = (height + ss_y) >> ss_y;
    ybf->uv_width = uv_width;
    ybf->uv_height = uv_height;
    ybf->uv_stride = uv_stride;

    ybf->border = border;
    ybf->frame_size = frame_size;

    ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border;
    ybf->u_buffer = ybf->buffer_alloc + yplane_size +
                    (uv_border_h * uv_stride) + uv_border_w;
    ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size +
                    (uv_border_h * uv_stride) + uv_border_w;

#if CONFIG_ALPHA
    ybf->alpha_width = alpha_width;
    ybf->alpha_height = alpha_height;
    ybf->alpha_stride = alpha_stride;
    ybf->alpha_buffer = ybf->buffer_alloc + yplane_size + 2 * uvplane_size +
                        (alpha_border_h * alpha_stride) + alpha_border_w;
#endif
    ybf->corrupted = 0; /* assume not corrupted by errors */
    return 0;
  }
  return -2;
}
Example #21
0
int vp8cx_create_encoder_threads(VP8_COMP *cpi)
{
    const VP8_COMMON * cm = &cpi->common;

    cpi->b_multi_threaded = 0;
    cpi->encoding_thread_count = 0;
    cpi->b_lpf_running = 0;

    if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1)
    {
        int ithread;
        int th_count = cpi->oxcf.multi_threaded - 1;
        int rc = 0;

        /* don't allocate more threads than cores available */
        if (cpi->oxcf.multi_threaded > cm->processor_core_count)
            th_count = cm->processor_core_count - 1;

        /* we have th_count + 1 (main) threads processing one row each */
        /* no point to have more threads than the sync range allows */
        if(th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1))
        {
            th_count = (cm->mb_cols / cpi->mt_sync_range) - 1;
        }

        if(th_count == 0)
            return 0;

        CHECK_MEM_ERROR(cpi->h_encoding_thread,
                        vpx_malloc(sizeof(pthread_t) * th_count));
        CHECK_MEM_ERROR(cpi->h_event_start_encoding,
                        vpx_malloc(sizeof(sem_t) * th_count));
        CHECK_MEM_ERROR(cpi->mb_row_ei,
                        vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count));
        vpx_memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count);
        CHECK_MEM_ERROR(cpi->en_thread_data,
                        vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count));

        sem_init(&cpi->h_event_end_encoding, 0, 0);

        cpi->b_multi_threaded = 1;
        cpi->encoding_thread_count = th_count;

        /*
        printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n",
               (cpi->encoding_thread_count +1));
        */

        for (ithread = 0; ithread < th_count; ithread++)
        {
            ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread];

            /* Setup block ptrs and offsets */
            vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb);
            vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd);

            sem_init(&cpi->h_event_start_encoding[ithread], 0, 0);

            ethd->ithread = ithread;
            ethd->ptr1 = (void *)cpi;
            ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread];

            rc = pthread_create(&cpi->h_encoding_thread[ithread], 0,
                                thread_encoding_proc, ethd);
            if(rc)
                break;
        }

        if(rc)
        {
            /* shutdown other threads */
            cpi->b_multi_threaded = 0;
            for(--ithread; ithread >= 0; ithread--)
            {
                pthread_join(cpi->h_encoding_thread[ithread], 0);
                sem_destroy(&cpi->h_event_start_encoding[ithread]);
            }
            sem_destroy(&cpi->h_event_end_encoding);

            /* free thread related resources */
            vpx_free(cpi->h_event_start_encoding);
            vpx_free(cpi->h_encoding_thread);
            vpx_free(cpi->mb_row_ei);
            vpx_free(cpi->en_thread_data);

            return -1;
        }


        {
            LPFTHREAD_DATA * lpfthd = &cpi->lpf_thread_data;

            sem_init(&cpi->h_event_start_lpf, 0, 0);
            sem_init(&cpi->h_event_end_lpf, 0, 0);

            lpfthd->ptr1 = (void *)cpi;
            rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter,
                                lpfthd);

            if(rc)
            {
                /* shutdown other threads */
                cpi->b_multi_threaded = 0;
                for(--ithread; ithread >= 0; ithread--)
                {
                    sem_post(&cpi->h_event_start_encoding[ithread]);
                    pthread_join(cpi->h_encoding_thread[ithread], 0);
                    sem_destroy(&cpi->h_event_start_encoding[ithread]);
                }
                sem_destroy(&cpi->h_event_end_encoding);
                sem_destroy(&cpi->h_event_end_lpf);
                sem_destroy(&cpi->h_event_start_lpf);

                /* free thread related resources */
                vpx_free(cpi->h_event_start_encoding);
                vpx_free(cpi->h_encoding_thread);
                vpx_free(cpi->mb_row_ei);
                vpx_free(cpi->en_thread_data);

                return -2;
            }
        }
    }
    return 0;
}
Example #22
0
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
  int i;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();

  ctx->last_show_frame = -1;
  ctx->next_submit_worker_id = 0;
  ctx->last_submit_worker_id = 0;
  ctx->next_output_worker_id = 0;
  ctx->frame_cache_read = 0;
  ctx->frame_cache_write = 0;
  ctx->num_cache_frames = 0;
  ctx->need_resync = 1;
  ctx->num_frame_workers =
      (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1;
  if (ctx->num_frame_workers > MAX_DECODE_THREADS)
    ctx->num_frame_workers = MAX_DECODE_THREADS;
  ctx->available_threads = ctx->num_frame_workers;
  ctx->flushed = 0;

  ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
  if (ctx->buffer_pool == NULL)
    return VPX_CODEC_MEM_ERROR;

#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate buffer pool mutex");
      return VPX_CODEC_MEM_ERROR;
    }
#endif

  ctx->frame_workers = (VPxWorker *)
      vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers));
  if (ctx->frame_workers == NULL) {
    set_error_detail(ctx, "Failed to allocate frame_workers");
    return VPX_CODEC_MEM_ERROR;
  }

  for (i = 0; i < ctx->num_frame_workers; ++i) {
    VPxWorker *const worker = &ctx->frame_workers[i];
    FrameWorkerData *frame_worker_data = NULL;
    winterface->init(worker);
    worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData));
    if (worker->data1 == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data = (FrameWorkerData *)worker->data1;
    frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool);
    if (frame_worker_data->pbi == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data->pbi->frame_worker_owner = worker;
    frame_worker_data->worker_id = i;
    frame_worker_data->scratch_buffer = NULL;
    frame_worker_data->scratch_buffer_size = 0;
    frame_worker_data->frame_context_ready = 0;
    frame_worker_data->received_frame = 0;
#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
      return VPX_CODEC_MEM_ERROR;
    }

    if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
      return VPX_CODEC_MEM_ERROR;
    }
#endif
    // If decoding in serial mode, FrameWorker thread could create tile worker
    // thread or loopfilter thread.
    frame_worker_data->pbi->max_threads =
        (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0;

    frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
    frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode;
    frame_worker_data->pbi->common.frame_parallel_decode =
        ctx->frame_parallel_decode;
    worker->hook = (VPxWorkerHook)frame_worker_hook;
    if (!winterface->reset(worker)) {
      set_error_detail(ctx, "Frame Worker thread creation failed");
      return VPX_CODEC_MEM_ERROR;
    }
  }

  // If postprocessing was enabled by the application and a
  // configuration has not been provided, default it.
  if (!ctx->postproc_cfg_set &&
      (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
    set_default_ppflags(&ctx->postproc_cfg);

  init_buffer_callbacks(ctx);

  return VPX_CODEC_OK;
}
Example #23
0
static vpx_codec_err_t decode_one_recon_ex(vpx_codec_alg_priv_t *ctx,
                                  const uint8_t **data, unsigned int data_sz,
                                  void *user_priv, int64_t deadline, void *texture) {
  vpx_codec_err_t res = VPX_CODEC_OK;

  VP9D_COMP *pbi;
  VP9D_COMP *pbi_storage;
  VP9D_COMP *my_pbi;
  static int flag = 0;
  int i_is_last_frame = 0;
  int ret = -1;

  struct vpx_usec_timer timer;
  unsigned long yuv2rgb_time = 0;
  unsigned long decode_time = 0;

  // ctx->img_avail = 0;
  vpx_usec_timer_start(&timer);
  if (data_sz == 0) {
    pbi = (VP9D_COMP *)ctx->pbi;
    if (!pbi->l_bufpool_flag_output) {
      return 0;
    }
  }

  /* Determine the stream parameters. Note that we rely on peek_si to
   * validate that we have a buffer that does not wrap around the top
   * of the heap.
   */
  if (!ctx->si.h)
    res = ctx->base.iface->dec.peek_si(*data, data_sz, &ctx->si);


  /* Perform deferred allocations, if required */
  if (!res && ctx->defer_alloc) {
    int i;

    for (i = 1; !res && i < NELEMENTS(ctx->mmaps); i++) {
      vpx_codec_dec_cfg_t cfg;

      cfg.w = ctx->si.w;
      cfg.h = ctx->si.h;
      ctx->mmaps[i].id = vp9_mem_req_segs[i].id;
      ctx->mmaps[i].sz = vp9_mem_req_segs[i].sz;
      ctx->mmaps[i].align = vp9_mem_req_segs[i].align;
      ctx->mmaps[i].flags = vp9_mem_req_segs[i].flags;

      if (!ctx->mmaps[i].sz)
        ctx->mmaps[i].sz = vp9_mem_req_segs[i].calc_sz(&cfg,
                                                       ctx->base.init_flags);

      res = vpx_mmap_alloc(&ctx->mmaps[i]);
    }

    if (!res)
      vp9_finalize_mmaps(ctx);

    ctx->defer_alloc = 0;
  }

  /* Initialize the decoder instance on the first frame*/
  if (!res && !ctx->decoder_init) {
    res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
                             vp9_mem_req_segs, NELEMENTS(vp9_mem_req_segs),
                             ctx->base.init_flags);

    if (!res) {
      VP9D_CONFIG oxcf;
      VP9D_PTR optr;

      VP9D_COMP *const new_pbi = vpx_memalign(32, sizeof(VP9D_COMP));
      VP9D_COMP *const new_pbi_two = vpx_memalign(32, sizeof(VP9D_COMP));

      vp9_initialize_dec();

      oxcf.width = ctx->si.w;
      oxcf.height = ctx->si.h;
      oxcf.version = 9;
      oxcf.postprocess = 0;
      oxcf.max_threads = ctx->cfg.threads;
      oxcf.inv_tile_order = ctx->invert_tile_order;
      optr = vp9_create_decompressor_recon(&oxcf);

      vp9_zero(*new_pbi);
      vp9_zero(*new_pbi_two);

      // If postprocessing was enabled by the application and a
      // configuration has not been provided, default it.
      if (!ctx->postproc_cfg_set &&
          (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) {
        ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK;
        ctx->postproc_cfg.deblocking_level = 4;
        ctx->postproc_cfg.noise_level = 0;
      }

      if (!optr) {
        res = VPX_CODEC_ERROR;
      } else {
        VP9D_COMP *const pbi = (VP9D_COMP*)optr;
        VP9_COMMON *const cm = &pbi->common;

        VP9_COMMON *const cm0 = &new_pbi->common;
        VP9_COMMON *const cm1 = &new_pbi_two->common;

        if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL &&
            ctx->fb_count > 0) {
          cm->fb_list = ctx->fb_list;
          cm->fb_count = ctx->fb_count;
          cm->realloc_fb_cb = ctx->realloc_fb_cb;
          cm->user_priv = ctx->user_priv;
          CpuFlag = 1;
        } else {
          CpuFlag = 0;
          cm->fb_count = FRAME_BUFFERS;
        }
        cm->fb_lru = ctx->fb_lru;
        CHECK_MEM_ERROR(cm, cm->yv12_fb,
                        vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
        CHECK_MEM_ERROR(cm, cm->fb_idx_ref_cnt,
                        vpx_calloc(cm->fb_count, sizeof(*cm->fb_idx_ref_cnt)));
        if (cm->fb_lru) {
          CHECK_MEM_ERROR(cm, cm->fb_idx_ref_lru,
                          vpx_calloc(cm->fb_count,
                                     sizeof(*cm->fb_idx_ref_lru)));
        }
        ctx->pbi = optr;

        ctx->storage_pbi[0] = new_pbi;
        ctx->storage_pbi[1] = new_pbi_two;

        // cm 0
        if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL &&
            ctx->fb_count > 0) {
          cm0->fb_list = ctx->fb_list;
          cm0->fb_count = ctx->fb_count;
          cm0->realloc_fb_cb = ctx->realloc_fb_cb;
          cm0->user_priv = ctx->user_priv;
        } else {
          cm0->fb_count = FRAME_BUFFERS;
        }
        cm0->fb_lru = ctx->fb_lru;
        // CHECK_MEM_ERROR(cm, cm->yv12_fb,
                        // vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
        CHECK_MEM_ERROR(cm0, cm0->fb_idx_ref_cnt,
                        vpx_calloc(cm0->fb_count, sizeof(*cm0->fb_idx_ref_cnt)));
        if (cm0->fb_lru) {
          CHECK_MEM_ERROR(cm0, cm0->fb_idx_ref_lru,
                          vpx_calloc(cm0->fb_count,
                                     sizeof(*cm0->fb_idx_ref_lru)));
        }

        // cm 1
        if (ctx->fb_list != NULL && ctx->realloc_fb_cb != NULL &&
            ctx->fb_count > 0) {
          cm1->fb_list = ctx->fb_list;
          cm1->fb_count = ctx->fb_count;
          cm1->realloc_fb_cb = ctx->realloc_fb_cb;
          cm1->user_priv = ctx->user_priv;
        } else {
          cm1->fb_count = FRAME_BUFFERS;
        }
        cm1->fb_lru = ctx->fb_lru;
        // CHECK_MEM_ERROR(cm, cm->yv12_fb,
                        // vpx_calloc(cm->fb_count, sizeof(*cm->yv12_fb)));
        CHECK_MEM_ERROR(cm1, cm1->fb_idx_ref_cnt,
                        vpx_calloc(cm1->fb_count, sizeof(*cm1->fb_idx_ref_cnt)));
        if (cm1->fb_lru) {
          CHECK_MEM_ERROR(cm1, cm1->fb_idx_ref_lru,
                          vpx_calloc(cm1->fb_count,
                                     sizeof(*cm1->fb_idx_ref_lru)));
        }

      }
    }

    ctx->decoder_init = 1;
  }

  if (!res && ctx->pbi) {
    YV12_BUFFER_CONFIG sd;
    int64_t time_stamp = 0, time_end_stamp = 0;
    vp9_ppflags_t flags = {0};

    if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) {
      flags.post_proc_flag =
#if CONFIG_POSTPROC_VISUALIZER
          (ctx->dbg_color_ref_frame_flag ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) |
          (ctx->dbg_color_mb_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
          (ctx->dbg_color_b_modes_flag ? VP9D_DEBUG_CLR_BLK_MODES : 0) |
          (ctx->dbg_display_mv_flag ? VP9D_DEBUG_DRAW_MV : 0) |
#endif
          ctx->postproc_cfg.post_proc_flag;

      flags.deblocking_level = ctx->postproc_cfg.deblocking_level;
      flags.noise_level = ctx->postproc_cfg.noise_level;
#if CONFIG_POSTPROC_VISUALIZER
      flags.display_ref_frame_flag = ctx->dbg_color_ref_frame_flag;
      flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
      flags.display_b_modes_flag = ctx->dbg_color_b_modes_flag;
      flags.display_mv_flag = ctx->dbg_display_mv_flag;
#endif
    }

#if 0
    if (vp9_receive_compressed_data(ctx->pbi, data_sz, data, deadline)) {
      VP9D_COMP *pbi = (VP9D_COMP*)ctx->pbi;
      res = update_error_state(ctx, &pbi->common.error);
    }

    if (!res && 0 == vp9_get_raw_frame(ctx->pbi, &sd, &time_stamp,
                                       &time_end_stamp, &flags)) {
      yuvconfig2image(&ctx->img, &sd, user_priv);
      ctx->img_avail = 1;
    }
#endif
    
    if (data_sz == 0) {
      i_is_last_frame = 1;
    }
    
    if (vp9_receive_compressed_data_recon(ctx->pbi, ctx->storage_pbi, data_sz, data,
      deadline, i_is_last_frame)) {
      pbi = (VP9D_COMP *)ctx->pbi;
      if (pbi->l_bufpool_flag_output == 0)
        pbi_storage = (VP9D_COMP *)ctx->storage_pbi[1];
      else
        pbi_storage = (VP9D_COMP *)ctx->storage_pbi[pbi->l_bufpool_flag_output & 1];
      res = update_error_state(ctx, &pbi_storage->common.error);
    }
	vpx_usec_timer_mark(&timer);
    decode_time = (unsigned int)vpx_usec_timer_elapsed(&timer);
    if (ctx->pbi) {
      pbi = (VP9D_COMP *)ctx->pbi;
      if (pbi->l_bufpool_flag_output) {
        ret = vp9_get_raw_frame(ctx->storage_pbi[pbi->l_bufpool_flag_output & 1],
              &sd, &time_stamp, &time_end_stamp, &flags);
        if (!pbi->res && 0 == ret ) {
          //for render
          my_pbi = (VP9D_COMP *)(ctx->storage_pbi[pbi->l_bufpool_flag_output & 1]);
          yuv2rgba_ocl_obj.y_plane_offset = my_pbi->common.frame_to_show->y_buffer - 
                                                inter_ocl_obj.buffer_pool_map_ptr;
          yuv2rgba_ocl_obj.u_plane_offset = my_pbi->common.frame_to_show->u_buffer - 
                                                inter_ocl_obj.buffer_pool_map_ptr;
          yuv2rgba_ocl_obj.v_plane_offset = my_pbi->common.frame_to_show->v_buffer - 
                                                inter_ocl_obj.buffer_pool_map_ptr;
 
          yuv2rgba_ocl_obj.Y_stride =  my_pbi->common.frame_to_show->y_stride;
          yuv2rgba_ocl_obj.UV_stride =  my_pbi->common.frame_to_show->uv_stride;
          yuv2rgba_ocl_obj.globalThreads[0] =  my_pbi->common.width >> 1;
          yuv2rgba_ocl_obj.globalThreads[1] =  my_pbi->common.height >> 1;
 
		  vpx_usec_timer_start(&timer);
          vp9_yuv2rgba(&yuv2rgba_ocl_obj, texture);
		  vpx_usec_timer_mark(&timer);
          yuv2rgb_time = (unsigned int)vpx_usec_timer_elapsed(&timer);
	      fprintf(pLog, "decode one frame time(without YUV to RGB): %lu us\n"
			            "the whole time of YUV to RGB:  %lu us\n", decode_time, yuv2rgb_time);
          // for render end
          yuvconfig2image(&ctx->img, &sd, user_priv);
          ctx->img_avail = 1;
        }