Example #1
0
void vp9_decoder_remove(VP9Decoder *pbi) {
  int i;

  if (!pbi) return;

  vpx_get_worker_interface()->end(&pbi->lf_worker);
  vpx_free(pbi->lf_worker.data1);

  for (i = 0; i < pbi->num_tile_workers; ++i) {
    VPxWorker *const worker = &pbi->tile_workers[i];
    vpx_get_worker_interface()->end(worker);
  }

  vpx_free(pbi->tile_worker_data);
  vpx_free(pbi->tile_workers);

  if (pbi->num_tile_workers > 0) {
    vp9_loop_filter_dealloc(&pbi->lf_row_sync);
  }

  if (pbi->row_mt == 1) {
    vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data);
    if (pbi->row_mt_worker_data != NULL) {
      vp9_jobq_deinit(&pbi->row_mt_worker_data->jobq);
      vpx_free(pbi->row_mt_worker_data->jobq_buf);
#if CONFIG_MULTITHREAD
      pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_done_mutex);
#endif
    }
    vpx_free(pbi->row_mt_worker_data);
  }

  vp9_remove_common(&pbi->common);
  vpx_free(pbi);
}
Example #2
0
static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) {
  if (ctx->frame_workers != NULL) {
    int i;
    for (i = 0; i < ctx->num_frame_workers; ++i) {
      VPxWorker *const worker = &ctx->frame_workers[i];
      FrameWorkerData *const frame_worker_data =
          (FrameWorkerData *)worker->data1;
      vpx_get_worker_interface()->end(worker);
      vp9_remove_common(&frame_worker_data->pbi->common);
#if CONFIG_VP9_POSTPROC
      vp9_free_postproc_buffers(&frame_worker_data->pbi->common);
#endif
      vp9_decoder_remove(frame_worker_data->pbi);
      vpx_free(frame_worker_data->scratch_buffer);
#if CONFIG_MULTITHREAD
      pthread_mutex_destroy(&frame_worker_data->stats_mutex);
      pthread_cond_destroy(&frame_worker_data->stats_cond);
#endif
      vpx_free(frame_worker_data);
    }
#if CONFIG_MULTITHREAD
    pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex);
#endif
  }

  if (ctx->buffer_pool) {
    vp9_free_ref_frame_buffers(ctx->buffer_pool);
    vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers);
  }

  vpx_free(ctx->frame_workers);
  vpx_free(ctx->buffer_pool);
  vpx_free(ctx);
  return VPX_CODEC_OK;
}
Example #3
0
static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) {
  YV12_BUFFER_CONFIG sd;
  vp9_ppflags_t flags = {0, 0, 0};
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id];
  FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
  ctx->next_output_worker_id =
      (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
  // TODO(hkuang): Add worker error handling here.
  winterface->sync(worker);
  frame_worker_data->received_frame = 0;
  ++ctx->available_threads;

  check_resync(ctx, frame_worker_data->pbi);

  if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
    VP9_COMMON *const cm = &frame_worker_data->pbi->common;
    RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
    ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx;
    yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd,
                    frame_worker_data->user_priv);
    ctx->frame_cache[ctx->frame_cache_write].img.fb_priv =
        frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
    ctx->frame_cache_write =
        (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE;
    ++ctx->num_cache_frames;
  }
}
Example #4
0
static void create_enc_workers(VP9_COMP *cpi, int num_workers) {
  VP9_COMMON *const cm = &cpi->common;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  int i;

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution. When row based multithreading is enabled, it is OK to
    // allocate more threads than the number of max tile columns.
    if (cpi->use_svc && !cpi->row_mt) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }
      winterface->sync(worker);
    }
  }
}
Example #5
0
static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2,
                               int num_workers) {
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  int i;

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    worker->hook = hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = data2;
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }
}
Example #6
0
static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame,
                                VP10_COMMON *cm,
                                struct macroblockd_plane planes[MAX_MB_PLANE],
                                int start, int stop, int y_only,
                                VPxWorker *workers, int nworkers,
                                VP9LfSync *lf_sync) {
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  // Number of superblock rows and cols
  const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2;
  // Decoder may allocate more threads than number of tiles based on user's
  // input.
  const int tile_cols = 1 << cm->log2_tile_cols;
  const int num_workers = VPXMIN(nworkers, tile_cols);
  int i;

  if (!lf_sync->sync_range || sb_rows != lf_sync->rows ||
      num_workers > lf_sync->num_workers) {
    vp10_loop_filter_dealloc(lf_sync);
    vp10_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers);
  }

  // Initialize cur_sb_col to -1 for all SB rows.
  memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows);

  // Set up loopfilter thread data.
  // The decoder is capping num_workers because it has been observed that using
  // more threads on the loopfilter than there are cores will hurt performance
  // on Android. This is because the system will only schedule the tile decode
  // workers on cores equal to the number of tile columns. Then if the decoder
  // tries to use more threads for the loopfilter, it will hurt performance
  // because of contention. If the multithreading code changes in the future
  // then the number of workers used by the loopfilter should be revisited.
  for (i = 0; i < num_workers; ++i) {
    VPxWorker *const worker = &workers[i];
    LFWorkerData *const lf_data = &lf_sync->lfdata[i];

    worker->hook = (VPxWorkerHook)loop_filter_row_worker;
    worker->data1 = lf_sync;
    worker->data2 = lf_data;

    // Loopfilter data
    vp10_loop_filter_data_reset(lf_data, frame, cm, planes);
    lf_data->start = start + i * MI_BLOCK_SIZE;
    lf_data->stop = stop;
    lf_data->y_only = y_only;

    // Start loopfiltering
    if (i == num_workers - 1) {
      winterface->execute(worker);
    } else {
      winterface->launch(worker);
    }
  }

  // Wait till all rows are finished
  for (i = 0; i < num_workers; ++i) {
    winterface->sync(&workers[i]);
  }
}
Example #7
0
void vp9_decoder_remove(VP9Decoder *pbi) {
  int i;

  vpx_get_worker_interface()->end(&pbi->lf_worker);
  vpx_free(pbi->lf_worker.data1);
  vpx_free(pbi->tile_data);
  for (i = 0; i < pbi->num_tile_workers; ++i) {
    VPxWorker *const worker = &pbi->tile_workers[i];
    vpx_get_worker_interface()->end(worker);
  }
  vpx_free(pbi->tile_worker_data);
  vpx_free(pbi->tile_workers);

  if (pbi->num_tile_workers > 0) {
    vp9_loop_filter_dealloc(&pbi->lf_row_sync);
  }

  vpx_free(pbi);
}
Example #8
0
VP10Decoder *vp10_decoder_create(BufferPool *const pool) {
  VP10Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi));
  VP10_COMMON *volatile const cm = pbi ? &pbi->common : NULL;

  if (!cm)
    return NULL;

  vp10_zero(*pbi);

  if (setjmp(cm->error.jmp)) {
    cm->error.setjmp = 0;
    vp10_decoder_remove(pbi);
    return NULL;
  }

  cm->error.setjmp = 1;

  CHECK_MEM_ERROR(cm, cm->fc,
                  (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc)));
  CHECK_MEM_ERROR(cm, cm->frame_contexts,
                  (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS,
                  sizeof(*cm->frame_contexts)));

  pbi->need_resync = 1;
  once(initialize_dec);

  // Initialize the references to not point to any frame buffers.
  memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map));
  memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map));

  cm->current_video_frame = 0;
  pbi->ready_for_new_data = 1;
  pbi->common.buffer_pool = pool;

  cm->bit_depth = VPX_BITS_8;
  cm->dequant_bit_depth = VPX_BITS_8;

  cm->alloc_mi = vp10_dec_alloc_mi;
  cm->free_mi = vp10_dec_free_mi;
  cm->setup_mi = vp10_dec_setup_mi;

  vp10_loop_filter_init(cm);

  cm->error.setjmp = 0;

  vpx_get_worker_interface()->init(&pbi->lf_worker);

  return pbi;
}
Example #9
0
static void release_fb_on_decoder_exit(VP9Decoder *pbi) {
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  VP9_COMMON *volatile const cm = &pbi->common;
  BufferPool *volatile const pool = cm->buffer_pool;
  RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
  int i;

  // Synchronize all threads immediately as a subsequent decode call may
  // cause a resize invalidating some allocations.
  winterface->sync(&pbi->lf_worker);
  for (i = 0; i < pbi->num_tile_workers; ++i) {
    winterface->sync(&pbi->tile_workers[i]);
  }

  // Release all the reference buffers if worker thread is holding them.
  if (pbi->hold_ref_buf == 1) {
    int ref_index = 0, mask;
    for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
      const int old_idx = cm->ref_frame_map[ref_index];
      // Current thread releases the holding of reference frame.
      decrease_ref_count(old_idx, frame_bufs, pool);

      // Release the reference frame in reference map.
      if (mask & 1) {
        decrease_ref_count(old_idx, frame_bufs, pool);
      }
      ++ref_index;
    }

    // Current thread releases the holding of reference frame.
    for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
      const int old_idx = cm->ref_frame_map[ref_index];
      decrease_ref_count(old_idx, frame_bufs, pool);
    }
    pbi->hold_ref_buf = 0;
  }
Example #10
0
static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx,
                                      vpx_codec_iter_t *iter) {
  vpx_image_t *img = NULL;

  // Only return frame when all the cpu are busy or
  // application fluhsed the decoder in frame parallel decode.
  if (ctx->frame_parallel_decode && ctx->available_threads > 0 &&
      !ctx->flushed) {
    return NULL;
  }

  // Output the frames in the cache first.
  if (ctx->num_cache_frames > 0) {
    release_last_output_frame(ctx);
    ctx->last_show_frame  = ctx->frame_cache[ctx->frame_cache_read].fb_idx;
    if (ctx->need_resync)
      return NULL;
    img = &ctx->frame_cache[ctx->frame_cache_read].img;
    ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE;
    --ctx->num_cache_frames;
    return img;
  }

  // iter acts as a flip flop, so an image is only returned on the first
  // call to get_frame.
  if (*iter == NULL && ctx->frame_workers != NULL) {
    do {
      YV12_BUFFER_CONFIG sd;
      vp9_ppflags_t flags = {0, 0, 0};
      const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
      VPxWorker *const worker =
          &ctx->frame_workers[ctx->next_output_worker_id];
      FrameWorkerData *const frame_worker_data =
          (FrameWorkerData *)worker->data1;
      ctx->next_output_worker_id =
          (ctx->next_output_worker_id + 1) % ctx->num_frame_workers;
      if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
        set_ppflags(ctx, &flags);
      // Wait for the frame from worker thread.
      if (winterface->sync(worker)) {
        // Check if worker has received any frames.
        if (frame_worker_data->received_frame == 1) {
          ++ctx->available_threads;
          frame_worker_data->received_frame = 0;
          check_resync(ctx, frame_worker_data->pbi);
        }
        if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) {
          VP9_COMMON *const cm = &frame_worker_data->pbi->common;
          RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs;
          release_last_output_frame(ctx);
          ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx;
          if (ctx->need_resync)
            return NULL;
          yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv);
          ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv;
          img = &ctx->img;
          return img;
        }
      } else {
        // Decoding failed. Release the worker thread.
        frame_worker_data->received_frame = 0;
        ++ctx->available_threads;
        ctx->need_resync = 1;
        if (ctx->flushed != 1)
          return NULL;
      }
    } while (ctx->next_output_worker_id != ctx->next_submit_worker_id);
  }
  return NULL;
}
Example #11
0
static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx,
                                  const uint8_t **data, unsigned int data_sz,
                                  void *user_priv, int64_t deadline) {
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  (void)deadline;

  // Determine the stream parameters. Note that we rely on peek_si to
  // validate that we have a buffer that does not wrap around the top
  // of the heap.
  if (!ctx->si.h) {
    int is_intra_only = 0;
    const vpx_codec_err_t res =
        decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only,
                                 ctx->decrypt_cb, ctx->decrypt_state);
    if (res != VPX_CODEC_OK)
      return res;

    if (!ctx->si.is_kf && !is_intra_only)
      return VPX_CODEC_ERROR;
  }

  if (!ctx->frame_parallel_decode) {
    VPxWorker *const worker = ctx->frame_workers;
    FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
    frame_worker_data->data = *data;
    frame_worker_data->data_size = data_sz;
    frame_worker_data->user_priv = user_priv;
    frame_worker_data->received_frame = 1;

    // Set these even if already initialized.  The caller may have changed the
    // decrypt config between frames.
    frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb;
    frame_worker_data->pbi->decrypt_state = ctx->decrypt_state;

    worker->had_error = 0;
    winterface->execute(worker);

    // Update data pointer after decode.
    *data = frame_worker_data->data_end;

    if (worker->had_error)
      return update_error_state(ctx, &frame_worker_data->pbi->common.error);

    check_resync(ctx, frame_worker_data->pbi);
  } else {
    VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id];
    FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1;
    // Copy context from last worker thread to next worker thread.
    if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
      vp9_frameworker_copy_context(
          &ctx->frame_workers[ctx->next_submit_worker_id],
          &ctx->frame_workers[ctx->last_submit_worker_id]);

    frame_worker_data->pbi->ready_for_new_data = 0;
    // Copy the compressed data into worker's internal buffer.
    // TODO(hkuang): Will all the workers allocate the same size
    // as the size of the first intra frame be better? This will
    // avoid too many deallocate and allocate.
    if (frame_worker_data->scratch_buffer_size < data_sz) {
      frame_worker_data->scratch_buffer =
          (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz);
      if (frame_worker_data->scratch_buffer == NULL) {
        set_error_detail(ctx, "Failed to reallocate scratch buffer");
        return VPX_CODEC_MEM_ERROR;
      }
      frame_worker_data->scratch_buffer_size = data_sz;
    }
    frame_worker_data->data_size = data_sz;
    memcpy(frame_worker_data->scratch_buffer, *data, data_sz);

    frame_worker_data->frame_decoded = 0;
    frame_worker_data->frame_context_ready = 0;
    frame_worker_data->received_frame = 1;
    frame_worker_data->data = frame_worker_data->scratch_buffer;
    frame_worker_data->user_priv = user_priv;

    if (ctx->next_submit_worker_id != ctx->last_submit_worker_id)
      ctx->last_submit_worker_id =
          (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers;

    ctx->next_submit_worker_id =
        (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers;
    --ctx->available_threads;
    worker->had_error = 0;
    winterface->launch(worker);
  }

  return VPX_CODEC_OK;
}
Example #12
0
static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) {
  int i;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();

  ctx->last_show_frame = -1;
  ctx->next_submit_worker_id = 0;
  ctx->last_submit_worker_id = 0;
  ctx->next_output_worker_id = 0;
  ctx->frame_cache_read = 0;
  ctx->frame_cache_write = 0;
  ctx->num_cache_frames = 0;
  ctx->need_resync = 1;
  ctx->num_frame_workers =
      (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1;
  if (ctx->num_frame_workers > MAX_DECODE_THREADS)
    ctx->num_frame_workers = MAX_DECODE_THREADS;
  ctx->available_threads = ctx->num_frame_workers;
  ctx->flushed = 0;

  ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool));
  if (ctx->buffer_pool == NULL)
    return VPX_CODEC_MEM_ERROR;

#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate buffer pool mutex");
      return VPX_CODEC_MEM_ERROR;
    }
#endif

  ctx->frame_workers = (VPxWorker *)
      vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers));
  if (ctx->frame_workers == NULL) {
    set_error_detail(ctx, "Failed to allocate frame_workers");
    return VPX_CODEC_MEM_ERROR;
  }

  for (i = 0; i < ctx->num_frame_workers; ++i) {
    VPxWorker *const worker = &ctx->frame_workers[i];
    FrameWorkerData *frame_worker_data = NULL;
    winterface->init(worker);
    worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData));
    if (worker->data1 == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data = (FrameWorkerData *)worker->data1;
    frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool);
    if (frame_worker_data->pbi == NULL) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data");
      return VPX_CODEC_MEM_ERROR;
    }
    frame_worker_data->pbi->frame_worker_owner = worker;
    frame_worker_data->worker_id = i;
    frame_worker_data->scratch_buffer = NULL;
    frame_worker_data->scratch_buffer_size = 0;
    frame_worker_data->frame_context_ready = 0;
    frame_worker_data->received_frame = 0;
#if CONFIG_MULTITHREAD
    if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data mutex");
      return VPX_CODEC_MEM_ERROR;
    }

    if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) {
      set_error_detail(ctx, "Failed to allocate frame_worker_data cond");
      return VPX_CODEC_MEM_ERROR;
    }
#endif
    // If decoding in serial mode, FrameWorker thread could create tile worker
    // thread or loopfilter thread.
    frame_worker_data->pbi->max_threads =
        (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0;

    frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order;
    frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode;
    frame_worker_data->pbi->common.frame_parallel_decode =
        ctx->frame_parallel_decode;
    worker->hook = (VPxWorkerHook)frame_worker_hook;
    if (!winterface->reset(worker)) {
      set_error_detail(ctx, "Frame Worker thread creation failed");
      return VPX_CODEC_MEM_ERROR;
    }
  }

  // If postprocessing was enabled by the application and a
  // configuration has not been provided, default it.
  if (!ctx->postproc_cfg_set &&
      (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC))
    set_default_ppflags(&ctx->postproc_cfg);

  init_buffer_callbacks(ctx);

  return VPX_CODEC_OK;
}
Example #13
0
int vp10_receive_compressed_data(VP10Decoder *pbi,
                                size_t size, const uint8_t **psource) {
  VP10_COMMON *volatile const cm = &pbi->common;
  BufferPool *volatile const pool = cm->buffer_pool;
  RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs;
  const uint8_t *source = *psource;
  int retcode = 0;
  cm->error.error_code = VPX_CODEC_OK;

  if (size == 0) {
    // This is used to signal that we are missing frames.
    // We do not know if the missing frame(s) was supposed to update
    // any of the reference buffers, but we act conservative and
    // mark only the last buffer as corrupted.
    //
    // TODO(jkoleszar): Error concealment is undefined and non-normative
    // at this point, but if it becomes so, [0] may not always be the correct
    // thing to do here.
    if (cm->frame_refs[0].idx > 0) {
      assert(cm->frame_refs[0].buf != NULL);
      cm->frame_refs[0].buf->corrupted = 1;
    }
  }

  pbi->ready_for_new_data = 0;

  // Check if the previous frame was a frame without any references to it.
  // Release frame buffer if not decoding in frame parallel mode.
  if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0
      && frame_bufs[cm->new_fb_idx].ref_count == 0)
    pool->release_fb_cb(pool->cb_priv,
                        &frame_bufs[cm->new_fb_idx].raw_frame_buffer);
  // Find a free frame buffer. Return error if can not find any.
  cm->new_fb_idx = get_free_fb(cm);
  if (cm->new_fb_idx == INVALID_IDX)
    return VPX_CODEC_MEM_ERROR;

  // Assign a MV array to the frame buffer.
  cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];

  pbi->hold_ref_buf = 0;
  if (pbi->frame_parallel_decode) {
    VPxWorker *const worker = pbi->frame_worker_owner;
    vp10_frameworker_lock_stats(worker);
    frame_bufs[cm->new_fb_idx].frame_worker_owner = worker;
    // Reset decoding progress.
    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
    pbi->cur_buf->row = -1;
    pbi->cur_buf->col = -1;
    vp10_frameworker_unlock_stats(worker);
  } else {
    pbi->cur_buf = &frame_bufs[cm->new_fb_idx];
  }


  if (setjmp(cm->error.jmp)) {
    const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
    int i;

    cm->error.setjmp = 0;
    pbi->ready_for_new_data = 1;

    // Synchronize all threads immediately as a subsequent decode call may
    // cause a resize invalidating some allocations.
    winterface->sync(&pbi->lf_worker);
    for (i = 0; i < pbi->num_tile_workers; ++i) {
      winterface->sync(&pbi->tile_workers[i]);
    }

    lock_buffer_pool(pool);
    // Release all the reference buffers if worker thread is holding them.
    if (pbi->hold_ref_buf == 1) {
      int ref_index = 0, mask;
      for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) {
        const int old_idx = cm->ref_frame_map[ref_index];
        // Current thread releases the holding of reference frame.
        decrease_ref_count(old_idx, frame_bufs, pool);

        // Release the reference frame in reference map.
        if ((mask & 1) && old_idx >= 0) {
          decrease_ref_count(old_idx, frame_bufs, pool);
        }
        ++ref_index;
      }

      // Current thread releases the holding of reference frame.
      for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) {
        const int old_idx = cm->ref_frame_map[ref_index];
        decrease_ref_count(old_idx, frame_bufs, pool);
      }
      pbi->hold_ref_buf = 0;
    }
    // Release current frame.
    decrease_ref_count(cm->new_fb_idx, frame_bufs, pool);
    unlock_buffer_pool(pool);

    vpx_clear_system_state();
    return -1;
  }
Example #14
0
void vp10_encode_tiles_mt(VP10_COMP *cpi) {
  VP10_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp10_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp10_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp10_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp10_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}
Example #15
0
void vp9_encode_tiles_mt(VP9_COMP *cpi) {
  VP9_COMMON *const cm = &cpi->common;
  const int tile_cols = 1 << cm->log2_tile_cols;
  const VPxWorkerInterface *const winterface = vpx_get_worker_interface();
  const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols);
  int i;

  vp9_init_tile_data(cpi);

  // Only run once to create threads and allocate thread data.
  if (cpi->num_workers == 0) {
    int allocated_workers = num_workers;

    // While using SVC, we need to allocate threads according to the highest
    // resolution.
    if (cpi->use_svc) {
      int max_tile_cols = get_max_tile_cols(cpi);
      allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols);
    }

    CHECK_MEM_ERROR(cm, cpi->workers,
                    vpx_malloc(allocated_workers * sizeof(*cpi->workers)));

    CHECK_MEM_ERROR(cm, cpi->tile_thr_data,
                    vpx_calloc(allocated_workers,
                    sizeof(*cpi->tile_thr_data)));

    for (i = 0; i < allocated_workers; i++) {
      VPxWorker *const worker = &cpi->workers[i];
      EncWorkerData *thread_data = &cpi->tile_thr_data[i];

      ++cpi->num_workers;
      winterface->init(worker);

      if (i < allocated_workers - 1) {
        thread_data->cpi = cpi;

        // Allocate thread data.
        CHECK_MEM_ERROR(cm, thread_data->td,
                        vpx_memalign(32, sizeof(*thread_data->td)));
        vp9_zero(*thread_data->td);

        // Set up pc_tree.
        thread_data->td->leaf_tree = NULL;
        thread_data->td->pc_tree = NULL;
        vp9_setup_pc_tree(cm, thread_data->td);

        // Allocate frame counters in thread data.
        CHECK_MEM_ERROR(cm, thread_data->td->counts,
                        vpx_calloc(1, sizeof(*thread_data->td->counts)));

        // Create threads
        if (!winterface->reset(worker))
          vpx_internal_error(&cm->error, VPX_CODEC_ERROR,
                             "Tile encoder thread creation failed");
      } else {
        // Main thread acts as a worker and uses the thread data in cpi.
        thread_data->cpi = cpi;
        thread_data->td = &cpi->td;
      }

      winterface->sync(worker);
    }
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *thread_data;

    worker->hook = (VPxWorkerHook)enc_worker_hook;
    worker->data1 = &cpi->tile_thr_data[i];
    worker->data2 = NULL;
    thread_data = (EncWorkerData*)worker->data1;

    // Before encoding a frame, copy the thread data from cpi.
    if (thread_data->td != &cpi->td) {
      thread_data->td->mb = cpi->td.mb;
      thread_data->td->rd_counts = cpi->td.rd_counts;
    }
    if (thread_data->td->counts != &cpi->common.counts) {
      memcpy(thread_data->td->counts, &cpi->common.counts,
             sizeof(cpi->common.counts));
    }

    // Handle use_nonrd_pick_mode case.
    if (cpi->sf.use_nonrd_pick_mode) {
      MACROBLOCK *const x = &thread_data->td->mb;
      MACROBLOCKD *const xd = &x->e_mbd;
      struct macroblock_plane *const p = x->plane;
      struct macroblockd_plane *const pd = xd->plane;
      PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none;
      int j;

      for (j = 0; j < MAX_MB_PLANE; ++j) {
        p[j].coeff = ctx->coeff_pbuf[j][0];
        p[j].qcoeff = ctx->qcoeff_pbuf[j][0];
        pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0];
        p[j].eobs = ctx->eobs_pbuf[j][0];
      }
    }
  }

  // Encode a frame
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Set the starting tile for each thread.
    thread_data->start = i;

    if (i == cpi->num_workers - 1)
      winterface->execute(worker);
    else
      winterface->launch(worker);
  }

  // Encoding ends.
  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    winterface->sync(worker);
  }

  for (i = 0; i < num_workers; i++) {
    VPxWorker *const worker = &cpi->workers[i];
    EncWorkerData *const thread_data = (EncWorkerData*)worker->data1;

    // Accumulate counters.
    if (i < cpi->num_workers - 1) {
      vp9_accumulate_frame_counts(cm, thread_data->td->counts, 0);
      accumulate_rd_opt(&cpi->td, thread_data->td);
    }
  }
}